AutoRig/Dl/Puppeteer/PuppeteerLoader.cs

Loader and detokenizer for the Puppeteer skeleton model. PuppeteerModel holds a shared MagicArticulate core, a target position embedding tensor, and an optional skin model. PuppeteerLoader.LoadModel extracts the target_aware_pos_embed tensor, reshapes it and calls MagicArticulateLoader to load the shared core. PuppeteerSkeleton.Detokenize converts generated token ids into 3D joint coords and parent indices, builds bone pairs and returns a MagicArticulateSkeleton via FromBonePairs.

File Access
using AutoRig.Dl.MagicArticulate;

namespace AutoRig.Dl.Puppeteer;

using AutoRig.Dl;
using Vector3 = System.Numerics.Vector3;

/// <summary>
/// Puppeteer (Seed3D, 2025) skeleton network — the SkeletonGPT frame shared
/// with MagicArticulate (same ShapeVAE encoder, cond projections, post-LN
/// opt-350m, vocab 131) in its joint-token + seq-shuffle configuration:
/// 4 tokens per JOINT (x, y, z, parent-index; parent bin 0 = root, else
/// parent = bin − 1), a 7-row bone-position table (cycle (k−2)%4+3), and a
/// learned target-aware positional embedding — row 0 added to the cond
/// prefix, row 1+(k−2)/4 added to generated token k (BOS exempt).
/// </summary>
public sealed class PuppeteerModel
{
    public const int BonePerToken = 4;

    /// <summary>Everything shared with MagicArticulate (encoder, decoder, tables).</summary>
    public required MagicArticulateModel Core;

    /// <summary>target_aware_pos_embed, reshaped to [101, 1024].</summary>
    public required Tensor TargetPosEmbed;

    /// <summary>The skinning transformer (separate checkpoint) — null until the
    /// user downloads it; the solver falls back to geodesic skinning then.</summary>
    public PuppeteerSkinModel Skin { get; set; }
}

public static class PuppeteerLoader
{
    public static PuppeteerModel LoadModel( IReadOnlyDictionary<string, Tensor> tensors )
    {
        ArgumentNullException.ThrowIfNull( tensors );
        var prefix = tensors.Keys.Any( k => k.StartsWith( "model.", StringComparison.Ordinal ) )
            ? "model."
            : "";
        if ( !tensors.TryGetValue( $"{prefix}target_aware_pos_embed", out var target ) )
            throw new FormatException(
                "Puppeteer checkpoint is missing 'target_aware_pos_embed' "
                + "(expected the seq-shuffle joint-token variant)." );

        // [1, 101, 1024] → [101, 1024]
        var rows = target.Shape[^2];
        var cols = target.Shape[^1];
        return new PuppeteerModel
        {
            Core = MagicArticulateLoader.LoadModel( tensors ),
            TargetPosEmbed = Tensor.From( target.Data, rows, cols ),
        };
    }
}

/// <summary>Puppeteer's joint-token detokenizer (detokenize_joint_token):
/// groups of 4 values = (x, y, z, parent index); bones reconstructed from the
/// parent indices, then MagicArticulate's shared post-processing.</summary>
public static class PuppeteerSkeleton
{
    /// <summary>ids = generated stream with BOS/EOS stripped; specials dropped,
    /// the rest are bins (id − 3), groups of 4 = a joint.</summary>
    public static MagicArticulateSkeleton.Decoded Detokenize( IReadOnlyList<int> ids )
    {
        var bins = ids.Where( id => id >= 3 ).Select( id => id - 3 ).ToList();
        var jointCount = bins.Count / PuppeteerModel.BonePerToken;
        if ( jointCount == 0 )
            throw new FormatException( "Puppeteer produced no complete joints." );

        var coords = new Vector3[jointCount];
        var parents = new int[jointCount];
        for ( var j = 0; j < jointCount; j++ )
        {
            var at = j * PuppeteerModel.BonePerToken;
            coords[j] = new Vector3(
                MagicArticulateModel.Undiscretize( bins[at] ),
                MagicArticulateModel.Undiscretize( bins[at + 1] ),
                MagicArticulateModel.Undiscretize( bins[at + 2] ) );
            parents[j] = bins[at + 3];   // 0 = root, else parent joint = value − 1
        }

        var pairs = new List<(Vector3, Vector3)>();
        for ( var j = 0; j < jointCount; j++ )
        {
            var p = parents[j] - 1;
            if ( parents[j] > 0 && p < jointCount )   // ref crashes out-of-range; we skip
                pairs.Add( (coords[p], coords[j]) );
        }
        if ( pairs.Count == 0 )
            throw new FormatException( "Puppeteer produced no valid bones." );

        return MagicArticulateSkeleton.FromBonePairs( pairs );
    }
}