Loader and detokenizer for the Puppeteer skeleton model. PuppeteerModel holds a shared MagicArticulate core, a target position embedding tensor, and an optional skin model. PuppeteerLoader.LoadModel extracts the target_aware_pos_embed tensor, reshapes it and calls MagicArticulateLoader to load the shared core. PuppeteerSkeleton.Detokenize converts generated token ids into 3D joint coords and parent indices, builds bone pairs and returns a MagicArticulateSkeleton via FromBonePairs.
using AutoRig.Dl.MagicArticulate;
namespace AutoRig.Dl.Puppeteer;
using AutoRig.Dl;
using Vector3 = System.Numerics.Vector3;
/// <summary>
/// Puppeteer (Seed3D, 2025) skeleton network — the SkeletonGPT frame shared
/// with MagicArticulate (same ShapeVAE encoder, cond projections, post-LN
/// opt-350m, vocab 131) in its joint-token + seq-shuffle configuration:
/// 4 tokens per JOINT (x, y, z, parent-index; parent bin 0 = root, else
/// parent = bin − 1), a 7-row bone-position table (cycle (k−2)%4+3), and a
/// learned target-aware positional embedding — row 0 added to the cond
/// prefix, row 1+(k−2)/4 added to generated token k (BOS exempt).
/// </summary>
public sealed class PuppeteerModel
{
public const int BonePerToken = 4;
/// <summary>Everything shared with MagicArticulate (encoder, decoder, tables).</summary>
public required MagicArticulateModel Core;
/// <summary>target_aware_pos_embed, reshaped to [101, 1024].</summary>
public required Tensor TargetPosEmbed;
/// <summary>The skinning transformer (separate checkpoint) — null until the
/// user downloads it; the solver falls back to geodesic skinning then.</summary>
public PuppeteerSkinModel Skin { get; set; }
}
public static class PuppeteerLoader
{
public static PuppeteerModel LoadModel( IReadOnlyDictionary<string, Tensor> tensors )
{
ArgumentNullException.ThrowIfNull( tensors );
var prefix = tensors.Keys.Any( k => k.StartsWith( "model.", StringComparison.Ordinal ) )
? "model."
: "";
if ( !tensors.TryGetValue( $"{prefix}target_aware_pos_embed", out var target ) )
throw new FormatException(
"Puppeteer checkpoint is missing 'target_aware_pos_embed' "
+ "(expected the seq-shuffle joint-token variant)." );
// [1, 101, 1024] → [101, 1024]
var rows = target.Shape[^2];
var cols = target.Shape[^1];
return new PuppeteerModel
{
Core = MagicArticulateLoader.LoadModel( tensors ),
TargetPosEmbed = Tensor.From( target.Data, rows, cols ),
};
}
}
/// <summary>Puppeteer's joint-token detokenizer (detokenize_joint_token):
/// groups of 4 values = (x, y, z, parent index); bones reconstructed from the
/// parent indices, then MagicArticulate's shared post-processing.</summary>
public static class PuppeteerSkeleton
{
/// <summary>ids = generated stream with BOS/EOS stripped; specials dropped,
/// the rest are bins (id − 3), groups of 4 = a joint.</summary>
public static MagicArticulateSkeleton.Decoded Detokenize( IReadOnlyList<int> ids )
{
var bins = ids.Where( id => id >= 3 ).Select( id => id - 3 ).ToList();
var jointCount = bins.Count / PuppeteerModel.BonePerToken;
if ( jointCount == 0 )
throw new FormatException( "Puppeteer produced no complete joints." );
var coords = new Vector3[jointCount];
var parents = new int[jointCount];
for ( var j = 0; j < jointCount; j++ )
{
var at = j * PuppeteerModel.BonePerToken;
coords[j] = new Vector3(
MagicArticulateModel.Undiscretize( bins[at] ),
MagicArticulateModel.Undiscretize( bins[at + 1] ),
MagicArticulateModel.Undiscretize( bins[at + 2] ) );
parents[j] = bins[at + 3]; // 0 = root, else parent joint = value − 1
}
var pairs = new List<(Vector3, Vector3)>();
for ( var j = 0; j < jointCount; j++ )
{
var p = parents[j] - 1;
if ( parents[j] > 0 && p < jointCount ) // ref crashes out-of-range; we skip
pairs.Add( (coords[p], coords[j]) );
}
if ( pairs.Count == 0 )
throw new FormatException( "Puppeteer produced no valid bones." );
return MagicArticulateSkeleton.FromBonePairs( pairs );
}
}