Code/AutoRig/Dl/UniRig/SkinTokensInput.cs

Static helper that prepares a point-cloud input for the SkinTokens model. It samples a 54k-point cloud (with 16k vertex picks), builds a PreCount float array from a fixed subsample, runs a farthest point sampling on that pre-subsample, maps the FPS indices back into the full cloud, and returns the points, normals, sampled indices, and undo-normalization params.

Native Interop
using AutoRig.Dl.RigNet;
using AutoRig.Mesh;

namespace AutoRig.Dl.UniRig;

using AutoRig.Dl;
using Vector3 = System.Numerics.Vector3;

/// <summary>
/// SkinTokens' input pipeline (checkpoint hparams predict_transform + the
/// encoder's eval path): bbox-normalize to [-1,1]³, SamplerMix a 54000-point
/// cloud (16384 vertex picks + area-weighted surface samples), then the
/// perceiver reduction with use_full_input=TRUE — the constant seed-0
/// 2048-subsample is fps'd down to 512 queries, but those queries cross-attend
/// against the WHOLE 54000-point cloud (unlike UniRig, which attends its
/// 4096-point subsample).
/// </summary>
public static class SkinTokensInput
{
    public const int NumSamples = 54000;
    public const int VertexSamples = 16384;
    public const int PreCount = 2048;    // token_num 512 · 4
    public const int LatentCount = 512;  // fps ratio 1/4 of the pre-cloud

    public sealed class Prepared
    {
        /// <summary>The FULL 54000-point cloud (perceiver "data", use_full_input).</summary>
        public required Vector3[] Points;
        public required Vector3[] Normals;

        /// <summary>fps picks (512) as indices into Points (perceiver queries).</summary>
        public required int[] SampledIndices;

        /// <summary>Undo the normalization: world = p * Scale + Center.</summary>
        public required Vector3 Center;
        public required float Scale;
    }

    public static Prepared Prepare( RigMesh mesh )
    {
        ArgumentNullException.ThrowIfNull( mesh );

        var (points, normals, center, scale) =
            UniRigInput.SampleCloud( mesh, NumSamples, VertexSamples );

        // fps runs over the seed-0 pre-subsample; its picks map back into the
        // full cloud (queries are literal rows of the data in the reference too).
        var pre = new float[PreCount * 3];
        for ( var i = 0; i < PreCount; i++ )
        {
            var p = points[SkinTokensSubsample.Indices[i]];
            pre[i * 3 + 0] = p.X;
            pre[i * 3 + 1] = p.Y;
            pre[i * 3 + 2] = p.Z;
        }
        var fps = PointNet.FarthestPointSample( Tensor.From( pre, PreCount, 3 ), ratio: 0.25f );
        var sampled = new int[fps.Length];
        for ( var i = 0; i < fps.Length; i++ )
            sampled[i] = SkinTokensSubsample.Indices[fps[i]];

        return new Prepared
        {
            Points = points,
            Normals = normals,
            SampledIndices = sampled,
            Center = center,
            Scale = scale,
        };
    }
}