AutoRig/Dl/UniRig/SkinTokensInput.cs

Preparation pipeline for skin token inputs. Samples a 54k point cloud from a RigMesh (with 16k vertex picks), computes a 2048-point pre-subsample, runs farthest-point sampling to produce 512 fps query indices that map back into the full cloud, and returns points, normals, sampled indices, center and scale for undoing normalization.

Native Interop
using AutoRig.Dl.RigNet;
using AutoRig.Mesh;

namespace AutoRig.Dl.UniRig;

using AutoRig.Dl;
using Vector3 = System.Numerics.Vector3;

/// <summary>
/// SkinTokens' input pipeline (checkpoint hparams predict_transform + the
/// encoder's eval path): bbox-normalize to [-1,1]³, SamplerMix a 54000-point
/// cloud (16384 vertex picks + area-weighted surface samples), then the
/// perceiver reduction with use_full_input=TRUE — the constant seed-0
/// 2048-subsample is fps'd down to 512 queries, but those queries cross-attend
/// against the WHOLE 54000-point cloud (unlike UniRig, which attends its
/// 4096-point subsample).
/// </summary>
public static class SkinTokensInput
{
    public const int NumSamples = 54000;
    public const int VertexSamples = 16384;
    public const int PreCount = 2048;    // token_num 512 · 4
    public const int LatentCount = 512;  // fps ratio 1/4 of the pre-cloud

    public sealed class Prepared
    {
        /// <summary>The FULL 54000-point cloud (perceiver "data", use_full_input).</summary>
        public required Vector3[] Points;
        public required Vector3[] Normals;

        /// <summary>fps picks (512) as indices into Points (perceiver queries).</summary>
        public required int[] SampledIndices;

        /// <summary>Undo the normalization: world = p * Scale + Center.</summary>
        public required Vector3 Center;
        public required float Scale;
    }

    public static Prepared Prepare( RigMesh mesh )
    {
        ArgumentNullException.ThrowIfNull( mesh );

        var (points, normals, center, scale) =
            UniRigInput.SampleCloud( mesh, NumSamples, VertexSamples );

        // fps runs over the seed-0 pre-subsample; its picks map back into the
        // full cloud (queries are literal rows of the data in the reference too).
        var pre = new float[PreCount * 3];
        for ( var i = 0; i < PreCount; i++ )
        {
            var p = points[SkinTokensSubsample.Indices[i]];
            pre[i * 3 + 0] = p.X;
            pre[i * 3 + 1] = p.Y;
            pre[i * 3 + 2] = p.Z;
        }
        var fps = PointNet.FarthestPointSample( Tensor.From( pre, PreCount, 3 ), ratio: 0.25f );
        var sampled = new int[fps.Length];
        for ( var i = 0; i < fps.Length; i++ )
            sampled[i] = SkinTokensSubsample.Indices[fps[i]];

        return new Prepared
        {
            Points = points,
            Normals = normals,
            SampledIndices = sampled,
            Center = center,
            Scale = scale,
        };
    }
}