glTF/GLB importer layer for the HumanoidRetargeter. Parses a GLB container or plain .gltf JSON, resolves inline/base64 buffers, decodes nodes (TRS or matrix), skins (joint indices) and animation samplers into simple runtime types, and reads VRM humanoid bone maps.
using System;
using System.Collections.Generic;
using System.Numerics;
using System.Text;
using System.Text.Json;
namespace HumanoidRetargeter.Formats.Gltf;
using Vector3 = System.Numerics.Vector3; // s&box compat: shadow engine's global-namespace Vector3 (see Code/HumanoidRetargeter/Assembly.cs)
/// <summary>One glTF node, reduced to what skeleton import needs (TRS rest + hierarchy).</summary>
internal sealed class GltfNode
{
public string? Name;
public int[] Children = Array.Empty<int>();
public int Parent = -1;
public bool HasMesh;
// Rest local transform: TRS properties, or the decomposed "matrix" property (the spec
// makes them exclusive; animated nodes must use TRS). Shear is not representable.
public Vector3 Translation; // meters
public Quaternion Rotation = Quaternion.Identity; // xyzw
public Vector3 Scale = Vector3.One;
}
/// <summary>One decoded animation channel: keyframe times + values for one node property.</summary>
internal sealed class GltfChannel
{
public required int NodeIndex;
public required bool IsRotation; // true = rotation (VEC4 quat), false = translation (VEC3)
public required float[] Times; // seconds, ascending
public required float[] Values; // flattened; 4 (or 3) floats per element
public required string Interpolation; // LINEAR / STEP / CUBICSPLINE
/// <summary>Floats per element (3 translation / 4 rotation).</summary>
public int Comps => IsRotation ? 4 : 3;
/// <summary>Elements stored per key: CUBICSPLINE keys carry in-tangent/value/out-tangent.</summary>
public int ElementsPerKey => Interpolation == "CUBICSPLINE" ? 3 : 1;
/// <summary>Number of keys.</summary>
public int KeyCount => Times.Length;
}
/// <summary>One glTF animation with its decoded rotation/translation channels.</summary>
internal sealed class GltfAnimation
{
public string? Name;
public List<GltfChannel> Channels { get; } = new();
}
/// <summary>
/// Container + JSON layer of the glTF importer: parses a GLB binary container or a plain
/// .gltf JSON document, resolves buffers (GLB BIN chunk and base64 <c>data:</c> URIs — file
/// IO is banned in Code/, so external file URIs throw), and decodes nodes, skin joints and
/// animation samplers into plain arrays. Throws <see cref="FormatException"/> on anything
/// malformed or unsupported.
/// </summary>
internal sealed class GltfDocument
{
private const uint GlbMagic = 0x46546C67; // 'glTF' little-endian
private const uint ChunkJson = 0x4E4F534A; // 'JSON'
private const uint ChunkBin = 0x004E4942; // 'BIN\0'
/// <summary>All nodes, indexed as in the file, with parents resolved from children lists.</summary>
public List<GltfNode> Nodes { get; } = new();
/// <summary>Union of all skins' joint node indices.</summary>
public HashSet<int> SkinJoints { get; } = new();
/// <summary>All animations with decoded rotation/translation channels (scale/weights ignored).</summary>
public List<GltfAnimation> Animations { get; } = new();
/// <summary>
/// The VRM humanoid bone map authored in the file, when present: VRM bone name
/// (<c>hips</c>, <c>leftUpperArm</c>, …) → node index. Read from BOTH extension layouts:
/// VRM 0.x <c>extensions.VRM.humanoid.humanBones</c> (an ARRAY of
/// <c>{ "bone": "hips", "node": 14 }</c> entries) and VRM 1.0
/// <c>extensions.VRMC_vrm.humanoid.humanBones</c> (an OBJECT
/// <c>{ "hips": { "node": 14 }, … }</c>). Null when the file carries neither.
/// </summary>
public Dictionary<string, int>? VrmHumanBones { get; private set; }
/// <summary>Which VRM extension supplied <see cref="VrmHumanBones"/>: <c>0</c> for the
/// 0.x <c>VRM</c> extension, <c>1</c> for the 1.0 <c>VRMC_vrm</c> extension, <c>-1</c>
/// when none.</summary>
public int VrmVersion { get; private set; } = -1;
private GltfDocument()
{
}
/// <summary>Parses GLB or plain-JSON glTF bytes.</summary>
/// <exception cref="FormatException">Truncated/malformed container, invalid JSON,
/// unresolvable buffers, or unsupported accessor layouts.</exception>
public static GltfDocument Parse(byte[] data)
{
ArgumentNullException.ThrowIfNull(data);
byte[] json;
byte[]? bin = null;
if (data.Length >= 4 && ReadU32(data, 0) == GlbMagic)
(json, bin) = ParseGlbContainer(data);
else
json = data;
JsonElement root;
try
{
// Parse via string: Memory<T>/ReadOnlyMemory<T> are not on the s&box runtime
// whitelist (SB1000), and the string path also lets us strip a UTF-8 BOM
// (Utf8JsonReader rejects raw BOM bytes). Clone detaches from the disposed
// JsonDocument.
var text = System.Text.Encoding.UTF8.GetString(json).TrimStart('\uFEFF');
using var doc = JsonDocument.Parse(text);
root = doc.RootElement.Clone();
}
catch (JsonException e)
{
throw new FormatException($"glTF: invalid JSON ({e.Message})");
}
if (root.ValueKind != JsonValueKind.Object || !root.TryGetProperty("asset", out _))
throw new FormatException("glTF: missing required 'asset' object (not a glTF file?).");
var document = new GltfDocument();
var buffers = ResolveBuffers(root, bin);
document.ReadNodes(root);
document.ReadSkins(root);
document.ReadAnimations(root, buffers);
document.ReadVrmHumanoid(root);
return document;
}
// ================================================================== VRM humanoid
/// <summary>
/// Reads the authored humanoid bone map of a VRM file (a .vrm is a regular glTF 2.0/GLB
/// container plus a VRM extension). VRM 1.0's <c>VRMC_vrm</c> wins when both extensions
/// are present. Defensive throughout: malformed entries and out-of-range node indices
/// are skipped (a broken bone map degrades to the regular detection cascade rather than
/// failing the import).
/// </summary>
private void ReadVrmHumanoid(JsonElement root)
{
if (!root.TryGetProperty("extensions", out var extensions)
|| extensions.ValueKind != JsonValueKind.Object)
return;
// ---- VRM 1.0: extensions.VRMC_vrm.humanoid.humanBones = { "<bone>": { "node": n } } ----
if (TryGetHumanBones(extensions, "VRMC_vrm", out var humanBones1)
&& humanBones1.ValueKind == JsonValueKind.Object)
{
var map = new Dictionary<string, int>(StringComparer.Ordinal);
foreach (var property in humanBones1.EnumerateObject())
{
if (property.Value.ValueKind == JsonValueKind.Object
&& property.Value.TryGetProperty("node", out var node)
&& node.ValueKind == JsonValueKind.Number
&& node.TryGetInt32(out var index)
&& index >= 0 && index < Nodes.Count)
{
map[property.Name] = index;
}
}
if (map.Count > 0)
{
VrmHumanBones = map;
VrmVersion = 1;
return;
}
}
// ---- VRM 0.x: extensions.VRM.humanoid.humanBones = [ { "bone": "...", "node": n } ] ----
if (TryGetHumanBones(extensions, "VRM", out var humanBones0)
&& humanBones0.ValueKind == JsonValueKind.Array)
{
var map = new Dictionary<string, int>(StringComparer.Ordinal);
foreach (var entry in humanBones0.EnumerateArray())
{
if (entry.ValueKind == JsonValueKind.Object
&& entry.TryGetProperty("bone", out var bone)
&& bone.ValueKind == JsonValueKind.String
&& entry.TryGetProperty("node", out var node)
&& node.ValueKind == JsonValueKind.Number
&& node.TryGetInt32(out var index)
&& index >= 0 && index < Nodes.Count)
{
map[bone.GetString()!] = index;
}
}
if (map.Count > 0)
{
VrmHumanBones = map;
VrmVersion = 0;
}
}
}
private static bool TryGetHumanBones(JsonElement extensions, string extensionName, out JsonElement humanBones)
{
humanBones = default;
return extensions.TryGetProperty(extensionName, out var vrm)
&& vrm.ValueKind == JsonValueKind.Object
&& vrm.TryGetProperty("humanoid", out var humanoid)
&& humanoid.ValueKind == JsonValueKind.Object
&& humanoid.TryGetProperty("humanBones", out humanBones);
}
// ================================================================== GLB container
/// <summary>GLB layout: 12-byte header (magic 'glTF', u32 version = 2, u32 length),
/// then chunks of (u32 length, u32 type, bytes): one JSON chunk, optionally one BIN.</summary>
private static (byte[] Json, byte[]? Bin) ParseGlbContainer(byte[] data)
{
if (data.Length < 12)
throw new FormatException("GLB: truncated header (need 12 bytes).");
uint version = ReadU32(data, 4);
if (version != 2)
throw new FormatException($"GLB: unsupported container version {version} (expected 2).");
long declared = ReadU32(data, 8);
if (declared > data.Length)
throw new FormatException(
$"GLB: truncated file (header declares {declared} bytes, got {data.Length}).");
byte[]? json = null, bin = null;
long offset = 12;
while (offset + 8 <= declared)
{
long length = ReadU32(data, (int)offset);
uint type = ReadU32(data, (int)offset + 4);
offset += 8;
if (offset + length > data.Length)
throw new FormatException("GLB: truncated chunk (declared length exceeds the file).");
if (type == ChunkJson && json is null)
json = data.AsSpan((int)offset, (int)length).ToArray();
else if (type == ChunkBin && bin is null)
bin = data.AsSpan((int)offset, (int)length).ToArray();
// Unknown chunk types are skipped per spec.
offset += length + (length % 4 == 0 ? 0 : 4 - length % 4); // chunks are 4-aligned
}
if (json is null)
throw new FormatException("GLB: no JSON chunk found.");
return (json, bin);
}
private static uint ReadU32(byte[] data, int offset)
=> (uint)(data[offset] | data[offset + 1] << 8 | data[offset + 2] << 16 | data[offset + 3] << 24);
// ================================================================== buffers
/// <summary>
/// Resolves every entry of <c>buffers</c>: no <c>uri</c> = the GLB BIN chunk (spec: only
/// buffer 0 may do this), <c>data:</c> URIs are base64-decoded inline. External file
/// URIs are NOT supported — this library does no file IO; users should export .glb.
/// </summary>
private static List<byte[]> ResolveBuffers(JsonElement root, byte[]? bin)
{
var buffers = new List<byte[]>();
if (!root.TryGetProperty("buffers", out var array) || array.ValueKind != JsonValueKind.Array)
return buffers;
foreach (var buffer in array.EnumerateArray())
{
if (!buffer.TryGetProperty("uri", out var uriProp))
{
buffers.Add(bin ?? throw new FormatException(
"glTF: buffer has no uri but the file has no GLB BIN chunk."));
continue;
}
var uri = uriProp.GetString() ?? "";
if (uri.StartsWith("data:", StringComparison.OrdinalIgnoreCase))
{
int comma = uri.IndexOf(',');
if (comma < 0 || !uri[..comma].EndsWith(";base64", StringComparison.OrdinalIgnoreCase))
throw new FormatException("glTF: only base64 data: URIs are supported for buffers.");
try
{
buffers.Add(Convert.FromBase64String(uri[(comma + 1)..]));
}
catch (Exception e) when (e is FormatException or ArgumentException)
{
throw new FormatException("glTF: invalid base64 in buffer data: URI.");
}
}
else
{
throw new FormatException(
$"glTF: buffer references an external file ('{uri}') which this importer cannot "
+ "read (no file IO). Export as .glb (binary, self-contained) instead.");
}
}
return buffers;
}
// ================================================================== nodes + skins
private void ReadNodes(JsonElement root)
{
if (!root.TryGetProperty("nodes", out var array) || array.ValueKind != JsonValueKind.Array)
return;
Span<float> m = stackalloc float[16]; // matrix scratch (outside the loop: CA2014)
foreach (var n in array.EnumerateArray())
{
var node = new GltfNode
{
Name = n.TryGetProperty("name", out var name) ? name.GetString() : null,
HasMesh = n.TryGetProperty("mesh", out _),
};
if (n.TryGetProperty("children", out var children) && children.ValueKind == JsonValueKind.Array)
{
var list = new List<int>();
foreach (var c in children.EnumerateArray())
list.Add(c.GetInt32());
node.Children = list.ToArray();
}
if (n.TryGetProperty("matrix", out var matrix) && matrix.ValueKind == JsonValueKind.Array)
{
// Column-major 16 floats; the element order maps 1:1 onto System.Numerics'
// row-vector matrices (translation in elements 12..14 either way).
int i = 0;
foreach (var v in matrix.EnumerateArray())
{
if (i >= 16)
break;
m[i++] = v.GetSingle();
}
if (i < 16)
throw new FormatException("glTF: node matrix has fewer than 16 elements.");
var local = new Matrix4x4(
m[0], m[1], m[2], m[3],
m[4], m[5], m[6], m[7],
m[8], m[9], m[10], m[11],
m[12], m[13], m[14], m[15]);
if (Matrix4x4.Decompose(local, out var scale, out var rot, out var pos))
{
node.Translation = pos;
node.Rotation = rot;
node.Scale = scale;
}
else
{
node.Translation = local.Translation; // degenerate: keep position at least
}
}
else
{
node.Translation = ReadVec3(n, "translation", Vector3.Zero);
node.Scale = ReadVec3(n, "scale", Vector3.One);
if (n.TryGetProperty("rotation", out var r) && r.ValueKind == JsonValueKind.Array
&& r.GetArrayLength() >= 4)
{
node.Rotation = new Quaternion(
r[0].GetSingle(), r[1].GetSingle(), r[2].GetSingle(), r[3].GetSingle());
}
}
Nodes.Add(node);
}
// Resolve parents (per spec a node is referenced by at most one other node's children).
for (int i = 0; i < Nodes.Count; i++)
{
foreach (var child in Nodes[i].Children)
{
if (child < 0 || child >= Nodes.Count)
throw new FormatException($"glTF: node {i} references nonexistent child {child}.");
if (Nodes[child].Parent < 0)
Nodes[child].Parent = i;
}
}
}
private static Vector3 ReadVec3(JsonElement element, string property, Vector3 fallback)
{
if (!element.TryGetProperty(property, out var v) || v.ValueKind != JsonValueKind.Array
|| v.GetArrayLength() < 3)
return fallback;
return new Vector3(v[0].GetSingle(), v[1].GetSingle(), v[2].GetSingle());
}
private void ReadSkins(JsonElement root)
{
if (!root.TryGetProperty("skins", out var array) || array.ValueKind != JsonValueKind.Array)
return;
foreach (var skin in array.EnumerateArray())
{
if (!skin.TryGetProperty("joints", out var joints) || joints.ValueKind != JsonValueKind.Array)
continue;
foreach (var j in joints.EnumerateArray())
{
int index = j.GetInt32();
if (index >= 0 && index < Nodes.Count)
SkinJoints.Add(index);
}
}
}
// ================================================================== animations
private void ReadAnimations(JsonElement root, List<byte[]> buffers)
{
if (!root.TryGetProperty("animations", out var array) || array.ValueKind != JsonValueKind.Array)
return;
root.TryGetProperty("accessors", out var accessors);
root.TryGetProperty("bufferViews", out var views);
foreach (var a in array.EnumerateArray())
{
var animation = new GltfAnimation
{
Name = a.TryGetProperty("name", out var name) ? name.GetString() : null,
};
if (!a.TryGetProperty("channels", out var channels) || !a.TryGetProperty("samplers", out var samplers))
{
Animations.Add(animation);
continue;
}
foreach (var channel in channels.EnumerateArray())
{
if (!channel.TryGetProperty("target", out var target)
|| !target.TryGetProperty("node", out var nodeProp)
|| !target.TryGetProperty("path", out var pathProp))
continue; // extension targets (e.g. KHR_animation_pointer) are ignored
var path = pathProp.GetString();
if (path is not ("rotation" or "translation"))
continue; // scale / weights channels are ignored by design
int node = nodeProp.GetInt32();
if (node < 0 || node >= Nodes.Count)
continue;
int samplerIndex = channel.TryGetProperty("sampler", out var s) ? s.GetInt32() : -1;
if (samplerIndex < 0 || samplerIndex >= samplers.GetArrayLength())
throw new FormatException("glTF: animation channel references a nonexistent sampler.");
var sampler = samplers[samplerIndex];
var interpolation = sampler.TryGetProperty("interpolation", out var interp)
? interp.GetString() ?? "LINEAR"
: "LINEAR";
bool isRotation = path == "rotation";
int comps = isRotation ? 4 : 3;
var times = ReadAccessor(accessors, views, buffers,
RequiredInt(sampler, "input", "animation sampler"), 1, normalizedAllowed: false);
var values = ReadAccessor(accessors, views, buffers,
RequiredInt(sampler, "output", "animation sampler"), comps, normalizedAllowed: isRotation);
int elementsPerKey = interpolation == "CUBICSPLINE" ? 3 : 1;
if (times.Length == 0 || values.Length < times.Length * elementsPerKey * comps)
continue; // empty or under-filled sampler: nothing usable
animation.Channels.Add(new GltfChannel
{
NodeIndex = node,
IsRotation = isRotation,
Times = times,
Values = values,
Interpolation = interpolation,
});
}
Animations.Add(animation);
}
}
private static int RequiredInt(JsonElement element, string property, string context)
{
if (!element.TryGetProperty(property, out var v))
throw new FormatException($"glTF: {context} is missing '{property}'.");
return v.GetInt32();
}
// ================================================================== accessors
/// <summary>
/// Decodes an accessor to floats. Component types: f32 directly; normalized i8/u8/i16/u16
/// per the spec's normalization rules when <paramref name="normalizedAllowed"/> (rotation
/// outputs); anything else throws. Honors accessor/bufferView byte offsets and an
/// explicit byteStride. Sparse accessors are not supported.
/// </summary>
private static float[] ReadAccessor(
JsonElement accessors, JsonElement views, List<byte[]> buffers,
int accessorIndex, int expectedComps, bool normalizedAllowed)
{
if (accessors.ValueKind != JsonValueKind.Array || accessorIndex < 0
|| accessorIndex >= accessors.GetArrayLength())
throw new FormatException($"glTF: accessor {accessorIndex} does not exist.");
var accessor = accessors[accessorIndex];
if (accessor.TryGetProperty("sparse", out _))
throw new FormatException("glTF: sparse accessors are not supported.");
var type = accessor.TryGetProperty("type", out var t) ? t.GetString() : null;
int comps = type switch
{
"SCALAR" => 1,
"VEC3" => 3,
"VEC4" => 4,
_ => throw new FormatException($"glTF: unsupported accessor type '{type}'."),
};
if (comps != expectedComps)
throw new FormatException(
$"glTF: accessor {accessorIndex} is {type}, expected {expectedComps} component(s).");
int count = RequiredInt(accessor, "count", "accessor");
int componentType = RequiredInt(accessor, "componentType", "accessor");
bool normalized = accessor.TryGetProperty("normalized", out var n) && n.GetBoolean();
// The count is attacker-controlled: validate it BEFORE any allocation sized by it.
// Negative would throw OverflowException from the array allocation (breaking the
// FormatException malformed-file contract); huge would OOM; count * comps can wrap.
if (count < 0)
throw new FormatException($"glTF: accessor {accessorIndex} has a negative count ({count}).");
int compSize = componentType switch
{
5126 => 4, // FLOAT
5120 or 5121 => 1, // BYTE / UNSIGNED_BYTE
5122 or 5123 => 2, // SHORT / UNSIGNED_SHORT
_ => throw new FormatException(
$"glTF: unsupported accessor componentType {componentType}."),
};
if (componentType != 5126 && !(normalized && normalizedAllowed))
throw new FormatException(
$"glTF: accessor {accessorIndex} must be float (or a normalized integer "
+ "rotation output).");
int elementSize = comps * compSize;
if (!accessor.TryGetProperty("bufferView", out var viewIndexProp))
{
// Zero-filled when no bufferView (legal per spec) — but then nothing backs the
// count, so cap it by the file's total decoded buffer bytes (a real file's
// accessors never outgrow its payload; a small floor keeps tiny legitimate
// zero-filled accessors working in buffer-less documents).
long totalBufferBytes = 0;
foreach (var b in buffers)
totalBufferBytes += b.Length;
long capacity = Math.Min(
Math.Max(totalBufferBytes / elementSize, 65536),
int.MaxValue / comps); // keeps count * comps int-representable
if (count > capacity)
throw new FormatException(
$"glTF: accessor {accessorIndex} count {count} exceeds what the file's "
+ "buffers could back (malformed or hostile file).");
return new float[checked(count * comps)];
}
int viewIndex = viewIndexProp.GetInt32();
if (views.ValueKind != JsonValueKind.Array || viewIndex < 0 || viewIndex >= views.GetArrayLength())
throw new FormatException($"glTF: bufferView {viewIndex} does not exist.");
var view = views[viewIndex];
int bufferIndex = RequiredInt(view, "buffer", "bufferView");
if (bufferIndex < 0 || bufferIndex >= buffers.Count)
throw new FormatException($"glTF: buffer {bufferIndex} does not exist.");
var buffer = buffers[bufferIndex];
int viewOffset = view.TryGetProperty("byteOffset", out var vo) ? vo.GetInt32() : 0;
int accessorOffset = accessor.TryGetProperty("byteOffset", out var ao) ? ao.GetInt32() : 0;
int stride = view.TryGetProperty("byteStride", out var st) ? st.GetInt32() : elementSize;
if (stride < elementSize)
throw new FormatException("glTF: bufferView byteStride is smaller than the element size.");
// Bounds check in long arithmetic BEFORE allocating: the backing range must fit the
// buffer, which also caps count at buffer.Length / stride (+1) — so the allocation
// below is bounded by the actual file size and checked() can no longer overflow.
long start = (long)viewOffset + accessorOffset;
long end = start + (long)(count - 1) * stride + elementSize;
if (count > 0 && (start < 0 || end > buffer.Length))
throw new FormatException(
$"glTF: accessor {accessorIndex} reads past the end of its buffer (truncated file?).");
var result = new float[checked(count * comps)];
for (int element = 0; element < count; element++)
{
int offset = (int)(start + (long)element * stride);
for (int c = 0; c < comps; c++)
{
int at = offset + c * compSize;
result[element * comps + c] = componentType switch
{
5126 => BitConverter.ToSingle(buffer, at),
5120 => MathF.Max((sbyte)buffer[at] / 127f, -1f),
5121 => buffer[at] / 255f,
5122 => MathF.Max(BitConverter.ToInt16(buffer, at) / 32767f, -1f),
_ => BitConverter.ToUInt16(buffer, at) / 65535f,
};
}
}
return result;
}
}