Code/AutoRig/Dl/Tensor.cs

A minimal CPU float32 tensor class used by the project's DL solvers. Implements row-major storage, basic shape validation, 2D matrix multiply, elementwise ops, reductions, softmax, transpose (with a cached Transposed view), concatenation, slicing and some utilities.

Native Interop
using System.Text;

namespace AutoRig.Dl;

/// <summary>
/// A minimal float32 CPU tensor (row-major) with exactly the operations the DL
/// solvers need. All operations allocate their results; shapes are validated with
/// FormatException. Deliberately not a general framework (SAME-port philosophy:
/// implement what the supported architectures use, nothing more).
/// </summary>
public sealed class Tensor
{
    public float[] Data { get; }
    public int[] Shape { get; }

    public int Count => Data.Length;

    Tensor( float[] data, int[] shape )
    {
        Data = data;
        Shape = shape;
    }

    public static Tensor Zeros( params int[] shape )
        => new( new float[Product( shape )], (int[])shape.CloneShape() );

    public static Tensor From( float[] data, params int[] shape )
    {
        ArgumentNullException.ThrowIfNull( data );
        if ( data.Length != Product( shape ) )
            throw new FormatException(
                $"Data length {data.Length} does not match shape [{string.Join( ",", shape )}]." );
        return new Tensor( data, (int[])shape.CloneShape() );
    }

    public float this[params int[] idx]
    {
        get => Data[Offset( idx )];
        set => Data[Offset( idx )] = value;
    }

    int Offset( int[] idx )
    {
        if ( idx.Length != Shape.Length )
            throw new FormatException( $"Index rank {idx.Length} != tensor rank {Shape.Length}." );
        var offset = 0;
        for ( var d = 0; d < idx.Length; d++ )
        {
            if ( idx[d] < 0 || idx[d] >= Shape[d] )
                throw new FormatException( $"Index {idx[d]} out of range for dim {d} (size {Shape[d]})." );
            offset = offset * Shape[d] + idx[d];
        }
        return offset;
    }

    void Require2D( string op )
    {
        if ( Shape.Length != 2 )
            throw new FormatException( $"{op} requires a 2D tensor, got rank {Shape.Length}." );
    }

    /// <summary>2D matrix product.</summary>
    public Tensor MatMul( Tensor other )
    {
        Require2D( "MatMul" );
        other.Require2D( "MatMul" );
        if ( Shape[1] != other.Shape[0] )
            throw new FormatException(
                $"MatMul shape mismatch: {Shape[0]}x{Shape[1]} · {other.Shape[0]}x{other.Shape[1]}." );

        int rows = Shape[0], inner = Shape[1], cols = other.Shape[1];
        var result = new float[rows * cols];
        void Row( int r )
        {
            var rowBase = r * inner;
            var outBase = r * cols;
            for ( var k = 0; k < inner; k++ )
            {
                var a = Data[rowBase + k];
                if ( a == 0f )
                    continue;
                var otherBase = k * cols;
                for ( var c = 0; c < cols; c++ )
                    result[outBase + c] += a * other.Data[otherBase + c];
            }
        }

        // Rows are independent — parallelize when the work amortizes the overhead
        // (the perceiver's 27-minute encode was 99.8% single-threaded matmul).
        if ( (long)rows * inner * cols >= 1 << 20 && rows > 1 )
            Concurrency.For( 0, rows, Row );
        else
            for ( var r = 0; r < rows; r++ )
                Row( r );
        return new Tensor( result, new[] { rows, cols } );
    }

    /// <summary>Elementwise add; a 1D right side broadcasts across the last dimension.</summary>
    public Tensor Add( Tensor other )
    {
        if ( other.Shape.Length == 1 && Shape[^1] == other.Shape[0] )
        {
            var broadcast = new float[Data.Length];
            var lastDim = other.Shape[0];
            for ( var i = 0; i < Data.Length; i++ )
                broadcast[i] = Data[i] + other.Data[i % lastDim];
            return new Tensor( broadcast, (int[])Shape.CloneShape() );
        }
        if ( !Shape.SequenceEqual( other.Shape ) )
            throw new FormatException(
                $"Add shape mismatch: [{string.Join( ",", Shape )}] vs [{string.Join( ",", other.Shape )}]." );
        var result = new float[Data.Length];
        for ( var i = 0; i < Data.Length; i++ )
            result[i] = Data[i] + other.Data[i];
        return new Tensor( result, (int[])Shape.CloneShape() );
    }

    public Tensor Mul( float scalar )
    {
        var result = new float[Data.Length];
        for ( var i = 0; i < Data.Length; i++ )
            result[i] = Data[i] * scalar;
        return new Tensor( result, (int[])Shape.CloneShape() );
    }

    public Tensor Relu() => Map( x => x > 0f ? x : 0f );

    public Tensor Sigmoid() => Map( x => 1f / (1f + MathF.Exp( -x )) );

    public Tensor Tanh() => Map( MathF.Tanh );

    Tensor Map( Func<float, float> f )
    {
        var result = new float[Data.Length];
        for ( var i = 0; i < Data.Length; i++ )
            result[i] = f( Data[i] );
        return new Tensor( result, (int[])Shape.CloneShape() );
    }

    /// <summary>Row-wise (axis 1) or column-wise (axis 0) softmax of a 2D tensor.</summary>
    public Tensor Softmax( int axis )
    {
        Require2D( "Softmax" );
        if ( axis != 1 && axis != 0 )
            throw new FormatException( $"Softmax axis {axis} unsupported (0 or 1)." );
        var t = axis == 1 ? this : Transpose();
        int rows = t.Shape[0], cols = t.Shape[1];
        var result = new float[rows * cols];
        for ( var r = 0; r < rows; r++ )
        {
            var baseIndex = r * cols;
            var max = float.MinValue;
            for ( var c = 0; c < cols; c++ )
                max = MathF.Max( max, t.Data[baseIndex + c] );
            float sum = 0;
            for ( var c = 0; c < cols; c++ )
            {
                var e = MathF.Exp( t.Data[baseIndex + c] - max );
                result[baseIndex + c] = e;
                sum += e;
            }
            for ( var c = 0; c < cols; c++ )
                result[baseIndex + c] /= sum;
        }
        var soft = new Tensor( result, new[] { rows, cols } );
        return axis == 1 ? soft : soft.Transpose();
    }

    Tensor _transposedCache;

    /// <summary>Cached transpose — for WEIGHTS used repeatedly as `x·Wᵀ` (a fresh
    /// Transpose per projection call dominated decode time). Do not mutate Data
    /// after first use.</summary>
    public Tensor Transposed => _transposedCache ??= Transpose();

    public Tensor Transpose()
    {
        Require2D( "Transpose" );
        int rows = Shape[0], cols = Shape[1];
        var result = new float[Data.Length];
        for ( var r = 0; r < rows; r++ )
            for ( var c = 0; c < cols; c++ )
                result[c * rows + r] = Data[r * cols + c];
        return new Tensor( result, new[] { cols, rows } );
    }

    /// <summary>Tiles a [1, C] row into [times, C] (torch.repeat_interleave on rows).</summary>
    public Tensor RepeatRows( int times )
    {
        Require2D( "RepeatRows" );
        if ( Shape[0] != 1 )
            throw new FormatException( $"RepeatRows needs a single row, got {Shape[0]}." );
        var cols = Shape[1];
        var result = new float[times * cols];
        for ( var r = 0; r < times; r++ )
            Array.Copy( Data, 0, result, r * cols, cols );
        return new Tensor( result, new[] { times, cols } );
    }

    /// <summary>Concatenates two 2D tensors along an axis.</summary>
    public Tensor Concat( Tensor other, int axis )
    {
        Require2D( "Concat" );
        other.Require2D( "Concat" );
        if ( axis == 1 )
        {
            if ( Shape[0] != other.Shape[0] )
                throw new FormatException( $"Concat rows differ: {Shape[0]} vs {other.Shape[0]}." );
            int rows = Shape[0], c0 = Shape[1], c1 = other.Shape[1];
            var result = new float[rows * (c0 + c1)];
            for ( var r = 0; r < rows; r++ )
            {
                Array.Copy( Data, r * c0, result, r * (c0 + c1), c0 );
                Array.Copy( other.Data, r * c1, result, r * (c0 + c1) + c0, c1 );
            }
            return new Tensor( result, new[] { rows, c0 + c1 } );
        }
        if ( axis == 0 )
        {
            if ( Shape[1] != other.Shape[1] )
                throw new FormatException( $"Concat cols differ: {Shape[1]} vs {other.Shape[1]}." );
            var result = new float[Data.Length + other.Data.Length];
            Array.Copy( Data, result, Data.Length );
            Array.Copy( other.Data, 0, result, Data.Length, other.Data.Length );
            return new Tensor( result, new[] { Shape[0] + other.Shape[0], Shape[1] } );
        }
        throw new FormatException( $"Concat axis {axis} unsupported." );
    }

    /// <summary>Slices a 2D tensor along an axis.</summary>
    public Tensor Slice( int axis, int start, int length )
    {
        Require2D( "Slice" );
        if ( start < 0 || length <= 0 || start + length > Shape[axis] )
            throw new FormatException( $"Slice [{start},{start + length}) out of range for dim {axis} (size {Shape[axis]})." );
        int rows = Shape[0], cols = Shape[1];
        if ( axis == 0 )
        {
            var result = new float[length * cols];
            Array.Copy( Data, start * cols, result, 0, length * cols );
            return new Tensor( result, new[] { length, cols } );
        }
        var sliced = new float[rows * length];
        for ( var r = 0; r < rows; r++ )
            Array.Copy( Data, r * cols + start, sliced, r * length, length );
        return new Tensor( sliced, new[] { rows, length } );
    }

    /// <summary>Max over an axis of a 2D tensor (keeps a leading 1-dim).</summary>
    public Tensor Max( int axis ) => Reduce( axis, MathF.Max, float.MinValue, mean: false );

    /// <summary>Mean over an axis of a 2D tensor (keeps a leading 1-dim).</summary>
    public Tensor Mean( int axis ) => Reduce( axis, ( a, b ) => a + b, 0f, mean: true );

    Tensor Reduce( int axis, Func<float, float, float> combine, float seed, bool mean )
    {
        Require2D( "Reduce" );
        int rows = Shape[0], cols = Shape[1];
        if ( axis == 0 )
        {
            var result = new float[cols];
            for ( var c = 0; c < cols; c++ )
            {
                var acc = seed;
                for ( var r = 0; r < rows; r++ )
                    acc = combine( acc, Data[r * cols + c] );
                result[c] = mean ? acc / rows : acc;
            }
            return new Tensor( result, new[] { 1, cols } );
        }
        if ( axis == 1 )
        {
            var result = new float[rows];
            for ( var r = 0; r < rows; r++ )
            {
                var acc = seed;
                for ( var c = 0; c < cols; c++ )
                    acc = combine( acc, Data[r * cols + c] );
                result[r] = mean ? acc / cols : acc;
            }
            return new Tensor( result, new[] { rows, 1 } );
        }
        throw new FormatException( $"Reduce axis {axis} unsupported." );
    }

    static long ProductLong( int[] shape )
    {
        long p = 1;
        foreach ( var d in shape )
        {
            if ( d <= 0 )
                throw new FormatException( $"Non-positive dimension in shape [{string.Join( ",", shape )}]." );
            p *= d;
        }
        return p;
    }

    static int Product( int[] shape )
    {
        var p = ProductLong( shape );
        if ( p > int.MaxValue )
            throw new FormatException( $"Shape too large: [{string.Join( ",", shape )}]." );
        return (int)p;
    }

    public override string ToString()
    {
        var sb = new StringBuilder( "Tensor[" );
        sb.Append( string.Join( ",", Shape ) ).Append( ']' );
        return sb.ToString();
    }
}

/// <summary>Whitelist-safe array copy helper (Array.Clone is banned in s&box).</summary>
internal static class ShapeExtensions
{
    public static int[] CloneShape( this int[] shape )
    {
        var copy = new int[shape.Length];
        Array.Copy( shape, copy, shape.Length );
        return copy;
    }
}