AutoRig/Dl/Tensor.cs

A small CPU float32 tensor type used by the project's deep learning code. Implements creation, indexing, 2D matrix multiply, elementwise ops (add, mul), activation functions, softmax, transpose with cached transposed view, slicing/concatenation/reduce utilities and some shape helpers.

Native Interop
using System.Text;

namespace AutoRig.Dl;

/// <summary>
/// A minimal float32 CPU tensor (row-major) with exactly the operations the DL
/// solvers need. All operations allocate their results; shapes are validated with
/// FormatException. Deliberately not a general framework (SAME-port philosophy:
/// implement what the supported architectures use, nothing more).
/// </summary>
public sealed class Tensor
{
    public float[] Data { get; }
    public int[] Shape { get; }

    public int Count => Data.Length;

    Tensor( float[] data, int[] shape )
    {
        Data = data;
        Shape = shape;
    }

    public static Tensor Zeros( params int[] shape )
        => new( new float[Product( shape )], (int[])shape.CloneShape() );

    public static Tensor From( float[] data, params int[] shape )
    {
        ArgumentNullException.ThrowIfNull( data );
        if ( data.Length != Product( shape ) )
            throw new FormatException(
                $"Data length {data.Length} does not match shape [{string.Join( ",", shape )}]." );
        return new Tensor( data, (int[])shape.CloneShape() );
    }

    public float this[params int[] idx]
    {
        get => Data[Offset( idx )];
        set => Data[Offset( idx )] = value;
    }

    int Offset( int[] idx )
    {
        if ( idx.Length != Shape.Length )
            throw new FormatException( $"Index rank {idx.Length} != tensor rank {Shape.Length}." );
        var offset = 0;
        for ( var d = 0; d < idx.Length; d++ )
        {
            if ( idx[d] < 0 || idx[d] >= Shape[d] )
                throw new FormatException( $"Index {idx[d]} out of range for dim {d} (size {Shape[d]})." );
            offset = offset * Shape[d] + idx[d];
        }
        return offset;
    }

    void Require2D( string op )
    {
        if ( Shape.Length != 2 )
            throw new FormatException( $"{op} requires a 2D tensor, got rank {Shape.Length}." );
    }

    /// <summary>2D matrix product.</summary>
    public Tensor MatMul( Tensor other )
    {
        Require2D( "MatMul" );
        other.Require2D( "MatMul" );
        if ( Shape[1] != other.Shape[0] )
            throw new FormatException(
                $"MatMul shape mismatch: {Shape[0]}x{Shape[1]} · {other.Shape[0]}x{other.Shape[1]}." );

        int rows = Shape[0], inner = Shape[1], cols = other.Shape[1];
        var result = new float[rows * cols];
        void Row( int r )
        {
            var rowBase = r * inner;
            var outBase = r * cols;
            for ( var k = 0; k < inner; k++ )
            {
                var a = Data[rowBase + k];
                if ( a == 0f )
                    continue;
                var otherBase = k * cols;
                for ( var c = 0; c < cols; c++ )
                    result[outBase + c] += a * other.Data[otherBase + c];
            }
        }

        // Rows are independent — parallelize when the work amortizes the overhead
        // (the perceiver's 27-minute encode was 99.8% single-threaded matmul).
        if ( (long)rows * inner * cols >= 1 << 20 && rows > 1 )
            Concurrency.For( 0, rows, Row );
        else
            for ( var r = 0; r < rows; r++ )
                Row( r );
        return new Tensor( result, new[] { rows, cols } );
    }

    /// <summary>Elementwise add; a 1D right side broadcasts across the last dimension.</summary>
    public Tensor Add( Tensor other )
    {
        if ( other.Shape.Length == 1 && Shape[^1] == other.Shape[0] )
        {
            var broadcast = new float[Data.Length];
            var lastDim = other.Shape[0];
            for ( var i = 0; i < Data.Length; i++ )
                broadcast[i] = Data[i] + other.Data[i % lastDim];
            return new Tensor( broadcast, (int[])Shape.CloneShape() );
        }
        if ( !Shape.SequenceEqual( other.Shape ) )
            throw new FormatException(
                $"Add shape mismatch: [{string.Join( ",", Shape )}] vs [{string.Join( ",", other.Shape )}]." );
        var result = new float[Data.Length];
        for ( var i = 0; i < Data.Length; i++ )
            result[i] = Data[i] + other.Data[i];
        return new Tensor( result, (int[])Shape.CloneShape() );
    }

    public Tensor Mul( float scalar )
    {
        var result = new float[Data.Length];
        for ( var i = 0; i < Data.Length; i++ )
            result[i] = Data[i] * scalar;
        return new Tensor( result, (int[])Shape.CloneShape() );
    }

    public Tensor Relu() => Map( x => x > 0f ? x : 0f );

    public Tensor Sigmoid() => Map( x => 1f / (1f + MathF.Exp( -x )) );

    public Tensor Tanh() => Map( MathF.Tanh );

    Tensor Map( Func<float, float> f )
    {
        var result = new float[Data.Length];
        for ( var i = 0; i < Data.Length; i++ )
            result[i] = f( Data[i] );
        return new Tensor( result, (int[])Shape.CloneShape() );
    }

    /// <summary>Row-wise (axis 1) or column-wise (axis 0) softmax of a 2D tensor.</summary>
    public Tensor Softmax( int axis )
    {
        Require2D( "Softmax" );
        if ( axis != 1 && axis != 0 )
            throw new FormatException( $"Softmax axis {axis} unsupported (0 or 1)." );
        var t = axis == 1 ? this : Transpose();
        int rows = t.Shape[0], cols = t.Shape[1];
        var result = new float[rows * cols];
        for ( var r = 0; r < rows; r++ )
        {
            var baseIndex = r * cols;
            var max = float.MinValue;
            for ( var c = 0; c < cols; c++ )
                max = MathF.Max( max, t.Data[baseIndex + c] );
            float sum = 0;
            for ( var c = 0; c < cols; c++ )
            {
                var e = MathF.Exp( t.Data[baseIndex + c] - max );
                result[baseIndex + c] = e;
                sum += e;
            }
            for ( var c = 0; c < cols; c++ )
                result[baseIndex + c] /= sum;
        }
        var soft = new Tensor( result, new[] { rows, cols } );
        return axis == 1 ? soft : soft.Transpose();
    }

    Tensor _transposedCache;

    /// <summary>Cached transpose — for WEIGHTS used repeatedly as `x·Wᵀ` (a fresh
    /// Transpose per projection call dominated decode time). Do not mutate Data
    /// after first use.</summary>
    public Tensor Transposed => _transposedCache ??= Transpose();

    public Tensor Transpose()
    {
        Require2D( "Transpose" );
        int rows = Shape[0], cols = Shape[1];
        var result = new float[Data.Length];
        for ( var r = 0; r < rows; r++ )
            for ( var c = 0; c < cols; c++ )
                result[c * rows + r] = Data[r * cols + c];
        return new Tensor( result, new[] { cols, rows } );
    }

    /// <summary>Tiles a [1, C] row into [times, C] (torch.repeat_interleave on rows).</summary>
    public Tensor RepeatRows( int times )
    {
        Require2D( "RepeatRows" );
        if ( Shape[0] != 1 )
            throw new FormatException( $"RepeatRows needs a single row, got {Shape[0]}." );
        var cols = Shape[1];
        var result = new float[times * cols];
        for ( var r = 0; r < times; r++ )
            Array.Copy( Data, 0, result, r * cols, cols );
        return new Tensor( result, new[] { times, cols } );
    }

    /// <summary>Concatenates two 2D tensors along an axis.</summary>
    public Tensor Concat( Tensor other, int axis )
    {
        Require2D( "Concat" );
        other.Require2D( "Concat" );
        if ( axis == 1 )
        {
            if ( Shape[0] != other.Shape[0] )
                throw new FormatException( $"Concat rows differ: {Shape[0]} vs {other.Shape[0]}." );
            int rows = Shape[0], c0 = Shape[1], c1 = other.Shape[1];
            var result = new float[rows * (c0 + c1)];
            for ( var r = 0; r < rows; r++ )
            {
                Array.Copy( Data, r * c0, result, r * (c0 + c1), c0 );
                Array.Copy( other.Data, r * c1, result, r * (c0 + c1) + c0, c1 );
            }
            return new Tensor( result, new[] { rows, c0 + c1 } );
        }
        if ( axis == 0 )
        {
            if ( Shape[1] != other.Shape[1] )
                throw new FormatException( $"Concat cols differ: {Shape[1]} vs {other.Shape[1]}." );
            var result = new float[Data.Length + other.Data.Length];
            Array.Copy( Data, result, Data.Length );
            Array.Copy( other.Data, 0, result, Data.Length, other.Data.Length );
            return new Tensor( result, new[] { Shape[0] + other.Shape[0], Shape[1] } );
        }
        throw new FormatException( $"Concat axis {axis} unsupported." );
    }

    /// <summary>Slices a 2D tensor along an axis.</summary>
    public Tensor Slice( int axis, int start, int length )
    {
        Require2D( "Slice" );
        if ( start < 0 || length <= 0 || start + length > Shape[axis] )
            throw new FormatException( $"Slice [{start},{start + length}) out of range for dim {axis} (size {Shape[axis]})." );
        int rows = Shape[0], cols = Shape[1];
        if ( axis == 0 )
        {
            var result = new float[length * cols];
            Array.Copy( Data, start * cols, result, 0, length * cols );
            return new Tensor( result, new[] { length, cols } );
        }
        var sliced = new float[rows * length];
        for ( var r = 0; r < rows; r++ )
            Array.Copy( Data, r * cols + start, sliced, r * length, length );
        return new Tensor( sliced, new[] { rows, length } );
    }

    /// <summary>Max over an axis of a 2D tensor (keeps a leading 1-dim).</summary>
    public Tensor Max( int axis ) => Reduce( axis, MathF.Max, float.MinValue, mean: false );

    /// <summary>Mean over an axis of a 2D tensor (keeps a leading 1-dim).</summary>
    public Tensor Mean( int axis ) => Reduce( axis, ( a, b ) => a + b, 0f, mean: true );

    Tensor Reduce( int axis, Func<float, float, float> combine, float seed, bool mean )
    {
        Require2D( "Reduce" );
        int rows = Shape[0], cols = Shape[1];
        if ( axis == 0 )
        {
            var result = new float[cols];
            for ( var c = 0; c < cols; c++ )
            {
                var acc = seed;
                for ( var r = 0; r < rows; r++ )
                    acc = combine( acc, Data[r * cols + c] );
                result[c] = mean ? acc / rows : acc;
            }
            return new Tensor( result, new[] { 1, cols } );
        }
        if ( axis == 1 )
        {
            var result = new float[rows];
            for ( var r = 0; r < rows; r++ )
            {
                var acc = seed;
                for ( var c = 0; c < cols; c++ )
                    acc = combine( acc, Data[r * cols + c] );
                result[r] = mean ? acc / cols : acc;
            }
            return new Tensor( result, new[] { rows, 1 } );
        }
        throw new FormatException( $"Reduce axis {axis} unsupported." );
    }

    static long ProductLong( int[] shape )
    {
        long p = 1;
        foreach ( var d in shape )
        {
            if ( d <= 0 )
                throw new FormatException( $"Non-positive dimension in shape [{string.Join( ",", shape )}]." );
            p *= d;
        }
        return p;
    }

    static int Product( int[] shape )
    {
        var p = ProductLong( shape );
        if ( p > int.MaxValue )
            throw new FormatException( $"Shape too large: [{string.Join( ",", shape )}]." );
        return (int)p;
    }

    public override string ToString()
    {
        var sb = new StringBuilder( "Tensor[" );
        sb.Append( string.Join( ",", Shape ) ).Append( ']' );
        return sb.ToString();
    }
}

/// <summary>Whitelist-safe array copy helper (Array.Clone is banned in s&box).</summary>
internal static class ShapeExtensions
{
    public static int[] CloneShape( this int[] shape )
    {
        var copy = new int[shape.Length];
        Array.Copy( shape, copy, shape.Length );
        return copy;
    }
}