Code/AutoRig/Dl/Nn/ConvOps.cs

Utility class implementing convolutional and spatial neural-network operations on channel-first float[] tensors. Provides Conv2d, Conv3d, conv-transpose block upsample, pooling, upsample, group/instance normalization, activations, and 2D/3D grid sampling with explicit loops and Concurrency.For parallelism.

Native Interop
namespace AutoRig.Dl.Nn;

using AutoRig.Dl;

/// <summary>
/// Convolutional / spatial ops for the PartField port (torch semantics,
/// channels-first float[] layouts). Everything whitelist-safe; hot loops go
/// through <see cref="Concurrency.For"/>.
/// </summary>
public static class ConvOps
{
    /// <summary>Conv2d, stride 1. x: [C,H,W], w: [O,C,K,K], padding P → [O,H,W].</summary>
    public static float[] Conv2d( float[] x, int c, int h, int w,
        Tensor weight, Tensor bias, int padding )
    {
        var o = weight.Shape[0];
        var k = weight.Shape[2];
        var result = new float[o * h * w];
        Concurrency.For( 0, o, oc =>
        {
            var wBase = oc * c * k * k;
            for ( var y = 0; y < h; y++ )
                for ( var x0 = 0; x0 < w; x0++ )
                {
                    var sum = bias?.Data[oc] ?? 0f;
                    for ( var ic = 0; ic < c; ic++ )
                        for ( var ky = 0; ky < k; ky++ )
                        {
                            var sy = y + ky - padding;
                            if ( sy < 0 || sy >= h )
                                continue;
                            var rowBase = ic * h * w + sy * w;
                            var wRow = wBase + ic * k * k + ky * k;
                            for ( var kx = 0; kx < k; kx++ )
                            {
                                var sx = x0 + kx - padding;
                                if ( sx < 0 || sx >= w )
                                    continue;
                                sum += x[rowBase + sx] * weight.Data[wRow + kx];
                            }
                        }
                    result[oc * h * w + y * w + x0] = sum;
                }
        } );
        return result;
    }

    /// <summary>Conv3d, stride 1. x: [C,D,H,W], w: [O,C,K,K,K] → [O,D,H,W].</summary>
    public static float[] Conv3d( float[] x, int c, int d, int h, int w,
        Tensor weight, Tensor bias, int padding )
    {
        var o = weight.Shape[0];
        var k = weight.Shape[2];
        var result = new float[o * d * h * w];
        Concurrency.For( 0, o, oc =>
        {
            var wBase = oc * c * k * k * k;
            for ( var z = 0; z < d; z++ )
                for ( var y = 0; y < h; y++ )
                    for ( var x0 = 0; x0 < w; x0++ )
                    {
                        var sum = bias?.Data[oc] ?? 0f;
                        for ( var ic = 0; ic < c; ic++ )
                            for ( var kz = 0; kz < k; kz++ )
                            {
                                var sz = z + kz - padding;
                                if ( sz < 0 || sz >= d )
                                    continue;
                                for ( var ky = 0; ky < k; ky++ )
                                {
                                    var sy = y + ky - padding;
                                    if ( sy < 0 || sy >= h )
                                        continue;
                                    var rowBase = ic * d * h * w + sz * h * w + sy * w;
                                    var wRow = wBase + ic * k * k * k + kz * k * k + ky * k;
                                    for ( var kx = 0; kx < k; kx++ )
                                    {
                                        var sx = x0 + kx - padding;
                                        if ( sx < 0 || sx >= w )
                                            continue;
                                        sum += x[rowBase + sx] * weight.Data[wRow + kx];
                                    }
                                }
                            }
                        result[oc * d * h * w + z * h * w + y * w + x0] = sum;
                    }
        } );
        return result;
    }

    /// <summary>ConvTranspose2d with kernel == stride (the LRM upsampler case):
    /// every input pixel paints a disjoint K×K block. w: [C,O,K,K] → [O,H·K,W·K].</summary>
    public static float[] ConvTranspose2dBlock( float[] x, int c, int h, int w,
        Tensor weight, Tensor bias )
    {
        var o = weight.Shape[1];
        var k = weight.Shape[2];
        var oh = h * k;
        var ow = w * k;
        var result = new float[o * oh * ow];
        Concurrency.For( 0, o, oc =>
        {
            for ( var y = 0; y < h; y++ )
                for ( var x0 = 0; x0 < w; x0++ )
                {
                    for ( var ky = 0; ky < k; ky++ )
                        for ( var kx = 0; kx < k; kx++ )
                        {
                            var sum = 0f;
                            for ( var ic = 0; ic < c; ic++ )
                                sum += x[ic * h * w + y * w + x0]
                                    * weight.Data[ic * o * k * k + oc * k * k + ky * k + kx];
                            result[oc * oh * ow + (y * k + ky) * ow + (x0 * k + kx)] =
                                sum + (bias?.Data[oc] ?? 0f);
                        }
                }
        } );
        return result;
    }

    /// <summary>2× max pool. [C,H,W] → [C,H/2,W/2].</summary>
    public static float[] MaxPool2( float[] x, int c, int h, int w )
    {
        var oh = h / 2;
        var ow = w / 2;
        var result = new float[c * oh * ow];
        Concurrency.For( 0, c, ic =>
        {
            for ( var y = 0; y < oh; y++ )
                for ( var x0 = 0; x0 < ow; x0++ )
                {
                    var at = ic * h * w + y * 2 * w + x0 * 2;
                    var m = x[at];
                    m = MathF.Max( m, x[at + 1] );
                    m = MathF.Max( m, x[at + w] );
                    m = MathF.Max( m, x[at + w + 1] );
                    result[ic * oh * ow + y * ow + x0] = m;
                }
        } );
        return result;
    }

    /// <summary>2× average pool. [C,H,W] → [C,H/2,W/2].</summary>
    public static float[] AvgPool2( float[] x, int c, int h, int w )
    {
        var oh = h / 2;
        var ow = w / 2;
        var result = new float[c * oh * ow];
        Concurrency.For( 0, c, ic =>
        {
            for ( var y = 0; y < oh; y++ )
                for ( var x0 = 0; x0 < ow; x0++ )
                {
                    var at = ic * h * w + y * 2 * w + x0 * 2;
                    result[ic * oh * ow + y * ow + x0] =
                        (x[at] + x[at + 1] + x[at + w] + x[at + w + 1]) * 0.25f;
                }
        } );
        return result;
    }

    /// <summary>Nearest-neighbor 2× upsample. [C,H,W] → [C,2H,2W].</summary>
    public static float[] UpsampleNearest2( float[] x, int c, int h, int w )
    {
        var oh = h * 2;
        var ow = w * 2;
        var result = new float[c * oh * ow];
        Concurrency.For( 0, c, ic =>
        {
            for ( var y = 0; y < oh; y++ )
                for ( var x0 = 0; x0 < ow; x0++ )
                    result[ic * oh * ow + y * ow + x0] = x[ic * h * w + (y / 2) * w + (x0 / 2)];
        } );
        return result;
    }

    /// <summary>GroupNorm over [C,*spatial] (single item), torch semantics.</summary>
    public static void GroupNorm( float[] x, int c, int spatial,
        int groups, Tensor gamma, Tensor beta, float eps = 1e-6f )
    {
        var perGroup = c / groups;
        Concurrency.For( 0, groups, g =>
        {
            var start = g * perGroup * spatial;
            var count = perGroup * spatial;
            double mean = 0;
            for ( var i = 0; i < count; i++ )
                mean += x[start + i];
            mean /= count;
            double variance = 0;
            for ( var i = 0; i < count; i++ )
            {
                var d = x[start + i] - mean;
                variance += d * d;
            }
            variance /= count;
            var inv = 1.0 / Math.Sqrt( variance + eps );
            for ( var ch = 0; ch < perGroup; ch++ )
            {
                var channel = g * perGroup + ch;
                var scale = (float)inv * (gamma?.Data[channel] ?? 1f);
                var shift = beta?.Data[channel] ?? 0f;
                var at = channel * spatial;
                for ( var i = 0; i < spatial; i++ )
                    x[at + i] = (float)((x[at + i] - mean) * scale) + shift;
            }
        } );
    }

    /// <summary>InstanceNorm (no affine): per channel over the spatial dims.</summary>
    public static void InstanceNorm( float[] x, int c, int spatial, float eps )
        => GroupNorm( x, c, spatial, c, null, null, eps );

    public static void LeakyRelu( float[] x, float slope = 0.1f )
    {
        for ( var i = 0; i < x.Length; i++ )
            if ( x[i] < 0f )
                x[i] *= slope;
    }

    public static void Swish( float[] x )
    {
        for ( var i = 0; i < x.Length; i++ )
            x[i] = x[i] / (1f + MathF.Exp( -x[i] ));
    }

    /// <summary>torch grid_sample 2D bilinear, padding border. Plane [C,H,W],
    /// samples (u,v) in [-1,1] where u indexes W. Returns [N,C].</summary>
    public static float[] GridSample2d( float[] plane, int c, int h, int w,
        float[] u, float[] v, bool alignCorners )
    {
        var n = u.Length;
        var result = new float[n * c];
        Concurrency.For( 0, n, i =>
        {
            var fx = Unnormalize( u[i], w, alignCorners );
            var fy = Unnormalize( v[i], h, alignCorners );
            var x0 = (int)MathF.Floor( fx );
            var y0 = (int)MathF.Floor( fy );
            var tx = fx - x0;
            var ty = fy - y0;
            int Cx( int q ) => Math.Clamp( q, 0, w - 1 );
            int Cy( int q ) => Math.Clamp( q, 0, h - 1 );
            var (x0c, x1c, y0c, y1c) = (Cx( x0 ), Cx( x0 + 1 ), Cy( y0 ), Cy( y0 + 1 ));
            for ( var ch = 0; ch < c; ch++ )
            {
                var b = ch * h * w;
                var top = plane[b + y0c * w + x0c] * (1 - tx) + plane[b + y0c * w + x1c] * tx;
                var bottom = plane[b + y1c * w + x0c] * (1 - tx) + plane[b + y1c * w + x1c] * tx;
                result[i * c + ch] = top * (1 - ty) + bottom * ty;
            }
        } );
        return result;
    }

    /// <summary>torch grid_sample 3D trilinear, border, align_corners=false.
    /// Volume [C,D,H,W]; grid (x,y,z) where x indexes W, y→H, z→D. Returns [N,C].</summary>
    public static float[] GridSample3d( float[] volume, int c, int d, int h, int w,
        float[] gx, float[] gy, float[] gz )
    {
        var n = gx.Length;
        var result = new float[n * c];
        Concurrency.For( 0, n, i =>
        {
            var fx = Unnormalize( gx[i], w, alignCorners: false );
            var fy = Unnormalize( gy[i], h, alignCorners: false );
            var fz = Unnormalize( gz[i], d, alignCorners: false );
            var x0 = (int)MathF.Floor( fx );
            var y0 = (int)MathF.Floor( fy );
            var z0 = (int)MathF.Floor( fz );
            var (tx, ty, tz) = (fx - x0, fy - y0, fz - z0);
            int C( int q, int hi ) => Math.Clamp( q, 0, hi - 1 );
            var (xa, xb) = (C( x0, w ), C( x0 + 1, w ));
            var (ya, yb) = (C( y0, h ), C( y0 + 1, h ));
            var (za, zb) = (C( z0, d ), C( z0 + 1, d ));
            for ( var ch = 0; ch < c; ch++ )
            {
                var b = ch * d * h * w;
                float At( int z, int y, int x ) => volume[b + z * h * w + y * w + x];
                var c00 = At( za, ya, xa ) * (1 - tx) + At( za, ya, xb ) * tx;
                var c01 = At( za, yb, xa ) * (1 - tx) + At( za, yb, xb ) * tx;
                var c10 = At( zb, ya, xa ) * (1 - tx) + At( zb, ya, xb ) * tx;
                var c11 = At( zb, yb, xa ) * (1 - tx) + At( zb, yb, xb ) * tx;
                var c0 = c00 * (1 - ty) + c01 * ty;
                var c1 = c10 * (1 - ty) + c11 * ty;
                result[i * c + ch] = c0 * (1 - tz) + c1 * tz;
            }
        } );
        return result;
    }

    static float Unnormalize( float coord, int size, bool alignCorners )
        => alignCorners
            ? (coord + 1f) / 2f * (size - 1)
            : ((coord + 1f) * size - 1f) / 2f;
}