Numeric tensor operations for convolutional and spatial layers used by an AutoRig neural-net inference port. Implements Conv2d/Conv3d, transpose-block conv, pooling, upsampling, group/instance normalization, activations, and 2D/3D grid sampling with channels-first float[] buffers and simple parallelization via Concurrency.For.
namespace AutoRig.Dl.Nn;
using AutoRig.Dl;
/// <summary>
/// Convolutional / spatial ops for the PartField port (torch semantics,
/// channels-first float[] layouts). Everything whitelist-safe; hot loops go
/// through <see cref="Concurrency.For"/>.
/// </summary>
public static class ConvOps
{
/// <summary>Conv2d, stride 1. x: [C,H,W], w: [O,C,K,K], padding P → [O,H,W].</summary>
public static float[] Conv2d( float[] x, int c, int h, int w,
Tensor weight, Tensor bias, int padding )
{
var o = weight.Shape[0];
var k = weight.Shape[2];
var result = new float[o * h * w];
Concurrency.For( 0, o, oc =>
{
var wBase = oc * c * k * k;
for ( var y = 0; y < h; y++ )
for ( var x0 = 0; x0 < w; x0++ )
{
var sum = bias?.Data[oc] ?? 0f;
for ( var ic = 0; ic < c; ic++ )
for ( var ky = 0; ky < k; ky++ )
{
var sy = y + ky - padding;
if ( sy < 0 || sy >= h )
continue;
var rowBase = ic * h * w + sy * w;
var wRow = wBase + ic * k * k + ky * k;
for ( var kx = 0; kx < k; kx++ )
{
var sx = x0 + kx - padding;
if ( sx < 0 || sx >= w )
continue;
sum += x[rowBase + sx] * weight.Data[wRow + kx];
}
}
result[oc * h * w + y * w + x0] = sum;
}
} );
return result;
}
/// <summary>Conv3d, stride 1. x: [C,D,H,W], w: [O,C,K,K,K] → [O,D,H,W].</summary>
public static float[] Conv3d( float[] x, int c, int d, int h, int w,
Tensor weight, Tensor bias, int padding )
{
var o = weight.Shape[0];
var k = weight.Shape[2];
var result = new float[o * d * h * w];
Concurrency.For( 0, o, oc =>
{
var wBase = oc * c * k * k * k;
for ( var z = 0; z < d; z++ )
for ( var y = 0; y < h; y++ )
for ( var x0 = 0; x0 < w; x0++ )
{
var sum = bias?.Data[oc] ?? 0f;
for ( var ic = 0; ic < c; ic++ )
for ( var kz = 0; kz < k; kz++ )
{
var sz = z + kz - padding;
if ( sz < 0 || sz >= d )
continue;
for ( var ky = 0; ky < k; ky++ )
{
var sy = y + ky - padding;
if ( sy < 0 || sy >= h )
continue;
var rowBase = ic * d * h * w + sz * h * w + sy * w;
var wRow = wBase + ic * k * k * k + kz * k * k + ky * k;
for ( var kx = 0; kx < k; kx++ )
{
var sx = x0 + kx - padding;
if ( sx < 0 || sx >= w )
continue;
sum += x[rowBase + sx] * weight.Data[wRow + kx];
}
}
}
result[oc * d * h * w + z * h * w + y * w + x0] = sum;
}
} );
return result;
}
/// <summary>ConvTranspose2d with kernel == stride (the LRM upsampler case):
/// every input pixel paints a disjoint K×K block. w: [C,O,K,K] → [O,H·K,W·K].</summary>
public static float[] ConvTranspose2dBlock( float[] x, int c, int h, int w,
Tensor weight, Tensor bias )
{
var o = weight.Shape[1];
var k = weight.Shape[2];
var oh = h * k;
var ow = w * k;
var result = new float[o * oh * ow];
Concurrency.For( 0, o, oc =>
{
for ( var y = 0; y < h; y++ )
for ( var x0 = 0; x0 < w; x0++ )
{
for ( var ky = 0; ky < k; ky++ )
for ( var kx = 0; kx < k; kx++ )
{
var sum = 0f;
for ( var ic = 0; ic < c; ic++ )
sum += x[ic * h * w + y * w + x0]
* weight.Data[ic * o * k * k + oc * k * k + ky * k + kx];
result[oc * oh * ow + (y * k + ky) * ow + (x0 * k + kx)] =
sum + (bias?.Data[oc] ?? 0f);
}
}
} );
return result;
}
/// <summary>2× max pool. [C,H,W] → [C,H/2,W/2].</summary>
public static float[] MaxPool2( float[] x, int c, int h, int w )
{
var oh = h / 2;
var ow = w / 2;
var result = new float[c * oh * ow];
Concurrency.For( 0, c, ic =>
{
for ( var y = 0; y < oh; y++ )
for ( var x0 = 0; x0 < ow; x0++ )
{
var at = ic * h * w + y * 2 * w + x0 * 2;
var m = x[at];
m = MathF.Max( m, x[at + 1] );
m = MathF.Max( m, x[at + w] );
m = MathF.Max( m, x[at + w + 1] );
result[ic * oh * ow + y * ow + x0] = m;
}
} );
return result;
}
/// <summary>2× average pool. [C,H,W] → [C,H/2,W/2].</summary>
public static float[] AvgPool2( float[] x, int c, int h, int w )
{
var oh = h / 2;
var ow = w / 2;
var result = new float[c * oh * ow];
Concurrency.For( 0, c, ic =>
{
for ( var y = 0; y < oh; y++ )
for ( var x0 = 0; x0 < ow; x0++ )
{
var at = ic * h * w + y * 2 * w + x0 * 2;
result[ic * oh * ow + y * ow + x0] =
(x[at] + x[at + 1] + x[at + w] + x[at + w + 1]) * 0.25f;
}
} );
return result;
}
/// <summary>Nearest-neighbor 2× upsample. [C,H,W] → [C,2H,2W].</summary>
public static float[] UpsampleNearest2( float[] x, int c, int h, int w )
{
var oh = h * 2;
var ow = w * 2;
var result = new float[c * oh * ow];
Concurrency.For( 0, c, ic =>
{
for ( var y = 0; y < oh; y++ )
for ( var x0 = 0; x0 < ow; x0++ )
result[ic * oh * ow + y * ow + x0] = x[ic * h * w + (y / 2) * w + (x0 / 2)];
} );
return result;
}
/// <summary>GroupNorm over [C,*spatial] (single item), torch semantics.</summary>
public static void GroupNorm( float[] x, int c, int spatial,
int groups, Tensor gamma, Tensor beta, float eps = 1e-6f )
{
var perGroup = c / groups;
Concurrency.For( 0, groups, g =>
{
var start = g * perGroup * spatial;
var count = perGroup * spatial;
double mean = 0;
for ( var i = 0; i < count; i++ )
mean += x[start + i];
mean /= count;
double variance = 0;
for ( var i = 0; i < count; i++ )
{
var d = x[start + i] - mean;
variance += d * d;
}
variance /= count;
var inv = 1.0 / Math.Sqrt( variance + eps );
for ( var ch = 0; ch < perGroup; ch++ )
{
var channel = g * perGroup + ch;
var scale = (float)inv * (gamma?.Data[channel] ?? 1f);
var shift = beta?.Data[channel] ?? 0f;
var at = channel * spatial;
for ( var i = 0; i < spatial; i++ )
x[at + i] = (float)((x[at + i] - mean) * scale) + shift;
}
} );
}
/// <summary>InstanceNorm (no affine): per channel over the spatial dims.</summary>
public static void InstanceNorm( float[] x, int c, int spatial, float eps )
=> GroupNorm( x, c, spatial, c, null, null, eps );
public static void LeakyRelu( float[] x, float slope = 0.1f )
{
for ( var i = 0; i < x.Length; i++ )
if ( x[i] < 0f )
x[i] *= slope;
}
public static void Swish( float[] x )
{
for ( var i = 0; i < x.Length; i++ )
x[i] = x[i] / (1f + MathF.Exp( -x[i] ));
}
/// <summary>torch grid_sample 2D bilinear, padding border. Plane [C,H,W],
/// samples (u,v) in [-1,1] where u indexes W. Returns [N,C].</summary>
public static float[] GridSample2d( float[] plane, int c, int h, int w,
float[] u, float[] v, bool alignCorners )
{
var n = u.Length;
var result = new float[n * c];
Concurrency.For( 0, n, i =>
{
var fx = Unnormalize( u[i], w, alignCorners );
var fy = Unnormalize( v[i], h, alignCorners );
var x0 = (int)MathF.Floor( fx );
var y0 = (int)MathF.Floor( fy );
var tx = fx - x0;
var ty = fy - y0;
int Cx( int q ) => Math.Clamp( q, 0, w - 1 );
int Cy( int q ) => Math.Clamp( q, 0, h - 1 );
var (x0c, x1c, y0c, y1c) = (Cx( x0 ), Cx( x0 + 1 ), Cy( y0 ), Cy( y0 + 1 ));
for ( var ch = 0; ch < c; ch++ )
{
var b = ch * h * w;
var top = plane[b + y0c * w + x0c] * (1 - tx) + plane[b + y0c * w + x1c] * tx;
var bottom = plane[b + y1c * w + x0c] * (1 - tx) + plane[b + y1c * w + x1c] * tx;
result[i * c + ch] = top * (1 - ty) + bottom * ty;
}
} );
return result;
}
/// <summary>torch grid_sample 3D trilinear, border, align_corners=false.
/// Volume [C,D,H,W]; grid (x,y,z) where x indexes W, y→H, z→D. Returns [N,C].</summary>
public static float[] GridSample3d( float[] volume, int c, int d, int h, int w,
float[] gx, float[] gy, float[] gz )
{
var n = gx.Length;
var result = new float[n * c];
Concurrency.For( 0, n, i =>
{
var fx = Unnormalize( gx[i], w, alignCorners: false );
var fy = Unnormalize( gy[i], h, alignCorners: false );
var fz = Unnormalize( gz[i], d, alignCorners: false );
var x0 = (int)MathF.Floor( fx );
var y0 = (int)MathF.Floor( fy );
var z0 = (int)MathF.Floor( fz );
var (tx, ty, tz) = (fx - x0, fy - y0, fz - z0);
int C( int q, int hi ) => Math.Clamp( q, 0, hi - 1 );
var (xa, xb) = (C( x0, w ), C( x0 + 1, w ));
var (ya, yb) = (C( y0, h ), C( y0 + 1, h ));
var (za, zb) = (C( z0, d ), C( z0 + 1, d ));
for ( var ch = 0; ch < c; ch++ )
{
var b = ch * d * h * w;
float At( int z, int y, int x ) => volume[b + z * h * w + y * w + x];
var c00 = At( za, ya, xa ) * (1 - tx) + At( za, ya, xb ) * tx;
var c01 = At( za, yb, xa ) * (1 - tx) + At( za, yb, xb ) * tx;
var c10 = At( zb, ya, xa ) * (1 - tx) + At( zb, ya, xb ) * tx;
var c11 = At( zb, yb, xa ) * (1 - tx) + At( zb, yb, xb ) * tx;
var c0 = c00 * (1 - ty) + c01 * ty;
var c1 = c10 * (1 - ty) + c11 * ty;
result[i * c + ch] = c0 * (1 - tz) + c1 * tz;
}
} );
return result;
}
static float Unnormalize( float coord, int size, bool alignCorners )
=> alignCorners
? (coord + 1f) / 2f * (size - 1)
: ((coord + 1f) * size - 1f) / 2f;
}