Emulator/GbaVideo.Rendering.cs
using System.Runtime.InteropServices;
using System.Threading;
using Sandbox.Rendering;
namespace sGBA;
public partial class GbaVideo
{
[StructLayout( LayoutKind.Sequential )]
public struct ScanlineState
{
public uint DispCntMosaic;
public uint BgCnt01;
public uint BgCnt23;
public uint BgOffset0, BgOffset1, BgOffset2, BgOffset3;
public int Bg2PA, Bg2PC;
public int Bg2X, Bg2Y;
public int Bg3PA, Bg3PC;
public int Bg3X, Bg3Y;
public uint BldCntAlpha;
public uint BldYWin0H;
public uint Win0VWin1H;
public uint Win1VWinIn;
public uint WinOutPad;
public int FirstAffine;
public uint EnabledAtYMask;
public uint OamState;
}
[StructLayout( LayoutKind.Sequential )]
public struct GpuSprite
{
public int X, Y;
public int Width, Height;
public int RenderWidth, RenderHeight;
public uint CharBase;
public uint Tile;
public uint Stride;
public uint Palette;
public uint Flags;
public int PA, PB, PC, PD;
public int Cycles;
}
private readonly struct FrameSource
{
public FrameSource( Texture texture, Vector4 size )
{
Texture = texture;
Size = size;
}
public Texture Texture { get; }
public Vector4 Size { get; }
}
public int GpuScale { get; private set; }
public Texture OutputTexture { get; private set; }
public CommandList RenderCommandList { get; private set; }
public bool GpuReady { get; private set; }
public bool ReproduceClassicFeel { get; private set; } = true;
private int _scaledWidth;
private int _scaledHeight;
private const int HistoryFrameCount = 7;
private const float ResponseTimeStrength = 0.333f;
private GpuBuffer<ScanlineState> _gpuScanlines;
private GpuBuffer<GpuSprite> _gpuSprites;
private GpuBuffer<uint> _gpuVram;
private GpuBuffer<uint> _gpuPalette;
private Texture _bg0Tex, _bg1Tex, _bg2Tex, _bg3Tex;
private Texture _objColorTex, _objFlagsTex, _windowTex;
private Texture _nativeFrameTex, _classicResponseTex, _classicLcdTex;
private Texture[] _suspendPreviewTex;
private int _previewWriteSlot;
private Texture[] _originalHistoryTex;
private ComputeShader _csBgMode0, _csBgMode2, _csBgMode3, _csBgMode4, _csBgMode5;
private ComputeShader _csObj, _csWindow, _csFinalize;
private ComputeShader _csResponseTime, _csLcdGridV2, _csGbaColor;
private ScanlineState[][] _scanlineFrames;
private uint[][] _paletteFrames;
private GpuSprite[][] _spriteFrames;
private int[] _frameOamTotal;
private uint[][] _vramFrames;
private const int FrameSlotCount = 3;
private int _writeSlot;
private int _readSlot = -1;
private int _readySlot = -1;
private int _frameReady;
private bool _hasFrame;
private const int MaxOamBatches = 8;
private const int MaxOamEntries = 128 * MaxOamBatches;
private uint[][] _frameOldCharBase;
private int[][] _frameOldCharBaseFirstY;
private int _historyHead = -1;
private int _historyValidCount;
private int _frameCount;
public void SetReproduceClassicFeel( bool reproduceClassicFeel )
{
ReproduceClassicFeel = reproduceClassicFeel;
ResetOriginalHistory();
}
private void ResetOriginalHistory()
{
_historyHead = -1;
_historyValidCount = 0;
_frameCount = 0;
}
public void InitGpu( int scale = 1 )
{
DisposeGpu();
CreateFrameSnapshots();
CreateGpuResources( scale );
}
public void DisposeGpu()
{
DisposeGpuResources();
ClearFrameSnapshots();
}
private void CreateFrameSnapshots()
{
_scanlineFrames = new ScanlineState[FrameSlotCount][];
_paletteFrames = new uint[FrameSlotCount][];
_spriteFrames = new GpuSprite[FrameSlotCount][];
_frameOamTotal = new int[FrameSlotCount];
_vramFrames = new uint[FrameSlotCount][];
_frameOldCharBase = new uint[FrameSlotCount][];
_frameOldCharBaseFirstY = new int[FrameSlotCount][];
for ( int i = 0; i < FrameSlotCount; i++ )
{
_scanlineFrames[i] = new ScanlineState[GbaConstants.VisibleLines];
_paletteFrames[i] = new uint[256 * GbaConstants.VisibleLines];
_spriteFrames[i] = new GpuSprite[MaxOamEntries];
_vramFrames[i] = new uint[96 * 1024 / 4];
_frameOldCharBase[i] = new uint[2];
_frameOldCharBaseFirstY[i] = new int[2];
}
_writeSlot = 0;
_readySlot = 1;
_readSlot = 2;
_frameReady = 0;
_hasFrame = false;
}
private void ClearFrameSnapshots()
{
_scanlineFrames = null;
_paletteFrames = null;
_spriteFrames = null;
_frameOamTotal = null;
_vramFrames = null;
_frameOldCharBase = null;
_frameOldCharBaseFirstY = null;
_writeSlot = 0;
_readySlot = -1;
_readSlot = -1;
_frameReady = 0;
_hasFrame = false;
}
private void CreateGpuResources( int scale )
{
GpuScale = Math.Max( 1, scale );
_scaledWidth = GbaConstants.ScreenWidth * GpuScale;
_scaledHeight = GbaConstants.ScreenHeight * GpuScale;
_gpuScanlines = new GpuBuffer<ScanlineState>( GbaConstants.VisibleLines );
_gpuSprites = new GpuBuffer<GpuSprite>( MaxOamEntries );
_gpuVram = new GpuBuffer<uint>( 96 * 1024 / 4 );
_gpuPalette = new GpuBuffer<uint>( 256 * GbaConstants.VisibleLines );
_bg0Tex = CreateNativeColorRT( gpuOnly: true );
_bg1Tex = CreateNativeColorRT( gpuOnly: true );
_bg2Tex = CreateNativeColorRT( gpuOnly: true );
_bg3Tex = CreateNativeColorRT( gpuOnly: true );
_objColorTex = CreateNativeColorRT( gpuOnly: true );
_objFlagsTex = CreateNativeUintRT( gpuOnly: true );
_windowTex = CreateNativeUintRT( gpuOnly: true );
_nativeFrameTex = CreateNativeColorRT( ImageFormat.RGBA16161616F );
_classicResponseTex = CreateNativeColorRT( ImageFormat.RGBA16161616F, gpuOnly: true );
_classicLcdTex = CreateScaledColorRT( ImageFormat.RGBA16161616F, gpuOnly: true );
_originalHistoryTex = new Texture[HistoryFrameCount];
for ( int i = 0; i < HistoryFrameCount; i++ )
_originalHistoryTex[i] = CreateNativeColorRT( ImageFormat.RGBA16161616F, gpuOnly: true );
OutputTexture = CreateScaledColorRT( ImageFormat.RGBA8888, gpuOnly: true );
_suspendPreviewTex = new Texture[2];
_suspendPreviewTex[0] = CreateNativeColorRT( ImageFormat.RGBA8888 );
_suspendPreviewTex[1] = CreateNativeColorRT( ImageFormat.RGBA8888 );
_previewWriteSlot = 0;
_csBgMode0 = new ComputeShader( "shaders/gba_bg_mode0.shader" );
_csBgMode2 = new ComputeShader( "shaders/gba_bg_mode2.shader" );
_csBgMode3 = new ComputeShader( "shaders/gba_bg_mode3.shader" );
_csBgMode4 = new ComputeShader( "shaders/gba_bg_mode4.shader" );
_csBgMode5 = new ComputeShader( "shaders/gba_bg_mode5.shader" );
_csObj = new ComputeShader( "shaders/gba_obj.shader" );
_csWindow = new ComputeShader( "shaders/gba_window.shader" );
_csFinalize = new ComputeShader( "shaders/gba_finalize.shader" );
_csResponseTime = new ComputeShader( "shaders/postprocess/motionblur/response_time.shader" );
_csLcdGridV2 = new ComputeShader( "shaders/postprocess/handheld/lcd_cgwg/lcd_grid_v2.shader" );
_csGbaColor = new ComputeShader( "shaders/postprocess/handheld/color/gba_color.shader" );
ResetOriginalHistory();
RenderCommandList = new CommandList( "GBA PPU" );
GpuReady = true;
}
private void DisposeGpuResources()
{
GpuReady = false;
_gpuScanlines?.Dispose();
_gpuSprites?.Dispose();
_gpuVram?.Dispose();
_gpuPalette?.Dispose();
_bg0Tex?.Dispose();
_bg1Tex?.Dispose();
_bg2Tex?.Dispose();
_bg3Tex?.Dispose();
_objColorTex?.Dispose();
_objFlagsTex?.Dispose();
_windowTex?.Dispose();
_nativeFrameTex?.Dispose();
_classicResponseTex?.Dispose();
_classicLcdTex?.Dispose();
if ( _originalHistoryTex != null )
{
for ( int i = 0; i < _originalHistoryTex.Length; i++ )
_originalHistoryTex[i]?.Dispose();
}
OutputTexture?.Dispose();
if ( _suspendPreviewTex != null )
{
for ( int i = 0; i < _suspendPreviewTex.Length; i++ )
_suspendPreviewTex[i]?.Dispose();
}
_bg0Tex = null;
_bg1Tex = null;
_bg2Tex = null;
_bg3Tex = null;
_objColorTex = null;
_objFlagsTex = null;
_windowTex = null;
_nativeFrameTex = null;
_classicResponseTex = null;
_classicLcdTex = null;
_originalHistoryTex = null;
OutputTexture = null;
_suspendPreviewTex = null;
_previewWriteSlot = 0;
RenderCommandList = null;
}
private Texture CreateRenderTarget( int width, int height, ImageFormat format, bool gpuOnly = false )
{
TextureBuilder builder = Texture.CreateRenderTarget()
.WithSize( width, height )
.WithFormat( format )
.WithUAVBinding();
if ( gpuOnly )
builder = builder.WithGPUOnlyUsage();
return builder.Create();
}
private Texture CreateNativeColorRT( ImageFormat format = ImageFormat.RGBA8888, bool gpuOnly = false ) => CreateRenderTarget( GbaConstants.ScreenWidth, GbaConstants.ScreenHeight, format, gpuOnly );
private Texture CreateScaledColorRT( ImageFormat format = ImageFormat.RGBA8888, bool gpuOnly = false ) => CreateRenderTarget( _scaledWidth, _scaledHeight, format, gpuOnly );
private Texture CreateNativeUintRT( bool gpuOnly = false ) => CreateRenderTarget( GbaConstants.ScreenWidth, GbaConstants.ScreenHeight, ImageFormat.R32_UINT, gpuOnly );
private void CaptureScanline( int y )
{
if ( !GpuReady ) return;
var scanlines = _scanlineFrames[_writeSlot];
ref var s = ref scanlines[y];
s.DispCntMosaic = DispCnt | ((uint)Mosaic << 16);
s.BgCnt01 = BgCnt[0] | ((uint)BgCnt[1] << 16);
s.BgCnt23 = BgCnt[2] | ((uint)BgCnt[3] << 16);
s.BgOffset0 = PackOffset( BgHOfs[0], BgVOfs[0] );
s.BgOffset1 = PackOffset( BgHOfs[1], BgVOfs[1] );
s.BgOffset2 = PackOffset( BgHOfs[2], BgVOfs[2] );
s.BgOffset3 = PackOffset( BgHOfs[3], BgVOfs[3] );
s.Bg2PA = BgPA[0];
s.Bg2PC = BgPC[0];
s.Bg2X = BgX[0];
s.Bg2Y = BgY[0];
s.Bg3PA = BgPA[1];
s.Bg3PC = BgPC[1];
s.Bg3X = BgX[1];
s.Bg3Y = BgY[1];
s.BldCntAlpha = BldCnt | ((uint)BldAlpha << 16);
s.BldYWin0H = BldY | ((uint)Win0H << 16);
s.Win0VWin1H = Win0V | ((uint)Win1H << 16);
s.Win1VWinIn = Win1V | ((uint)WinIn << 16);
s.WinOutPad = WinOut;
s.FirstAffine = _firstAffine;
uint enabledMask = 0;
for ( int i = 0; i < 4; i++ )
{
if ( (DispCnt & (0x100 << i)) != 0 && y >= _enabledAtY[i] )
enabledMask |= (1u << i);
}
s.EnabledAtYMask = enabledMask;
var ram = Gba.Memory.PaletteRam;
var palDst = _paletteFrames[_writeSlot];
int baseIdx = y * 256;
Buffer.BlockCopy( ram, 0, palDst, baseIdx * 4, 1024 );
if ( _oamDirty )
{
int newOffset = _oamBatchOffset + _oamMax;
if ( newOffset + 128 <= MaxOamEntries )
{
_oamBatchOffset = newOffset;
_oamMax = CleanOam( newOffset );
}
_oamDirty = false;
}
s.OamState = (uint)_oamBatchOffset | ((uint)_oamMax << 16);
}
private int CleanOam( int offset )
{
if ( !GpuReady ) return 0;
var sprites = _spriteFrames[_writeSlot];
byte[] oam = Gba.Memory.Oam;
bool mapping1D = (DispCnt & 0x40) != 0;
int bgMode = DispCnt & 7;
int count = 0;
for ( int i = 0; i < 128 && count < 128; i++ )
{
int off = i * 8;
ushort attr0 = (ushort)(oam[off] | (oam[off + 1] << 8));
ushort attr1 = (ushort)(oam[off + 2] | (oam[off + 3] << 8));
ushort attr2 = (ushort)(oam[off + 4] | (oam[off + 5] << 8));
int objMode = (attr0 >> 10) & 3;
if ( objMode == 3 ) continue;
bool isAffine = (attr0 & 0x100) != 0;
bool doubleSize = isAffine && (attr0 & 0x200) != 0;
if ( !isAffine && (attr0 & 0x200) != 0 ) continue;
int shape = (attr0 >> 14) & 3;
int sizeParam = (attr1 >> 14) & 3;
GetSpriteSize( shape, sizeParam, out int w, out int h );
int sprY = attr0 & 0xFF;
if ( sprY >= 160 ) sprY -= 256;
int sprX = attr1 & 0x1FF;
if ( sprX >= 240 ) sprX -= 512;
int tileNum = attr2 & 0x3FF;
bool is8bpp = (attr0 & 0x2000) != 0;
if ( bgMode >= 3 && tileNum < 512 ) continue;
int align = is8bpp && !mapping1D ? 1 : 0;
uint charBase = (uint)((0x10000 >> 1) + ((tileNum & ~align) * 0x10));
uint tile = 0;
if ( !mapping1D )
{
if ( is8bpp )
tile = (charBase >> 5) & 0xFu;
else
tile = (charBase >> 4) & 0x1Fu;
charBase &= ~0x1FFu;
}
uint stride = mapping1D ? (uint)(w >> 3) : (uint)(0x20 >> (is8bpp ? 1 : 0));
ref var spr = ref sprites[offset + count];
spr.X = sprX;
spr.Y = sprY;
spr.Width = w;
spr.Height = h;
spr.RenderWidth = doubleSize ? w * 2 : w;
spr.RenderHeight = doubleSize ? h * 2 : h;
spr.CharBase = charBase;
spr.Tile = tile;
spr.Stride = stride;
spr.Palette = (uint)((attr2 >> 12) & 0xF);
bool flipH = !isAffine && (attr1 & 0x1000) != 0;
bool flipV = !isAffine && (attr1 & 0x2000) != 0;
bool isMosaic = (attr0 & 0x1000) != 0;
int priority = (attr2 >> 10) & 3;
spr.Flags = 0;
if ( is8bpp ) spr.Flags |= 1u;
if ( flipH ) spr.Flags |= 2u;
if ( flipV ) spr.Flags |= 4u;
if ( isAffine ) spr.Flags |= 8u;
if ( doubleSize ) spr.Flags |= 16u;
if ( isMosaic ) spr.Flags |= 64u;
if ( objMode == 1 ) spr.Flags |= 128u;
if ( objMode == 2 ) spr.Flags |= 256u;
spr.Flags |= (uint)(priority << 10);
int cycles;
if ( isAffine )
{
int renderW = doubleSize ? w * 2 : w;
cycles = 8 + renderW * 2;
if ( sprX < 0 )
cycles += sprX;
}
else
{
cycles = w - 2;
if ( sprX < 0 )
{
if ( sprX + w < 0 ) continue;
cycles += sprX >> 1;
}
}
spr.Cycles = cycles;
if ( isAffine )
{
int affIdx = (attr1 >> 9) & 0x1F;
int paOff = affIdx * 32 + 6;
int pbOff = affIdx * 32 + 14;
int pcOff = affIdx * 32 + 22;
int pdOff = affIdx * 32 + 30;
spr.PA = (short)(oam[paOff] | (oam[paOff + 1] << 8));
spr.PB = (short)(oam[pbOff] | (oam[pbOff + 1] << 8));
spr.PC = (short)(oam[pcOff] | (oam[pcOff + 1] << 8));
spr.PD = (short)(oam[pdOff] | (oam[pdOff + 1] << 8));
}
else
{
spr.PA = flipH ? -256 : 256;
spr.PB = 0;
spr.PC = 0;
spr.PD = flipV ? -256 : 256;
}
count++;
}
return count;
}
private void SnapshotVram()
{
if ( !GpuReady ) return;
Buffer.BlockCopy( Gba.Memory.Vram, 0, _vramFrames[_writeSlot], 0, Gba.Memory.Vram.Length );
}
private void CommitFrame()
{
if ( !GpuReady ) return;
Array.Copy( _oldCharBase, _frameOldCharBase[_writeSlot], 2 );
Array.Copy( _oldCharBaseFirstY, _frameOldCharBaseFirstY[_writeSlot], 2 );
_frameOamTotal[_writeSlot] = _oamBatchOffset + _oamMax;
_writeSlot = Interlocked.Exchange( ref _readySlot, _writeSlot );
Interlocked.Exchange( ref _frameReady, 1 );
}
private void DispatchWindowPass( CommandList cmd, Vector3 circle0, Vector3 circle1 )
{
cmd.Attributes.Set( "OutputMask", _windowTex );
cmd.Attributes.Set( "Circle0", circle0 );
cmd.Attributes.Set( "Circle1", circle1 );
cmd.DispatchCompute( _csWindow, GbaConstants.ScreenWidth, GbaConstants.ScreenHeight, 1 );
cmd.UavBarrier( _windowTex );
}
private void DispatchObjPass( CommandList cmd )
{
cmd.Attributes.Set( "Sprites", _gpuSprites );
cmd.Attributes.Set( "OutputColor", _objColorTex );
cmd.Attributes.Set( "OutputFlags", _objFlagsTex );
cmd.Attributes.Set( "WindowTex", _windowTex );
cmd.DispatchCompute( _csObj, GbaConstants.ScreenWidth, GbaConstants.ScreenHeight, 1 );
cmd.UavBarrier( _objColorTex );
cmd.UavBarrier( _objFlagsTex );
cmd.UavBarrier( _windowTex );
}
private void DispatchOutputPasses( CommandList cmd )
{
Vector4 nativeSize = CreateSizeVector( GbaConstants.ScreenWidth, GbaConstants.ScreenHeight );
Vector4 scaledSize = CreateSizeVector( _scaledWidth, _scaledHeight );
FrameSource displaySource = new( _nativeFrameTex, nativeSize );
FrameSource previewSource = displaySource;
if ( ReproduceClassicFeel )
{
FrameSource responseSource = DispatchClassicResponsePass( cmd, displaySource, nativeSize );
displaySource = DispatchClassicLcdPass( cmd, responseSource, scaledSize );
previewSource = responseSource;
_frameCount++;
}
DispatchGbaColorPass( cmd, displaySource, OutputTexture, scaledSize, _scaledWidth, _scaledHeight );
cmd.UavBarrier( OutputTexture );
Texture preview = _suspendPreviewTex[_previewWriteSlot];
DispatchGbaColorPass( cmd, previewSource, preview, nativeSize, GbaConstants.ScreenWidth, GbaConstants.ScreenHeight );
cmd.UavBarrier( preview );
_previewWriteSlot ^= 1;
}
private FrameSource DispatchClassicResponsePass( CommandList cmd, FrameSource source, Vector4 nativeSize )
{
cmd.Attributes.Set( "SourceSize", source.Size );
cmd.Attributes.Set( "OriginalSize", nativeSize );
cmd.Attributes.Set( "OutputSize", nativeSize );
cmd.Attributes.Set( "Source", source.Texture );
BindOriginalHistoryTextures( cmd, source.Texture );
cmd.Attributes.Set( "OutputTex", _classicResponseTex );
cmd.Attributes.Set( "response_time", ResponseTimeStrength );
cmd.DispatchCompute( _csResponseTime, GbaConstants.ScreenWidth, GbaConstants.ScreenHeight, 1 );
cmd.UavBarrier( _classicResponseTex );
return new FrameSource( _classicResponseTex, nativeSize );
}
private FrameSource DispatchClassicLcdPass( CommandList cmd, FrameSource source, Vector4 scaledSize )
{
cmd.Attributes.Set( "SourceSize", source.Size );
cmd.Attributes.Set( "OriginalSize", CreateSizeVector( GbaConstants.ScreenWidth, GbaConstants.ScreenHeight ) );
cmd.Attributes.Set( "OutputSize", scaledSize );
cmd.Attributes.Set( "Source", source.Texture );
cmd.Attributes.Set( "OutputTex", _classicLcdTex );
cmd.Attributes.Set( "RSUBPIX_R", 1.0f );
cmd.Attributes.Set( "RSUBPIX_G", 0.0f );
cmd.Attributes.Set( "RSUBPIX_B", 0.0f );
cmd.Attributes.Set( "GSUBPIX_R", 0.0f );
cmd.Attributes.Set( "GSUBPIX_G", 1.0f );
cmd.Attributes.Set( "GSUBPIX_B", 0.0f );
cmd.Attributes.Set( "BSUBPIX_R", 0.0f );
cmd.Attributes.Set( "BSUBPIX_G", 0.0f );
cmd.Attributes.Set( "BSUBPIX_B", 1.0f );
cmd.Attributes.Set( "gain", 1.0f );
cmd.Attributes.Set( "gamma", 3.0f );
cmd.Attributes.Set( "blacklevel", 0.05f );
cmd.Attributes.Set( "ambient", 0.0f );
cmd.Attributes.Set( "BGR", 0.0f );
cmd.DispatchCompute( _csLcdGridV2, _scaledWidth, _scaledHeight, 1 );
cmd.UavBarrier( _classicLcdTex );
return new FrameSource( _classicLcdTex, scaledSize );
}
private void DispatchGbaColorPass( CommandList cmd, FrameSource source, Texture output, Vector4 outputSize, int dispatchWidth, int dispatchHeight )
{
cmd.Attributes.Set( "SourceSize", source.Size );
cmd.Attributes.Set( "OriginalSize", CreateSizeVector( GbaConstants.ScreenWidth, GbaConstants.ScreenHeight ) );
cmd.Attributes.Set( "OutputSize", outputSize );
cmd.Attributes.Set( "Source", source.Texture );
cmd.Attributes.Set( "OutputTex", output );
cmd.Attributes.Set( "mode", 1.0f );
cmd.Attributes.Set( "darken_screen", 0.8f );
cmd.DispatchCompute( _csGbaColor, dispatchWidth, dispatchHeight, 1 );
}
private void UpdateOriginalHistory()
{
if ( !ReproduceClassicFeel || _frameCount <= 0 || _nativeFrameTex == null || _originalHistoryTex == null )
return;
int nextHistoryHead = (_historyHead + 1) % HistoryFrameCount;
Texture historyTexture = _originalHistoryTex[nextHistoryHead];
if ( historyTexture == null )
return;
Graphics.CopyTexture( _nativeFrameTex, historyTexture );
_historyHead = nextHistoryHead;
if ( _historyValidCount < HistoryFrameCount )
_historyValidCount++;
}
private void BindOriginalHistoryTextures( CommandList cmd, Texture currentSource )
{
cmd.Attributes.Set( "OriginalHistory1", GetOriginalHistoryTexture( 1, currentSource ) );
cmd.Attributes.Set( "OriginalHistory2", GetOriginalHistoryTexture( 2, currentSource ) );
cmd.Attributes.Set( "OriginalHistory3", GetOriginalHistoryTexture( 3, currentSource ) );
cmd.Attributes.Set( "OriginalHistory4", GetOriginalHistoryTexture( 4, currentSource ) );
cmd.Attributes.Set( "OriginalHistory5", GetOriginalHistoryTexture( 5, currentSource ) );
cmd.Attributes.Set( "OriginalHistory6", GetOriginalHistoryTexture( 6, currentSource ) );
cmd.Attributes.Set( "OriginalHistory7", GetOriginalHistoryTexture( 7, currentSource ) );
}
private Texture GetOriginalHistoryTexture( int age, Texture currentSource )
{
if ( _originalHistoryTex == null || _historyHead < 0 || age > _historyValidCount )
return currentSource;
int index = _historyHead - age + 1;
while ( index < 0 )
index += HistoryFrameCount;
return _originalHistoryTex[index];
}
private static Vector4 CreateSizeVector( int width, int height ) => new Vector4( width, height, 1.0f / width, 1.0f / height );
public bool UploadAndBuildCommandList()
{
if ( !GpuReady || RenderCommandList == null || OutputTexture == null )
return false;
if ( _scanlineFrames == null || _paletteFrames == null || _spriteFrames == null || _frameOamTotal == null || _vramFrames == null )
return false;
if ( _gpuScanlines == null || _gpuSprites == null || _gpuVram == null || _gpuPalette == null )
return false;
if ( Interlocked.Exchange( ref _frameReady, 0 ) == 1 )
{
_readSlot = Interlocked.Exchange( ref _readySlot, _readSlot );
_hasFrame = true;
}
if ( !_hasFrame ) return false;
int slot = _readSlot;
if ( slot < 0 ) return false;
var scanlines = _scanlineFrames[slot];
_gpuScanlines.SetData( scanlines );
int totalSprites = _frameOamTotal[slot];
_gpuSprites.SetData( _spriteFrames[slot].AsSpan( 0, Math.Max( 1, totalSprites ) ) );
_gpuVram.SetData( _vramFrames[slot] );
_gpuPalette.SetData( _paletteFrames[slot] );
uint modesMask = 0;
for ( int y = 0; y < GbaConstants.VisibleLines; y++ )
modesMask |= 1u << (int)(scanlines[y].DispCntMosaic & 7);
DetectCircle( scanlines, 0, out var circle0 );
DetectCircle( scanlines, 1, out var circle1 );
var cmd = RenderCommandList;
UpdateOriginalHistory();
cmd.Reset();
cmd.Attributes.Set( "ScanlineStates", _gpuScanlines );
cmd.Attributes.Set( "Vram", _gpuVram );
cmd.Attributes.Set( "Palette", _gpuPalette );
cmd.Attributes.Set( "Scale", 1 );
DispatchWindowPass( cmd, circle0, circle1 );
DispatchObjPass( cmd );
cmd.Attributes.Set( "OldCharBase2", new Vector2( _frameOldCharBase[slot][0], _frameOldCharBaseFirstY[slot][0] ) );
cmd.Attributes.Set( "OldCharBase3", new Vector2( _frameOldCharBase[slot][1], _frameOldCharBaseFirstY[slot][1] ) );
Texture[] bgTex = [_bg0Tex, _bg1Tex, _bg2Tex, _bg3Tex];
for ( int bg = 0; bg < 4; bg++ )
{
cmd.Attributes.Set( "BgIndex", bg );
cmd.Attributes.Set( "OutputTex", bgTex[bg] );
var shaders = GetBgShaders( bg, modesMask );
bool first = true;
foreach ( var shader in shaders )
{
cmd.Attributes.Set( "IsBasePass", first ? 1 : 0 );
cmd.DispatchCompute( shader, GbaConstants.ScreenWidth, GbaConstants.ScreenHeight, 1 );
if ( shaders.Count > 1 )
cmd.UavBarrier( bgTex[bg] );
first = false;
}
}
cmd.UavBarrier( _bg0Tex );
cmd.UavBarrier( _bg1Tex );
cmd.UavBarrier( _bg2Tex );
cmd.UavBarrier( _bg3Tex );
cmd.Attributes.Set( "Bg0Tex", _bg0Tex );
cmd.Attributes.Set( "Bg1Tex", _bg1Tex );
cmd.Attributes.Set( "Bg2Tex", _bg2Tex );
cmd.Attributes.Set( "Bg3Tex", _bg3Tex );
cmd.Attributes.Set( "ObjColorTex", _objColorTex );
cmd.Attributes.Set( "ObjFlagsTex", _objFlagsTex );
cmd.Attributes.Set( "WindowTex", _windowTex );
cmd.Attributes.Set( "OutputTex", _nativeFrameTex );
cmd.DispatchCompute( _csFinalize, GbaConstants.ScreenWidth, GbaConstants.ScreenHeight, 1 );
cmd.UavBarrier( _nativeFrameTex );
DispatchOutputPasses( cmd );
return true;
}
private List<ComputeShader> GetBgShaders( int bg, uint modesMask )
{
var result = new List<ComputeShader>( 2 );
switch ( bg )
{
case 0:
case 1:
if ( (modesMask & 0b11) != 0 )
result.Add( _csBgMode0 );
break;
case 2:
if ( (modesMask & 0b001) != 0 ) result.Add( _csBgMode0 );
if ( (modesMask & 0b110) != 0 ) result.Add( _csBgMode2 );
if ( (modesMask & 0b001000) != 0 ) result.Add( _csBgMode3 );
if ( (modesMask & 0b010000) != 0 ) result.Add( _csBgMode4 );
if ( (modesMask & 0b100000) != 0 ) result.Add( _csBgMode5 );
break;
case 3:
if ( (modesMask & 0b001) != 0 ) result.Add( _csBgMode0 );
if ( (modesMask & 0b100) != 0 ) result.Add( _csBgMode2 );
break;
}
return result;
}
private void DetectCircle( ScanlineState[] states, int window, out Vector3 result )
{
result = Vector3.Zero;
if ( GpuScale < 2 ) return;
int firstY = 0;
int lastStartX = 0;
int lastEndX = 0;
int startX = 0;
int endX = 0;
int circleFirstY = -1;
float centerX = -1;
float centerY = -1;
float radius = 0;
bool invalid = false;
for ( int y = firstY; y < GbaConstants.VisibleLines; y++ )
{
ref var s = ref states[y];
uint winH, winV;
if ( window == 0 )
{
winH = (s.BldYWin0H >> 16) & 0xFFFFu;
winV = s.Win0VWin1H & 0xFFFFu;
}
else
{
winH = (s.Win0VWin1H >> 16) & 0xFFFFu;
winV = s.Win1VWinIn & 0xFFFFu;
}
lastStartX = startX;
lastEndX = endX;
startX = (int)((winH >> 8) & 0xFF);
endX = (int)(winH & 0xFF);
int startY = (int)((winV >> 8) & 0xFF);
int endY = (int)(winV & 0xFF);
if ( startX == endX || y < startY || y >= endY )
{
if ( circleFirstY >= 0 )
{
centerY = (circleFirstY + y) / 2.0f;
circleFirstY = -1;
}
continue;
}
if ( lastEndX - lastStartX <= 0 ) continue;
if ( startX >= 240 ) { invalid = true; break; }
int startDiff = lastStartX - startX;
int endDiff = endX - lastEndX;
if ( startDiff - endDiff < -1 || startDiff - endDiff > 1 )
{
invalid = true; break;
}
if ( startX < lastStartX )
{
centerX = (startX + endX) / 2.0f;
if ( radius > 0 ) { invalid = true; break; }
}
else if ( startX > lastStartX && radius <= 0 )
{
radius = (lastEndX - lastStartX) / 2.0f;
}
if ( circleFirstY < 0 && y - 1 >= startY && y - 1 < endY )
{
circleFirstY = y - 1;
}
}
if ( radius <= 0 ) invalid = true;
if ( centerX < 0 ) invalid = true;
if ( centerY < 0 ) invalid = true;
for ( int y = firstY; y < GbaConstants.VisibleLines && !invalid; y++ )
{
ref var s = ref states[y];
uint winH, winV;
if ( window == 0 )
{
winH = (s.BldYWin0H >> 16) & 0xFFFFu;
winV = s.Win0VWin1H & 0xFFFFu;
}
else
{
winH = (s.Win0VWin1H >> 16) & 0xFFFFu;
winV = s.Win1VWinIn & 0xFFFFu;
}
int sx = (int)((winH >> 8) & 0xFF);
int ex = (int)(winH & 0xFF);
int sy = (int)((winV >> 8) & 0xFF);
int ey = (int)(winV & 0xFF);
bool xActive = sx < ex;
bool yActive = y >= sy && y < ey;
if ( xActive && yActive )
{
if ( centerY - y > radius ) { invalid = true; break; }
if ( y - centerY > radius ) { invalid = true; break; }
float cosine = MathF.Abs( y - centerY );
float sine = MathF.Sqrt( radius * radius - cosine * cosine );
if ( MathF.Abs( centerX - sine - sx ) <= 1 && MathF.Abs( centerX + sine - ex ) <= 1 )
continue;
if ( radius >= cosine + 1 )
{
float sine2 = MathF.Sqrt( radius * radius - (cosine + 1) * (cosine + 1) );
if ( MathF.Abs( centerX - sine2 - sx ) <= 1 && MathF.Abs( centerX + sine2 - ex ) <= 1 )
continue;
}
invalid = true;
}
else if ( centerY - y < radius && y - centerY < radius )
{
invalid = true;
}
}
if ( !invalid )
result = new Vector3( centerX, centerY, radius - 0.499f );
}
public byte[] CaptureScreenshot()
{
if ( _suspendPreviewTex == null ) return null;
var pixels = _suspendPreviewTex[_previewWriteSlot].GetPixels();
var result = new byte[GbaConstants.ScreenWidth * GbaConstants.ScreenHeight * 4];
for ( int i = 0; i < pixels.Length && i * 4 + 3 < result.Length; i++ )
{
result[i * 4] = pixels[i].r;
result[i * 4 + 1] = pixels[i].g;
result[i * 4 + 2] = pixels[i].b;
result[i * 4 + 3] = pixels[i].a;
}
return result;
}
public void CaptureScreenshotAsync( Action<byte[]> callback )
{
if ( _suspendPreviewTex == null )
{
callback?.Invoke( null );
return;
}
_suspendPreviewTex[_previewWriteSlot].GetPixelsAsync<byte>( span =>
{
int expected = GbaConstants.ScreenWidth * GbaConstants.ScreenHeight * 4;
var result = new byte[expected];
int n = Math.Min( span.Length, expected );
span.Slice( 0, n ).CopyTo( result );
callback?.Invoke( result );
}, ImageFormat.RGBA8888, (0, 0, GbaConstants.ScreenWidth, GbaConstants.ScreenHeight) );
}
private static uint PackOffset( short h, short v )
{
return (uint)(h & 0x1FF) | (uint)((v & 0x1FF) << 16);
}
private static void GetSpriteSize( int shape, int size, out int w, out int h )
{
switch ( shape )
{
case 0:
w = h = 8 << size;
break;
case 1:
(w, h) = size switch
{
0 => (16, 8),
1 => (32, 8),
2 => (32, 16),
_ => (64, 32),
};
break;
case 2:
(w, h) = size switch
{
0 => (8, 16),
1 => (8, 32),
2 => (16, 32),
_ => (32, 64),
};
break;
default:
w = h = 8;
break;
}
}
}