SceneGaussianSplatSystem.cs
using Sandbox.Engine.Settings;
using Sandbox.Rendering;
using System.Runtime.InteropServices;
namespace Sandbox;
/// <summary>
/// A single-instance-per-scene-world renderer that collects all <see cref="SceneGaussianSplatObject"/> instances and draws them in one unified pipeline.
/// <br />
/// cull → sort → billboard → draw
/// <br />
/// This ensures correct global sort order across all splat clouds and avoids redundant GPU work (one sort pass instead of N).
/// </summary>
public class SceneGaussianSplatSystem : SceneCustomObject
{
// Per-SceneWorld singleton map, avoids creating duplicate systems
private static readonly Dictionary<SceneWorld, SceneGaussianSplatSystem> _instances = new();
/// <summary>
/// Get the existing system for this scene world, or create one if it doesn't exist.
/// Also cleans up stale entries from destroyed SceneWorlds to prevent GPU memory leaks.
/// </summary>
public static SceneGaussianSplatSystem GetOrCreate( SceneWorld sceneWorld )
{
// Purge entries for SceneWorlds that no longer exist (eg. previous play sessions)
// Each stale system holds GPU buffers that would otherwise leak.
List<SceneWorld> staleKeys = null;
foreach ( var kvp in _instances )
{
if ( !kvp.Key.IsValid() || !kvp.Value.IsValid() )
{
staleKeys ??= new();
staleKeys.Add( kvp.Key );
}
}
if ( staleKeys is not null )
{
foreach ( var key in staleKeys )
{
if ( _instances.Remove( key, out var stale ) )
{
stale.ReleaseGpuResources();
stale.ForceFlushAllDisposals();
}
}
}
if ( _instances.TryGetValue( sceneWorld, out var existing ) && existing.IsValid() )
{
// Flush any expired disposals while we have a main-thread context
existing.FlushPendingDisposals();
return existing;
}
var system = new SceneGaussianSplatSystem( sceneWorld );
_instances[sceneWorld] = system;
return system;
}
// Compute shaders, shared across all instances (static)
private static readonly ComputeShader _cullShader = new( "gaussian_splat_cull_cs" );
private static readonly ComputeShader _argsShader = new( "gaussian_splat_args_cs" );
private static readonly ComputeShader _depthShader = new( "gaussian_splat_depth_cs" );
private static readonly ComputeShader _sortPrefixShader = new( "gaussian_splat_sort_prefix_cs" );
private static readonly ComputeShader _sortScatterShader = new( "gaussian_splat_sort_scatter_cs" );
private static readonly ComputeShader _computeShader = new( "gaussian_splat_cs" );
private static readonly ComputeShader _chunkLodShader = new( "gaussian_splat_chunk_lod_cs" );
private static readonly Material _defaultMaterial = Material.FromShader( "gaussian_splat" );
private static readonly uint[] ZeroHistogram = new uint[65536];
private static readonly uint[] ZeroCount = new uint[1];
/// <summary>
/// Stable slot allocation for splat objects. Each slot has a fixed buffer offset that
/// never moves, so adding/removing objects doesn't shift existing data. Dead slots
/// (Object=null) keep their buffer range reserved — the cull shader skips them via
/// the inactive flag in ObjectDataGpu. This eliminates the O(total_scene_splats)
/// full re-merge that previously occurred on every object add/remove.
/// </summary>
private readonly List<ObjectSlot> _slots = new();
private readonly Dictionary<SceneGaussianSplatObject, int> _objectSlotIndex = new();
/// <summary>
/// Total splat buffer positions allocated across all slots (including dead/free ones).
/// This is the effective "used length" of the splat buffers.
/// </summary>
private int _allocatedSplatCount;
private struct ObjectSlot
{
public SceneGaussianSplatObject Object;
public int BufferOffset;
public int AllocatedCount;
public bool IsFree;
/// <summary>
/// References to the data arrays last uploaded into this slot's scratch range.
/// When a new occupant has the same references (shared cache), the scratch already
/// contains correct data and the upload can be skipped entirely.
/// </summary>
public SceneGaussianSplatObject.SplatPosition[] LastPositionData;
public SceneGaussianSplatObject.SplatData[] LastSplatData;
}
// Unified GPU buffers, sized to hold the merged splat data from all objects
private GpuBuffer<SceneGaussianSplatObject.SplatPosition> _positionBuffer;
private GpuBuffer<SceneGaussianSplatObject.SplatData> _splatDataBuffer;
private GpuBuffer<uint> _objectIdBuffer; // Per-splat object index
private GpuBuffer<ObjectDataGpu> _objectDataBuffer; // Per-object transform + params combined
// Chunked LOD buffers
private GpuBuffer<uint> _chunkIdBuffer; // Per-splat chunk index (uint16 packed into uint for structured buffer)
private GpuBuffer<ChunkDataGpu> _chunkDataBuffer; // Per-chunk center + flags
private GpuBuffer<float> _chunkLodBuffer; // Per-chunk LOD factor (written by chunk LOD compute shader)
private int _chunkCapacity;
private int _chunkIdCapacity;
private int _totalChunkCount; // Total chunks across all objects this frame
private GpuBuffer<SplatVertex> _vertexBuffer;
private GpuBuffer<uint> _indexBuffer;
private GpuBuffer<uint> _sortKeysBuffer;
private GpuBuffer<uint> _sortValuesBuffer;
private GpuBuffer<uint> _histogramBuffer;
private GpuBuffer<uint> _visibleIndicesBuffer;
private GpuBuffer<uint> _visibleCountBuffer;
private GpuBuffer<GpuBuffer.IndirectDispatchArguments> _indirectDispatchBuffer;
private GpuBuffer<GpuBuffer.IndirectDrawIndexedArguments> _indirectDrawBuffer;
private GpuBuffer<GaussianSplatVolume.VolumeGpuData> _volumeBuffer; // Add/Subtract volume descriptors
private GpuBuffer<GaussianSplatVolume.VolumeGpuData> _colorVolumeBuffer; // Color volume descriptors (separate to avoid 32-bit mask limit)
private int _splatCapacity;
private int _vertexCapacity;
private int _indexCapacity;
private int _sortCapacity;
private int _objectCapacity;
private int _volumeCapacity;
private int _colorVolumeCapacity;
/// <summary>
/// Registered boolean volumes that control splat visibility.
/// </summary>
private readonly List<GaussianSplatVolume> _volumes = new();
/// <summary>
/// Maps unique tag strings to bit indices (0..31) for GPU-side volume↔object matching.
/// Rebuilt every frame from the tags referenced by active volumes. This lets us support
/// unlimited volumes — the only limit is 32 unique tag strings, which is very generous.
/// </summary>
private readonly Dictionary<string, int> _tagBitRegistry = new();
/// <summary>
/// Shared render attributes passed to all compute shader dispatches and the final draw call.
/// Rebuilt each frame in <see cref="RenderPipeline"/>.
/// </summary>
private readonly RenderAttributes _sharedAttrs = new();
/// <summary>
/// Set when buffer reallocation or compaction requires a full re-upload of all
/// scratch data. Does NOT trigger a re-merge — scratch arrays are always valid.
/// </summary>
private bool _layoutDirty;
/// <summary>
/// Counts down the number of frames to skip compute after buffer reallocation or large
/// data re-uploads. Multiple frames are skipped because the Vulkan pipeline may have
/// 2-3 frames of GPU work in-flight — a single skip isn't enough to drain the pipeline
/// before issuing heavy compute dispatches against new buffers.
/// </summary>
private int _skipComputeFrames;
private int _framesWithoutObjects;
/// <summary>
/// Buffers queued for disposal with frame-delay to avoid destroying resources still
/// referenced by in-flight GPU commands. Each entry tracks the frame it was queued on;
/// actual disposal happens only after <see cref="DisposalDelayFrames"/> frames have passed.
/// </summary>
private readonly List<(IDisposable Resource, int QueuedFrame)> _pendingDisposals = new();
private int _frameCounter;
/// <summary>
/// Number of frames to wait before disposing GPU buffers. Must be >= the Vulkan
/// frame sync latency (typically 2-3) to ensure the GPU has fully drained all
/// commands referencing the old buffer before it is destroyed.
/// </summary>
private const int DisposalDelayFrames = 4;
/// <summary>
/// Reusable scratch buffers for merging per-object data before a single GPU upload.
/// Avoids relying on GpuBuffer.SetData offset parameter (which may not work correctly)
/// and avoids allocating multi-MB arrays on every frame inside the render callback.
/// </summary>
private SceneGaussianSplatObject.SplatPosition[] _positionScratch = Array.Empty<SceneGaussianSplatObject.SplatPosition>();
private SceneGaussianSplatObject.SplatData[] _dataScratch = Array.Empty<SceneGaussianSplatObject.SplatData>();
private uint[] _objectIdScratch = Array.Empty<uint>();
/// <summary>
/// Reusable scratch arrays for per-frame GPU uploads, avoiding allocations every frame.
/// </summary>
private ObjectDataGpu[] _objectDataScratch = Array.Empty<ObjectDataGpu>();
private GaussianSplatVolume.VolumeGpuData[] _volumeScratch = Array.Empty<GaussianSplatVolume.VolumeGpuData>();
private readonly List<ChunkDataGpu> _chunkDataScratch = new();
private GaussianSplatVolume.VolumeGpuData[] _colorVolumeScratch = Array.Empty<GaussianSplatVolume.VolumeGpuData>();
private GaussianSplatVolume[] _activeVolumesScratch = Array.Empty<GaussianSplatVolume>();
private GaussianSplatVolume[] _activeColorVolumesScratch = Array.Empty<GaussianSplatVolume>();
/// <summary>
/// GPU-side vertex output, must match the compute shader's SplatVertex and the VS VertexInput.
/// </summary>
[StructLayout( LayoutKind.Sequential )]
private struct SplatVertex
{
[VertexLayout.Position] public Vector3 Position;
[VertexLayout.Normal] public Vector3 Normal;
[VertexLayout.Color] public Color Color;
[VertexLayout.TexCoord] public Vector2 TexCoord;
}
/// <summary>
/// Per-object data combining transform matrix and rendering parameters into one struct.
/// Merged into a single GPU buffer to stay within the 16 storage buffer limit.
/// Must match the shader's ObjectData struct layout exactly.
/// </summary>
[StructLayout( LayoutKind.Sequential )]
private struct ObjectDataGpu
{
public Matrix Transform;
public float SplatSize;
public uint Flags;
/// <summary>
/// Bitmask of which tag groups this object belongs to (bit N = has tag N).
/// Volumes carry their own include/exclude group bitmasks and the GPU
/// evaluates the match per-volume, removing the 32-volume limit.
/// </summary>
public uint TagBits;
/// <summary>
/// Per-object tint color packed as RGBA8 (matches shader UnpackColor layout).
/// </summary>
public uint TintColor;
/// <summary>
/// Maximum LOD distance in world units. 0 = LOD disabled for this object.
/// </summary>
public float LODMaxDistance;
/// <summary>
/// Shadow tint color packed as RGBA8. Only used when Flags bit 3 (ReceiveShadows) is set.
/// Lerps toward this color in shadowed regions.
/// </summary>
public uint ShadowTintColor;
public float _lodPad1;
public float _lodPad2;
/// <summary>
/// 8-sample LOD curve LUT: fraction of splats to keep at evenly spaced distances.
/// Packed as two float4s for efficient GPU access.
/// </summary>
public Vector4 LODCurveLow; // samples 0..3
public Vector4 LODCurveHigh; // samples 4..7
}
/// <summary>
/// GPU-side per-chunk data for the chunk LOD compute shader.
/// </summary>
[StructLayout( LayoutKind.Sequential )]
private struct ChunkDataGpu
{
/// <summary>Local-space center of this chunk.</summary>
public Vector3 Center;
/// <summary>Object index this chunk belongs to (for transform lookup).</summary>
public uint ObjectId;
/// <summary>Packed flags: bit 0 = exempt from LOD.</summary>
public uint Flags;
public float _pad0;
public float _pad1;
public float _pad2;
}
public SceneGaussianSplatSystem( SceneWorld sceneWorld ) : base( sceneWorld )
{
Flags.IsOpaque = false;
Flags.IsTranslucent = true;
}
/// <summary>
/// Register a splat object to be rendered by this system.
/// The object starts with SplatCount=0; a stable buffer slot is assigned later
/// when data becomes available (DataChanged=true). No layout shift occurs.
/// </summary>
public void Register( SceneGaussianSplatObject obj )
{
FlushPendingDisposals();
// Object is tracked but has no slot yet — slot assigned on first data upload
if ( !_objectSlotIndex.ContainsKey( obj ) )
_objectSlotIndex[obj] = -1; // sentinel: registered but no slot
}
/// <summary>
/// Unregister a splat object from this system.
/// The object's buffer slot is marked free (not removed), so no data shifts.
/// The cull shader skips free slots via the inactive flag in ObjectDataGpu.
/// </summary>
public void Unregister( SceneGaussianSplatObject obj )
{
FlushPendingDisposals();
if ( _objectSlotIndex.TryGetValue( obj, out int slotIdx ) )
{
_objectSlotIndex.Remove( obj );
if ( slotIdx >= 0 && slotIdx < _slots.Count )
{
var slot = _slots[slotIdx];
slot.Object = null;
slot.IsFree = true;
_slots[slotIdx] = slot;
}
}
// No _layoutDirty — data stays in buffer, ObjectDataGpu Flags=4 next frame
}
/// <summary>
/// Mark the layout as dirty, forcing a full re-upload of all scratch data next frame.
/// Only needed after buffer reallocation or compaction — NOT for normal add/remove.
/// </summary>
public void MarkLayoutDirty()
{
_layoutDirty = true;
}
/// <summary>
/// Pre-grow GPU buffers to accommodate additional splats without reallocation at spawn time.
/// Call this during loading screens before instantiating splat prefabs at runtime.
/// Only triggers the one-frame render skip if buffers actually need to grow.
/// </summary>
public void ReserveCapacity( int additionalSplats )
{
int needed = _allocatedSplatCount + additionalSplats;
EnsureBufferCapacity( needed, _slots.Count + 1 );
}
/// <summary>
/// Assign a stable buffer slot for an object that just loaded its data.
/// Tries to reuse a free slot with an exact size match (common for pooled objects
/// like blood particles that all use the same .sog file). Falls back to appending.
/// </summary>
private void AssignSlot( SceneGaussianSplatObject obj )
{
int count = obj.SplatCount;
// Try to reuse a free slot with exact size match first (most efficient, no waste)
for ( int i = 0; i < _slots.Count; i++ )
{
var slot = _slots[i];
if ( slot.IsFree && slot.AllocatedCount == count )
{
slot.Object = obj;
slot.IsFree = false;
_slots[i] = slot;
_objectSlotIndex[obj] = i;
obj.DataChanged = true; // Force upload into this slot
return;
}
}
// No exact match — append a new slot at the end
int newOffset = _allocatedSplatCount;
_slots.Add( new ObjectSlot
{
Object = obj,
BufferOffset = newOffset,
AllocatedCount = count,
IsFree = false
} );
_objectSlotIndex[obj] = _slots.Count - 1;
_allocatedSplatCount += count;
obj.DataChanged = true;
}
/// <summary>
/// Register a boolean volume to affect splat visibility.
/// </summary>
public void RegisterVolume( GaussianSplatVolume vol )
{
if ( !_volumes.Contains( vol ) )
_volumes.Add( vol );
}
/// <summary>
/// Unregister a boolean volume.
/// </summary>
public void UnregisterVolume( GaussianSplatVolume vol )
{
_volumes.Remove( vol );
}
/// <summary>
/// Total number of splats across all live (non-free) slots.
/// </summary>
public int TotalSplatCount
{
get
{
int total = 0;
foreach ( var slot in _slots )
{
if ( !slot.IsFree && slot.Object != null && slot.Object.IsValid() )
total += slot.Object.SplatCount;
}
return total;
}
}
private void EnsureBufferCapacity( int totalSplats, int objectCount )
{
// Position + data + objectId buffers. Reallocation invalidates all splat data
if ( totalSplats > _splatCapacity )
{
// Skip compute for multiple frames to let the GPU drain in-flight work
// referencing old buffers. Only needed when replacing existing buffers —
// first-time allocation has no old buffers in flight.
bool hadExistingBuffers = _positionBuffer.IsValid();
DeferDispose( _positionBuffer );
DeferDispose( _splatDataBuffer );
DeferDispose( _objectIdBuffer );
DeferDispose( _visibleIndicesBuffer );
_splatCapacity = (int)(totalSplats * 1.5);
_positionBuffer = new GpuBuffer<SceneGaussianSplatObject.SplatPosition>( _splatCapacity, GpuBuffer.UsageFlags.Structured );
_splatDataBuffer = new GpuBuffer<SceneGaussianSplatObject.SplatData>( _splatCapacity, GpuBuffer.UsageFlags.Structured );
_objectIdBuffer = new GpuBuffer<uint>( _splatCapacity, GpuBuffer.UsageFlags.Structured );
_visibleIndicesBuffer = new GpuBuffer<uint>( _splatCapacity, GpuBuffer.UsageFlags.Structured );
if ( hadExistingBuffers )
_skipComputeFrames = Math.Max( _skipComputeFrames, DisposalDelayFrames );
}
// Per-object data buffer (transform + params combined)
if ( objectCount > _objectCapacity )
{
DeferDispose( _objectDataBuffer );
_objectCapacity = Math.Max( objectCount * 2, 8 );
_objectDataBuffer = new GpuBuffer<ObjectDataGpu>( _objectCapacity, GpuBuffer.UsageFlags.Structured );
}
// Vertex buffer (4 vertices per splat)
int requiredVertices = totalSplats * 4;
if ( requiredVertices > _vertexCapacity )
{
DeferDispose( _vertexBuffer );
_vertexCapacity = (int)(requiredVertices * 1.5);
_vertexBuffer = new GpuBuffer<SplatVertex>( _vertexCapacity, GpuBuffer.UsageFlags.Vertex | GpuBuffer.UsageFlags.Structured );
}
// Index buffer (6 indices per splat) — pre-generated with a static quad pattern.
// Each quad `i` uses indices [i*4, i*4+1, i*4+2, i*4+2, i*4+1, i*4+3].
// This never changes frame-to-frame, so we generate once on allocation and the
// billboard shader doesn't need to write indices at all.
int requiredIndices = totalSplats * 6;
if ( requiredIndices > _indexCapacity )
{
DeferDispose( _indexBuffer );
_indexCapacity = (int)(requiredIndices * 1.5);
_indexBuffer = new GpuBuffer<uint>( _indexCapacity, GpuBuffer.UsageFlags.Index | GpuBuffer.UsageFlags.Structured );
GenerateStaticIndexBuffer();
}
// Sort buffers
if ( totalSplats > _sortCapacity )
{
DeferDispose( _sortKeysBuffer );
DeferDispose( _sortValuesBuffer );
_sortCapacity = (int)(totalSplats * 1.5);
_sortKeysBuffer = new GpuBuffer<uint>( _sortCapacity, GpuBuffer.UsageFlags.Structured );
_sortValuesBuffer = new GpuBuffer<uint>( _sortCapacity, GpuBuffer.UsageFlags.Structured );
}
// One-time allocations
if ( !_histogramBuffer.IsValid() )
_histogramBuffer = new GpuBuffer<uint>( 65536, GpuBuffer.UsageFlags.Structured );
if ( !_visibleCountBuffer.IsValid() )
_visibleCountBuffer = new GpuBuffer<uint>( 1, GpuBuffer.UsageFlags.Structured );
if ( !_indirectDispatchBuffer.IsValid() )
_indirectDispatchBuffer = new GpuBuffer<GpuBuffer.IndirectDispatchArguments>( 2,
GpuBuffer.UsageFlags.Structured | GpuBuffer.UsageFlags.IndirectDrawArguments );
if ( !_indirectDrawBuffer.IsValid() )
_indirectDrawBuffer = new GpuBuffer<GpuBuffer.IndirectDrawIndexedArguments>( 1,
GpuBuffer.UsageFlags.Structured | GpuBuffer.UsageFlags.IndirectDrawArguments );
}
/// <summary>
/// Rebuild the tag bit registry from all tags referenced by active volumes this frame.
/// Each unique tag string gets a bit index (0..31). Clears and repopulates every frame
/// so bit assignments stay minimal and adapt to runtime volume changes.
/// </summary>
private void RebuildTagRegistry( List<GaussianSplatVolume> volumes )
{
_tagBitRegistry.Clear();
foreach ( var vol in volumes )
{
if ( !vol.IsValid() || !vol.Active )
continue;
RegisterTags( vol.IncludeTags );
RegisterTags( vol.ExcludeTags );
}
}
private void RegisterTags( TagSet tags )
{
if ( tags is null ) return;
foreach ( var tag in tags.TryGetAll() )
{
if ( _tagBitRegistry.Count >= 32 ) break; // Hard limit: 32 unique tags
_tagBitRegistry.TryAdd( tag, _tagBitRegistry.Count );
}
}
/// <summary>
/// Compute a bitmask from a TagSet using the current tag registry.
/// Each tag the set contains gets its bit set. Unknown tags (not in any volume) are ignored.
/// </summary>
private uint ComputeTagBits( TagSet tags )
{
if ( tags is null ) return 0;
uint bits = 0;
foreach ( var tag in tags.TryGetAll() )
{
if ( _tagBitRegistry.TryGetValue( tag, out int bitIndex ) )
bits |= 1u << bitIndex;
}
return bits;
}
private void RenderPipeline()
{
// Assign slots to any objects that have loaded data but don't have a slot yet.
// Also count live objects and detect lighting needs.
int liveSlotCount = 0;
bool anyLighting = false;
// Collect objects that need slot (re)assignment — can't modify dictionary during iteration
List<SceneGaussianSplatObject> needsSlot = null;
foreach ( var kvp in _objectSlotIndex )
{
var obj = kvp.Key;
if ( obj == null || !obj.IsValid() || obj.SplatCount == 0 )
continue;
int slotIdx = kvp.Value;
if ( slotIdx < 0 )
{
// No slot yet — needs assignment
needsSlot ??= new();
needsSlot.Add( obj );
}
else if ( slotIdx < _slots.Count && _slots[slotIdx].AllocatedCount != obj.SplatCount )
{
// SplatCount changed — free old slot and reassign
var oldSlot = _slots[slotIdx];
oldSlot.Object = null;
oldSlot.IsFree = true;
_slots[slotIdx] = oldSlot;
needsSlot ??= new();
needsSlot.Add( obj );
}
liveSlotCount++;
if ( obj.ReceiveLighting && obj.HasCovariance )
anyLighting = true;
}
// Assign slots outside the dictionary iteration
if ( needsSlot != null )
{
foreach ( var obj in needsSlot )
AssignSlot( obj );
}
if ( _allocatedSplatCount == 0 || liveSlotCount == 0 )
return;
EnsureBufferCapacity( _allocatedSplatCount, _slots.Count );
// Validate all buffers
if ( !_positionBuffer.IsValid() || !_splatDataBuffer.IsValid() || !_objectIdBuffer.IsValid() )
return;
if ( !_vertexBuffer.IsValid() || !_indexBuffer.IsValid() )
return;
if ( !_sortKeysBuffer.IsValid() || !_sortValuesBuffer.IsValid() || !_histogramBuffer.IsValid() )
return;
if ( !_visibleIndicesBuffer.IsValid() || !_visibleCountBuffer.IsValid() )
return;
if ( !_indirectDispatchBuffer.IsValid() || !_indirectDrawBuffer.IsValid() )
return;
if ( !_objectDataBuffer.IsValid() )
return;
// Reuse pooled array for per-object GPU data (one entry per slot, including dead)
int slotCount = _slots.Count;
if ( _objectDataScratch.Length < slotCount )
_objectDataScratch = new ObjectDataGpu[Math.Max( slotCount * 2, 8 )];
var objectDataArray = _objectDataScratch;
// Prune dead volumes, then split active volumes into visibility (Add/Subtract)
// and Color lists. Color volumes are stored in a separate GPU buffer so they
// don't consume bits in the tag-group bitmask, allowing unlimited Color volumes.
for ( int i = _volumes.Count - 1; i >= 0; i-- )
{
if ( !_volumes[i].IsValid() )
_volumes.RemoveAt( i );
}
int visibilityVolumeCount = 0;
int colorVolumeCount = 0;
int addVolumeCount = 0;
foreach ( var vol in _volumes )
{
if ( !vol.Active ) continue;
if ( vol.Mode == SplatVolumeMode.Color )
{
colorVolumeCount++;
}
else
{
visibilityVolumeCount++;
if ( vol.Mode == SplatVolumeMode.Add )
addVolumeCount++;
}
}
// Reuse pooled arrays for active volume lists to avoid per-frame allocation
if ( _activeVolumesScratch.Length < visibilityVolumeCount )
_activeVolumesScratch = new GaussianSplatVolume[Math.Max( visibilityVolumeCount * 2, 8 )];
if ( _activeColorVolumesScratch.Length < colorVolumeCount )
_activeColorVolumesScratch = new GaussianSplatVolume[Math.Max( colorVolumeCount * 2, 8 )];
var activeVolumes = visibilityVolumeCount > 0 ? _activeVolumesScratch : null;
var activeColorVolumes = colorVolumeCount > 0 ? _activeColorVolumesScratch : null;
if ( visibilityVolumeCount > 0 || colorVolumeCount > 0 )
{
int vi = 0, ci = 0;
foreach ( var vol in _volumes )
{
if ( !vol.Active ) continue;
if ( vol.Mode == SplatVolumeMode.Color )
activeColorVolumes[ci++] = vol;
else
activeVolumes[vi++] = vol;
}
}
// Rebuild the tag bit registry from all active volumes' include/exclude tags.
// This maps each unique tag string to a bit index (0..31) so the GPU can
// evaluate volume↔object matching without a per-volume bitmask limit.
RebuildTagRegistry( _volumes );
// Incremental splat data upload. Scratch arrays are maintained as a persistent
// mirror of GPU buffer contents. Only objects with DataChanged get their range
// patched — no full re-merge of all objects on every add/remove.
bool forceUpload = _layoutDirty || _skipComputeFrames > 0;
_layoutDirty = false;
// Only skip compute during buffer reallocation cooldown.
bool skipCompute = _skipComputeFrames > 0;
if ( _skipComputeFrames > 0 )
_skipComputeFrames--;
// Grow scratch arrays to match allocated buffer size.
// Array.Resize preserves existing data — critical because only objects with
// DataChanged get re-patched; existing objects' data must survive the resize.
if ( _positionScratch.Length < _allocatedSplatCount )
Array.Resize( ref _positionScratch, _allocatedSplatCount );
if ( _dataScratch.Length < _allocatedSplatCount )
Array.Resize( ref _dataScratch, _allocatedSplatCount );
if ( _objectIdScratch.Length < _allocatedSplatCount )
Array.Resize( ref _objectIdScratch, _allocatedSplatCount );
// Patch only objects whose data changed into their stable scratch positions
bool anyPatched = false;
for ( int i = 0; i < _slots.Count; i++ )
{
var slot = _slots[i];
if ( slot.IsFree || slot.Object == null || !slot.Object.IsValid() )
continue;
var obj = slot.Object;
if ( !obj.DataChanged && !forceUpload )
continue;
int count = obj.SplatCount;
int offset = slot.BufferOffset;
// Skip upload if this slot already has the correct data in scratch.
// Blood particles (and other pooled objects) all share the same cached arrays,
// so when a slot is reused by the same .sog file, the data is already there.
if ( obj.PositionData == slot.LastPositionData
&& obj.SplatDataArray == slot.LastSplatData
&& !forceUpload )
{
obj.DataChanged = false;
continue;
}
Array.Copy( obj.PositionData, 0, _positionScratch, offset, count );
Array.Copy( obj.SplatDataArray, 0, _dataScratch, offset, count );
Array.Fill( _objectIdScratch, (uint)i, offset, count );
// Track what's in scratch so future occupants can skip upload
slot.LastPositionData = obj.PositionData;
slot.LastSplatData = obj.SplatDataArray;
_slots[i] = slot;
obj.DataChanged = false;
anyPatched = true;
}
// Upload splat buffers only if something actually changed
if ( anyPatched || forceUpload )
{
_positionBuffer.SetData( _positionScratch.AsSpan( 0, _allocatedSplatCount ) );
_splatDataBuffer.SetData( _dataScratch.AsSpan( 0, _allocatedSplatCount ) );
_objectIdBuffer.SetData( _objectIdScratch.AsSpan( 0, _allocatedSplatCount ) );
}
// Upload chunk data only when splat data actually changed — chunk assignments
// are stable per-object and don't need rebuilding in steady state.
if ( anyPatched || forceUpload )
{
bool anyChunkedLOD = false;
_chunkDataScratch.Clear();
for ( int i = 0; i < _slots.Count; i++ )
{
var slot = _slots[i];
if ( slot.IsFree || slot.Object == null || !slot.Object.IsValid() )
continue;
var obj = slot.Object;
if ( obj.EnableChunkedLOD && obj.ChunkIds is not null && obj.ChunkCount > 0 )
{
anyChunkedLOD = true;
for ( int c = 0; c < obj.ChunkCount; c++ )
{
_chunkDataScratch.Add( new ChunkDataGpu
{
Center = obj.ChunkCenters[c],
ObjectId = (uint)i,
Flags = obj.ChunkExempt[c] ? 1u : 0u
} );
}
}
}
_totalChunkCount = _chunkDataScratch.Count;
if ( anyChunkedLOD && _totalChunkCount > 0 )
{
UploadChunkData( _allocatedSplatCount, _chunkDataScratch );
}
else
{
_totalChunkCount = 0;
}
}
// Build ObjectDataGpu for ALL slots (live get real data, dead get inactive flag).
// Indexed by slot index — matches the objectId stored per-splat in _objectIdScratch.
for ( int i = 0; i < _slots.Count; i++ )
{
var slot = _slots[i];
if ( slot.IsFree || slot.Object == null || !slot.Object.IsValid() || slot.Object.SplatCount == 0 )
{
// Dead/free slot: cull shader will early-exit on Flags bit 2
objectDataArray[i] = new ObjectDataGpu { Flags = 4u };
continue;
}
var obj = slot.Object;
var tx = obj.Transform;
var matrix = (Matrix.CreateScale( new Vector3( tx.Scale ) )
* Matrix.CreateRotation( tx.Rotation )
* Matrix.CreateTranslation( tx.Position )).Transpose();
uint tagBits = ComputeTagBits( obj.Tags );
var lodSamples = obj.LODCurveSamples;
objectDataArray[i] = new ObjectDataGpu
{
Transform = matrix,
SplatSize = obj.SplatSize,
Flags = (obj.HasCovariance ? 1u : 0u)
| (obj.ReceiveLighting && obj.HasCovariance ? 2u : 0u)
| (!obj.IsActive ? 4u : 0u)
| (obj.ReceiveShadows && !obj.ReceiveLighting ? 8u : 0u),
TagBits = tagBits,
TintColor = PackColorRGBA8( obj.Tint ),
LODMaxDistance = obj.LODMaxDistance,
ShadowTintColor = PackColorRGBA8( obj.ShadowTint ),
LODCurveLow = new Vector4( lodSamples[0], lodSamples[1], lodSamples[2], lodSamples[3] ),
LODCurveHigh = new Vector4( lodSamples[4], lodSamples[5], lodSamples[6], lodSamples[7] )
};
}
// Upload per-object data every frame (objects may move)
_objectDataBuffer.SetData( objectDataArray.AsSpan( 0, slotCount ) );
// Upload Add/Subtract volume data (volumes may move/change every frame).
// Each volume's GroupFilter is populated here from the tag registry so that
// the GPU can evaluate include/exclude tag matching per-volume.
if ( visibilityVolumeCount > 0 )
{
if ( visibilityVolumeCount > _volumeCapacity )
{
DeferDispose( _volumeBuffer );
_volumeCapacity = Math.Max( visibilityVolumeCount * 2, 8 );
_volumeBuffer = new GpuBuffer<GaussianSplatVolume.VolumeGpuData>( _volumeCapacity, GpuBuffer.UsageFlags.Structured );
}
if ( _volumeScratch.Length < visibilityVolumeCount )
_volumeScratch = new GaussianSplatVolume.VolumeGpuData[Math.Max( visibilityVolumeCount * 2, 8 )];
var volumeArray = _volumeScratch;
for ( int i = 0; i < visibilityVolumeCount; i++ )
{
var vol = activeVolumes[i];
var data = vol.BuildGpuData();
data.GroupFilter = new Vector4(
BitConverter.UInt32BitsToSingle( ComputeTagBits( vol.IncludeTags ) ),
BitConverter.UInt32BitsToSingle( ComputeTagBits( vol.ExcludeTags ) ),
0f, 0f
);
volumeArray[i] = data;
}
_volumeBuffer.SetData( volumeArray.AsSpan( 0, visibilityVolumeCount ) );
}
// Upload Color volume data into a separate buffer (unlimited count).
// Color volumes also carry GroupFilter for tag-based filtering.
if ( colorVolumeCount > 0 )
{
if ( colorVolumeCount > _colorVolumeCapacity )
{
DeferDispose( _colorVolumeBuffer );
_colorVolumeCapacity = Math.Max( colorVolumeCount * 2, 8 );
_colorVolumeBuffer = new GpuBuffer<GaussianSplatVolume.VolumeGpuData>( _colorVolumeCapacity, GpuBuffer.UsageFlags.Structured );
}
if ( _colorVolumeScratch.Length < colorVolumeCount )
_colorVolumeScratch = new GaussianSplatVolume.VolumeGpuData[Math.Max( colorVolumeCount * 2, 8 )];
var colorArray = _colorVolumeScratch;
for ( int i = 0; i < colorVolumeCount; i++ )
{
var vol = activeColorVolumes[i];
var data = vol.BuildGpuData();
data.GroupFilter = new Vector4(
BitConverter.UInt32BitsToSingle( ComputeTagBits( vol.IncludeTags ) ),
BitConverter.UInt32BitsToSingle( ComputeTagBits( vol.ExcludeTags ) ),
0f, 0f
);
colorArray[i] = data;
}
_colorVolumeBuffer.SetData( colorArray.AsSpan( 0, colorVolumeCount ) );
}
// Skip the compute pipeline when buffers were just (re)allocated or a large data
// re-upload occurred. Stacking massive CPU→GPU transfers with compute dispatches
// in the same frame exceeds the Vulkan fence timeout (250ms) and causes swap chain
// present stalls. The pipeline will resume once the cooldown expires.
if ( skipCompute )
return;
// Map video quality settings to shader attributes.
// High = current visual fidelity, lower settings trade quality for performance.
var (splatDensity, shadowQuality, lowPassFilter) = GetQualitySettings();
// Build shared render attributes for all compute dispatches
_sharedAttrs.Set( "SplatDensity", splatDensity );
_sharedAttrs.Set( "SplatShadowQuality", shadowQuality );
_sharedAttrs.Set( "SplatLowPassFilter", lowPassFilter );
_sharedAttrs.Set( "SplatPositions", (GpuBuffer)_positionBuffer );
_sharedAttrs.Set( "SplatObjectIds", (GpuBuffer)_objectIdBuffer );
_sharedAttrs.Set( "ObjectData", (GpuBuffer)_objectDataBuffer );
_sharedAttrs.Set( "VisibleIndices", (GpuBuffer)_visibleIndicesBuffer );
_sharedAttrs.Set( "VisibleCount", (GpuBuffer)_visibleCountBuffer );
_sharedAttrs.Set( "SplatCount", _allocatedSplatCount );
_sharedAttrs.Set( "TotalSplatCount", _allocatedSplatCount );
_sharedAttrs.Set( "SortKeys", (GpuBuffer)_sortKeysBuffer );
_sharedAttrs.Set( "Histogram", (GpuBuffer)_histogramBuffer );
_sharedAttrs.Set( "IndirectDispatch", (GpuBuffer)_indirectDispatchBuffer );
_sharedAttrs.Set( "IndirectDraw", (GpuBuffer)_indirectDrawBuffer );
_sharedAttrs.Set( "SortOutput", (GpuBuffer)_sortValuesBuffer );
_sharedAttrs.Set( "SortIndices", (GpuBuffer)_sortValuesBuffer );
_sharedAttrs.Set( "SplatDataBuffer", (GpuBuffer)_splatDataBuffer );
_sharedAttrs.Set( "VertexBuffer", (GpuBuffer)_vertexBuffer );
// Volume attributes — always set counts (shaders check VolumeCount == 0 to skip)
_sharedAttrs.Set( "VolumeCount", visibilityVolumeCount );
_sharedAttrs.Set( "AddVolumeCount", addVolumeCount );
if ( visibilityVolumeCount > 0 && _volumeBuffer.IsValid() )
_sharedAttrs.Set( "Volumes", (GpuBuffer)_volumeBuffer );
// Color volume attributes — separate buffer, no bitmask, unlimited count
_sharedAttrs.Set( "ColorVolumeCount", colorVolumeCount );
if ( colorVolumeCount > 0 && _colorVolumeBuffer.IsValid() )
_sharedAttrs.Set( "ColorVolumes", (GpuBuffer)_colorVolumeBuffer );
// Chunked LOD attributes
bool hasChunkedLOD = _totalChunkCount > 0 && _chunkIdBuffer.IsValid() && _chunkDataBuffer.IsValid() && _chunkLodBuffer.IsValid();
_sharedAttrs.Set( "ChunkCount", hasChunkedLOD ? _totalChunkCount : 0 );
if ( hasChunkedLOD )
{
_sharedAttrs.Set( "ChunkIds", (GpuBuffer)_chunkIdBuffer );
_sharedAttrs.Set( "ChunkData", (GpuBuffer)_chunkDataBuffer );
_sharedAttrs.Set( "ChunkLOD", (GpuBuffer)_chunkLodBuffer );
}
// --- Phase -1: Compute per-chunk LOD factors (if any objects use chunked LOD) ---
if ( hasChunkedLOD )
{
_chunkLodShader.DispatchWithAttributes( _sharedAttrs, _totalChunkCount, 1, 1 );
Graphics.ResourceBarrierTransition( (GpuBuffer)_chunkLodBuffer, ResourceState.UnorderedAccess, ResourceState.UnorderedAccess );
}
// --- Phase 0: Frustum cull all splats globally ---
// Clear VisibleCount via resource barrier + SetData. This is a 4-byte upload
// so the implicit sync cost is negligible.
Graphics.ResourceBarrierTransition( (GpuBuffer)_visibleCountBuffer, ResourceState.UnorderedAccess, ResourceState.CopyDestination );
_visibleCountBuffer.SetData( ZeroCount.AsSpan() );
Graphics.ResourceBarrierTransition( (GpuBuffer)_visibleCountBuffer, ResourceState.CopyDestination, ResourceState.UnorderedAccess );
_cullShader.DispatchWithAttributes( _sharedAttrs, _allocatedSplatCount, 1, 1 );
// UAV barriers
Graphics.ResourceBarrierTransition( (GpuBuffer)_visibleIndicesBuffer, ResourceState.UnorderedAccess, ResourceState.UnorderedAccess );
Graphics.ResourceBarrierTransition( (GpuBuffer)_visibleCountBuffer, ResourceState.UnorderedAccess, ResourceState.UnorderedAccess );
// --- Phase 0.5: Setup indirect args ---
_argsShader.DispatchWithAttributes( _sharedAttrs, 1, 1, 1 );
Graphics.ResourceBarrierTransition( (GpuBuffer)_indirectDispatchBuffer, ResourceState.UnorderedAccess, ResourceState.UnorderedAccess );
Graphics.ResourceBarrierTransition( (GpuBuffer)_indirectDrawBuffer, ResourceState.UnorderedAccess, ResourceState.UnorderedAccess );
// --- Counting sort (depth + prefix + scatter) ---
// Clear histogram via barrier + SetData. The barrier ensures the GPU is done reading
// the histogram from the previous frame's scatter pass before we overwrite it.
Graphics.ResourceBarrierTransition( (GpuBuffer)_histogramBuffer, ResourceState.UnorderedAccess, ResourceState.CopyDestination );
_histogramBuffer.SetData( ZeroHistogram.AsSpan() );
Graphics.ResourceBarrierTransition( (GpuBuffer)_histogramBuffer, ResourceState.CopyDestination, ResourceState.UnorderedAccess );
_depthShader.DispatchIndirectWithAttributes( _sharedAttrs, (GpuBuffer)_indirectDispatchBuffer, 0 );
Graphics.ResourceBarrierTransition( (GpuBuffer)_sortKeysBuffer, ResourceState.UnorderedAccess, ResourceState.UnorderedAccess );
Graphics.ResourceBarrierTransition( (GpuBuffer)_histogramBuffer, ResourceState.UnorderedAccess, ResourceState.UnorderedAccess );
_sortPrefixShader.DispatchWithAttributes( _sharedAttrs, 256, 1, 1 );
Graphics.ResourceBarrierTransition( (GpuBuffer)_histogramBuffer, ResourceState.UnorderedAccess, ResourceState.UnorderedAccess );
_sortScatterShader.DispatchIndirectWithAttributes( _sharedAttrs, (GpuBuffer)_indirectDispatchBuffer, 0 );
Graphics.ResourceBarrierTransition( (GpuBuffer)_sortValuesBuffer, ResourceState.UnorderedAccess, ResourceState.UnorderedAccess );
// --- Phase 4: Billboard compute ---
// Set up per-object lighting data (writes light cluster/shadow info to attributes)
if ( anyLighting )
{
Graphics.SetupLighting( this, _sharedAttrs );
}
// Dispatch for ALL splats — the shader writes degenerate zero-area quads for non-visible
// slots so the direct draw doesn't render stale/garbage vertices. Index buffer is static
// (pre-generated), so the shader only writes vertex data.
_computeShader.DispatchWithAttributes( _sharedAttrs, _allocatedSplatCount, 1, 1 );
// Transition vertex buffer for draw
Graphics.ResourceBarrierTransition( (GpuBuffer)_vertexBuffer, ResourceState.UnorderedAccess, ResourceState.VertexOrIndexBuffer );
// Draw all splats — non-visible slots have degenerate (zero-area) quads that the GPU
// culls for free at triangle setup. The pre-generated index buffer avoids per-frame writes.
Graphics.Draw( _vertexBuffer, (GpuBuffer)_indexBuffer, _defaultMaterial, 0, _allocatedSplatCount * 6, Attributes );
}
public override void RenderSceneObject()
{
base.RenderSceneObject();
// Tick the frame counter for disposal delay tracking.
// Actual disposal happens on the main thread via Register/Unregister/GetOrCreate,
// NOT here — RenderSceneObject runs on the render thread and GpuBuffer.Dispose
// requires the main thread.
_frameCounter++;
// Prune dead objects that were never properly unregistered (eg. GC'd components).
// Mark their slots as free — no layout shift needed.
List<SceneGaussianSplatObject> deadKeys = null;
foreach ( var kvp in _objectSlotIndex )
{
if ( !kvp.Key.IsValid() )
{
deadKeys ??= new();
deadKeys.Add( kvp.Key );
}
}
if ( deadKeys != null )
{
foreach ( var key in deadKeys )
{
if ( _objectSlotIndex.TryGetValue( key, out int slotIdx ) )
{
_objectSlotIndex.Remove( key );
if ( slotIdx >= 0 && slotIdx < _slots.Count )
{
var slot = _slots[slotIdx];
slot.Object = null;
slot.IsFree = true;
_slots[slotIdx] = slot;
}
}
}
}
// Check if any live objects exist (either in assigned slots or awaiting slot assignment)
bool anyLive = false;
foreach ( var slot in _slots )
{
if ( !slot.IsFree && slot.Object != null && slot.Object.IsValid() )
{
anyLive = true;
break;
}
}
if ( !anyLive )
{
// Also check for registered objects awaiting their first slot assignment
foreach ( var kvp in _objectSlotIndex )
{
if ( kvp.Key.IsValid() && kvp.Key.SplatCount > 0 )
{
anyLive = true;
break;
}
}
}
if ( !anyLive )
{
// Release GPU memory when idle for a short while to avoid carrying huge transient buffers
// after objects are deleted/disabled in editor workflows.
_framesWithoutObjects++;
if ( _framesWithoutObjects > 30 )
ReleaseGpuResources();
return;
}
_framesWithoutObjects = 0;
RenderPipeline();
}
/// <summary>
/// Clean up all GPU resources owned by the system.
/// </summary>
public void DestroySystem()
{
// Remove from singleton map
foreach ( var kvp in _instances )
{
if ( kvp.Value == this )
{
_instances.Remove( kvp.Key );
break;
}
}
_slots.Clear();
_objectSlotIndex.Clear();
_allocatedSplatCount = 0;
ReleaseGpuResources();
ForceFlushAllDisposals();
}
/// <summary>
/// Release all GPU buffers and reset capacity tracking. Safe to call multiple times.
/// </summary>
private void ReleaseGpuResources()
{
DeferDispose( _positionBuffer );
DeferDispose( _splatDataBuffer );
DeferDispose( _objectIdBuffer );
DeferDispose( _objectDataBuffer );
DeferDispose( _vertexBuffer );
DeferDispose( _indexBuffer );
DeferDispose( _sortKeysBuffer );
DeferDispose( _sortValuesBuffer );
DeferDispose( _histogramBuffer );
DeferDispose( _visibleIndicesBuffer );
DeferDispose( _visibleCountBuffer );
DeferDispose( _indirectDispatchBuffer );
DeferDispose( _indirectDrawBuffer );
DeferDispose( _volumeBuffer );
DeferDispose( _colorVolumeBuffer );
DeferDispose( _chunkIdBuffer );
DeferDispose( _chunkDataBuffer );
DeferDispose( _chunkLodBuffer );
_positionBuffer = default;
_splatDataBuffer = default;
_objectIdBuffer = default;
_objectDataBuffer = default;
_vertexBuffer = default;
_indexBuffer = default;
_sortKeysBuffer = default;
_sortValuesBuffer = default;
_histogramBuffer = default;
_visibleIndicesBuffer = default;
_visibleCountBuffer = default;
_indirectDispatchBuffer = default;
_indirectDrawBuffer = default;
_volumeBuffer = default;
_colorVolumeBuffer = default;
_chunkIdBuffer = default;
_chunkDataBuffer = default;
_chunkLodBuffer = default;
_splatCapacity = 0;
_vertexCapacity = 0;
_indexCapacity = 0;
_sortCapacity = 0;
_objectCapacity = 0;
_volumeCapacity = 0;
_colorVolumeCapacity = 0;
_chunkCapacity = 0;
_chunkIdCapacity = 0;
_totalChunkCount = 0;
_objectIdScratch = Array.Empty<uint>();
_positionScratch = Array.Empty<SceneGaussianSplatObject.SplatPosition>();
_dataScratch = Array.Empty<SceneGaussianSplatObject.SplatData>();
_slots.Clear();
_objectSlotIndex.Clear();
_allocatedSplatCount = 0;
}
/// <summary>
/// Fill the index buffer with a static quad pattern: [i*4, i*4+1, i*4+2, i*4+2, i*4+1, i*4+3]
/// for each quad slot. Called once when the index buffer is allocated/grown.
/// Eliminates per-frame index writes from the billboard shader entirely.
/// </summary>
private void GenerateStaticIndexBuffer()
{
int quadCount = _indexCapacity / 6;
var indices = new uint[_indexCapacity];
for ( int i = 0; i < quadCount; i++ )
{
uint vBase = (uint)(i * 4);
int idx = i * 6;
indices[idx + 0] = vBase;
indices[idx + 1] = vBase + 1;
indices[idx + 2] = vBase + 2;
indices[idx + 3] = vBase + 2;
indices[idx + 4] = vBase + 1;
indices[idx + 5] = vBase + 3;
}
_indexBuffer.SetData( indices.AsSpan() );
}
/// <summary>
/// Upload per-splat chunk IDs and per-chunk data buffers for chunked LOD.
/// </summary>
private void UploadChunkData( int totalSplats, List<ChunkDataGpu> chunkDataList )
{
// Grow chunk ID buffer if needed
if ( totalSplats > _chunkIdCapacity )
{
DeferDispose( _chunkIdBuffer );
_chunkIdCapacity = (int)(totalSplats * 1.5);
_chunkIdBuffer = new GpuBuffer<uint>( _chunkIdCapacity, GpuBuffer.UsageFlags.Structured );
}
// Grow chunk data/LOD buffers if needed
int chunkCount = chunkDataList.Count;
if ( chunkCount > _chunkCapacity )
{
DeferDispose( _chunkDataBuffer );
DeferDispose( _chunkLodBuffer );
_chunkCapacity = Math.Max( (int)(chunkCount * 1.5), 64 );
_chunkDataBuffer = new GpuBuffer<ChunkDataGpu>( _chunkCapacity, GpuBuffer.UsageFlags.Structured );
_chunkLodBuffer = new GpuBuffer<float>( _chunkCapacity, GpuBuffer.UsageFlags.Structured );
}
// Build merged chunk ID array — each splat gets a global chunk index.
// Objects without chunked LOD get chunk ID = 0xFFFFFFFF (sentinel for "use per-object LOD").
var chunkIds = new uint[totalSplats];
Array.Fill( chunkIds, 0xFFFFFFFFu );
int globalChunkOffset = 0;
foreach ( var slot in _slots )
{
if ( slot.IsFree || slot.Object == null || !slot.Object.IsValid() || slot.Object.SplatCount == 0 )
continue;
var obj = slot.Object;
int offset = slot.BufferOffset;
if ( obj.EnableChunkedLOD && obj.ChunkIds is not null && obj.ChunkCount > 0 )
{
for ( int i = 0; i < obj.SplatCount; i++ )
{
chunkIds[offset + i] = (uint)(globalChunkOffset + obj.ChunkIds[i]);
}
globalChunkOffset += obj.ChunkCount;
}
}
_chunkIdBuffer.SetData( chunkIds.AsSpan( 0, totalSplats ) );
_chunkDataBuffer.SetData( chunkDataList.ToArray().AsSpan() );
}
/// <summary>
/// Pack a Color into a uint matching the shader's UnpackColor layout (RGBA8).
/// </summary>
private static uint PackColorRGBA8( Color c )
{
uint r = (uint)(c.r.Clamp( 0f, 1f ) * 255f);
uint g = (uint)(c.g.Clamp( 0f, 1f ) * 255f);
uint b = (uint)(c.b.Clamp( 0f, 1f ) * 255f);
uint a = (uint)(c.a.Clamp( 0f, 1f ) * 255f);
return r | (g << 8) | (b << 16) | (a << 24);
}
/// <summary>
/// Map the game's video quality settings to shader-consumable values.
/// At High settings this returns (1.0, 2, 1) — identical to pre-quality behavior.
/// Reads ConVars directly so it works in both standalone and editor contexts.
/// </summary>
private static (float SplatDensity, int ShadowQuality, int LowPassFilter) GetQualitySettings()
{
// TextureQuality → splat density (fewer splats at lower detail)
// r_max_anisotropy: 1=Low, 2=Medium, 4=High (set by quality_profiles.json)
int aniso = int.Parse( ConsoleSystem.GetValue( "r_max_anisotropy", "4" ) );
float density = aniso switch
{
<= 1 => 0.5f,
<= 2 => 0.75f,
_ => 1.0f
};
// ShadowQuality → per-splat shadow evaluation
// r.shadows.quality: 1=Low, 2=Medium, 3=High (set by quality_profiles.json)
int shadowConVar = int.Parse( ConsoleSystem.GetValue( "r.shadows.quality", "3" ) );
int shadows = shadowConVar switch
{
<= 1 => 0, // Low: no per-splat shadows
2 => 1, // Medium: sun shadow only
_ => 2 // High: all light shadows
};
// AntiAliasing → low-pass filter (prevents sub-pixel flicker)
// When MSAA is disabled or AA quality is minimal, skip the low-pass filter.
// r_ao_quality serves as a proxy: 1=Low postprocess, meaning minimal AA too.
int aoQuality = int.Parse( ConsoleSystem.GetValue( "r_ao_quality", "3" ) );
int lowPass = aoQuality <= 1 ? 0 : 1;
return (density, shadows, lowPass);
}
/// <summary>
/// Queue a buffer for deferred disposal. Tracks the current frame so the buffer
/// won't actually be disposed until <see cref="DisposalDelayFrames"/> frames have
/// passed, giving the GPU time to finish any in-flight commands that reference it.
/// </summary>
private void DeferDispose( IDisposable disposable )
{
if ( disposable != null )
_pendingDisposals.Add( (disposable, _frameCounter) );
}
/// <summary>
/// Dispose only those queued buffers whose delay has expired.
/// Must be called from the main thread. Called from Register/Unregister/GetOrCreate
/// and from <see cref="GaussianSplatRenderer.OnPreRender"/> to ensure regular flushing
/// even when no objects are registering/unregistering.
/// </summary>
internal void FlushPendingDisposals()
{
if ( _pendingDisposals.Count == 0 )
return;
for ( int i = _pendingDisposals.Count - 1; i >= 0; i-- )
{
var (resource, queuedFrame) = _pendingDisposals[i];
if ( _frameCounter - queuedFrame >= DisposalDelayFrames )
{
resource.Dispose();
_pendingDisposals.RemoveAt( i );
}
}
}
/// <summary>
/// Force-dispose ALL pending buffers immediately, ignoring frame delay.
/// Only safe during full system teardown when no GPU work is in-flight.
/// </summary>
private void ForceFlushAllDisposals()
{
foreach ( var (resource, _) in _pendingDisposals )
resource.Dispose();
_pendingDisposals.Clear();
}
}