SharpTalkSpeaker.cs
using System;
using System.Threading.Tasks;
using SharpTalk;
namespace Sandbox;
public sealed class SharpTalkSpeaker : Component
{
[Property] public string SpeakOnStart { get; set; } = "";
[Property, Range( 40, 600 )] public int Rate { get; set; } = 200;
[Property, Range( 40, 500 )] public int PitchHz { get; set; } = 122;
[Property, Range( 0, 100 )] public int VoiceVolume { get; set; } = 80;
[Property, Range( 0f, 2f )] public float AudioVolume { get; set; } = 1f;
/// <summary>Fired on the main thread as each phoneme starts playing.</summary>
public event Action<PhonemeEvent> OnPhoneme;
TtsEngine _engine;
SoundHandle _handle;
bool _speaking;
PhonemeEvent[] _phonemeEvents = Array.Empty<PhonemeEvent>();
int _nextPhonemeIndex;
public bool IsSpeaking => _speaking;
protected override void OnStart()
{
try { InitEngine(); }
catch ( Exception e ) { Log.Error( $"SharpTalkSpeaker: OnStart threw — {e}" ); }
}
void InitEngine()
{
var dict = FileSystem.Mounted.ReadAllBytes( "sharptalk/english_lex.bin" ).ToArray();
var symbols = FileSystem.Mounted.ReadAllBytes( "sharptalk/symbols.bin" ).ToArray();
if ( dict.Length == 0 ) { Log.Error( "SharpTalkSpeaker: english_lex.bin not found or empty" ); return; }
if ( symbols.Length == 0 ) { Log.Error( "SharpTalkSpeaker: symbols.bin not found or empty" ); return; }
var voice = VoiceData.BaselineVoice;
voice.Rate = (short)Rate;
voice.PitchHz = (short)PitchHz;
voice.VGain = (short)VoiceVolume;
_engine = new TtsEngine( voice, dict, symbols );
Log.Info( $"SharpTalkSpeaker: engine initialized (dict={dict.Length}b, symbols={symbols.Length}b)" );
if ( !string.IsNullOrWhiteSpace( SpeakOnStart ) )
_ = Speak( SpeakOnStart );
}
public async Task Speak( string text )
{
if ( _engine is null ) { Log.Error( "SharpTalkSpeaker: Speak() called but engine is null — was OnStart run?" ); return; }
Stop();
_speaking = true;
Log.Info( $"SharpTalkSpeaker: synthesizing \"{text}\"" );
short[] samples;
PhonemeEvent[] events;
try
{
(samples, events) = await GameTask.RunInThreadAsync( () => _engine.SpeakWithEvents( text ) );
}
catch ( Exception e )
{
Log.Error( $"SharpTalkSpeaker: synthesis threw — {e}" );
_speaking = false;
return;
}
_phonemeEvents = events;
_nextPhonemeIndex = 0;
short peak = 0;
foreach ( var s in samples ) { var a = Math.Abs( (int)s ); if ( a > peak ) peak = (short)a; }
Log.Info( $"SharpTalkSpeaker: got {samples.Length} samples, {events.Length} phoneme events, peak={peak}, streaming…" );
using var stream = new SoundStream( TtsEngine.SampleRate, 1 );
_handle = stream.Play( AudioVolume, 1f );
_handle.SpacialBlend = 0f;
_handle.DistanceAttenuation = false;
_handle.Occlusion = false;
_handle.SetParent( GameObject );
_handle.FollowParent = true;
_handle.Update();
int offset = 0;
while ( offset < samples.Length )
{
int space = stream.MaxWriteSampleCount - stream.QueuedSampleCount;
if ( space <= 0 )
{
await GameTask.Delay( 5 );
continue;
}
int count = Math.Min( space, samples.Length - offset );
stream.WriteData( samples.AsSpan( offset, count ) );
offset += count;
}
stream.Close();
Log.Info( "SharpTalkSpeaker: done" );
_speaking = false;
}
protected override void OnUpdate()
{
if ( OnPhoneme is null || _nextPhonemeIndex >= _phonemeEvents.Length ) return;
if ( _handle is null || !_handle.IsValid ) return;
float t = _handle.ElapsedTime;
while ( _nextPhonemeIndex < _phonemeEvents.Length && _phonemeEvents[_nextPhonemeIndex].TimeSeconds <= t )
OnPhoneme?.Invoke( _phonemeEvents[_nextPhonemeIndex++] );
}
public void Stop()
{
if ( _handle != null && _handle.IsValid && _handle.IsPlaying )
_handle.Stop( 0f );
_speaking = false;
}
public void SetVoice( bool whisper )
{
if ( _engine is null ) return;
var voice = whisper ? VoiceData.WhisperVoice : VoiceData.BaselineVoice;
voice.Rate = (short)Rate;
voice.PitchHz = (short)PitchHz;
voice.VGain = (short)VoiceVolume;
_engine.Voice = voice;
}
public void ApplyVoice()
{
if ( _engine is null ) return;
var v = _engine.Voice;
v.Rate = (short)Rate;
v.PitchHz = (short)PitchHz;
v.VGain = (short)VoiceVolume;
_engine.Voice = v;
}
}