FormantSynth.cs
#nullable enable
using System;

namespace SharpTalk
{

    public class FormantSynth
    {
        public const int KMaxBandWidth = 1225;
        public const int KPrecision = 13;
        public const int KNoiseLen = 2048;
        public const int KOnePtOh = 0x2000;
        public const int KNoiseGain = 3200;
        public const int KSampFrameLen = 112;

        // Filter coefficients
        private short Acoeff1, Bcoeff1, Ccoeff1;
        private short Acoeff2, Bcoeff2, Ccoeff2;
        private short Acoeff3, Bcoeff3, Ccoeff3;
        private short Acoeff4, Bcoeff4, Ccoeff4;
        private short Acoeff4p, Bcoeff4p, Ccoeff4p;
        private short Acoeff5, Bcoeff5, Ccoeff5;
        private short Acoeff6, Bcoeff6, Ccoeff6;
        private short AcoeffNZ, BcoeffNZ, CcoeffNZ;
        private short AcoeffNP, BcoeffNP, CcoeffNP;

        // IIR delay taps
        private short Na1, Nb1;
        private short Na2, Nb2;
        private short Na3, Nb3;
        private short Na4, Nb4;
        private short Na5, Nb5;
        private short Na6, Nb6;
        private short Na2a, Nb2a;
        private short Na3a, Nb3a;
        private short Na4a, Nb4a;
        private short NaNZ, NbNZ;
        private short NaNP, NbNP;

        // Parallel bank input gains
        private short amp2, amp3, amp4, amp5, amp6, ab;

        // State
        private int glotIndex;
        private int noiseIndex;
        private short lastnSamp;
        private long curAmp_Full;
        private short curAmp;
        private short lastSample;
        private long ampStep;
        private long lastAmp;

        // Glottal excitation
        private int glotInc;
        private int glotInc1;
        private int glotIndex1;
        private short[] voiceWaveform = new short[256];
        private short[] voiceWaveform1 = new short[256];
        public short VoiceChorus { get; set; }
        public int GlotType { get; set; } = KUseHarm;
        public byte[]? SampleWave { get; set; }
        public int SampleInc { get; set; }
        public int SampleIndex { get; set; }

        public const int KUseHarm = 0;
        public const int KUseSnd = 1;
        public const int KUseSyncSnd = 2;

        // Noise excitation
        private byte[] noiseWave = Tables.NoiseWave;
        private byte[] bandNoise = Tables.BandNoise;
        private byte[] hpNoise = Tables.HPNoise;

        // Gain
        private short Av, Af;
        private short wavesampleGain;
        private short voiceNoiseGain;
        private short reverbDepth;
        private int reverbDelay;
        private bool addReverb;
        private bool hfEmph = true;

        private short speechVolume = 256;
        private short setNoiseGain = 3200;
        private short voiceF1Gain, voiceF2Gain, voiceF3Gain;
        private short nasalAmt;
        private short fNP;
        private short bNP;
        private short breathGain;
        private short breathCycle;
        private byte[] breathWave;
        private short voiceMinBW = 50;

        // Parallel F-bank params (from voiceData)
        private short f4_Par;
        private short bw4_Par;
        private short f5_Par;
        private short bw5_Par;
        private short f6_Par;
        private short bw6_Par;

        private short voice_F4_Freq;
        private short voice_F4_BW;

        public FormantSynth()
        {
            maxRvbDelay = 4096;
            delayBuffer = new short[maxRvbDelay];
            breathWave = Tables.BandNoise;
            tapBuffer[0] = 404;
            tapBuffer[1] = 1058;
            tapBuffer[2] = 1362;
            tapBuffer[3] = 2318;
            tapBuffer[4] = 2909;
            tapBuffer[5] = 3723;
            tapBuffer[6] = 4030;
            tapBuffer[7] = 4096;
        }

        public void SetVoice(short nGain, bool bit16, short f4_Freq, short f4_BW, short f4p_Freq, short bw4p_BW, short f5p_Freq, short bw5p_BW, short f6p_Freq, short bw6p_BW, short nasal_Base, short nasal_BW, short aGain = 0, short aCycle = 192)
        {
            breathGain = (short)((aGain * KNoiseGain) / 100);
            breathCycle = aCycle;

            long tempLong = nGain;
            voiceNoiseGain = (short)MRatio(tempLong, 100, KPrecision);
            if (bit16)
            {
                voiceNoiseGain = (short)MMul2(voiceNoiseGain, 0xCCCC, 16);
            }
            setNoiseGain = voiceNoiseGain;

            voice_F4_Freq = HzToPitch(f4_Freq);
            voice_F4_BW = f4_BW;
            f4_Par = HzToPitch(f4p_Freq);
            bw4_Par = bw4p_BW;
            f5_Par = HzToPitch(f5p_Freq);
            bw5_Par = bw5p_BW;
            f6_Par = HzToPitch(f6p_Freq);
            bw6_Par = bw6p_BW;
            fNP = HzToPitch(nasal_Base);
            bNP = nasal_BW;

            InitFixedFormants();
        }

        private void InitFixedFormants()
        {
            Calc_Pole_Coefficients(out Acoeff4, out Bcoeff4, out Ccoeff4, voice_F4_Freq, voice_F4_BW);

            Calc_Pole_Coefficients(out Acoeff4p, out Bcoeff4p, out Ccoeff4p, f4_Par, bw4_Par);
            Acoeff4p = (short)MMul2(Acoeff4p, KNoiseGain, KPrecision);

            Calc_Pole_Coefficients(out Acoeff5, out Bcoeff5, out Ccoeff5, f5_Par, bw5_Par);
            Acoeff5 = (short)MMul2(Acoeff5, KNoiseGain, KPrecision);

            Calc_Pole_Coefficients(out Acoeff6, out Bcoeff6, out Ccoeff6, f6_Par, bw6_Par);
            Acoeff6 = (short)MMul2(Acoeff6, KNoiseGain, KPrecision);

            Calc_Pole_Coefficients(out AcoeffNP, out BcoeffNP, out CcoeffNP, fNP, bNP);
        }

        // Reverb state
        private const int KNumOfTaps = 8;
        private short[] tapBuffer = new short[KNumOfTaps];
        private short[] delayBuffer;
        private int maxRvbDelay;
        private int delay_Index;
        private long lastRevbSample;

        private static int MMul2(long x, long y, int s)
        {
            return (int)((x * y) >> s);
        }

        private static int MRatio(long x, long y, int s)
        {
            return (int)((x << s) / y);
        }

        private static int MUnScale(long x, int s)
        {
            return (int)(x >> s);
        }

        private static short MDiv(int x, int y, int s)
        {
            return (short)(x >> s);
        }

        public void Calc_Pole_Coefficients(out short Acoeff, out short Bcoeff, out short Ccoeff, short pitch, short bandWidth, int voiceMinBW = 50)
        {
            if (bandWidth > KMaxBandWidth) bandWidth = (short)KMaxBandWidth;
            if (bandWidth < voiceMinBW) bandWidth = (short)voiceMinBW;
            if (pitch < 256) pitch = 256;
            if (pitch >= 256 + Tables.CosTbl.Length) pitch = (short)(256 + Tables.CosTbl.Length - 1);

            int bwIndex = (bandWidth - 50) / 5;
            Ccoeff = Tables.CcoeffTbl[bwIndex];
            short cosVal = Tables.CosTbl[pitch - 256];
            Bcoeff = (short)MMul2(Tables.BcoeffTbl[bwIndex], cosVal, KPrecision - 1);
            Acoeff = (short)(KOnePtOh - Bcoeff - Ccoeff);
        }

        public void Calc_Zero_Coefficients(out short Acoeff, out short Bcoeff, out short Ccoeff, short pitch, short bandWidth)
        {
            if (bandWidth > KMaxBandWidth) bandWidth = (short)KMaxBandWidth;
            if (pitch < 256) pitch = 256;
            if (pitch >= 256 + Tables.CosTbl.Length) pitch = (short)(256 + Tables.CosTbl.Length - 1);

            int bwIndex = (bandWidth - 50) / 5;
            Ccoeff = Tables.CcoeffTbl[bwIndex];
            short cosVal = Tables.CosTbl[pitch - 256];
            Bcoeff = (short)MMul2(Tables.BcoeffTbl[bwIndex], cosVal, KPrecision - 1);
            Bcoeff = (short)(-Bcoeff);
            Ccoeff = (short)(-Ccoeff);
            Acoeff = (short)(KOnePtOh + Bcoeff + Ccoeff);
        }

        public void InvDFT(short[] vWave, short[] vWave1, short vGain)
        {
            if (vWave == null || vWave1 == null)
            {
                for (int j = 0; j < 256; j++)
                {
                    voiceWaveform[j] = 0;
                    voiceWaveform1[j] = 0;
                }
                return;
            }

            int voiceWaveGain = MRatio(vGain, 200, 16);

            for (int j = 0; j < 256; j++)
            {
                voiceWaveform[j] = 0;
                voiceWaveform1[j] = 0;
            }

            for (int i = 0; i < 48; i++)
            {
                short amp = (short)MMul2(vWave[i], voiceWaveGain, 16);
                short amp1 = (short)MMul2(vWave1[i], voiceWaveGain, 16);

                int sIndex = 0;
                for (int j = 0; j < 256; j++)
                {
                    short sine = Tables.SineWave15[sIndex];
                    voiceWaveform[j] = (short)(voiceWaveform[j] + (short)MMul2(amp, sine, 16));
                    voiceWaveform1[j] = (short)(voiceWaveform1[j] + (short)MMul2(amp1, sine, 16));
                    sIndex = (sIndex + i) & 0xFF;
                }
            }

            int max = 0;
            int max1 = 0;
            for (int j = 0; j < 256; j++)
            {
                if (Math.Abs(voiceWaveform[j]) > max) max = Math.Abs(voiceWaveform[j]);
                if (Math.Abs(voiceWaveform1[j]) > max1) max1 = Math.Abs(voiceWaveform1[j]);
            }

            if (max1 > 0)
            {
                int max2 = MRatio(max, max1, 16);
                for (int j = 0; j < 256; j++)
                {
                    voiceWaveform1[j] = (short)MMul2(voiceWaveform1[j], max2, 16);
                }
            }
        }

        public void SynthesizeFrame(Frame frame, short[] outputBuffer, int offset)
        {
            if ((curAmp == 0) && (Af == 0))
            {
                glotIndex = 0;
                glotIndex1 = 0;
                Na1 = Nb1 = Na2 = Nb2 = Na3 = Nb3 = Na4 = Nb4 = 0;
                NaNP = NbNP = NaNZ = NbNZ = 0;
                lastAmp = 0;
            }

            Calc_Pole_Coefficients(out Acoeff1, out Bcoeff1, out Ccoeff1, (short)(frame.F1 + voiceF1Gain), frame.Bw1);
            Calc_Pole_Coefficients(out Acoeff2, out Bcoeff2, out Ccoeff2, (short)(frame.F2 + voiceF2Gain), frame.Bw2);
            Calc_Pole_Coefficients(out Acoeff3, out Bcoeff3, out Ccoeff3, (short)(frame.F3 + voiceF3Gain), frame.Bw3);

            bool noNasal;
            int nGain = 0;
            if (frame.FNZ != fNP)
            {
                noNasal = false;
                Calc_Zero_Coefficients(out AcoeffNZ, out BcoeffNZ, out CcoeffNZ, (short)(frame.FNZ + nasalAmt), bNP);
                nGain = MRatio(AcoeffNP, AcoeffNZ, 16);
            }
            else
            {
                noNasal = true;
            }

            bool ampBank = false;
            short rawAv = frame.Av;
            Av = (short)(rawAv * speechVolume);
            Af = (short)((frame.Af * speechVolume) << 2);
            ab = (short)(frame.AB * speechVolume);
            if (Af > 0 || ab > 0) ampBank = true;
            short totalBreathGain = (short)MMul2(breathGain, Av, KPrecision);

            short Acoeff2q = 0, Acoeff3q = 0, Acoeff4q = 0, Acoeff5q = 0, Acoeff6q = 0;

            if (frame.A2 > 0) { amp2 = (short)(frame.A2 << (KPrecision - 5)); Acoeff2q = (short)MMul2(Acoeff2, amp2, KPrecision); ampBank = true; }
            else { amp2 = 0; Nb2a = 0; Na2a = 0; }
            if (frame.A3 > 0) { amp3 = (short)(frame.A3 << (KPrecision - 5)); Acoeff3q = (short)MMul2(Acoeff3, amp3, KPrecision); ampBank = true; }
            else { amp3 = 0; Nb3a = 0; Na3a = 0; }
            if (frame.A4 > 0) { amp4 = (short)(frame.A4 << (KPrecision - 5)); Acoeff4q = (short)MMul2(Acoeff4p, amp4, KPrecision); ampBank = true; }
            else { amp4 = 0; Nb4a = 0; Na4a = 0; }
            if (frame.A5 > 0) { amp5 = (short)(frame.A5 << (KPrecision - 5)); Acoeff5q = (short)MMul2(Acoeff5, amp5, KPrecision); ampBank = true; }
            else { amp5 = 0; Nb5 = 0; Na5 = 0; }
            if (frame.A6 > 0) { amp6 = (short)(frame.A6 << (KPrecision - 5)); Acoeff6q = (short)MMul2(Acoeff6, amp6, KPrecision); ampBank = true; }
            else { amp6 = 0; Nb6 = 0; Na6 = 0; }

            glotInc = Tables.TopOctave[frame.F0 & 0xFF] >> (3 - (frame.F0 >> 8));
            if (VoiceChorus != 0)
            {
                int curF0Pitch = frame.F0 + VoiceChorus;
                if (curF0Pitch < 0) curF0Pitch = 0;
                glotInc1 = Tables.TopOctave[curF0Pitch & 0xFF] >> (3 - (curF0Pitch >> 8));
            }

            const int kAmpStepRes = 16;
            ampStep = (((long)Av << kAmpStepRes) - lastAmp) >> 3;
            curAmp_Full = lastAmp;
            lastAmp = ((long)Av << kAmpStepRes);
            int local_ampCtr = 0;

            for (int sampCtr = (KSampFrameLen / 2) - 1; sampCtr >= 0; --sampCtr)
            {
                if (local_ampCtr < 8) { curAmp_Full += ampStep; curAmp = (short)(curAmp_Full >> kAmpStepRes); local_ampCtr++; }
                else { curAmp = Av; }

                int sourceC = 0, SampV = 0, sourceP = 0, SampAB = 0, Samp2 = 0, Samp3 = 0, Samp4 = 0, Samp5 = 0, Samp6 = 0;

                if (curAmp > 0 || ampBank || totalBreathGain > 0)
                {
                    noiseIndex = (noiseIndex + 1) & (KNoiseLen - 1);
                    if (curAmp > 0)
                    {
                        short vPulse;
                        if (GlotType == KUseHarm)
                        {
                            glotIndex = (glotInc + glotIndex) & 0xFFFFFF;
                            vPulse = voiceWaveform[glotIndex >> 16];
                            if (VoiceChorus != 0)
                            {
                                glotIndex1 = (glotInc1 + glotIndex1) & 0xFFFFFF;
                                vPulse = MDiv(vPulse + voiceWaveform1[glotIndex1 >> 16], 2, 1);
                            }
                        }
                        else
                        {
                            glotIndex = (glotInc + glotIndex) & 0xFFFFFF;
                            if (SampleWave != null)
                            {
                                SampleIndex = (SampleInc + SampleIndex) & 0xFFFFFF;
                                vPulse = (short)(SampleWave[SampleIndex >> 16] - 128);
                                vPulse = (short)MMul2(vPulse, wavesampleGain, KPrecision);
                            }
                            else vPulse = 0;
                        }
                        sourceC = MMul2(vPulse, curAmp, KPrecision);
                    }
                    else
                    {
                        // No voicing, but still advance glotIndex for breathCycle gating
                        if (totalBreathGain > 0) glotIndex = (glotInc + glotIndex) & 0xFFFFFF;
                        else { lastnSamp = 0; glotIndex = 0; glotIndex1 = 0; }
                        sourceC = 0;
                    }

                    // Breath (aspiration) source — injected when cycle position exceeds breathCycle
                    if (totalBreathGain > 0 && (glotIndex >> 16) > breathCycle)
                        sourceC += MMul2((short)(breathWave[noiseIndex] - 128), totalBreathGain, KPrecision - 2);

                    if (curAmp > 0 || Af > 0 || totalBreathGain > 0)
                    {
                        sourceC += MMul2((short)(bandNoise[noiseIndex] - 128), Af, KPrecision);
                        if (noNasal) SampV = sourceC;
                        else
                        {
                            SampV = sourceC + MUnScale(((long)BcoeffNZ * NaNZ) + ((long)CcoeffNZ * NbNZ), KPrecision);
                            NbNZ = NaNZ; NaNZ = (short)sourceC;
                            SampV = MMul2(SampV, nGain, 16);
                            SampV = SampV + MUnScale(((long)BcoeffNP * NaNP) + ((long)CcoeffNP * NbNP), KPrecision);
                            NbNP = NaNP; NaNP = (short)SampV;
                        }

                        SampV = MUnScale(((long)Acoeff1 * SampV) + ((long)Bcoeff1 * Na1) + ((long)Ccoeff1 * Nb1), KPrecision);
                        Nb1 = Na1; Na1 = (short)SampV;
                        SampV = MUnScale(((long)Acoeff2 * SampV) + ((long)Bcoeff2 * Na2) + ((long)Ccoeff2 * Nb2), KPrecision);
                        Nb2 = Na2; Na2 = (short)SampV;
                        SampV = MUnScale(((long)Acoeff3 * SampV) + ((long)Bcoeff3 * Na3) + ((long)Ccoeff3 * Nb3), KPrecision);
                        Nb3 = Na3; Na3 = (short)SampV;
                        SampV = MUnScale(((long)Acoeff4 * SampV) + ((long)Bcoeff4 * Na4) + ((long)Ccoeff4 * Nb4), KPrecision);
                        Nb4 = Na4; Na4 = (short)SampV;
                    }

                    sourceP = MMul2((short)(noiseWave[noiseIndex] - 128), voiceNoiseGain, KPrecision);
                    if (ab > 0) SampAB = MMul2(sourceP, ab, KPrecision - 1);
                    if (amp2 > 0) { Samp2 = MUnScale(((long)Acoeff2q * sourceP) + ((long)Bcoeff2 * Na2a) + ((long)Ccoeff2 * Nb2a), KPrecision); Nb2a = Na2a; Na2a = (short)Samp2; }
                    if (amp3 > 0) { Samp3 = MUnScale(((long)Acoeff3q * sourceP) + ((long)Bcoeff3 * Na3a) + ((long)Ccoeff3 * Nb3a), KPrecision); Nb3a = Na3a; Na3a = (short)Samp3; }
                    if (amp4 > 0) { Samp4 = MUnScale(((long)Acoeff4q * sourceP) + ((long)Bcoeff4p * Na4a) + ((long)Ccoeff4p * Nb4a), KPrecision); Nb4a = Na4a; Na4a = (short)Samp4; }
                    if (amp5 > 0) { Samp5 = MUnScale(((long)Acoeff5q * sourceP) + ((long)Bcoeff5 * Na5) + ((long)Ccoeff5 * Nb5), KPrecision); Nb5 = Na5; Na5 = (short)Samp5; }
                    if (amp6 > 0) { Samp6 = MUnScale(((long)Acoeff6q * sourceP) + ((long)Bcoeff6 * Na6) + ((long)Ccoeff6 * Nb6), KPrecision); Nb6 = Na6; Na6 = (short)Samp6; }

                    int nSamp = SampV + (SampAB - Samp3 + Samp4 - Samp5 + Samp6 - Samp2);
                    if (hfEmph)
                    {
                        nSamp += (nSamp >> 2);
                        int tSamp = nSamp - (lastSample - (lastSample >> 2));
                        lastSample = (short)nSamp;
                        nSamp = tSamp + (nSamp >> 1);
                    }

                    if (nSamp > 8191) nSamp = 8191; else if (nSamp < -8191) nSamp = -8191;
                    outputBuffer[offset++] = (short)((((nSamp - lastnSamp) >> 1) + lastnSamp) << 2);
                    outputBuffer[offset++] = (short)(nSamp << 2);
                    lastnSamp = (short)nSamp;
                }
                else
                {
                    lastnSamp = 0; glotIndex = 0; glotIndex1 = 0;
                    outputBuffer[offset++] = 0; outputBuffer[offset++] = 0;
                }
            }
        }

        public static short HzToPitch(short hz)
        {
            const int ratioK = 2621;
            int fk, freq;
            if (hz <= 0) return 0;
            if (hz < 100) { freq = hz << 3; fk = 0x0; }
            else if (hz < 200) { freq = hz << 2; fk = 0x100; }
            else if (hz < 400) { freq = hz << 1; fk = 0x200; }
            else if (hz < 800) { freq = hz; fk = 0x300; }
            else if (hz < 1600) { freq = hz >> 1; fk = 0x400; }
            else if (hz < 3200) { freq = hz >> 2; fk = 0x500; }
            else { freq = hz >> 3; fk = 0x600; }

            int ratio = ((freq - 400) * ratioK) >> 11;
            if (ratio < 0) ratio = 0;
            if (ratio >= Tables.logOf2Tbl.Length) ratio = Tables.logOf2Tbl.Length - 1;
            return (short)(Tables.logOf2Tbl[ratio] + fk);
        }

        public static short PitchToHz(short pitch)
        {
            int freq = (Tables.OctFreqTbl[(pitch & 0xF00) >> 8] * Tables.ExpOf2Tbl[pitch & 0xFF]) >> 15;
            return (short)freq;
        }
    }

    public struct Frame
    {
        public short Av;
        public short Af;
        public short F0;
        public short F1;
        public short F2;
        public short F3;
        public short A2;
        public short A3;
        public short A4;
        public short A5;
        public short A6;
        public short FNZ;
        public short AB;
        public short Bw1;
        public short Bw2;
        public short Bw3;
        public short PhonEdge;
        public long Marker;
    }
}  // namespace