AudioProcessor.Phonemes.cs
#nullable enable
using System;

namespace SharpTalk
{

    public sealed partial class AudioProcessor
    {
        // LoadPhonemes

        private void LoadPhonemes(PhonemeToken[] tokens)
        {
            // Slot 0 is the initial SIL (already filled by ClearBuffers)
            _phonBuf1InIndex = 1;

            foreach (var tok in tokens)
            {
                if (_phonBuf1InIndex >= kPhonBuf_Red_Zone) break;
                _phonBuf1[_phonBuf1InIndex] = tok.Phon;
                _phonCtrlBuf1[_phonBuf1InIndex] = tok.Ctrl;
                _userPitchBuf1[_phonBuf1InIndex] = tok.UserPitch;
                _userDurBuf1[_phonBuf1InIndex] = tok.UserDur == 0 ? (short)kDur_One : tok.UserDur;
                _userNoteBuf1[_phonBuf1InIndex] = tok.UserNote;
                _userRateBuf1[_phonBuf1InIndex] = tok.UserRate;
                if ((tok.Ctrl & kSingingDuration) != 0) _singing = true;
                _phonBuf1InIndex++;
            }

            // Add trailing boundary SIL, but only if the last phoneme isn't already a
            // terminal SIL (e.g. sentence-final comma from FrontEnd). If one is already
            // there, upgrade its boundary type to match the sentence-ending punctuation.
            if (_phonBuf1InIndex < kPhonBuf_Red_Zone)
            {
                int bndType = _endPunctuation switch
                {
                    _Period_ => kBND_Decl,
                    _Quest_ => kBND_Quest,
                    _Exclam_ => kBND_Emph,
                    _ => kBND_Pause,
                };
                int lastIdx = _phonBuf1InIndex - 1;
                bool lastIsSilBoundary = lastIdx >= 1 &&
                    _phonBuf1[lastIdx] == _SIL_ &&
                    (_phonCtrlBuf1[lastIdx] & kTerm_Bound) != 0;

                if (lastIsSilBoundary)
                {
                    // Replace the existing boundary type with the sentence-final type
                    _phonCtrlBuf1[lastIdx] = (_phonCtrlBuf1[lastIdx] & ~kSilenceTypeField)
                        | ((long)bndType << kSilenceTypeShift);
                }
                else
                {
                    _phonCtrlBuf1[_phonBuf1InIndex] |= kTerm_Bound;
                    _phonCtrlBuf1[_phonBuf1InIndex] |= ((long)bndType << kSilenceTypeShift);
                    // _phonBuf1[index] is already _SIL_ from ClearBuffers
                    _phonBuf1InIndex++;
                }
            }

            // Auto-compute kWord_Initial_Consonant for each word.
            // Any boundary bit (kWord_Start, kTerm_Bound, kVerb_Start, kPrep_Start) resets
            // wordInitial=true. kWord_Start is on the first real phoneme of a word, so we
            // must NOT skip it — it may itself be a word-initial consonant. SIL phonemes
            // (sentence boundaries with kTerm_Bound) are skipped since they're not consonants.
            bool wordInitial = true;
            for (int i = 1; i < _phonBuf1InIndex; i++)
            {
                long ctrl = _phonCtrlBuf1[i];
                if ((ctrl & kBoundryTypeField) != 0) wordInitial = true;
                if (_phonBuf1[i] == _SIL_) continue;
                uint flags = GetPhonFlags1(i);
                if (wordInitial)
                {
                    if ((flags & kVowelF) != 0)
                        wordInitial = false;
                    else
                        _phonCtrlBuf1[i] |= kWord_Initial_Consonant;
                }
            }
        }

        // Flag_PhonBuf_1

        private void FlagPhonBuf1()
        {
            _isCompoundNoun = false;
            for (_scanIndex = 0; _scanIndex < _phonBuf1InIndex; _scanIndex++)
            {
                long ctrl = _phonCtrlBuf1[_scanIndex];
                if ((ctrl & kCompoundNoun) != 0) _isCompoundNoun = true;
                else if ((ctrl & kBoundryTypeField) != 0) _isCompoundNoun = false;

                uint phonFlags = GetPhonFlags1(_scanIndex);
                if ((phonFlags & kVowelF) != 0)
                    MarkSyllable();

                MarkBoundry();
            }
            MarkSyllableStart();
        }

        private void MarkSyllable()
        {
            long order = 0;

            // scan backward for another vowel in same word
            for (int idx = _scanIndex - 1; idx > 0; idx--)
            {
                long syl = _phonCtrlBuf1[idx] & kSyllableTypeField;
                if (syl >= kWord_End) break;
                uint flags = GetPhonFlags1(idx);
                if ((flags & kVowelF) != 0) { order = kLast_Syllable_In_Word; break; }
            }

            // scan forward for another vowel in same word
            for (int idx = _scanIndex + 1; idx < _phonBuf1InIndex; idx++)
            {
                long bnd = _phonCtrlBuf1[idx] & kBoundryTypeField;
                uint flags = GetPhonFlags1(idx);
                if (bnd != 0)
                {
                    _phonCtrlBuf1[_scanIndex] |= order;
                    break;
                }
                if ((flags & kVowelF) != 0)
                {
                    if (order == kLast_Syllable_In_Word)
                        order = kMid_Syllable_In_Word;
                    else if (order == 0)
                        order = kFirst_Syllable_In_Word;
                }
            }
        }

        private void MarkBoundry()
        {
            for (int idx = _scanIndex + 1; idx < _phonBuf1InIndex; idx++)
            {
                long bnd = _phonCtrlBuf1[idx] & kBoundryTypeField;
                uint flags = GetPhonFlags1(idx);
                if (bnd != 0)
                {
                    long boundType = 0;
                    if ((bnd & kTerm_Bound) != 0) boundType |= kTerm_End | kWord_End;
                    if ((bnd & kPrep_Start) != 0) boundType |= kPrep_End | kWord_End;
                    if ((bnd & kVerb_Start) != 0) boundType |= kVerb_End | kWord_End;
                    if ((bnd & kWord_Start) != 0) boundType |= kWord_End;
                    _phonCtrlBuf1[_scanIndex] |= boundType;
                    break;
                }
                if ((flags & kVowelF) != 0) break;
            }
        }

        private void MarkSyllableStart()
        {
            int syllIdx = 0;
            int idx = 0;
            while (idx < _phonBuf1InIndex)
            {
                while (idx < _phonBuf1InIndex && _phonBuf1[idx] == _SIL_)
                { syllIdx++; idx++; }
                if (idx >= _phonBuf1InIndex) break;

                uint flags = GetPhonFlags1(idx);
                if ((flags & kVowelF) != 0)
                {
                    _phonCtrlBuf1[syllIdx] |= kSyllable_Start;
                    long syllOrder = _phonCtrlBuf1[idx] & kSyllableOrderField;
                    if (syllOrder == 0 || syllOrder == kLast_Syllable_In_Word)
                    {
                        idx = FindNextWordBound(idx);
                        syllIdx = idx;
                    }
                    else
                    {
                        // scan forward to next vowel counting consonants
                        int dist = -1;
                        int startIdx = idx;
                        do
                        {
                            idx++;
                            dist++;
                            if (idx >= _phonBuf1InIndex) goto SYLL_DONE;
                        } while ((GetPhonFlags1(idx) & kVowelF) == 0);

                        if (dist == 0)
                        {
                            syllIdx = idx;
                        }
                        else if (dist == 1)
                        {
                            idx--; syllIdx = idx;
                        }
                        else if (dist == 2)
                        {
                            short p2 = _phonBuf1[idx - 1];
                            short p1 = _phonBuf1[idx - 2];
                            if (IfConsonantCluster(p1, p2)) idx -= 2;
                            else idx--;
                            syllIdx = idx;
                        }
                        else if (dist == 3)
                        {
                            short p2 = _phonBuf1[idx - 1];
                            short p1 = _phonBuf1[idx - 2];
                            if (IfConsonantCluster(p1, p2))
                            {
                                if (_phonBuf1[idx - 3] == _s_) idx -= 3;
                                else idx -= 2;
                            }
                            else idx--;
                            syllIdx = idx;
                        }
                        else
                        {
                            short p1 = _phonBuf1[idx - dist];
                            short p2 = _phonBuf1[idx - dist + 1];
                            if (IfConsonantCluster(p1, p2)) idx -= dist - 2;
                            else idx -= dist >> 1;
                            syllIdx = idx;
                        }
                    }
                }
                else
                    idx++;
            }
        SYLL_DONE:;
        }

        private int FindNextWordBound(int index)
        {
            for (int i = index + 1; i < _phonBuf1InIndex; i++)
            {
                if ((_phonCtrlBuf1[i] & (kBoundryTypeField | kWord_Start)) != 0)
                    return i;
            }
            return _phonBuf1InIndex;
        }

        private static bool IfConsonantCluster(short c1, short c2) => (c1, c2) switch
        {
            (_f_, _r_) or (_f_, _l_) => true,
            (_v_, _r_) or (_v_, _l_) => true,
            (_TH_, _r_) or (_TH_, _w_) => true,
            (_s_, _w_) or (_s_, _l_) or (_s_, _p_) or (_s_, _t_) or (_s_, _k_)
                or (_s_, _m_) or (_s_, _n_) or (_s_, _f_) => true,
            (_SH_, _w_) or (_SH_, _l_) or (_SH_, _p_) or (_SH_, _t_)
                or (_SH_, _r_) or (_SH_, _m_) or (_SH_, _n_) => true,
            (_p_, _r_) or (_p_, _l_) => true,
            (_b_, _r_) or (_b_, _l_) => true,
            (_t_, _r_) or (_t_, _w_) => true,
            (_d_, _r_) or (_d_, _w_) => true,
            (_k_, _r_) or (_k_, _l_) or (_k_, _w_) => true,
            (_g_, _r_) or (_g_, _l_) or (_g_, _w_) => true,
            _ => false,
        };

        // Fill_Phon_Buf_2

        private void FillPhonBuf2()
        {
            _phonBuf2InIndex = 0;
            short lastStoredPhon = _SIL_;
            short lastUserPitch = 0;

            for (int outIdx = 0; outIdx < _phonBuf1InIndex; outIdx++)
            {
                short curPhon = _phonBuf1[outIdx];
                long curCtrl = _phonCtrlBuf1[outIdx];
                uint curFlags = Tables.PhonFlags2[curPhon];

                // next
                short nextPhon, next2Phon, next3Phon;
                long nextCtrl, next2Ctrl;
                if (outIdx < _phonBuf1InIndex - 1)
                { nextPhon = _phonBuf1[outIdx + 1]; nextCtrl = _phonCtrlBuf1[outIdx + 1]; }
                else { nextPhon = _SIL_; nextCtrl = 0; }
                if (outIdx < _phonBuf1InIndex - 2)
                { next2Phon = _phonBuf1[outIdx + 2]; next2Ctrl = _phonCtrlBuf1[outIdx + 2]; }
                else { next2Phon = _SIL_; next2Ctrl = 0; }
                next3Phon = outIdx < _phonBuf1InIndex - 3 ? _phonBuf1[outIdx + 3] : _SIL_;

                uint nextFlags = Tables.PhonFlags2[nextPhon];
                uint next2Flags = Tables.PhonFlags2[next2Phon];

                // prev
                short prevPhon, prev2Phon, prev3Phon;
                long prevCtrl;
                if (outIdx > 0) { prevPhon = _phonBuf1[outIdx - 1]; prevCtrl = _phonCtrlBuf1[outIdx - 1]; }
                else { prevPhon = _SIL_; prevCtrl = 0; }
                prev2Phon = outIdx > 1 ? _phonBuf1[outIdx - 2] : _SIL_;
                prev3Phon = outIdx > 2 ? _phonBuf1[outIdx - 3] : _SIL_;

                uint prevFlags = Tables.PhonFlags2[prevPhon];
                uint prev2Flags = Tables.PhonFlags2[prev2Phon];
                uint prev3Flags = Tables.PhonFlags2[prev3Phon];

                if (_phonBuf2InIndex == 0) lastStoredPhon = _SIL_;
                else lastStoredPhon = _phonBuf2[_phonBuf2InIndex - 1];
                uint lastPhonFlags = Tables.PhonFlags2[lastStoredPhon];

                short userPitch = _userPitchBuf1[outIdx];
                short userDur = _userDurBuf1[outIdx];
                short userNote = _userNoteBuf1[outIdx];
                short userRate = _userRateBuf1[outIdx];

                short targetPhon = curPhon;
                bool delFwd = false;
                bool insertGlot = false;

                // EN rule
                if (curPhon == _n_ && prevPhon == _IX_)
                {
                    if ((prev2Flags & kPlosFricF) != 0 && prev2Phon != _b_ && prev2Phon != _g_)
                    {
                        if (!(prev2Phon == _d_ && (prev3Flags & kVowelF) != 0))
                        {
                            _phonBuf2[_phonBuf2InIndex - 1] = _EN_;
                            delFwd = true;
                        }
                    }
                }

                // EL rule
                if (curPhon == _l_ && (curCtrl & (kPrimOrEmphStress | kWord_Initial_Consonant)) == 0)
                {
                    if (prevPhon == _AX_ || prevPhon == _UH_)
                    {
                        _phonBuf2[_phonBuf2InIndex - 1] = _EL_;
                        delFwd = true;
                        goto STUFF_BUFF;
                    }
                }

                // LX / RX rules
                if ((curCtrl & (kPrimOrEmphStress | kWord_Initial_Consonant)) == 0 &&
                    (prevFlags & kVowel1F) != 0)
                {
                    if (curPhon == _l_)
                    {
                        targetPhon = _LX_;
                    }
                    else if (curPhon == _r_)
                    {
                        targetPhon = _RX_;
                        switch (prevPhon)
                        {
                            case _UW_:
                            case _UH_:
                                _phonBuf2[_phonBuf2InIndex - 1] = _UR_; delFwd = true; break;
                            case _AO_:
                            case _OW_:
                                _phonBuf2[_phonBuf2InIndex - 1] = _OR_; delFwd = true; break;
                            case _AA_:
                                _phonBuf2[_phonBuf2InIndex - 1] = _AR_; delFwd = true; break;
                            case _AH_:
                            case _AX_:
                                _phonBuf2[_phonBuf2InIndex - 1] = _ER_; delFwd = true; break;
                            case _IH_:
                            case _IY_:
                                _phonBuf2[_phonBuf2InIndex - 1] = _IR_; delFwd = true; break;
                            case _AE_:
                            case _EH_:
                            case _EY_:
                                _phonBuf2[_phonBuf2InIndex - 1] = _XR_; delFwd = true; break;
                        }
                    }
                }

                // yUW -> YU rule
                if ((prevCtrl & kWord_Initial_Consonant) != 0 && prevPhon == _y_ &&
                    curPhon == _UW_ && nextPhon != _r_ &&
                    (curCtrl & kSyllableTypeField) >= kWord_End)
                {
                    _phonBuf2[_phonBuf2InIndex - 1] = _YU_;
                    _phonCtrlBuf2[_phonBuf2InIndex - 1] = curCtrl;
                    delFwd = true;
                }

                // DHAH -> DHIY rule
                if ((nextFlags & kVowelF) != 0 && curPhon == _AH_ &&
                    (curCtrl & kSyllableTypeField) != 0 && prevPhon == _DH_ &&
                    (prevCtrl & kWord_Initial_Consonant) != 0 &&
                    (nextCtrl & kPrimOrEmphStress) != 0)
                {
                    targetPhon = _IY_;
                }

                // EHnd -> AEnd rule
                if (curPhon == _SIL_ && nextPhon == _EH_ && next2Phon == _n_ &&
                    next3Phon == _d_ && (nextCtrl & kPrimOrEmphStress) != 0)
                {
                    _phonBuf1[outIdx + 1] = _AE_;
                    nextPhon = _AE_;
                    nextFlags = Tables.PhonFlags2[_AE_];
                }

                // Glottal insertion
                if ((curFlags & kVowelF) != 0 && (nextFlags & kVowelF) != 0 &&
                    (nextCtrl & kPrimOrEmphStress) != 0 && (curCtrl & kWord_End) != 0)
                {
                    insertGlot = true;
                }

                // Dental/affricate y-slur: d -> JH before YU/y (unstressed)
                if ((nextPhon == _YU_ || nextPhon == _y_) && (nextCtrl & kPrimOrEmphStress) == 0)
                {
                    if (curPhon == _d_) { targetPhon = _JH_; goto STUFF_BUFF; }
                }

                // t rules
                if (curPhon == _t_)
                {
                    // tUH -> tUW
                    if (nextPhon == _UW_ && (nextCtrl & kSyllableTypeField) >= kWord_End &&
                        (curCtrl & kPrimOrEmphStress) == 0 &&
                        (next2Phon == _SIL_ || (Tables.PhonFlags2[next2Phon] & kVowelF) != 0))
                    {
                        _phonBuf1[outIdx + 1] = _UW_;
                    }
                    else
                    {
                        // Glottalize t before l or DH
                        if (nextPhon == _l_ || nextPhon == _DH_)
                        {
                            goto SUB_T_GLOT;
                        }
                        // At word end before sonorant/h
                        else if ((curCtrl & kSyllableTypeField) >= kWord_End)
                        {
                            if (((nextFlags & kSonorConsonF) != 0 && nextPhon != _EN_) || nextPhon == _h_)
                                goto SUB_T_GLOT;
                        }
                        else if (nextPhon == _EN_ || (nextPhon == _IX_ && next2Phon == _n_))
                        {
                            goto SUB_T_GLOT;
                        }
                        goto SKIP_T_GLOT;
                    SUB_T_GLOT:
                        targetPhon = (lastPhonFlags & kSonorantF) != 0 ? _TX_ : _d_;
                        goto STUFF_BUFF;
                    SKIP_T_GLOT:;
                    }
                }

                // Dental flap DX rules (d or t before vowel, after sonorant non-nasal)
                if (curPhon == _d_ || curPhon == _t_)
                {
                    // Don't flap before syllabic n
                    if (nextPhon == _IX_ && next2Phon == _n_)
                    {
                        if (curPhon == _t_) goto SKIP_FLAP;
                        if ((prevFlags & kVowelF) == 0) goto SKIP_FLAP;
                    }

                    if ((nextFlags & kVowelF) != 0 &&
                        (lastPhonFlags & kSonorantF) != 0 && (lastPhonFlags & kNasalF) == 0)
                    {
                        if ((nextCtrl & kWord_Start) != 0)
                        {
                            targetPhon = _DX_;
                        }
                        else if ((curCtrl & kPrimOrEmphStress) == 0)
                        {
                            if ((curCtrl & kWord_Initial_Consonant) != 0)
                            {
                                if (nextPhon == _AX_ || nextPhon == _IX_ || nextPhon == _UH_)
                                    targetPhon = _DX_;
                            }
                            else if (curPhon == _t_)
                            {
                                // T flap rules
                                if (nextPhon == _OW_)
                                {
                                    if ((_phonCtrlBuf2[_phonBuf2InIndex - 1] & kStressField) != 0 &&
                                        (next2Phon != _r_ || (next2Ctrl & kWord_Initial_Consonant) != 0))
                                        targetPhon = _DX_;
                                }
                                else if ((nextPhon == _AH_ || nextPhon == _AX_) &&
                                         next2Phon == _r_ && (nextCtrl & kPrimaryStress) == 0)
                                {
                                    if ((curCtrl & kWord_Initial_Consonant) == 0 && (nextCtrl & kPrimaryStress) == 0)
                                        targetPhon = _DX_;
                                }
                                else if (nextPhon == _ER_)
                                {
                                    if ((curCtrl & kWord_Initial_Consonant) == 0) targetPhon = _DX_;
                                }
                                else if ((nextPhon == _AX_ || nextPhon == _IY_ || nextPhon == _IX_ || nextPhon == _EL_) &&
                                         (next2Phon != _r_ || (next2Ctrl & kWord_Initial_Consonant) != 0) &&
                                         (nextCtrl & kPrimaryStress) == 0)
                                {
                                    targetPhon = _DX_;
                                }
                            }
                            else // curPhon == _d_
                            {
                                if (nextPhon == _OW_)
                                {
                                    if ((_phonCtrlBuf2[_phonBuf2InIndex - 1] & kStressField) != 0)
                                        targetPhon = _DX_;
                                }
                                else if (nextPhon == _AX_ || nextPhon == _IY_ || nextPhon == _IX_ ||
                                         nextPhon == _EL_ || nextPhon == _ER_ || nextPhon == _IH_ ||
                                         nextPhon == _AH_ || nextPhon == _AA_)
                                {
                                    targetPhon = _DX_;
                                }
                            }
                        }
                    }
                }
            SKIP_FLAP:

                // DH rules
                if (curPhon == _DH_ && (curCtrl & kPrimaryStress) == 0)
                {
                    switch (lastStoredPhon)
                    {
                        case _t_:
                        case _TX_:
                        case _d_:
                            targetPhon = _DD_; break;
                        case _n_:
                            targetPhon = _n_; break;
                    }
                }

            STUFF_BUFF:
                if (!delFwd)
                {
                    _phonBuf2[_phonBuf2InIndex] = targetPhon;
                    _phonCtrlBuf2[_phonBuf2InIndex] = curCtrl;
                    _userPitchBuf2[_phonBuf2InIndex] = (short)(userPitch + lastUserPitch);
                    _userDurBuf2[_phonBuf2InIndex] = userDur;
                    _userNoteBuf2[_phonBuf2InIndex] = userNote;
                    _userRateBuf2[_phonBuf2InIndex] = userRate;

                    if (_phonBuf2InIndex < kPhonBuf_Red_Zone)
                        _phonBuf2InIndex++;

                    if (insertGlot)
                    {
                        _phonBuf2[_phonBuf2InIndex] = _QX_;
                        _phonCtrlBuf2[_phonBuf2InIndex] = 0;
                        _userPitchBuf2[_phonBuf2InIndex] = _userPitchBuf2[_phonBuf2InIndex - 1];
                        _userDurBuf2[_phonBuf2InIndex] = kDur_One;
                        _userNoteBuf2[_phonBuf2InIndex] = 0;
                        _userRateBuf2[_phonBuf2InIndex] = 0;
                        if (_phonBuf2InIndex < kPhonBuf_Red_Zone)
                            _phonBuf2InIndex++;
                    }
                }
                else
                {
                    _userPitchBuf2[_phonBuf2InIndex - 1] += userPitch;
                    if (userDur != kDur_One)
                        _userDurBuf2[_phonBuf2InIndex - 1] = userDur;
                    if (userRate != 0)
                        _userRateBuf2[_phonBuf2InIndex - 1] = userRate;
                    if ((curCtrl & kSyllable_Start) != 0)
                        _phonCtrlBuf1[outIdx + 1] |= kSyllable_Start;
                }

                lastUserPitch += userPitch;
            }
        }
    }
}  // namespace