TextCommands.cs
#nullable enable
using System;
using System.Collections.Generic;
using static SharpTalk.AudioProcessor;

namespace SharpTalk
{

    public static class EmbeddedCmd
    {
        // note N → internal pitch value
        static short NoteToInternalPitch(int dtNote)
        {
            if (dtNote <= 0) return 0;
            short midiQ8 = (short)((dtNote + 42) << 8);
            if (midiQ8 < 0x1F59) return 0;
            midiQ8 -= 0x1F59;
            return (short)((((long)midiQ8 * 0x1555L) + 0x8000L) >> 16);
        }

        static short MapPhoneme(string p) => p switch
        {
            "iy" => _IY_,
            "ih" => _IH_,
            "eh" => _EH_,
            "ae" => _AE_,
            "aa" => _AA_,
            "ah" => _AH_,
            "ao" => _AO_,
            "uh" => _UH_,
            "ax" => _AX_,
            "er" => _ER_,
            "ey" => _EY_,
            "ay" => _AY_,
            "oy" => _OY_,
            "aw" => _AW_,
            "ow" => _OW_,
            "uw" => _UW_,
            "yu" => _YU_,
            "ix" => _IX_,
            "ir" => _IR_,
            "xr" => _XR_,
            "ar" => _AR_,
            "or" => _OR_,
            "ur" => _UR_,
            "el" => _EL_,
            "en" => _EN_,
            "rr" => _RX_,
            "hx" => _h_,
            "nx" => _n_,
            "dx" => _DX_,
            "zh" => _ZH_,
            "sh" => _SH_,
            "th" => _TH_,
            "dh" => _DH_,
            "ch" => _CH_,
            "jh" => _JH_,
            "ng" => _NG_,
            "wh" => _w_,
            "b" => _b_,
            "d" => _d_,
            "f" => _f_,
            "g" => _g_,
            "h" => _h_,
            "k" => _k_,
            "l" => _l_,
            "m" => _m_,
            "n" => _n_,
            "p" => _p_,
            "r" => _r_,
            "s" => _s_,
            "t" => _t_,
            "v" => _v_,
            "w" => _w_,
            "y" => _y_,
            "z" => _z_,
            "q" => _QX_,
            "_" => _SIL_,
            _ => -1,
        };

        public readonly struct VoiceCommand
        {
            public enum Kind { Rate, Pitch, Volume }
            public readonly Kind Type;
            public readonly int Value;
            public VoiceCommand(Kind type, int value) { Type = type; Value = value; }
        }

        public readonly struct Segment
        {
            public readonly string? PlainText;
            public readonly List<PhonemeToken>? Singing;
            public readonly VoiceCommand? Cmd;
            public bool IsSinging => Singing != null;
            public bool IsCommand => Cmd != null;
            public Segment(string text) { PlainText = text; Singing = null; Cmd = null; }
            public Segment(List<PhonemeToken> s) { PlainText = null; Singing = s; Cmd = null; }
            public Segment(VoiceCommand cmd) { PlainText = null; Singing = null; Cmd = cmd; }
        }

        public static List<Segment> ParseSegments(string text)
        {
            var segments = new List<Segment>();
            if (!text.Contains('['))
            {
                if (text.Length > 0) segments.Add(new Segment(text));
                return segments;
            }

            var plain = new System.Text.StringBuilder();
            bool inSingMode = false;
            int i = 0;

            void FlushPlain()
            {
                if (plain.Length > 0) { segments.Add(new Segment(plain.ToString())); plain.Clear(); }
            }

            while (i < text.Length)
            {
                if (text[i] != '[') { plain.Append(text[i++]); continue; }

                i++; // consume '['
                if (i >= text.Length) break;

                if (text[i] == ':')
                {
                    // [:command arg] — parse mode-switch and voice-param commands
                    i++;
                    int cmdStart = i;
                    while (i < text.Length && text[i] != ' ' && text[i] != ']') i++;
                    string cmd = text[cmdStart..i].ToLowerInvariant();

                    // capture optional integer argument
                    while (i < text.Length && text[i] == ' ') i++;
                    int argStart = i;
                    while (i < text.Length && text[i] != ']') i++;
                    string argStr = text[argStart..i].Trim();
                    if (i < text.Length) i++; // consume ']'

                    if (cmd == "sing") inSingMode = true;
                    else if (cmd == "talk" || cmd == "stop") inSingMode = false;
                    else if (int.TryParse(argStr, out int argVal))
                    {
                        VoiceCommand.Kind? kind = cmd switch
                        {
                            "rate" => VoiceCommand.Kind.Rate,
                            "pitch" => VoiceCommand.Kind.Pitch,
                            "volume" => VoiceCommand.Kind.Volume,
                            _ => null,
                        };
                        if (kind is { } k)
                        {
                            FlushPlain();
                            segments.Add(new Segment(new VoiceCommand(k, argVal)));
                        }
                    }
                    continue;
                }

                // Phoneme block [phoneme<dur,note> ...]
                var blockSing = new List<PhonemeToken>();
                bool firstPhon = true;
                short lastPitch = 0; // inherited by trailing consonants with no <note>

                while (i < text.Length && text[i] != ']')
                {
                    while (i < text.Length && text[i] == ' ') i++;
                    if (i >= text.Length || text[i] == ']') break;

                    if (text[i] == '_' || char.IsLetter(text[i]))
                    {
                        // Collect all phonemes up to '<', ']', or ' '
                        // e.g. "dey<600,24>" → [d, ey] with dur=600 note=24
                        var group = new List<short>();
                        while (i < text.Length && text[i] != '<' && text[i] != ']' && text[i] != ' ')
                        {
                            if (text[i] == '_') { group.Add(_SIL_); i++; continue; }
                            bool matched2 = false;
                            if (i + 1 < text.Length && char.IsLetter(text[i + 1]))
                            {
                                string two = string.Concat(text[i], text[i + 1]).ToLowerInvariant();
                                short op2 = MapPhoneme(two);
                                if (op2 >= 0) { group.Add(op2); i += 2; matched2 = true; }
                            }
                            if (!matched2)
                            {
                                string one = text[i].ToString().ToLowerInvariant();
                                short op1 = MapPhoneme(one);
                                group.Add(op1 >= 0 ? op1 : _SIL_);
                                i++;
                            }
                        }

                        int dur = 0, note = 0;
                        bool hasNote = false;
                        if (i < text.Length && text[i] == '<')
                        {
                            hasNote = true;
                            i++;
                            while (i < text.Length && char.IsDigit(text[i]))
                                dur = dur * 10 + (text[i++] - '0');
                            if (i < text.Length && text[i] == ',')
                            {
                                i++;
                                while (i < text.Length && char.IsDigit(text[i]))
                                    note = note * 10 + (text[i++] - '0');
                            }
                            while (i < text.Length && text[i] != '>' && text[i] != ']') i++;
                            if (i < text.Length && text[i] == '>') i++;
                        }

                        // In [:sing] text mode, skip bare phonemes with no note/dur.
                        // In an explicit block, always include them (trailing consonants, etc.)
                        if (!hasNote && !inSingMode && blockSing.Count == 0) continue;

                        short pitch = hasNote
                            ? (note > 37 ? (short)-note : NoteToInternalPitch(note))
                            : lastPitch;
                        if (hasNote) lastPitch = pitch;

                        // Leading consonants use natural timing (no kSingingDuration);
                        // only the last phoneme in a noted group gets the explicit duration.
                        for (int gi = 0; gi < group.Count; gi++)
                        {
                            bool isLast = gi == group.Count - 1;
                            long ctrl = kWord_Start | kContent_Word;
                            if (hasNote && isLast) ctrl |= kSingingDuration;
                            if (firstPhon) { firstPhon = false; }
                            else ctrl &= ~(kWord_Start | kContent_Word);
                            blockSing.Add(new PhonemeToken
                            {
                                Phon = group[gi],
                                Ctrl = ctrl,
                                UserDur = hasNote && isLast ? (short)dur : (short)0,
                                UserNote = pitch,
                            });
                        }
                    }
                    else { i++; }
                }
                if (i < text.Length && text[i] == ']') i++;

                if (blockSing.Count > 0)
                {
                    FlushPlain();
                    segments.Add(new Segment(blockSing));
                }
            }

            FlushPlain();
            return segments;
        }

        public static string Parse(string text, out List<PhonemeToken>? singingTokens)
        {
            singingTokens = null;
            var segments = ParseSegments(text);

            var plain = new System.Text.StringBuilder();
            List<PhonemeToken>? sing = null;

            foreach (var seg in segments)
            {
                if (seg.IsSinging)
                {
                    sing ??= new List<PhonemeToken>();
                    sing.AddRange(seg.Singing!);
                }
                else if (!seg.IsCommand)
                {
                    plain.Append(seg.PlainText);
                }
            }

            singingTokens = sing;
            return plain.ToString();
        }

        public static string StripCommands(string text)
        {
            var result = Parse(text, out _);
            return result;
        }
    }
}  // namespace