Code/UstConverter.cs
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;

namespace SharpTalk.WebUi
{
    public record UstConvertResult(string Klattsch, string Diagnostics);

    internal sealed class UstNote
    {
        public string Lyric { get; init; } = "";
        public int NoteNum { get; init; }
        public int Length { get; init; }
        public int Intensity { get; init; } = 100;
        public double PbsValue { get; init; }
        public List<double> Pby { get; init; } = new();
    }

    public static class UstConverter
    {
        private static readonly (string Kana, string Romaji)[] s_hiragana;
        private static readonly (string Kana, string Romaji)[] s_katakana;
        private static readonly (string Romaji, string[])[] s_romaji;
        private static readonly (string Ipa, string[] Codes)[] s_ipa;
        private static readonly (string Sym, string Ipa)[] s_xsampa;
        private static readonly Dictionary<string, string[]> s_arpa;
        private static readonly Dictionary<string, string[]> s_arpa_jp;
        private static readonly Dictionary<string, string> s_phonToKlattsch;

        private static readonly string[] s_noteNames = { "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B" };
        private static readonly HashSet<string> s_restLyrics = new(StringComparer.Ordinal) { "R", "r", "-", "_" };
        private static readonly HashSet<string> s_extLyrics  = new(StringComparer.Ordinal) { "+", "*", "↑", "↓" };
        private static readonly Regex s_noteSection    = new(@"^#[0-9A-Fa-f]+$", RegexOptions.Compiled);
        private static readonly Regex s_vcvPrefix      = new(@"^[aeiouAEIOU\-n] (.+)$", RegexOptions.Compiled);
        private static readonly Regex s_trimDecorators = new(@"^[-+*↑↓\[\]{}]+|[-+*↑↓\[\]{}]+$", RegexOptions.Compiled);
        private static readonly Regex s_stressMarkers  = new(@"[ˈˌ]", RegexOptions.Compiled);

        private static readonly string[] s_ipaIndicators = {
            "ə","ɛ","ɪ","ɔ","ʃ","ʒ","θ","ð","ŋ","ɑ","æ","ʌ","ɜ","ɯ","ɾ","ɸ","ç","ɕ","ʑ","ɹ"
        };

        private const double PitchCentsThreshold = 20.0;

        private static readonly (string Label, string[] Members)[] s_phonCategories = {
            ("JP vowels",  new[] { "jp_aa","jp_iy","jp_uw","jp_eh","jp_ow" }),
            ("EN vowels",  new[] { "iy","ih","eh","ae","aa","ao","ah","uh","uw","er","ey","ay","oy","aw","ow","ax","ix" }),
            ("Stops",      new[] { "p","b","t","d","k","g","dx" }),
            ("Fricatives", new[] { "f","v","th","dh","s","z","sh","zh","hh" }),
            ("Affricates", new[] { "ch","jh" }),
            ("Nasals",     new[] { "m","n","ng" }),
            ("Sonorants",  new[] { "w","y","r","l" }),
        };

        static UstConverter()
        {
            var hiraganaRaw = new (string, string)[] {
                ("いぇ","ye"),("うぃ","wi"),("うぇ","we"),("うぉ","wo"),
                ("きゃ","kya"),("きゅ","kyu"),("きょ","kyo"),("ぎゃ","gya"),("ぎゅ","gyu"),("ぎょ","gyo"),
                ("しぇ","she"),("しゃ","sha"),("しゅ","shu"),("しょ","sho"),
                ("じぇ","je"),("じゃ","ja"),("じゅ","ju"),("じょ","jo"),
                ("ちぇ","che"),("ちゃ","cha"),("ちゅ","chu"),("ちょ","cho"),
                ("つぁ","tsa"),("つぃ","tsi"),("つぇ","tse"),("つぉ","tso"),
                ("てぃ","thi"),("でぃ","dhi"),("とゅ","thu"),("どゅ","dhu"),
                ("にゃ","nya"),("にゅ","nyu"),("にょ","nyo"),
                ("ひゃ","hya"),("ひゅ","hyu"),("ひょ","hyo"),
                ("びゃ","bya"),("びゅ","byu"),("びょ","byo"),
                ("ぴゃ","pya"),("ぴゅ","pyu"),("ぴょ","pyo"),
                ("ふぁ","fa"),("ふぃ","fi"),("ふぇ","fe"),("ふぉ","fo"),
                ("みゃ","mya"),("みゅ","myu"),("みょ","myo"),
                ("りゃ","rya"),("りゅ","ryu"),("りょ","ryo"),
                ("ゔぁ","va"),("ゔぃ","vi"),("ゔぇ","ve"),("ゔぉ","vo"),
                ("ぁ","a"),("あ","a"),("ぃ","i"),("い","i"),("ぅ","u"),("う","u"),
                ("ぇ","e"),("え","e"),("ぉ","o"),("お","o"),
                ("か","ka"),("が","ga"),("き","ki"),("ぎ","gi"),
                ("く","ku"),("ぐ","gu"),("け","ke"),("げ","ge"),("こ","ko"),("ご","go"),
                ("さ","sa"),("ざ","za"),("し","shi"),("じ","ji"),
                ("す","su"),("ず","zu"),("せ","se"),("ぜ","ze"),("そ","so"),("ぞ","zo"),
                ("た","ta"),("だ","da"),("ち","chi"),("ぢ","di"),
                ("っ","q"),("つ","tsu"),("づ","du"),("て","te"),("で","de"),
                ("と","to"),("ど","do"),
                ("な","na"),("に","ni"),("ぬ","nu"),("ね","ne"),("の","no"),
                ("は","ha"),("ば","ba"),("ぱ","pa"),("ひ","hi"),("び","bi"),("ぴ","pi"),
                ("ふ","fu"),("ぶ","bu"),("ぷ","pu"),
                ("へ","he"),("べ","be"),("ぺ","pe"),("ほ","ho"),("ぼ","bo"),("ぽ","po"),
                ("ま","ma"),("み","mi"),("む","mu"),("め","me"),("も","mo"),
                ("ゃ","ya"),("や","ya"),("ゅ","yu"),("ゆ","yu"),("ょ","yo"),("よ","yo"),
                ("ら","ra"),("り","ri"),("る","ru"),("れ","re"),("ろ","ro"),
                ("わ","wa"),("ゐ","wi"),("ゑ","we"),("を","wo"),("ん","n"),("ゔ","vu"),
            };
            s_hiragana = hiraganaRaw.OrderByDescending(x => x.Item1.Length).ToArray();

            var katakanaRaw = new (string, string)[] {
                ("イェ","ye"),("ウィ","wi"),("ウェ","we"),("ウォ","wo"),
                ("キャ","kya"),("キュ","kyu"),("キョ","kyo"),("ギャ","gya"),("ギュ","gyu"),("ギョ","gyo"),
                ("シェ","she"),("シャ","sha"),("シュ","shu"),("ショ","sho"),
                ("ジェ","je"),("ジャ","ja"),("ジュ","ju"),("ジョ","jo"),
                ("チェ","che"),("チャ","cha"),("チュ","chu"),("チョ","cho"),
                ("ツぁ","tsa"),("ツィ","tsi"),("ツェ","tse"),("ツォ","tso"),
                ("ティ","thi"),("ディ","dhi"),("トゥ","thu"),("ドゥ","dhu"),
                ("ニャ","nya"),("ニュ","nyu"),("ニョ","nyo"),
                ("ヒャ","hya"),("ヒュ","hyu"),("ヒョ","hyo"),
                ("ビャ","bya"),("ビュ","byu"),("ビョ","byo"),
                ("ピャ","pya"),("ピュ","pyu"),("ピョ","pyo"),
                ("ファ","fa"),("フィ","fi"),("フェ","fe"),("フォ","fo"),
                ("ミャ","mya"),("ミュ","myu"),("ミョ","myo"),
                ("リャ","rya"),("リュ","ryu"),("リョ","ryo"),
                ("ヴァ","va"),("ヴィ","vi"),("ヴェ","ve"),("ヴォ","vo"),
                ("ァ","a"),("ア","a"),("ィ","i"),("イ","i"),("ゥ","u"),("ウ","u"),
                ("ェ","e"),("エ","e"),("ォ","o"),("オ","o"),
                ("カ","ka"),("ガ","ga"),("キ","ki"),("ギ","gi"),
                ("ク","ku"),("グ","gu"),("ケ","ke"),("ゲ","ge"),("コ","ko"),("ゴ","go"),
                ("サ","sa"),("ザ","za"),("シ","shi"),("ジ","ji"),
                ("ス","su"),("ズ","zu"),("セ","se"),("ゼ","ze"),("ソ","so"),("ゾ","zo"),
                ("タ","ta"),("ダ","da"),("チ","chi"),("ヂ","di"),
                ("ッ","q"),("ツ","tsu"),("ヅ","du"),("テ","te"),("デ","de"),
                ("ト","to"),("ド","do"),
                ("ナ","na"),("ニ","ni"),("ヌ","nu"),("ネ","ne"),("ノ","no"),
                ("ハ","ha"),("バ","ba"),("パ","pa"),("ヒ","hi"),("ビ","bi"),("ピ","pi"),
                ("フ","fu"),("ブ","bu"),("プ","pu"),
                ("ヘ","he"),("ベ","be"),("ペ","pe"),("ホ","ho"),("ボ","bo"),("ポ","po"),
                ("マ","ma"),("ミ","mi"),("ム","mu"),("メ","me"),("モ","mo"),
                ("ャ","ya"),("ヤ","ya"),("ュ","yu"),("ユ","yu"),("ョ","yo"),("ヨ","yo"),
                ("ラ","ra"),("リ","ri"),("ル","ru"),("レ","re"),("ロ","ro"),
                ("ワ","wa"),("ヰ","wi"),("ヱ","we"),("ヲ","wo"),("ン","n"),("ヴ","vu"),
            };
            s_katakana = katakanaRaw.OrderByDescending(x => x.Item1.Length).ToArray();

            // Helper for building JP phoneme arrays: consonants followed by vowel code
            static string[] Cv(string vowelCode, params string[] cons) {
                var r = new string[cons.Length + 1];
                for (int k = 0; k < cons.Length; k++) r[k] = cons[k];
                r[cons.Length] = vowelCode;
                return r;
            }
            const string JA = "jp_aa", JI = "jp_iy", JU = "jp_uw", JE = "jp_eh", JO = "jp_ow";

            var romajiRaw = new (string, string[])[] {
                ("a",Cv(JA)),("i",Cv(JI)),("u",Cv(JU)),("e",Cv(JE)),("o",Cv(JO)),
                ("ka",Cv(JA,"k")),("ki",Cv(JI,"k")),("ku",Cv(JU,"k")),("ke",Cv(JE,"k")),("ko",Cv(JO,"k")),
                ("kya",Cv(JA,"k","y")),("kyu",Cv(JU,"k","y")),("kyo",Cv(JO,"k","y")),
                ("ga",Cv(JA,"g")),("gi",Cv(JI,"g")),("gu",Cv(JU,"g")),("ge",Cv(JE,"g")),("go",Cv(JO,"g")),
                ("gya",Cv(JA,"g","y")),("gyu",Cv(JU,"g","y")),("gyo",Cv(JO,"g","y")),
                ("sa",Cv(JA,"s")),("si",Cv(JI,"sh")),("su",Cv(JU,"s")),("se",Cv(JE,"s")),("so",Cv(JO,"s")),
                ("sha",Cv(JA,"sh")),("shi",Cv(JI,"sh")),("shu",Cv(JU,"sh")),("she",Cv(JE,"sh")),("sho",Cv(JO,"sh")),
                ("sya",Cv(JA,"sh")),("syu",Cv(JU,"sh")),("syo",Cv(JO,"sh")),
                ("za",Cv(JA,"z")),("zi",Cv(JI,"zh")),("zu",Cv(JU,"z")),("ze",Cv(JE,"z")),("zo",Cv(JO,"z")),
                ("ja",Cv(JA,"jh")),("ji",Cv(JI,"jh")),("ju",Cv(JU,"jh")),("je",Cv(JE,"jh")),("jo",Cv(JO,"jh")),
                ("jya",Cv(JA,"jh","y")),("jyu",Cv(JU,"jh","y")),("jyo",Cv(JO,"jh","y")),
                ("ta",Cv(JA,"t")),("ti",Cv(JI,"ch")),("tu",Cv(JU,"t","s")),("te",Cv(JE,"t")),("to",Cv(JO,"t")),
                ("cha",Cv(JA,"ch")),("chi",Cv(JI,"ch")),("chu",Cv(JU,"ch")),("che",Cv(JE,"ch")),("cho",Cv(JO,"ch")),
                ("tya",Cv(JA,"ch","y")),("tyu",Cv(JU,"ch","y")),("tyo",Cv(JO,"ch","y")),
                ("tsa",Cv(JA,"t","s")),("tsi",Cv(JI,"t","s")),("tsu",Cv(JU,"t","s")),("tse",Cv(JE,"t","s")),("tso",Cv(JO,"t","s")),
                ("thi",Cv(JI,"t")),("thu",Cv(JU,"t")),
                ("da",Cv(JA,"d")),("di",Cv(JI,"jh")),("du",Cv(JU,"d","z")),("de",Cv(JE,"d")),("do",Cv(JO,"d")),
                ("dhi",Cv(JI,"d")),("dhu",Cv(JU,"d")),
                ("na",Cv(JA,"n")),("ni",Cv(JI,"n")),("nu",Cv(JU,"n")),("ne",Cv(JE,"n")),("no",Cv(JO,"n")),
                ("nya",Cv(JA,"n","y")),("nyu",Cv(JU,"n","y")),("nyo",Cv(JO,"n","y")),
                ("ha",Cv(JA,"hh")),("hi",Cv(JI,"hh")),("fu",Cv(JU,"f")),("he",Cv(JE,"hh")),("ho",Cv(JO,"hh")),
                ("hya",Cv(JA,"hh","y")),("hyu",Cv(JU,"hh","y")),("hyo",Cv(JO,"hh","y")),
                ("fa",Cv(JA,"f")),("fi",Cv(JI,"f")),("fe",Cv(JE,"f")),("fo",Cv(JO,"f")),
                ("ba",Cv(JA,"b")),("bi",Cv(JI,"b")),("bu",Cv(JU,"b")),("be",Cv(JE,"b")),("bo",Cv(JO,"b")),
                ("bya",Cv(JA,"b","y")),("byu",Cv(JU,"b","y")),("byo",Cv(JO,"b","y")),
                ("pa",Cv(JA,"p")),("pi",Cv(JI,"p")),("pu",Cv(JU,"p")),("pe",Cv(JE,"p")),("po",Cv(JO,"p")),
                ("pya",Cv(JA,"p","y")),("pyu",Cv(JU,"p","y")),("pyo",Cv(JO,"p","y")),
                ("ma",Cv(JA,"m")),("mi",Cv(JI,"m")),("mu",Cv(JU,"m")),("me",Cv(JE,"m")),("mo",Cv(JO,"m")),
                ("mya",Cv(JA,"m","y")),("myu",Cv(JU,"m","y")),("myo",Cv(JO,"m","y")),
                ("ya",Cv(JA,"y")),("yu",Cv(JU,"y")),("yo",Cv(JO,"y")),("ye",Cv(JE,"y")),
                ("ra",Cv(JA,"dx")),("ri",Cv(JI,"dx")),("ru",Cv(JU,"dx")),("re",Cv(JE,"dx")),("ro",Cv(JO,"dx")),
                ("rya",Cv(JA,"dx","y")),("ryu",Cv(JU,"dx","y")),("ryo",Cv(JO,"dx","y")),
                ("wa",Cv(JA,"w")),("wi",Cv(JI,"w")),("we",Cv(JE,"w")),("wo",Cv(JO,"w")),
                ("va",Cv(JA,"v")),("vi",Cv(JI,"v")),("vu",Cv(JU,"v")),("ve",Cv(JE,"v")),("vo",Cv(JO,"v")),
                ("n",  new[] { "n" }),
                ("q",  Array.Empty<string>()),
            };
            s_romaji = romajiRaw.OrderByDescending(x => x.Item1.Length).ToArray();

            var xsampaRaw = new (string, string)[] {
                ("|\\|\\","ǁ"),("!\\","ǃ"),(":\\","ˑ"),("<\\","ʢ"),("=\\","ǁ"),(">\\","ʡ"),("?\\","ʕ"),
                ("B\\","ʙ"),("G\\","ɢ"),("H\\","ʜ"),("L\\","ʟ"),("O\\","ʘ"),
                ("d\\`","ɖ"),("dZ","dʒ"),("d\\","ɖ"),("dz\\","dʑ"),("g\\","ɢ"),
                ("h\\","ɦ"),("j\\","ʝ"),("l\\`","ɭ"),("l\\","ɭ"),("n\\`","ɳ"),("n\\","ɳ"),
                ("p\\","ɸ"),("r\\`","ɻ"),("r\\","ɹ"),("s\\`","ʂ"),("s\\","ɕ"),
                ("tS","tʃ"),("t\\`","ʈ"),("t\\","ʈ"),("ts\\","tɕ"),
                ("x\\","ɧ"),("z\\`","ʐ"),("z\\","ʑ"),("|\\","ǀ"),
                ("&","æ"),("'","ˈ"),(",","ˌ"),("0","ɒ"),("1","ɨ"),("2","ø"),("3","ɜ"),
                ("4","ɾ"),("5","ɫ"),("6","ɐ"),("7","ɤ"),("8","ɵ"),("9","œ"),(":","ː"),
                ("=","̩"),("?","ʔ"),("@","ə"),("A","ɑ"),("B","β"),("C","ç"),("D","ð"),
                ("E","ɛ"),("F","ɱ"),("G","ɣ"),("H","ɥ"),("I","ɪ"),("J","ɲ"),("K","ɬ"),
                ("L","ʎ"),("M","ɯ"),("N","ŋ"),("O","ɔ"),("P","ʋ"),("Q","ɒ"),("R","ʁ"),
                ("S","ʃ"),("T","θ"),("U","ʊ"),("V","ʌ"),("W","ʍ"),("X","χ"),("Y","ʏ"),
                ("Z","ʒ"),("^","̯"),("_",""),
            };
            s_xsampa = xsampaRaw.OrderByDescending(x => x.Item1.Length).ToArray();

            var ipaRaw = new (string, string[])[] {
                ("aɪ",new[]{"ay"}),("aʊ",new[]{"aw"}),("eɪ",new[]{"ey"}),("oʊ",new[]{"ow"}),("əʊ",new[]{"ow"}),
                ("ɔɪ",new[]{"oy"}),("iː",new[]{"iy"}),("uː",new[]{"uw"}),("aː",new[]{"aa"}),("ɑː",new[]{"aa"}),
                ("ɜː",new[]{"er"}),("ɔː",new[]{"ao"}),("eː",new[]{"eh"}),("oː",new[]{"ow"}),
                ("ɪə",new[]{"ih","r"}),("eə",new[]{"eh","r"}),("ʊə",new[]{"uh","r"}),("ɔə",new[]{"ao","r"}),
                ("t͡ʃ",new[]{"ch"}),("d͡ʒ",new[]{"jh"}),("t͡s",new[]{"t","s"}),("d͡z",new[]{"jh"}),
                ("d͡ʑ",new[]{"jh"}),("t͡ɕ",new[]{"ch"}),
                ("tʃ",new[]{"ch"}),("dʒ",new[]{"jh"}),("ts",new[]{"t","s"}),("dz",new[]{"jh"}),
                ("dʑ",new[]{"jh"}),("tɕ",new[]{"ch"}),("ɖʐ",new[]{"jh"}),("ʈʂ",new[]{"ch"}),
                ("l̩",new[]{"el"}),("m̩",new[]{"m"}),("n̩",new[]{"en"}),
                ("ŋ",new[]{"ng"}),("ɴ",new[]{"ng"}),
                ("aɪ",new[]{"ay"}),
                ("i",new[]{"iy"}),("ɪ",new[]{"ih"}),("e",new[]{"eh"}),("ɛ",new[]{"eh"}),("æ",new[]{"ae"}),
                ("a",new[]{"aa"}),("ɑ",new[]{"aa"}),("ɒ",new[]{"aa"}),
                ("ɔ",new[]{"ao"}),("ʌ",new[]{"ah"}),("ɐ",new[]{"ah"}),
                ("ʊ",new[]{"uh"}),("u",new[]{"uw"}),
                ("ə",new[]{"ah"}),("ɘ",new[]{"ah"}),("ɵ",new[]{"ah"}),
                ("ɜ",new[]{"er"}),("ɝ",new[]{"er"}),("ɚ",new[]{"er"}),
                ("ɨ",new[]{"ih"}),("ɯ",new[]{"uw"}),
                ("ø",new[]{"er"}),("œ",new[]{"er"}),("y",new[]{"iy"}),
                ("w",new[]{"w"}),("ɥ",new[]{"w"}),("ʍ",new[]{"w"}),
                ("j",new[]{"y"}),("ʝ",new[]{"y"}),
                ("ɹ",new[]{"r"}),("r",new[]{"r"}),("ɻ",new[]{"r"}),("ʀ",new[]{"r"}),("ʁ",new[]{"r"}),
                ("ɾ",new[]{"dx"}),("ɽ",new[]{"dx"}),
                ("l",new[]{"l"}),("ɫ",new[]{"l"}),("ʎ",new[]{"l"}),("ɭ",new[]{"l"}),("ɺ",new[]{"l"}),
                ("m",new[]{"m"}),("ɱ",new[]{"m"}),
                ("n",new[]{"n"}),("ɳ",new[]{"n"}),("ɲ",new[]{"n"}),
                ("h",new[]{"hh"}),("ɦ",new[]{"hh"}),("ɸ",new[]{"f"}),("ħ",new[]{"hh"}),
                ("f",new[]{"f"}),("v",new[]{"v"}),("ʋ",new[]{"v"}),
                ("θ",new[]{"th"}),("ð",new[]{"dh"}),
                ("s",new[]{"s"}),("z",new[]{"z"}),
                ("ʂ",new[]{"sh"}),("ʃ",new[]{"sh"}),("ɕ",new[]{"sh"}),
                ("ʐ",new[]{"zh"}),("ʒ",new[]{"zh"}),("ʑ",new[]{"zh"}),
                ("ç",new[]{"sh"}),("χ",new[]{"k"}),("ɣ",new[]{"g"}),
                ("β",new[]{"b"}),("x",new[]{"k"}),
                ("p",new[]{"p"}),("b",new[]{"b"}),("ɓ",new[]{"b"}),("ʙ",new[]{"b"}),
                ("t",new[]{"t"}),("d",new[]{"d"}),("ɗ",new[]{"d"}),("ɖ",new[]{"d"}),
                ("c",new[]{"t"}),("ɟ",new[]{"d"}),
                ("k",new[]{"k"}),("g",new[]{"g"}),("ɡ",new[]{"g"}),("ɠ",new[]{"g"}),("ɢ",new[]{"g"}),
                ("q",new[]{"k"}),
                ("ʔ",Array.Empty<string>()),
                ("ˈ",Array.Empty<string>()),("ˌ",Array.Empty<string>()),("ː",Array.Empty<string>()),
                ("ˑ",Array.Empty<string>()),("̃",Array.Empty<string>()),("̩",Array.Empty<string>()),
                ("̯",Array.Empty<string>()),("ʰ",Array.Empty<string>()),("ʲ",Array.Empty<string>()),
                ("ʷ",Array.Empty<string>()),("ˠ",Array.Empty<string>()),("ˤ",Array.Empty<string>()),
            };
            s_ipa = ipaRaw.OrderByDescending(x => x.Item1.Length).ToArray();

            var arpaRaw = new (string, string[])[] {
                ("AA", new[]{"aa"}), ("AE", new[]{"ae"}), ("AH", new[]{"ah"}), ("AO", new[]{"ao"}),
                ("AW", new[]{"aw"}), ("AY", new[]{"ay"}), ("EH", new[]{"eh"}), ("ER", new[]{"er"}),
                ("EY", new[]{"ey"}), ("IH", new[]{"ih"}), ("IY", new[]{"iy"}), ("OW", new[]{"ow"}),
                ("OY", new[]{"oy"}), ("UH", new[]{"uh"}), ("UW", new[]{"uw"}),
                ("B",  new[]{"b"}),  ("CH", new[]{"ch"}), ("D",  new[]{"d"}),  ("DH", new[]{"dh"}),
                ("F",  new[]{"f"}),  ("G",  new[]{"g"}),  ("HH", new[]{"hh"}), ("JH", new[]{"jh"}),
                ("K",  new[]{"k"}),  ("L",  new[]{"l"}),  ("M",  new[]{"m"}),  ("N",  new[]{"n"}),
                ("NG", new[]{"ng"}), ("P",  new[]{"p"}),  ("R",  new[]{"r"}),  ("S",  new[]{"s"}),
                ("SH", new[]{"sh"}), ("T",  new[]{"t"}),  ("TH", new[]{"th"}), ("V",  new[]{"v"}),
                ("W",  new[]{"w"}),  ("Y",  new[]{"y"}),  ("Z",  new[]{"z"}),  ("ZH", new[]{"zh"}),
                ("AX", new[]{"ax"}), ("IX", new[]{"ix"}), ("DX", new[]{"dx"}),
                ("A",  new[]{"aa"}), ("I",  new[]{"iy"}), ("U",  new[]{"uw"}), ("E",  new[]{"eh"}), ("O",  new[]{"ow"}),
                ("Q",  new[]{"_"}),  ("SIL", Array.Empty<string>()), ("SP", Array.Empty<string>()),
            };
            s_arpa = arpaRaw.ToDictionary(x => x.Item1, x => x.Item2, StringComparer.OrdinalIgnoreCase);

            var arpaJpRaw = new (string, string[])[] {
                ("AA", new[]{"jp_aa"}), ("AE", new[]{"jp_aa"}), ("AH", new[]{"jp_aa"}), ("AO", new[]{"jp_ow"}),
                ("AW", new[]{"jp_aa","jp_uw"}), ("AY", new[]{"jp_aa","jp_iy"}), ("EH", new[]{"jp_eh"}), ("ER", new[]{"jp_eh"}),
                ("EY", new[]{"jp_eh","jp_iy"}), ("IH", new[]{"jp_iy"}), ("IY", new[]{"jp_iy"}), ("OW", new[]{"jp_ow"}),
                ("OY", new[]{"jp_ow","jp_iy"}), ("UH", new[]{"jp_uw"}), ("UW", new[]{"jp_uw"}),
                ("B",  new[]{"b"}),  ("CH", new[]{"ch"}), ("D",  new[]{"d"}),  ("DH", new[]{"z"}),
                ("F",  new[]{"f"}),  ("G",  new[]{"g"}),  ("HH", new[]{"hh"}), ("JH", new[]{"jh"}),
                ("K",  new[]{"k"}),  ("L",  new[]{"dx"}), ("M",  new[]{"m"}),  ("N",  new[]{"n"}),
                ("NG", new[]{"n"}),  ("P",  new[]{"p"}),  ("R",  new[]{"dx"}), ("S",  new[]{"s"}),
                ("SH", new[]{"sh"}), ("T",  new[]{"t"}),  ("TH", new[]{"s"}),  ("V",  new[]{"v"}),
                ("W",  new[]{"w"}),  ("Y",  new[]{"y"}),  ("Z",  new[]{"z"}),  ("ZH", new[]{"sh"}),
                ("AX", new[]{"jp_aa"}), ("IX", new[]{"jp_iy"}), ("DX", new[]{"dx"}),
                ("A",  new[]{"jp_aa"}), ("I",  new[]{"jp_iy"}), ("U",  new[]{"jp_uw"}), ("E",  new[]{"jp_eh"}), ("O",  new[]{"jp_ow"}),
                ("Q",  new[]{"_"}),  ("SIL", Array.Empty<string>()), ("SP", Array.Empty<string>()),
            };
            s_arpa_jp = arpaJpRaw.ToDictionary(x => x.Item1, x => x.Item2, StringComparer.OrdinalIgnoreCase);

            s_phonToKlattsch = new Dictionary<string, string>(StringComparer.Ordinal) {
                {"jp_aa","A"},{"jp_iy","I"},{"jp_uw","U"},{"jp_eh","E"},{"jp_ow","O"},
                {"iy","IY"},{"ih","IH"},{"eh","EH"},{"ae","AE"},
                {"aa","AA"},{"ah","AH"},{"ao","AO"},{"uh","UH"},
                {"ax","AX"},{"er","ER"},{"ey","EY"},{"ay","AY"},
                {"oy","OY"},{"aw","AW"},{"ow","OW"},{"uw","UW"},{"ix","IX"},
                {"w","W"},{"y","Y"},{"r","R"},{"l","L"},
                {"m","M"},{"n","N"},{"ng","NG"},
                {"hh","HH"},{"f","F"},{"v","V"},{"th","TH"},{"dh","DH"},
                {"s","S"},{"z","Z"},{"sh","SH"},{"zh","ZH"},
                {"p","P"},{"b","B"},{"t","T"},{"d","D"},{"dx","DX"},{"k","K"},{"g","G"},
                {"ch","CH"},{"jh","JH"},{"_","_"},
            };
        }

        public static UstConvertResult Convert(
            string ustText,
            string language = "auto",
            int noteOffset = 0,
            string? compatBank = null)
        {
            bool compatAuto = string.Equals(compatBank, "auto", StringComparison.Ordinal);

            double tempo = 120.0;
            var unknownLyrics = new HashSet<string>(StringComparer.Ordinal);
            var notes = ParseUst(ustText, ref tempo, out string notationType, language);

            if (string.IsNullOrEmpty(notationType) || notationType == "auto" || notationType == "auto-detect")
                notationType = DetectNotation(notes.Select(n => n.Lyric).ToList());

            if (compatAuto)
                compatBank = (notationType == "japanese" || notationType == "arpa_jp") ? "ja-mokhtari-2000" : null;

            var parts = new List<string>();
            var phonemeCounts = new Dictionary<string, int>(StringComparer.Ordinal);
            string? curNote = null;
            int curRate = -1;
            int i = 0;

            while (i < notes.Count)
            {
                var note = notes[i];
                int durMs = TicksToMs(note.Length, tempo);
                string noteName = MidiToNoteName(note.NoteNum + noteOffset);
                double baseHz = MidiToHz(note.NoteNum + noteOffset);

                int j = i + 1;
                var mergedPby = new List<double>(note.Pby);
                while (j < notes.Count && IsExtension(notes[j].Lyric))
                {
                    durMs += TicksToMs(notes[j].Length, tempo);
                    if (notes[j].Pby.Count > 0)
                        mergedPby = new List<double>(notes[j].Pby);
                    j++;
                }

                if (IsRest(note.Lyric))
                {
                    if (durMs > 0)
                        parts.Add($"p={durMs}");
                }
                else
                {
                    var phonemes = LyricToPhonemes(note.Lyric, notationType, unknownLyrics);
                    if (phonemes is null)
                    {
                        // skip unknown
                    }
                    else if (phonemes.Length == 0)
                    {
                        if (durMs > 0) parts.Add($"p={durMs}");
                    }
                    else
                    {
                        foreach (var p in phonemes)
                            phonemeCounts[p] = phonemeCounts.GetValueOrDefault(p) + 1;

                        var klattsch = phonemes.Select(p => MapPhoneme(p, compatBank)).ToList();

                        if (noteName != curNote) { parts.Add($"b={noteName}"); curNote = noteName; }
                        if (durMs != curRate)    { parts.Add($"r={durMs}");    curRate = durMs; }

                        if (Math.Abs(note.PbsValue) >= PitchCentsThreshold)
                        {
                            string mod = PitchModifier(baseHz, note.PbsValue, transient: true);
                            if (mod.Length > 0) klattsch[0] += mod;
                        }

                        double endCents = mergedPby.Count > 0 ? mergedPby[^1] : 0.0;
                        if (Math.Abs(endCents) >= PitchCentsThreshold)
                        {
                            string mod = PitchModifier(baseHz, endCents, transient: false);
                            if (mod.Length > 0) klattsch[^1] += mod;
                        }

                        if (klattsch.Count == 1)
                            parts.Add(klattsch[0]);
                        else
                            parts.Add("( " + string.Join(" ", klattsch) + " )");
                    }
                }
                i = j;
            }

            string prefix = compatBank is not null ? $"[bank={compatBank}] " : "";
            string klattschOutput = prefix + string.Join(" ", parts);
            string diagnostics = BuildDiagnostics(notationType, tempo, phonemeCounts, unknownLyrics);
            return new UstConvertResult(klattschOutput, diagnostics);
        }

        public static UstConvertResult ConvertFromBytes(
            byte[] data,
            string language = "auto",
            int noteOffset = 0,
            string? compatBank = null)
        {
            string text = DecodeUst(data);
            return Convert(text, language, noteOffset, compatBank);
        }

        private static string DecodeUst(byte[] data)
        {
            // shift-jis and windows-31j need CodePagesEncodingProvider
            // Prioritize shift-jis as it is the standard for UST files
            var encodings = new[] { "shift-jis", "windows-31j", "utf-8" };
            foreach (var name in encodings)
            {
                try
                {
                    var enc = Encoding.GetEncoding(name, EncoderFallback.ExceptionFallback, DecoderFallback.ExceptionFallback);
                    return enc.GetString(data);
                }
                catch (DecoderFallbackException) { }
                catch (ArgumentException) { }
                catch (NotSupportedException) { }
            }
            return Encoding.Latin1.GetString(data);
        }

        private static List<UstNote> ParseUst(string text, ref double tempo, out string notationType, string language)
        {
            var notes = new List<UstNote>();
            var current = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
            string? section = null;
            var allLyrics = new List<string>();

            // Normalize line endings
            text = text.Replace("\r\n", "\n").Replace("\r", "\n");

            foreach (var rawLine in text.Split('\n'))
            {
                var line = rawLine.Trim();
                if (line.Length == 0) continue;

                if (line.StartsWith('[') && line.EndsWith(']'))
                {
                    if (section is not null && s_noteSection.IsMatch(section) &&
                        current.Count > 0 && current.ContainsKey("Lyric"))
                    {
                        notes.Add(BuildNote(current));
                        allLyrics.Add(current.TryGetValue("Lyric", out var lv) ? lv : "");
                        current.Clear();
                    }
                    section = line[1..^1];
                }
                else if (line.Contains('=') && section is not null)
                {
                    int eq = line.IndexOf('=');
                    string key = line[..eq].Trim();
                    string val = line[(eq + 1)..].Trim();
                    if (string.Equals(section, "#SETTING", StringComparison.OrdinalIgnoreCase) &&
                        string.Equals(key, "Tempo", StringComparison.OrdinalIgnoreCase))
                    {
                        if (double.TryParse(val, System.Globalization.NumberStyles.Float,
                                System.Globalization.CultureInfo.InvariantCulture, out double t))
                            tempo = t;
                    }
                    else
                        current[key] = val;
                }
            }

            if (current.Count > 0 && current.ContainsKey("Lyric"))
            {
                notes.Add(BuildNote(current));
                allLyrics.Add(current.TryGetValue("Lyric", out var lv2) ? lv2 : "");
            }

            notationType = (string.IsNullOrEmpty(language) || language == "auto")
                ? DetectNotation(allLyrics) : language;
            return notes;
        }

        private static UstNote BuildNote(Dictionary<string, string> fields)
        {
            double pbsValue = 0.0;
            if (fields.TryGetValue("PBS", out var pbsRaw) && pbsRaw.Contains(';'))
            {
                var pbsParts = pbsRaw.Split(';');
                if (pbsParts.Length > 1 &&
                    double.TryParse(pbsParts[1], System.Globalization.NumberStyles.Float,
                        System.Globalization.CultureInfo.InvariantCulture, out double pv))
                    pbsValue = pv;
            }

            var pby = new List<double>();
            if (fields.TryGetValue("PBY", out var pbyRaw) && pbyRaw.Length > 0)
            {
                foreach (var tok in pbyRaw.Split(','))
                {
                    var t = tok.Trim();
                    pby.Add(double.TryParse(t, System.Globalization.NumberStyles.Float,
                        System.Globalization.CultureInfo.InvariantCulture, out double d) ? d : 0.0);
                }
            }

            fields.TryGetValue("Lyric", out var lyric);
            int noteNum  = fields.TryGetValue("NoteNum",   out var s) && int.TryParse(s, out int n) ? n : 60;
            int length   = fields.TryGetValue("Length",    out var s2) && int.TryParse(s2, out int l) ? l : 480;
            int intensity= fields.TryGetValue("Intensity", out var s3) && int.TryParse(s3, out int it) ? it : 100;

            return new UstNote { Lyric=lyric??"", NoteNum=noteNum, Length=length,
                                 Intensity=intensity, PbsValue=pbsValue, Pby=pby };
        }

        private static string StripVcvPrefix(string lyric)
        {
            var m = s_vcvPrefix.Match(lyric);
            return m.Success ? m.Groups[1].Value : lyric;
        }

        private static bool IsRest(string lyric)
        {
            var s = StripVcvPrefix(lyric.Trim());
            return s.Length == 0 || s_restLyrics.Contains(s);
        }

        private static bool IsExtension(string lyric) => s_extLyrics.Contains(lyric.Trim());

        private static string DetectNotation(List<string> lyrics)
        {
            var unique = new HashSet<string>(StringComparer.Ordinal);
            foreach (var lyric in lyrics)
            {
                var cleaned = lyric.Trim();
                cleaned = StripVcvPrefix(cleaned);
                cleaned = s_trimDecorators.Replace(cleaned, "").Trim();
                if (cleaned.Length > 0 && !s_restLyrics.Contains(cleaned))
                    unique.Add(cleaned);
            }
            if (unique.Count == 0) return "ipa";

            int jp = 0, xs = 0, en = 0, ipa = 0, arpa = 0;
            var xsIndicators = new HashSet<char>("@&3690124578");
            var romajiKeys = new HashSet<string>(s_romaji.Select(x => x.Romaji), StringComparer.OrdinalIgnoreCase);

            foreach (var lyric in unique)
            {
                bool hasKana = false;
                foreach (char c in lyric)
                    if ((c >= '぀' && c <= 'ゟ') || (c >= '゠' && c <= 'ヿ'))
                    { hasKana = true; break; }
                if (hasKana) { jp++; continue; }

                string lower = lyric.ToLowerInvariant();
                if (romajiKeys.Contains(lower)) { jp++; continue; }

                bool isArpa = true;
                var tokens = lyric.Split(' ', StringSplitOptions.RemoveEmptyEntries);
                if (tokens.Length == 0) isArpa = false;
                foreach (var t in tokens)
                {
                    string key = Regex.Replace(t, "[012]$","").ToUpperInvariant();
                    if (!s_arpa.ContainsKey(key)) { isArpa = false; break; }
                }
                if (isArpa) { arpa++; continue; }

                bool hasMixed = lyric.Any(char.IsUpper) && lyric.Any(char.IsLower);
                bool hasXsInd = lyric.Any(c => xsIndicators.Contains(c));
                bool hasXsDig = lyric.Contains("dh") || lyric.Contains("th") || lyric.Contains("zh") ||
                                lyric.Contains("sh") || lyric.Contains("ch") || lyric.Contains("ng");
                if (hasXsInd || hasMixed || hasXsDig) { xs++; continue; }

                bool hasIpa = s_ipaIndicators.Any(lyric.Contains);
                if (hasIpa) { ipa++; continue; }

                if (lyric.Length > 2 && lyric.All(c => c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'))
                {
                    int v = lyric.ToLowerInvariant().Count(c => "aeiou".Contains(c));
                    if (v >= lyric.Length * 0.3) en++;
                }
            }

            int maxCount = Math.Max(jp, Math.Max(xs, Math.Max(en, Math.Max(ipa, arpa))));
            if (maxCount == 0) return "ipa";
            if (jp == maxCount) return "japanese";
            if (arpa == maxCount) return "arpa";
            if (xs == maxCount) return "xsampa";
            if (en == maxCount) return "english";
            return "ipa";
        }

        private static int TicksToMs(int ticks, double tempo)
            => Math.Max(1, (int)(ticks / 480.0 * 60_000.0 / tempo));

        private static string MidiToNoteName(int midi)
        {
            midi = Math.Max(0, Math.Min(127, midi));
            return s_noteNames[midi % 12] + (midi / 12 - 1).ToString();
        }

        private static double MidiToHz(int midi)
            => 440.0 * Math.Pow(2.0, (midi - 69) / 12.0);

        private static string PitchModifier(double baseHz, double cents, bool transient)
        {
            int delta = (int)Math.Round(baseHz * (Math.Pow(2.0, cents / 1200.0) - 1.0));
            if (delta == 0) return "";
            string body = (delta > 0 ? "+" : "-") + Math.Abs(delta).ToString();
            return transient ? "(" + body + ")" : body;
        }

        private static string MapPhoneme(string phoneme, string? compatBank)
        {
            string token = s_phonToKlattsch.TryGetValue(phoneme, out var t) ? t : phoneme.ToUpperInvariant();
            return (token == "DX" && compatBank is null) ? "R" : token;
        }

        private static string KanaToRomaji(string text)
        {
            var sb = new StringBuilder(text.Length * 2);
            int pos = 0;
            while (pos < text.Length)
            {
                bool matched = false;
                foreach (var (kana, romaji) in s_hiragana)
                {
                    int kl = kana.Length;
                    if (pos + kl <= text.Length && string.CompareOrdinal(text, pos, kana, 0, kl) == 0)
                    { sb.Append(romaji); pos += kl; matched = true; break; }
                }
                if (!matched)
                    foreach (var (kana, romaji) in s_katakana)
                    {
                        int kl = kana.Length;
                        if (pos + kl <= text.Length && string.CompareOrdinal(text, pos, kana, 0, kl) == 0)
                        { sb.Append(romaji); pos += kl; matched = true; break; }
                    }
                if (!matched) { sb.Append(text[pos]); pos++; }
            }
            return sb.ToString();
        }

        private static string[]? RomajiToPhonemes(string romaji)
        {
            romaji = romaji.ToLowerInvariant().Trim();
            foreach (var (key, phonemes) in s_romaji)
                if (string.Equals(romaji, key, StringComparison.Ordinal))
                    return phonemes;
            return null;
        }

        private static string XsampaToIpa(string text)
        {
            var sb = new StringBuilder(text.Length);
            int pos = 0;
            while (pos < text.Length)
            {
                bool matched = false;
                foreach (var (sym, ipa) in s_xsampa)
                {
                    int sl = sym.Length;
                    if (pos + sl <= text.Length && string.CompareOrdinal(text, pos, sym, 0, sl) == 0)
                    { sb.Append(ipa); pos += sl; matched = true; break; }
                }
                if (!matched) { sb.Append(text[pos]); pos++; }
            }
            return sb.ToString();
        }

        private static string[] IpaToSharptalk(string ipa)
        {
            ipa = s_stressMarkers.Replace(ipa, "");
            var result = new List<string>();
            int pos = 0;
            while (pos < ipa.Length)
            {
                bool matched = false;
                foreach (var (key, codes) in s_ipa)
                {
                    int kl = key.Length;
                    if (pos + kl <= ipa.Length && string.CompareOrdinal(ipa, pos, key, 0, kl) == 0)
                    { result.AddRange(codes); pos += kl; matched = true; break; }
                }
                if (!matched) pos++;
            }
            return result.ToArray();
        }

        private static string[]? LyricToPhonemes(string lyric, string notationType, HashSet<string> unknownLyrics)
        {
            lyric = lyric.Trim();
            if (lyric.Length == 0 || s_restLyrics.Contains(lyric)) return null;
            if (s_extLyrics.Contains(lyric)) return null;

            lyric = StripVcvPrefix(lyric);
            lyric = s_trimDecorators.Replace(lyric, "").Trim();
            if (lyric.Length == 0) return null;

            if (notationType == "japanese")
            {
                string romaji = KanaToRomaji(lyric);
                if (string.IsNullOrEmpty(romaji)) romaji = lyric.ToLowerInvariant();
                
                if (romaji == "r" || romaji == "-" || romaji == "_") return null;
                if (romaji == "q") return Array.Empty<string>();
                
                var phonemes = RomajiToPhonemes(romaji);
                if (phonemes is not null) return phonemes;
            }

            if (notationType == "arpa")
            {
                var parts = lyric.Split(' ', StringSplitOptions.RemoveEmptyEntries);
                var result = new List<string>();
                foreach (var p in parts)
                {
                    string key = Regex.Replace(p, "[012]$","").ToUpperInvariant();
                    if (s_arpa.TryGetValue(key, out var phons))
                        result.AddRange(phons);
                    else
                        unknownLyrics.Add(p);
                }
                return result.Count > 0 ? result.ToArray() : null;
            }

            if (notationType == "arpa_jp")
            {
                var parts = lyric.Split(' ', StringSplitOptions.RemoveEmptyEntries);
                var result = new List<string>();
                foreach (var p in parts)
                {
                    string key = Regex.Replace(p, "[012]$","").ToUpperInvariant();
                    if (s_arpa_jp.TryGetValue(key, out var phons))
                        result.AddRange(phons);
                    else
                        unknownLyrics.Add(p);
                }
                return result.Count > 0 ? result.ToArray() : null;
            }

            if (notationType == "xsampa")
                return IpaToSharptalk(XsampaToIpa(lyric));

            if (notationType == "english")
            {
                unknownLyrics.Add(lyric);
                return null;
            }

            if (notationType == "ipa" || notationType == "japanese")
            {
                var phonemes = IpaToSharptalk(lyric);
                if (phonemes.Length > 0) return phonemes;
            }

            unknownLyrics.Add(lyric);
            return null;
        }

        private static string BuildDiagnostics(
            string notationType,
            double tempo,
            Dictionary<string, int> counts,
            HashSet<string> unknownLyrics)
        {
            int total = counts.Values.Sum();
            var sb = new StringBuilder();
            sb.AppendLine($"Language: {notationType} | Tempo: {tempo:F0} BPM | {total} phonemes");

            foreach (var (label, members) in s_phonCategories)
            {
                int catTotal = members.Sum(p => counts.GetValueOrDefault(p));
                if (catTotal == 0) { sb.AppendLine($"{label}: (none)"); continue; }
                var detail = string.Join("  ", members
                    .Where(p => counts.GetValueOrDefault(p) > 0)
                    .Select(p => {
                        var tok = s_phonToKlattsch.TryGetValue(p, out var t) ? t : p.ToUpperInvariant();
                        return $"{tok}:{counts[p]}";
                    }));
                sb.AppendLine($"{label}: {catTotal}  ({detail})");
            }

            if (unknownLyrics.Count > 0)
                sb.AppendLine("Unknown lyrics: " + string.Join(", ", unknownLyrics.OrderBy(x => x)));
            else
                sb.AppendLine("Unknown lyrics: (none)");

            if (notationType == "english")
                sb.AppendLine("Note: espeak-ng unavailable in WebAssembly — English lyrics show as unknown");

            return sb.ToString().TrimEnd();
        }
    }
}