Code/Phonemizer.cs
#nullable enable
using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using static SharpTalk.AudioProcessor;
using static SharpTalk.Phonemizer.Normalizer;
namespace SharpTalk
{
public class Phonemizer
{
readonly DictReader _dict;
readonly DictReader _symbols;
public int StatDict { get; private set; }
public int StatMorph { get; private set; }
public int StatLts { get; private set; }
public void ResetStats() { StatDict = StatMorph = StatLts = 0; }
public DictReader Dict => _dict;
// Opcodes that are control codes, not actual phonemes (56-72)
const byte OP_STRESS1 = 56; // _Stress1_ -> kPrimaryStress
const byte OP_STRESS2 = 57; // _Stress2_ -> kSecondaryStress
const byte OP_EMPHSTRESS = 58; // _EmphStress_ -> kEmphaticStress
const byte OP_SYLL = 63; // _Syll_ -> kSyllable_Start
const byte OP_WORD = 64; // _Word_ -> kWord_Start
const byte OP_PREP = 65; // _Prep_ -> kPrep_Start
const byte OP_VERB = 66; // _Verb_ -> kVerb_Start
const byte OP_COMMA = 67; // _Comma_
const byte OP_PERIOD = 68; // _Period_
const byte OP_QUEST = 69; // _Quest_
const byte OP_EXCLAM = 70; // _Exclam_
// Function words do NOT receive kContent_Word, primary dict stress is
// suppressed so they don't drive pitch peaks in the BackEnd pitch algorithm.
// Mirrors POS-based content/function distinction.
static readonly HashSet<string> FuncWords = new(StringComparer.OrdinalIgnoreCase)
{
// articles / determiners
"a", "an", "the",
// prepositions
"of", "in", "on", "at", "by", "for", "to", "up", "as", "into",
"from", "with", "about", "over", "under", "out", "off", "than",
// coordinating conjunctions
"and", "or", "but", "nor", "yet", "so",
// subordinating conjunctions
"if", "that", "than", "when", "while", "because", "though",
"although", "unless", "until", "since", "after", "before",
// auxiliaries & copula
"be", "am", "is", "are", "was", "were", "been", "being",
"have", "has", "had", "do", "does", "did",
"will", "would", "could", "should", "may", "might", "shall",
"can", "must", "ought",
// subject / object pronouns
"i", "he", "she", "we", "they", "you", "it",
"me", "him", "her", "us", "them",
// possessive determiners
"my", "your", "his", "its", "our", "their",
// other function words
"not", "no", "there", "here",
};
static readonly Regex TokenRe = new(
@"(\d+)|([a-zA-Z]+(?:'[a-zA-Z]+)*)|([,;:])|(\.\.\.|[.!?~])|(\s+)",
RegexOptions.Compiled);
static readonly Regex CamelSplit = new(
@"(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])",
RegexOptions.Compiled);
#if !SANDBOX
public Phonemizer() : this(LibraryData.EnglishLex, LibraryData.Symbols) { }
#endif
public Phonemizer(byte[] dictData, byte[] symbolsData)
{
_dict = new DictReader(dictData);
_symbols = new DictReader(symbolsData);
}
public short LastEndPunct { get; private set; } = _Period_;
public (PhonemeToken[] Tokens, short EndPunct)[] TextToSentenceTokens(string text)
{
var result = new List<(PhonemeToken[], short)>();
var segments = EmbeddedCmd.ParseSegments(text);
foreach (var seg in segments)
{
if (seg.IsCommand) continue; // handled by TtsEngine, not FrontEnd
if (seg.IsSinging)
{
// Each singing block is its own clause never mix with speech
if (seg.Singing!.Count > 0)
result.Add((seg.Singing.ToArray(), 0));
continue;
}
// Split at sentence boundaries (.!?) and clause boundaries (,;:).
// Each clause gets its own BackEnd.Process call so pitch resets cleanly.
string plain = Normalize(seg.PlainText!);
int start = 0;
foreach (Match m in TokenRe.Matches(plain))
{
if (!m.Groups[4].Success && !m.Groups[3].Success) continue;
string sentence = plain[start..(m.Index + m.Length)];
var tokens = TextSegmentToPhonemes(sentence);
result.Add((tokens, LastEndPunct));
start = m.Index + m.Length;
}
if (start < plain.Length)
{
string remaining = plain[start..];
if (remaining.Trim().Length > 0)
{
var tokens = TextSegmentToPhonemes(remaining);
result.Add((tokens, LastEndPunct));
}
}
}
if (result.Count == 0)
{
var tokens = TextToPhonemes(text);
result.Add((tokens, LastEndPunct));
}
return result.ToArray();
}
// Process a pure-text span (no embedded commands) into phoneme tokens.
private PhonemeToken[] TextSegmentToPhonemes(string text)
{
var tokens = new List<PhonemeToken>();
LastEndPunct = _Period_;
var matches = TokenRe.Matches(Normalize(text));
var ctxWords = new List<string>();
foreach (Match wm in matches)
if (wm.Groups[2].Success) ctxWords.Add(wm.Groups[2].Value.ToUpperInvariant());
int wordIdx = 0;
foreach (Match m in matches)
{
if (m.Groups[1].Success)
{
if (long.TryParse(m.Groups[1].Value, out long n))
AppendWordTokens(tokens, NumberToPhonStream(n), isContent: true);
}
else if (m.Groups[2].Success)
{
string word = m.Groups[2].Value;
string upper = word.ToUpperInvariant();
byte[]? stream = HeteronymResolver.Resolve(ctxWords, wordIdx);
if (stream == null && IsAllCaps(word) && _dict.Search(upper) == null)
stream = SpellOutAcronym(upper);
stream ??= WordToPhonStream(upper);
AppendWordTokens(tokens, stream, !FuncWords.Contains(word));
wordIdx++;
}
else if (m.Groups[3].Success)
{
tokens.Add(new PhonemeToken
{
Phon = _SIL_,
Ctrl = kTerm_Bound | ((long)kBND_Pause << kSilenceTypeShift),
});
LastEndPunct = _Comma_;
}
else if (m.Groups[4].Success)
{
char p = m.Groups[4].Value[0];
string p4 = m.Groups[4].Value;
LastEndPunct = p4 == "..." ? _Ellipsis_
: p4 == "?" ? _Quest_
: p4 == "!" ? _Exclam_
: p4 == "~" ? _Tilde_
: _Period_;
}
}
return tokens.ToArray();
}
public PhonemeToken[] TextToPhonemes(string text)
{
var tokens = new List<PhonemeToken>();
LastEndPunct = _Period_;
// Split into ordered segments (plain text spans interleaved with singing blocks)
var segments = EmbeddedCmd.ParseSegments(text);
foreach (var seg in segments)
{
if (seg.IsCommand) continue; // handled by TtsEngine, not FrontEnd
if (seg.IsSinging)
{
tokens.AddRange(seg.Singing!);
continue;
}
var matches = TokenRe.Matches(Normalize(seg.PlainText!));
// Pre-extract word list for heteronym context resolution.
var ctxWords = new List<string>();
foreach (Match wm in matches)
if (wm.Groups[2].Success) ctxWords.Add(wm.Groups[2].Value.ToUpperInvariant());
int wordIdx = 0;
foreach (Match m in matches)
{
if (m.Groups[1].Success) // number
{
if (long.TryParse(m.Groups[1].Value, out long n))
AppendWordTokens(tokens, NumberToPhonStream(n), isContent: true);
}
else if (m.Groups[2].Success) // word
{
string word = m.Groups[2].Value;
bool isContent = !FuncWords.Contains(word);
var stream = HeteronymResolver.Resolve(ctxWords, wordIdx)
?? WordToPhonStream(word.ToUpperInvariant());
AppendWordTokens(tokens, stream, isContent);
wordIdx++;
}
else if (m.Groups[3].Success) // , ;
{
tokens.Add(new PhonemeToken
{
Phon = _SIL_,
Ctrl = kTerm_Bound | ((long)kBND_Pause << kSilenceTypeShift),
});
LastEndPunct = _Comma_;
}
else if (m.Groups[4].Success) // ... . ! ? ~
{
string p4 = m.Groups[4].Value;
LastEndPunct = p4 == "..." ? _Ellipsis_
: p4 == "?" ? _Quest_
: p4 == "!" ? _Exclam_
: p4 == "~" ? _Tilde_
: _Period_;
}
// whitespace: skip
}
}
return tokens.ToArray();
}
// Text normalization
// Nested static class keeps normalizer state (regexes, tables) out of the
// FrontEnd field list without a separate file.
internal static class Normalizer
{
// Repeated-syllable words: "hahaha" -> "ha ha ha", "lolol" -> "lol ol"
// Fires for 3+ repetitions of a 1–3 char unit. Rare in real English at that count.
// Non-greedy {1,3}? so "iiiiiiiii" splits on "i" not "iii".
static readonly Regex ReReduplicate = new(
@"\b([a-zA-Z]{1,3}?)\1{2,}\b", RegexOptions.Compiled);
static readonly Regex ReCurrency = new(
@"\$\s*(\d+)(?:\.(\d{1,2}))?", RegexOptions.Compiled);
static readonly Regex RePercent = new(
@"(\d+)\s*%", RegexOptions.Compiled);
static readonly Regex ReOrdinal = new(
@"\b(\d+)\s*(?:st|nd|rd|th)\b", RegexOptions.Compiled | RegexOptions.IgnoreCase);
static readonly Regex ReDecimal = new(
@"\b(\d+)\.(\d+)\b", RegexOptions.Compiled);
// Years, 4-digit numbers in 1000–2099, not preceded by $ or another digit,
// not followed by ordinal suffixes, %, or another digit.
static readonly Regex ReYear = new(
@"(?<![.$€£\d])\b(1\d{3}|20\d{2})\b(?!\s*(?:st|nd|rd|th|%|\d))",
RegexOptions.Compiled | RegexOptions.IgnoreCase);
// Dotted abbreviations (e.g., i.e., a.m.) — matched before the regular
// abbreviation pass so their embedded periods don't split sentences.
static readonly Regex ReAbbrevDotted = new(
@"(?<!\w)(i\.e\.|e\.g\.|a\.m\.|p\.m\.|p\.s\.|w\.r\.t\.|b\.c\.|a\.d\.)(?!\w)",
RegexOptions.Compiled | RegexOptions.IgnoreCase);
static readonly Dictionary<string, string> AbbrevDottedMap =
new(StringComparer.OrdinalIgnoreCase)
{
["i.e."] = "that is",
["e.g."] = "for example",
["a.m."] = "ay em",
["p.m."] = "pee em",
["p.s."] = "postscript",
["w.r.t."] = "with regard to",
["b.c."] = "bee see",
["a.d."] = "ay dee",
};
static readonly Regex ReAbbrev = new(
@"\b(Dr|Mr|Mrs|Ms|Prof|Jr|Sr|Vs|Etc|St|Ave|Blvd|Rd|Ln"
+ @"|Jan|Feb|Mar|Apr|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec"
+ @"|Lt|Cpt|Capt|Gen|Sgt|Pvt|Col|Maj|Rev|Dept|Inc|Corp|Approx"
+ @"|Max|Min|Avg|Govt|Vol|Fig|Ref|Intl|Natl|Div|Asst|Mgr|Dir"
+ @"|Assoc|Admin|Est|Cont|Abbr|Attr|Dist|Pop|Temp|Tech|Elec)\.",
RegexOptions.Compiled | RegexOptions.IgnoreCase);
static readonly Dictionary<string, string> AbbrevMap =
new(StringComparer.OrdinalIgnoreCase)
{
// Titles
["Dr"] = "Doctor",
["Mr"] = "Mister",
["Mrs"] = "Missus",
["Ms"] = "Miss",
["Prof"] = "Professor",
["Jr"] = "Junior",
["Sr"] = "Senior",
// Common
["Vs"] = "versus",
["Etc"] = "etcetera",
["Approx"]= "approximately",
["Max"] = "maximum",
["Min"] = "minimum",
["Avg"] = "average",
["Vol"] = "volume",
["Fig"] = "figure",
["Ref"] = "reference",
["Est"] = "established",
["Cont"] = "continued",
["Abbr"] = "abbreviation",
["Attr"] = "attributed",
["Dist"] = "district",
["Pop"] = "population",
["Temp"] = "temperature",
["Tech"] = "technical",
["Elec"] = "electric",
// Addresses
["St"] = "Street",
["Ave"] = "Avenue",
["Blvd"] = "Boulevard",
["Rd"] = "Road",
["Ln"] = "Lane",
// Military / ranks
["Lt"] = "Lieutenant",
["Cpt"] = "Captain",
["Capt"] = "Captain",
["Gen"] = "General",
["Sgt"] = "Sergeant",
["Pvt"] = "Private",
["Col"] = "Colonel",
["Maj"] = "Major",
["Rev"] = "Reverend",
// Org
["Dept"] = "Department",
["Inc"] = "Incorporated",
["Corp"] = "Corporation",
["Govt"] = "government",
["Div"] = "division",
["Intl"] = "international",
["Natl"] = "national",
["Assoc"] = "association",
["Admin"] = "administration",
["Asst"] = "assistant",
["Mgr"] = "manager",
["Dir"] = "director",
// Months
["Jan"] = "January",
["Feb"] = "February",
["Mar"] = "March",
["Apr"] = "April",
["Jun"] = "June",
["Jul"] = "July",
["Aug"] = "August",
["Sep"] = "September",
["Sept"] = "September",
["Oct"] = "October",
["Nov"] = "November",
["Dec"] = "December",
};
static readonly string[] DigitWords =
new string[] { "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine" };
static readonly string[] TeenWords =
new string[] { "ten","eleven","twelve","thirteen","fourteen","fifteen",
"sixteen","seventeen","eighteen","nineteen" };
static string SmallCardinal(int n)
{
if (n == 0) return "zero";
if (n < 10) return DigitWords[n];
if (n < 20) return TeenWords[n - 10];
int t = n / 10, o = n % 10;
return TensWords[t] + (o > 0 ? " " + DigitWords[o] : "");
}
static string YearToWords(int y)
{
int hi = y / 100;
int lo = y % 100;
if (y == 2000) return "two thousand";
if (y > 2000 && y < 2010) return "two thousand " + SmallCardinal(lo);
string hiPart = SmallCardinal(hi);
if (lo == 0) return hiPart + " hundred";
if (lo < 10) return hiPart + " oh " + SmallCardinal(lo);
return hiPart + " " + SmallCardinal(lo);
}
static readonly string[] OnesOrd = new string[]
{
"zeroth","first","second","third","fourth","fifth","sixth","seventh",
"eighth","ninth","tenth","eleventh","twelfth","thirteenth","fourteenth",
"fifteenth","sixteenth","seventeenth","eighteenth","nineteenth",
};
static readonly string[] TensOrd = new string[]
{"","","twentieth","thirtieth","fortieth","fiftieth",
"sixtieth","seventieth","eightieth","ninetieth"};
static readonly string[] TensWords = new string[]
{"","","twenty","thirty","forty","fifty","sixty","seventy","eighty","ninety"};
static string OrdinalToWord(long n)
{
if (n < 0) return n.ToString();
if (n < 20) return OnesOrd[n];
if (n < 100)
{
int t = (int)(n / 10), o = (int)(n % 10);
return o == 0 ? TensOrd[t] : TensWords[t] + " " + OnesOrd[o];
}
return n.ToString(); // cardinal fallback for 100+ (rare as ordinal)
}
public static string Normalize(string text)
{
// 0. Split CamelCase/PascalCase so "SharpTalk" -> "Sharp Talk"
text = CamelSplit.Replace(text, " ");
// 1. Currency — before decimal so $3.99 isn't split at the dot
text = ReCurrency.Replace(text, m =>
{
long dollars = long.Parse(m.Groups[1].Value);
string r = dollars + " dollar" + (dollars == 1 ? "" : "s");
if (m.Groups[2].Success)
{
string cs = m.Groups[2].Value.PadRight(2, '0')[..2];
long cents = long.Parse(cs);
if (cents > 0)
r += " and " + cents + " cent" + (cents == 1 ? "" : "s");
}
return r;
});
// 2. Percentages
text = RePercent.Replace(text, m => m.Groups[1].Value + " percent");
// 3. Ordinals — before decimals to avoid "1.5th" oddities
text = ReOrdinal.Replace(text, m => OrdinalToWord(long.Parse(m.Groups[1].Value)));
// 4. Years — 4-digit numbers read as pairs ("nineteen eighty-four")
text = ReYear.Replace(text, m => YearToWords(int.Parse(m.Value)));
// 5. Decimal numbers — spell each digit after the point individually
text = ReDecimal.Replace(text, m =>
{
string r = m.Groups[1].Value + " point";
foreach (char c in m.Groups[2].Value)
r += " " + DigitWords[c - '0'];
return r;
});
// 6. Dotted abbreviations (i.e., e.g., a.m. …) — must run before step 7
// so their embedded periods don't trigger sentence splitting.
text = ReAbbrevDotted.Replace(text, m =>
AbbrevDottedMap.TryGetValue(m.Value, out var v) ? v : m.Value);
// 7. Single-dot abbreviations
text = ReAbbrev.Replace(text, m => AbbrevMap[m.Groups[1].Value]);
// 8. Em-dash, en-dash, double-hyphen -> sentence break; plain hyphens -> space
text = text.Replace("—", ". ").Replace("–", ". ").Replace("--", ". ");
text = text.Replace('-', ' ');
// 9. Expressive reduplication: "hahaha" -> "ha ha ha"
text = ReReduplicate.Replace(text, m => {
string unit = m.Groups[1].Value;
int count = m.Value.Length / unit.Length;
return string.Join(" ", System.Linq.Enumerable.Repeat(unit, count));
});
return text;
}
}
// Word -> raw phoneme stream
// Hardcoded letter pronunciations — A-Z indexed by (char - 'A').
// Stress marker 56=STRESS1 placed immediately before the stressed vowel.
// Never routed through dict or LTS so missing entries can't break them.
static readonly byte[][] LetterPhonemes =
{
new byte[]{ 56,10 }, // A -> EY
new byte[]{ 45,56, 0 }, // B -> B IY
new byte[]{ 40,56, 0 }, // C -> S IY
new byte[]{ 47,56, 0 }, // D -> D IY
new byte[]{ 56, 0 }, // E -> IY
new byte[]{ 56, 2,36 }, // F -> EH F
new byte[]{ 51,56, 0 }, // G -> JH IY
new byte[]{ 56,10,50 }, // H -> EY CH (aitch)
new byte[]{ 56,11 }, // I -> AY
new byte[]{ 51,56,10 }, // J -> JH EY
new byte[]{ 48,56,10 }, // K -> K EY
new byte[]{ 56, 2,31 }, // L -> EH L
new byte[]{ 56, 2,33 }, // M -> EH M
new byte[]{ 56, 2,34 }, // N -> EH N
new byte[]{ 56,14 }, // O -> OW
new byte[]{ 44,56, 0 }, // P -> P IY
new byte[]{ 48,29,56,15 }, // Q -> K Y UW (cue)
new byte[]{ 56, 4,30 }, // R -> AA R
new byte[]{ 56, 2,40 }, // S -> EH S
new byte[]{ 46,56, 0 }, // T -> T IY
new byte[]{ 29,56,15 }, // U -> Y UW
new byte[]{ 37,56, 0 }, // V -> V IY
new byte[]{ 47,56, 5,45, 8,31,29,56,15 }, // W -> D AH B AX L Y UW (double-you)
new byte[]{ 56, 2,48,40 }, // X -> EH K S
new byte[]{ 28,56,11 }, // Y -> W AY
new byte[]{ 41,56, 0 }, // Z -> Z IY
};
// Phoneme sequences for every word the normalizer can produce.
// Checked before dict + LTS so dictionary swaps never affect normalizer output.
static readonly Dictionary<string, byte[]> NormWords = new()
{
// Digits
["ZERO"] = new byte[]{ 0x29,0x38,0x01,0x1E,0x0E },
["ONE"] = new byte[]{ 0x1C,0x38,0x05,0x22 },
["TWO"] = new byte[]{ 0x2E,0x38,0x0F },
["THREE"]= new byte[]{ 0x26,0x1E,0x38,0x00 },
["FOUR"] = new byte[]{ 0x24,0x38,0x06,0x1E },
["FIVE"] = new byte[]{ 0x24,0x38,0x0B,0x25 },
["SIX"] = new byte[]{ 0x28,0x38,0x01,0x30,0x28 },
["SEVEN"]= new byte[]{ 0x28,0x38,0x02,0x25,0x08,0x22 },
["EIGHT"]= new byte[]{ 0x38,0x0A,0x2E },
["NINE"] = new byte[]{ 0x22,0x38,0x0B,0x22 },
// Teens
["TEN"] = new byte[]{ 0x2E,0x38,0x02,0x22 },
["ELEVEN"] = new byte[]{ 0x16,0x1F,0x38,0x02,0x25,0x08,0x22 },
["TWELVE"] = new byte[]{ 0x2E,0x1C,0x38,0x02,0x1F,0x25 },
["THIRTEEN"] = new byte[]{ 0x26,0x38,0x09,0x2E,0x38,0x00,0x22 },
["FOURTEEN"] = new byte[]{ 0x24,0x38,0x06,0x1E,0x2E,0x38,0x00,0x22 },
["FIFTEEN"] = new byte[]{ 0x24,0x16,0x24,0x2E,0x38,0x00,0x22 },
["SIXTEEN"] = new byte[]{ 0x28,0x16,0x30,0x28,0x2E,0x38,0x00,0x22 },
["SEVENTEEN"]= new byte[]{ 0x28,0x38,0x02,0x25,0x08,0x22,0x2E,0x38,0x00,0x22 },
["EIGHTEEN"] = new byte[]{ 0x0A,0x2E,0x38,0x00,0x22 },
["NINETEEN"] = new byte[]{ 0x22,0x38,0x0B,0x22,0x2E,0x38,0x00,0x22 },
// Tens
["TWENTY"] = new byte[]{ 0x2E,0x1C,0x38,0x02,0x22,0x2E,0x00 },
["THIRTY"] = new byte[]{ 0x26,0x38,0x09,0x2F,0x39,0x00 },
["FORTY"] = new byte[]{ 0x24,0x38,0x06,0x1E,0x2E,0x00 },
["FIFTY"] = new byte[]{ 0x24,0x38,0x01,0x24,0x2E,0x00 },
["SIXTY"] = new byte[]{ 0x28,0x38,0x01,0x30,0x28,0x2E,0x00 },
["SEVENTY"]= new byte[]{ 0x28,0x38,0x02,0x25,0x08,0x22,0x2E,0x00 },
["EIGHTY"] = new byte[]{ 0x38,0x0A,0x2E,0x00 },
["NINETY"] = new byte[]{ 0x22,0x38,0x0B,0x22,0x2E,0x00 },
// Large / misc number
["HUNDRED"] = new byte[]{ 0x20,0x38,0x05,0x22,0x2F,0x1E,0x08,0x2F },
["THOUSAND"] = new byte[]{ 0x26,0x38,0x0D,0x29,0x08,0x22,0x2F },
["MILLION"] = new byte[]{ 0x21,0x38,0x01,0x1F,0x1D,0x08,0x22 },
["BILLION"] = new byte[]{ 0x2D,0x38,0x01,0x1F,0x1D,0x08,0x22 },
["OH"] = new byte[]{ 0x38,0x0E },
["POINT"] = new byte[]{ 0x2C,0x38,0x0C,0x22,0x2E },
["AND"] = new byte[]{ 0x08,0x22,0x2F },
// Currency / percent
["DOLLAR"] = new byte[]{ 0x2F,0x38,0x04,0x1F,0x09 },
["DOLLARS"] = new byte[]{ 0x2F,0x38,0x04,0x1F,0x09,0x29 },
["CENT"] = new byte[]{ 0x28,0x38,0x02,0x22,0x2E },
["CENTS"] = new byte[]{ 0x28,0x38,0x02,0x22,0x2E,0x28 },
["PERCENT"] = new byte[]{ 0x2C,0x09,0x28,0x38,0x02,0x22,0x2E },
// Ordinals
["ZEROTH"] = new byte[]{ 0x29,0x38,0x00,0x1E,0x0E,0x26 },
["FIRST"] = new byte[]{ 0x24,0x38,0x09,0x28,0x2E },
["SECOND"] = new byte[]{ 0x28,0x38,0x02,0x30,0x08,0x22,0x2F },
["THIRD"] = new byte[]{ 0x26,0x38,0x09,0x2F },
["FOURTH"] = new byte[]{ 0x24,0x38,0x06,0x1E,0x26 },
["FIFTH"] = new byte[]{ 0x24,0x38,0x01,0x24,0x26 },
["SIXTH"] = new byte[]{ 0x28,0x38,0x01,0x30,0x28,0x26 },
["SEVENTH"] = new byte[]{ 0x28,0x38,0x02,0x25,0x08,0x22,0x26 },
["EIGHTH"] = new byte[]{ 0x38,0x0A,0x2E,0x26 },
["NINTH"] = new byte[]{ 0x22,0x38,0x0B,0x22,0x26 },
["TENTH"] = new byte[]{ 0x2E,0x38,0x02,0x22,0x26 },
["ELEVENTH"] = new byte[]{ 0x16,0x1F,0x38,0x02,0x25,0x08,0x22,0x26 },
["TWELFTH"] = new byte[]{ 0x2E,0x1C,0x38,0x02,0x1F,0x24,0x26 },
["THIRTEENTH"] = new byte[]{ 0x26,0x38,0x09,0x2E,0x38,0x00,0x22,0x26 },
["FOURTEENTH"] = new byte[]{ 0x24,0x38,0x06,0x1E,0x2E,0x38,0x00,0x22,0x26 },
["FIFTEENTH"] = new byte[]{ 0x24,0x16,0x24,0x2E,0x38,0x00,0x22,0x26 },
["SIXTEENTH"] = new byte[]{ 0x28,0x16,0x30,0x28,0x2E,0x38,0x00,0x22,0x26 },
["SEVENTEENTH"]= new byte[]{ 0x28,0x38,0x02,0x25,0x08,0x22,0x2E,0x38,0x00,0x22,0x26 },
["EIGHTEENTH"] = new byte[]{ 0x0A,0x2E,0x38,0x00,0x22,0x26 },
["NINETEENTH"] = new byte[]{ 0x22,0x38,0x0B,0x22,0x2E,0x38,0x00,0x22,0x26 },
["TWENTIETH"] = new byte[]{ 0x2E,0x1C,0x38,0x02,0x22,0x2E,0x00,0x08,0x26 },
["THIRTIETH"] = new byte[]{ 0x26,0x38,0x09,0x2E,0x00,0x08,0x26 },
["FORTIETH"] = new byte[]{ 0x24,0x38,0x06,0x1E,0x2E,0x00,0x16,0x26 },
["FIFTIETH"] = new byte[]{ 0x24,0x38,0x01,0x24,0x2E,0x00,0x16,0x26 },
["SIXTIETH"] = new byte[]{ 0x28,0x38,0x01,0x30,0x28,0x2E,0x00,0x16,0x26 },
["SEVENTIETH"] = new byte[]{ 0x28,0x38,0x02,0x25,0x08,0x22,0x2E,0x00,0x16,0x26 },
["EIGHTIETH"] = new byte[]{ 0x38,0x0A,0x2E,0x00,0x16,0x26 },
["NINETIETH"] = new byte[]{ 0x22,0x38,0x0B,0x22,0x2E,0x00,0x16,0x26 },
// Letter names (used by dotted abbreviation expansions)
["AY"] = new byte[]{ 0x38,0x0A },
["BEE"] = new byte[]{ 0x2D,0x38,0x00 },
["SEE"] = new byte[]{ 0x28,0x38,0x00 },
["DEE"] = new byte[]{ 0x2F,0x38,0x00 },
["EF"] = new byte[]{ 0x38,0x02,0x24 },
["EM"] = new byte[]{ 0x38,0x02,0x21 },
["PEE"] = new byte[]{ 0x2C,0x38,0x00 },
// Dotted abbreviation expansions
["THAT"] = new byte[]{ 0x27,0x38,0x03,0x2E },
["IS"] = new byte[]{ 0x38,0x01,0x29 },
["FOR"] = new byte[]{ 0x24,0x38,0x06,0x1E },
["EXAMPLE"] = new byte[]{ 0x16,0x31,0x29,0x38,0x03,0x21,0x2C,0x08,0x1F },
["POSTSCRIPT"]= new byte[]{ 0x2C,0x38,0x0E,0x28,0x30,0x1E,0x39,0x01,0x2C,0x2E },
["WITH"] = new byte[]{ 0x1C,0x38,0x01,0x27 },
["REGARD"] = new byte[]{ 0x1E,0x16,0x31,0x38,0x04,0x1E,0x2F },
["TO"] = new byte[]{ 0x2E,0x38,0x0F },
// Titles
["DOCTOR"] = new byte[]{ 0x2F,0x38,0x04,0x30,0x2E,0x09 },
["MISTER"] = new byte[]{ 0x21,0x38,0x01,0x28,0x2E,0x09 },
["MISSUS"] = new byte[]{ 0x21,0x38,0x01,0x28,0x16,0x29 },
["MISS"] = new byte[]{ 0x21,0x38,0x01,0x28 },
["PROFESSOR"] = new byte[]{ 0x2C,0x1E,0x08,0x24,0x38,0x02,0x28,0x09 },
["JUNIOR"] = new byte[]{ 0x33,0x38,0x0F,0x22,0x1D,0x09 },
["SENIOR"] = new byte[]{ 0x28,0x38,0x00,0x22,0x1D,0x09 },
// Common abbreviation expansions
["VERSUS"] = new byte[]{ 0x25,0x38,0x09,0x28,0x08,0x28 },
["ETCETERA"] = new byte[]{ 0x38,0x02,0x2E,0x28,0x38,0x02,0x2E,0x09,0x08 },
["APPROXIMATELY"] = new byte[]{ 0x08,0x2C,0x1E,0x38,0x04,0x30,0x28,0x08,0x21,0x08,0x2E,0x1F,0x00 },
["MAXIMUM"] = new byte[]{ 0x21,0x38,0x03,0x30,0x28,0x08,0x21,0x08,0x21 },
["MINIMUM"] = new byte[]{ 0x21,0x38,0x01,0x22,0x08,0x21,0x08,0x21 },
["AVERAGE"] = new byte[]{ 0x38,0x03,0x25,0x09,0x16,0x33 },
["VOLUME"] = new byte[]{ 0x25,0x38,0x04,0x1F,0x1D,0x0F,0x21 },
["FIGURE"] = new byte[]{ 0x24,0x38,0x01,0x31,0x1D,0x09 },
["REFERENCE"] = new byte[]{ 0x1E,0x38,0x02,0x24,0x09,0x08,0x22,0x28 },
["ESTABLISHED"] = new byte[]{ 0x16,0x28,0x2E,0x38,0x03,0x2D,0x1F,0x16,0x2A,0x2E },
["CONTINUED"] = new byte[]{ 0x30,0x08,0x22,0x2E,0x38,0x01,0x22,0x1D,0x0F,0x2F },
["ABBREVIATION"] = new byte[]{ 0x08,0x2D,0x1E,0x39,0x00,0x25,0x00,0x38,0x0A,0x2A,0x08,0x22 },
["ATTRIBUTED"] = new byte[]{ 0x08,0x2E,0x1E,0x38,0x01,0x2D,0x1D,0x08,0x2E,0x16,0x2F },
["DISTRICT"] = new byte[]{ 0x2F,0x38,0x01,0x28,0x2E,0x1E,0x16,0x30,0x2E },
["POPULATION"] = new byte[]{ 0x2C,0x39,0x04,0x2C,0x1D,0x08,0x1F,0x38,0x0A,0x2A,0x08,0x22 },
["TEMPERATURE"] = new byte[]{ 0x2E,0x38,0x02,0x21,0x2C,0x1E,0x08,0x32,0x09 },
["TECHNICAL"] = new byte[]{ 0x2E,0x38,0x02,0x30,0x22,0x16,0x30,0x08,0x1F },
["ELECTRIC"] = new byte[]{ 0x16,0x1F,0x38,0x02,0x30,0x2E,0x1E,0x16,0x30 },
// Address
["STREET"] = new byte[]{ 0x28,0x2E,0x1E,0x38,0x00,0x2E },
["AVENUE"] = new byte[]{ 0x38,0x03,0x25,0x08,0x22,0x39,0x0F },
["BOULEVARD"] = new byte[]{ 0x2D,0x38,0x07,0x1F,0x08,0x25,0x39,0x04,0x1E,0x2F },
["ROAD"] = new byte[]{ 0x1E,0x38,0x0E,0x2F },
["LANE"] = new byte[]{ 0x1F,0x38,0x0A,0x22 },
// Military
["LIEUTENANT"] = new byte[]{ 0x1F,0x0F,0x2E,0x38,0x02,0x22,0x08,0x22,0x2E },
["CAPTAIN"] = new byte[]{ 0x30,0x38,0x03,0x2C,0x2E,0x08,0x22 },
["GENERAL"] = new byte[]{ 0x33,0x38,0x02,0x22,0x09,0x08,0x1F },
["SERGEANT"] = new byte[]{ 0x28,0x38,0x04,0x1E,0x33,0x08,0x22,0x2E },
["PRIVATE"] = new byte[]{ 0x2C,0x1E,0x38,0x0B,0x25,0x08,0x2E },
["COLONEL"] = new byte[]{ 0x30,0x38,0x09,0x22,0x08,0x1F },
["MAJOR"] = new byte[]{ 0x21,0x38,0x0A,0x33,0x09 },
["REVEREND"] = new byte[]{ 0x1E,0x38,0x02,0x25,0x09,0x08,0x22,0x2F },
// Org
["DEPARTMENT"] = new byte[]{ 0x2F,0x16,0x2C,0x38,0x04,0x1E,0x2E,0x21,0x08,0x22,0x2E },
["INCORPORATED"] = new byte[]{ 0x39,0x01,0x22,0x30,0x38,0x06,0x1E,0x2C,0x09,0x39,0x0A,0x2E,0x16,0x2F },
["CORPORATION"] = new byte[]{ 0x30,0x39,0x06,0x1E,0x2C,0x09,0x38,0x0A,0x2A,0x08,0x22 },
["GOVERNMENT"] = new byte[]{ 0x31,0x38,0x05,0x25,0x09,0x21,0x08,0x22,0x2E },
["DIVISION"] = new byte[]{ 0x2F,0x16,0x25,0x38,0x01,0x2B,0x08,0x22 },
["INTERNATIONAL"] = new byte[]{ 0x39,0x01,0x22,0x2E,0x09,0x22,0x38,0x03,0x2A,0x08,0x22,0x08,0x1F },
["NATIONAL"] = new byte[]{ 0x22,0x38,0x03,0x2A,0x08,0x22,0x08,0x1F },
["ASSOCIATION"] = new byte[]{ 0x08,0x28,0x39,0x0E,0x28,0x00,0x38,0x0A,0x2A,0x08,0x22 },
["ADMINISTRATION"]= new byte[]{ 0x03,0x2F,0x21,0x39,0x01,0x22,0x16,0x28,0x2E,0x1E,0x38,0x0A,0x2A,0x08,0x22 },
["ASSISTANT"] = new byte[]{ 0x08,0x28,0x38,0x01,0x28,0x2E,0x08,0x22,0x2E },
["MANAGER"] = new byte[]{ 0x21,0x38,0x03,0x22,0x08,0x33,0x09 },
["DIRECTOR"] = new byte[]{ 0x2F,0x09,0x38,0x02,0x30,0x2E,0x09 },
// Months
["JANUARY"] = new byte[]{ 0x33,0x38,0x03,0x22,0x1D,0x0F,0x39,0x02,0x1E,0x00 },
["FEBRUARY"] = new byte[]{ 0x24,0x38,0x02,0x2D,0x1D,0x08,0x1C,0x39,0x02,0x1E,0x00 },
["MARCH"] = new byte[]{ 0x21,0x38,0x04,0x1E,0x32 },
["APRIL"] = new byte[]{ 0x38,0x0A,0x2C,0x1E,0x08,0x1F },
["JUNE"] = new byte[]{ 0x33,0x38,0x0F,0x22 },
["JULY"] = new byte[]{ 0x33,0x39,0x0F,0x1F,0x38,0x0B },
["AUGUST"] = new byte[]{ 0x38,0x04,0x31,0x08,0x28,0x2E },
["SEPTEMBER"]= new byte[]{ 0x28,0x02,0x2C,0x2E,0x38,0x02,0x21,0x2D,0x09 },
["OCTOBER"] = new byte[]{ 0x04,0x30,0x2E,0x38,0x0E,0x2D,0x09 },
["NOVEMBER"] = new byte[]{ 0x22,0x0E,0x25,0x38,0x02,0x21,0x2D,0x09 },
["DECEMBER"] = new byte[]{ 0x2F,0x16,0x28,0x38,0x02,0x21,0x2D,0x09 },
};
// For all-caps words absent from the dict, inject letter phonemes directly
// no dict lookup, no LTS. Each letter becomes its own word-boundary token.
byte[] SpellOutAcronym(string upper)
{
var buf = new System.Collections.Generic.List<byte>(upper.Length * 4);
foreach (char c in upper)
{
if (c < 'A' || c > 'Z') continue;
buf.Add(OP_WORD);
buf.AddRange(LetterPhonemes[c - 'A']);
}
return buf.ToArray();
}
static bool IsAllCaps(string word)
{
if (word.Length < 2) return false;
foreach (char c in word) if (c < 'A' || c > 'Z') return false;
return true;
}
byte[] WordToPhonStream(string upperWord)
{
// Contractions are stored in the dict without apostrophes ("ISN'T" -> "ISNT").
string lookupWord = upperWord.Contains('\'')
? upperWord.Replace("'", "") : upperWord;
// 0. Normalizer word table — bypasses dict entirely
if (NormWords.TryGetValue(upperWord, out var normPhons))
{
var nb = new byte[normPhons.Length + 1];
nb[0] = OP_WORD; normPhons.CopyTo(nb, 1);
return nb;
}
// 1. Try dictionary directly
byte[]? phons = _dict.Search(lookupWord);
if (phons != null) StatDict++;
// 2. Try morphological decomposition (suffix stripping + root lookup)
if (phons == null) { phons = Morph.TryDecompose(lookupWord, _dict); if (phons != null) StatMorph++; }
// 3. Fall back to letter-to-sound rules
if (phons == null) { phons = LetterToSound.Convert(upperWord); StatLts++; }
// Prepend OP_WORD marker
var buf = new byte[phons.Length + 1];
buf[0] = OP_WORD;
phons.CopyTo(buf, 1);
return buf;
}
// Number -> raw phoneme stream
byte[] NumberToPhonStream(long n)
{
var buf = new List<byte>();
BuildNumberPhons(buf, n);
return buf.ToArray();
}
void BuildNumberPhons(List<byte> buf, long n)
{
if (n < 0) { AppendSymbol(buf, "1E3"); BuildNumberPhons(buf, -n); return; } // "minus" via billion slot TODO: add MINUS to symbols
if (n == 0) { AppendSymbol(buf, "0"); return; }
if (n >= 1_000_000_000)
{
BuildNumberPhons(buf, n / 1_000_000_000);
AppendSymbol(buf, "1E3"); // billion
n %= 1_000_000_000;
}
if (n >= 1_000_000)
{
BuildNumberPhons(buf, n / 1_000_000);
AppendSymbol(buf, "1E2"); // million
n %= 1_000_000;
}
if (n >= 1_000)
{
BuildNumberPhons(buf, n / 1_000);
AppendSymbol(buf, "1E1"); // thousand
n %= 1_000;
}
if (n >= 100)
{
AppendDigit(buf, (int)(n / 100));
AppendSymbol(buf, "100"); // hundred
n %= 100;
}
if (n >= 20)
{
AppendTens(buf, (int)(n / 10));
n %= 10;
if (n > 0) AppendDigit(buf, (int)n);
}
else if (n >= 10)
{
AppendTeen(buf, (int)n);
}
else if (n > 0)
{
AppendDigit(buf, (int)n);
}
}
static readonly string[] DigitNames = new string[] { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" };
static readonly string[] TeenNames = new string[] { "10", "11", "12", "13", "14", "15", "16", "17", "18", "19" };
static readonly string[] TensNames = new string[] { "", "", "20", "30", "40", "50", "60", "70", "80", "90" };
void AppendDigit(List<byte> buf, int d) => AppendSymbol(buf, DigitNames[d]);
void AppendTeen(List<byte> buf, int n) => AppendSymbol(buf, TeenNames[n - 10]);
void AppendTens(List<byte> buf, int t) => AppendSymbol(buf, TensNames[t]);
void AppendSymbol(List<byte> buf, string sym)
{
if (buf.Count == 0) buf.Add(OP_WORD);
byte[]? phons = _symbols.Search(sym);
if (phons == null) return;
buf.AddRange(phons);
}
// Stream -> PhonemeToken list
void AppendWordTokens(List<PhonemeToken> tokens, byte[] stream, bool isContent)
{
long pending = 0;
int startIdx = tokens.Count;
bool hadPrimary = false;
foreach (byte b in stream)
{
switch (b)
{
case OP_WORD:
pending |= kWord_Start;
if (isContent) pending |= kContent_Word;
break;
case OP_STRESS1:
// Function words: demote dict primary stress to secondary so they
// don't trigger pitch peaks in the BackEnd pitch algorithm.
if (isContent) { pending |= kPrimaryStress; hadPrimary = true; }
else pending |= kSecondaryStress;
break;
case OP_STRESS2: pending |= kSecondaryStress; break;
case OP_EMPHSTRESS: pending |= kEmphaticStress; break;
case OP_SYLL: pending |= kSyllable_Start; break;
case OP_PREP: pending |= kPrep_Start; break;
case OP_VERB: pending |= kVerb_Start; break;
case OP_COMMA:
case OP_PERIOD:
case OP_QUEST:
case OP_EXCLAM:
tokens.Add(new PhonemeToken { Phon = (short)b, Ctrl = kTerm_Bound });
pending = 0;
break;
default:
if (b <= 55)
{
tokens.Add(new PhonemeToken { Phon = (short)b, Ctrl = pending });
pending = 0;
}
break;
}
}
// Content word with only secondary stress: promote to primary so the pitch
// algorithm has a peak to work with on words like "how".
if (isContent && !hadPrimary)
{
for (int i = startIdx; i < tokens.Count; i++)
{
if ((tokens[i].Ctrl & kSecondaryStress) != 0)
{
tokens[i] = new PhonemeToken
{
Phon = tokens[i].Phon,
Ctrl = (tokens[i].Ctrl & ~kSecondaryStress) | kPrimaryStress,
UserPitch = tokens[i].UserPitch,
UserDur = tokens[i].UserDur,
UserNote = tokens[i].UserNote,
UserRate = tokens[i].UserRate,
};
break;
}
}
}
}
}
} // namespace