Code/HeteronymResolver.cs
#nullable enable
using System.Collections.Generic;
using static SharpTalk.AudioProcessor;
namespace SharpTalk
{
// Contextual heteronym disambiguation.
// For each ambiguous word we store a default pronunciation and a list of
// context rules, the first matching rule wins. Rules fire when a word in
// the before-set appears immediately before OR a word in the after-set
// appears immediately after the target.
internal static class HeteronymResolver
{
const short S1 = 56; // OP_STRESS1
const byte OP_WORD = 64; // word boundary marker (same as Phonemizer.OP_WORD)
static byte[] Ph(params short[] p)
{
var buf = new byte[p.Length + 1];
buf[0] = OP_WORD;
for (int i = 0; i < p.Length; i++) buf[i + 1] = (byte)p[i];
return buf;
}
readonly struct Rule
{
public readonly HashSet<string>? Before;
public readonly HashSet<string>? After;
public readonly byte[] Phonemes;
public Rule(string[]? before, string[]? after, byte[] ph)
{
Before = before is null ? null : new HashSet<string>(before);
After = after is null ? null : new HashSet<string>(after);
Phonemes = ph;
}
public bool Matches(string? prev, string? next) =>
(Before != null && prev != null && Before.Contains(prev)) ||
(After != null && next != null && After .Contains(next));
}
readonly struct Entry
{
public readonly Rule[] Rules;
public readonly byte[] Default;
public Entry(byte[] def, params Rule[] rules) { Default = def; Rules = rules; }
}
// LIVE
static readonly byte[] LiveVerb = Ph(_L_, S1, _IH_, _V_); // /lɪv/
static readonly byte[] LiveAdj = Ph(_L_, S1, _AY_, _V_); // /laɪv/
// READ
static readonly byte[] ReadPres = Ph(_R_, S1, _IY_, _D_); // /riːd/
static readonly byte[] ReadPast = Ph(_R_, S1, _EH_, _D_); // /rɛd/
// LEAD
static readonly byte[] LeadVerb = Ph(_L_, S1, _IY_, _D_); // /liːd/
static readonly byte[] LeadMet = Ph(_L_, S1, _EH_, _D_); // /lɛd/
// WIND
static readonly byte[] WindNoun = Ph(_W_, S1, _IH_, _N_, _D_); // /wɪnd/
static readonly byte[] WindVerb = Ph(_W_, S1, _AY_, _N_, _D_); // /waɪnd/
// WOUND
static readonly byte[] WoundInj = Ph(_W_, S1, _UW_, _N_, _D_); // /wuːnd/ injury
static readonly byte[] WoundPst = Ph(_W_, S1, _AW_, _N_, _D_); // /waʊnd/ past-of-wind
// TEAR
static readonly byte[] TearRip = Ph(_T_, S1, _EH_, _R_); // /tɛr/ rip
static readonly byte[] TearEye = Ph(_T_, S1, _IH_, _R_); // /tɪr/ cry
// BOW
static readonly byte[] BowWeap = Ph(_B_, S1, _OW_); // /boʊ/ weapon/ribbon
static readonly byte[] BowGest = Ph(_B_, S1, _AW_); // /baʊ/ gesture
// CLOSE
static readonly byte[] CloseVrb = Ph(_K_, _L_, S1, _OW_, _Z_); // /kloʊz/ verb
static readonly byte[] CloseAdj = Ph(_K_, _L_, S1, _OW_, _S_); // /kloʊs/ near
static readonly Dictionary<string, Entry> Table = new()
{
["LIVE"] = new Entry(LiveVerb,
new Rule(
before: new[] { "GO", "GOES", "WENT", "STREAM", "STREAMED", "BROADCAST", "AIRED" },
after: new[] { "MUSIC", "CONCERT", "SHOW", "PERFORMANCE", "WIRE", "AMMUNITION",
"AMMO", "BAIT", "ROUND", "FIRE", "BROADCAST", "EVENT", "GAME", "GAMES" },
LiveAdj)),
["READ"] = new Entry(ReadPres,
new Rule(
before: new[] { "HAD", "HAVE", "HAS", "ALREADY", "JUST", "NEVER",
"I'VE", "YOU'VE", "WE'VE", "THEY'VE", "WHO'VE" },
after: null,
ReadPast)),
["LEAD"] = new Entry(LeadVerb,
new Rule(
before: null,
after: new[] { "PIPE", "PIPES", "PAINT", "PENCIL", "POISONING",
"BULLET", "BULLETS", "SHOT", "WEIGHT", "WEIGHTS", "FREE" },
LeadMet)),
["WIND"] = new Entry(WindNoun,
new Rule(
before: new[] { "TO", "WILL", "CAN", "COULD", "WOULD", "LET", "LETS" },
after: new[] { "UP", "DOWN", "BACK", "THROUGH", "AROUND" },
WindVerb)),
["WOUND"] = new Entry(WoundInj,
new Rule(
before: null,
after: new[] { "UP", "DOWN", "BACK", "AROUND", "THROUGH" },
WoundPst)),
["TEAR"] = new Entry(TearRip,
new Rule(
before: new[] { "A", "THE", "ONE", "MY", "HER", "HIS", "YOUR",
"EACH", "EVERY", "SINGLE" },
after: new[] { "DUCT", "DUCTS", "DROP", "DROPS", "GAS", "JERKER", "STAINED" },
TearEye)),
["BOW"] = new Entry(BowWeap,
new Rule(
before: new[] { "TAKE", "TAKES", "TOOK", "MAKE", "MADE", "GIVE",
"GIVES", "GAVE", "DEEP" },
after: new[] { "DOWN", "TO", "BEFORE", "OUT" },
BowGest)),
["CLOSE"] = new Entry(CloseVrb,
new Rule(
before: new[] { "VERY", "TOO", "SO", "QUITE", "FAIRLY", "PRETTY",
"REALLY", "THAT", "THIS", "COME", "STAY", "REMAIN",
"REMAINS", "GET", "GETS", "CAME", "GETTING", "STAYING" },
after: new[] { "TO", "BY", "ENOUGH", "CALL", "FRIEND", "FRIENDS",
"CONTACT", "SHAVE", "QUARTERS", "TOGETHER" },
CloseAdj)),
};
// Returns the full phoneme stream (OP_WORD + phonemes) if a rule matches,
// or null to fall through to normal dictionary/LTS lookup.
public static byte[]? Resolve(IReadOnlyList<string> words, int index)
{
string word = words[index];
if (!Table.TryGetValue(word, out var entry)) return null;
string? prev = index > 0 ? words[index - 1] : null;
string? next = index < words.Count - 1 ? words[index + 1] : null;
foreach (var rule in entry.Rules)
if (rule.Matches(prev, next)) return rule.Phonemes;
return entry.Default;
}
}
}