Morphology.cs
#nullable enable
using System;
using System.Collections.Generic;
using static SharpTalk.AudioProcessor;
namespace SharpTalk
{
static class Morph
{
enum Sfx
{
None,
S, ES, IES, ED, ER, ERS, EST,
IED, IER, IERS, IEST,
ING, INGS,
LY, BLY, CALLY,
MENT, MENTS, IMENT, IMENTS,
OR, ORS,
NESS, NESSES, INESS, INESSES,
IZE, IZED, IZES, IZER, IZERS,
IZING, IZINGS,
ISM, ISMS,
ABLE,
}
// Ordered from longest to shortest to avoid early false matches.
// Each entry: (suffix_string, stripped_length, suffix_type)
static readonly (string Sfx, Sfx Type)[] SuffixTable =
new (string Sfx, Sfx Type)[]
{
("INESSES", Sfx.INESSES),
("NESSES", Sfx.NESSES),
("IZINGS", Sfx.IZINGS),
("IMENTS", Sfx.IMENTS),
("IZERS", Sfx.IZERS),
("CALLY", Sfx.CALLY),
("IZING", Sfx.IZING),
("INESS", Sfx.INESS),
("MENTS", Sfx.MENTS),
("IZED", Sfx.IZED),
("IZER", Sfx.IZER),
("IZES", Sfx.IZES),
("ISMS", Sfx.ISMS),
("IERS", Sfx.IERS),
("IEST", Sfx.IEST),
("INGS", Sfx.INGS),
("ABLE", Sfx.ABLE),
("IES", Sfx.IES),
("IED", Sfx.IED),
("IER", Sfx.IER),
("ING", Sfx.ING),
("IZE", Sfx.IZE),
("ISM", Sfx.ISM),
("ERS", Sfx.ERS),
("EST", Sfx.EST),
("BLY", Sfx.BLY),
("MENT", Sfx.MENT),
("ORS", Sfx.ORS),
("ED", Sfx.ED),
("ES", Sfx.ES),
("ER", Sfx.ER),
("LY", Sfx.LY),
("OR", Sfx.OR),
("S", Sfx.S),
};
public static byte[]? TryDecompose(string upper, DictReader dict)
{
// Try plain S first before suffix table
if (upper.Length > 1 && upper[^1] == 'S')
{
string stem = upper[..^1];
byte[]? root = dict.Search(stem);
if (root != null)
return Concat(root, SufPhons_S(root));
}
// Try each suffix in order
foreach (var (sfxStr, sfxType) in SuffixTable)
{
if (!upper.EndsWith(sfxStr, StringComparison.Ordinal)) continue;
string stem = upper[..^sfxStr.Length];
if (stem.Length < 1) continue;
byte[]? result = ApplySuffix(sfxType, stem, sfxStr, dict);
if (result != null) return result;
}
return null;
}
static byte[]? ApplySuffix(Sfx sfx, string stem, string sfxStr, DictReader dict)
{
switch (sfx)
{
case Sfx.S:
{
byte[]? r = dict.Search(stem);
return r == null ? null : Concat(r, SufPhons_S(r));
}
case Sfx.ES:
{
// Try stem+"S" first (e.g. houseS), then stem (fish, box)
byte[]? r = dict.Search(stem + "S");
if (r != null) return Concat(r, SufPhons_S(r));
// -sh/-ss/-x root: stem is already stripped of "ES"
char last = stem.Length > 0 ? stem[^1] : '\0';
char prev = stem.Length > 1 ? stem[^2] : '\0';
bool eshRoot = (last == 'H' && (prev == 'S' || prev == 'C'))
|| (last == 'S' && prev == 'S')
|| last == 'X';
if (eshRoot)
{
r = dict.Search(stem);
if (r != null) return Concat(r, SufPhons_S(r));
}
// stem + "E" (house, clothe, name)
r = dict.Search(stem + "E");
if (r != null) return Concat(r, SufPhons_S(r));
// stem ending in S/Z (bus, waltz)
if (last == 'S' || last == 'Z')
{
r = dict.Search(stem);
if (r != null) return Concat(r, SufPhons_S(r));
}
return null;
}
case Sfx.IES:
{
// Y-mutation: candies → candy
byte[]? r = dict.Search(stem + "Y");
if (r != null) return Concat(r, SufPhons_S(r));
// calorie → calories (stem + "IE")
r = dict.Search(stem + "IE");
if (r != null) return Concat(r, SufPhons_S(r));
return null;
}
case Sfx.ED:
{
byte[]? r = DecomposeE(stem, dict);
return r == null ? null : Concat(r, SufPhons_ED(r));
}
case Sfx.ER:
{
byte[]? r = DecomposeE(stem, dict);
return r == null ? null : Append(r, _ER_);
}
case Sfx.ERS:
{
// Try plain S first: stem+"ERS" → stem+"ER"+"S"
byte[]? r = dict.Search(stem + "ER");
if (r != null) return Concat(r, SufPhons_S(r));
r = DecomposeE(stem, dict);
return r == null ? null : Append(Append(r, _ER_), _z_);
}
case Sfx.EST:
{
byte[]? r = DecomposeE(stem, dict);
return r == null ? null : Concat(r, new byte[] { (byte)_IX_, (byte)_s_, (byte)_t_ });
}
case Sfx.IED:
{
byte[]? r = DecomposeI(stem, dict);
return r == null ? null : Append(r, _d_);
}
case Sfx.IER:
{
byte[]? r = DecomposeI(stem, dict);
return r == null ? null : Append(r, _ER_);
}
case Sfx.IERS:
{
// Try plain S: stem+"IERS" → stem+"IER"+"S"
byte[]? r = dict.Search(stem + "IER");
if (r != null) return Concat(r, SufPhons_S(r));
r = DecomposeI(stem, dict);
return r == null ? null : Append(Append(r, _ER_), _z_);
}
case Sfx.IEST:
{
byte[]? r = DecomposeI(stem, dict);
if (r != null) return Concat(r, new byte[] { (byte)_IX_, (byte)_s_, (byte)_t_ });
// loneliest: stem ends in L → remove L, look up, add LY+EST
if (stem.Length > 0 && stem[^1] == 'L')
{
r = dict.Search(stem[..^1]);
if (r != null) return Concat(r, new byte[] { (byte)_l_, (byte)_IY_, (byte)_IX_, (byte)_s_, (byte)_t_ });
}
return null;
}
case Sfx.ING:
{
byte[]? r = DecomposeE(stem, dict);
return r == null ? null : Append(Append(r, _IX_), _NG_);
}
case Sfx.INGS:
{
byte[]? r = DecomposeE(stem, dict);
return r == null ? null : Concat(r, new byte[] { (byte)_IX_, (byte)_NG_, (byte)_z_ });
}
case Sfx.LY:
{
byte[]? r = dict.Search(stem);
return r == null ? null : Append(Append(r, _l_), _IY_);
}
case Sfx.BLY:
{
// possibly → possible+LY: stem + "BLE" ("BLY" already stripped, stem ends in "BL")
byte[]? r = dict.Search(stem + "BLE");
return r == null ? null : Append(Append(r, _l_), _IY_);
}
case Sfx.CALLY:
{
// musically → musical + LY
byte[]? r = dict.Search(stem + "C");
return r == null ? null : Append(Append(r, _l_), _IY_);
}
case Sfx.MENT:
{
byte[]? r = dict.Search(stem);
return r == null ? null : Concat(r, new byte[] { (byte)_m_, (byte)_AX_, (byte)_n_, (byte)_t_ });
}
case Sfx.MENTS:
{
byte[]? r = dict.Search(stem);
return r == null ? null : Concat(r, new byte[] { (byte)_m_, (byte)_AX_, (byte)_n_, (byte)_t_, (byte)_s_ });
}
case Sfx.IMENT:
{
byte[]? r = dict.Search(stem);
return r == null ? null : Concat(r, new byte[] { (byte)_m_, (byte)_AX_, (byte)_n_, (byte)_t_ });
}
case Sfx.IMENTS:
{
byte[]? r = dict.Search(stem);
return r == null ? null : Concat(r, new byte[] { (byte)_m_, (byte)_AX_, (byte)_n_, (byte)_t_, (byte)_s_ });
}
case Sfx.OR:
{
byte[]? r = dict.Search(stem + "E");
if (r != null) return Append(r, _ER_);
r = dict.Search(stem);
return r == null ? null : Append(r, _ER_);
}
case Sfx.ORS:
{
byte[]? r = dict.Search(stem + "E");
if (r != null) return Concat(r, new byte[] { (byte)_ER_, (byte)_z_ });
r = dict.Search(stem);
return r == null ? null : Concat(r, new byte[] { (byte)_ER_, (byte)_z_ });
}
case Sfx.NESS:
{
byte[]? r = dict.Search(stem);
return r == null ? null : Concat(r, new byte[] { (byte)_n_, (byte)_IX_, (byte)_s_ });
}
case Sfx.NESSES:
{
byte[]? r = dict.Search(stem);
return r == null ? null : Concat(r, new byte[] { (byte)_n_, (byte)_IX_, (byte)_s_, (byte)_IX_, (byte)_z_ });
}
case Sfx.INESS:
{
// Y-mutation: sexiness → sexy
byte[]? r = dict.Search(stem + "Y");
if (r != null) return Concat(r, new byte[] { (byte)_n_, (byte)_IX_, (byte)_s_ });
// loneliness: stem ends in L → remove L, add LY+NESS
if (stem.Length > 0 && stem[^1] == 'L')
{
r = dict.Search(stem[..^1]);
if (r != null) return Concat(r, new byte[] { (byte)_l_, (byte)_IY_, (byte)_n_, (byte)_IX_, (byte)_s_ });
}
return null;
}
case Sfx.INESSES:
{
byte[]? r = dict.Search(stem + "Y");
if (r != null) return Concat(r, new byte[] { (byte)_n_, (byte)_IX_, (byte)_s_, (byte)_IX_, (byte)_z_ });
if (stem.Length > 0 && stem[^1] == 'L')
{
r = dict.Search(stem[..^1]);
if (r != null) return Concat(r, new byte[] { (byte)_l_, (byte)_IY_, (byte)_n_, (byte)_IX_, (byte)_s_, (byte)_IX_, (byte)_z_ });
}
return null;
}
case Sfx.IZE:
{
byte[]? r = dict.Search(stem);
return r == null ? null : Concat(r, new byte[] { (byte)_AY_, (byte)_z_ });
}
case Sfx.IZED:
{
byte[]? r = dict.Search(stem);
return r == null ? null : Concat(r, new byte[] { (byte)_AY_, (byte)_z_, (byte)_d_ });
}
case Sfx.IZES:
{
byte[]? r = dict.Search(stem);
return r == null ? null : Concat(r, new byte[] { (byte)_AY_, (byte)_z_, (byte)_IX_, (byte)_z_ });
}
case Sfx.IZER:
{
// Try Decompose_E first (e.g. "organizer" with ING fallback)
byte[]? r = DecomposeE(stem, dict);
if (r != null) return Append(r, _ER_);
r = dict.Search(stem);
return r == null ? null : Concat(r, new byte[] { (byte)_AY_, (byte)_z_, (byte)_ER_ });
}
case Sfx.IZERS:
{
byte[]? r = DecomposeE(stem, dict);
if (r != null) return Concat(r, new byte[] { (byte)_ER_, (byte)_z_ });
r = dict.Search(stem);
return r == null ? null : Concat(r, new byte[] { (byte)_AY_, (byte)_z_, (byte)_ER_, (byte)_z_ });
}
case Sfx.IZING:
{
// Try E-decompose first: "timing" → "time"
byte[]? r = DecomposeE(stem, dict);
if (r != null) return Concat(r, new byte[] { (byte)_IX_, (byte)_NG_ });
r = dict.Search(stem);
return r == null ? null : Concat(r, new byte[] { (byte)_AY_, (byte)_z_, (byte)_IX_, (byte)_NG_ });
}
case Sfx.IZINGS:
{
byte[]? r = DecomposeE(stem, dict);
if (r != null) return Concat(r, new byte[] { (byte)_IX_, (byte)_NG_, (byte)_z_ });
r = dict.Search(stem);
return r == null ? null : Concat(r, new byte[] { (byte)_AY_, (byte)_z_, (byte)_IX_, (byte)_NG_, (byte)_z_ });
}
case Sfx.ISM:
{
byte[]? r = dict.Search(stem);
return r == null ? null : Concat(r, new byte[] { (byte)_IX_, (byte)_z_, (byte)_AX_, (byte)_m_ });
}
case Sfx.ISMS:
{
byte[]? r = dict.Search(stem);
return r == null ? null : Concat(r, new byte[] { (byte)_IX_, (byte)_z_, (byte)_AX_, (byte)_m_, (byte)_z_ });
}
case Sfx.ABLE:
{
byte[]? r = dict.Search(stem);
return r == null ? null : Concat(r, new byte[] { (byte)_AX_, (byte)_b_, (byte)_EL_ });
}
default:
return null;
}
}
// Root recovery
// Recovers root for -ED/-ER/-ERS/-EST/-ING etc.
// Tries: stem+"E" (timed→time), then consonant-doubling removal (napped→nap).
static byte[]? DecomposeE(string stem, DictReader dict)
{
byte[]? r = dict.Search(stem + "E");
if (r != null) return r;
string undoubled = RemoveDoubling(stem);
if (undoubled.Length != stem.Length)
r = dict.Search(undoubled);
return r;
}
// Recovers root for -IED/-IER/-IERS/-IEST (Y-mutation: steadiest→steady).
static byte[]? DecomposeI(string stem, DictReader dict)
=> dict.Search(stem + "Y");
// Remove consonant doubling: "canned" stem "cann" → "can", "slurring" stem "slurr" → "slur".
// Vowels and S/L/F are not doubled in roots (they stand alone).
static string RemoveDoubling(string s)
{
if (s.Length < 2) return s;
char last = s[^1];
if ("AEIOUSLF".Contains(last)) return s;
if (s[^2] == last) return s[..^1];
return s;
}
// Suffix phoneme helpers
// /s/ or /z/ or /ɪz/ depending on last root phoneme.
static byte[] SufPhons_S(byte[] root)
{
byte last = LastPhon(root);
// After sibilants: /ɪz/
if (last == _s_ || last == _z_ || last == _SH_ || last == _ZH_ || last == _CH_ || last == _JH_)
return new byte[] { (byte)_IX_, (byte)_z_ };
// After unvoiced consonants: /s/
if (IsUnvoicedConsonant(last))
return new byte[] { (byte)_s_ };
return new byte[] { (byte)_z_ };
}
// /t/ or /d/ or /ɪd/ depending on last root phoneme.
static byte[] SufPhons_ED(byte[] root)
{
byte last = LastPhon(root);
if (last == _t_ || last == _d_)
return new byte[] { (byte)_IX_, (byte)_d_ };
if (IsUnvoicedConsonant(last))
return new byte[] { (byte)_t_ };
return new byte[] { (byte)_d_ };
}
static byte LastPhon(byte[] phons)
{
for (int i = phons.Length - 1; i >= 0; i--)
if (phons[i] <= 55) return phons[i];
return (byte)_SIL_;
}
// Unvoiced obstruents: /p t k f θ s ʃ tʃ/
static bool IsUnvoicedConsonant(byte p) =>
p == _p_ || p == _t_ || p == _k_ || p == _f_ ||
p == _TH_ || p == _s_ || p == _SH_ || p == _CH_;
// Array helpers
static byte[] Append(byte[] a, short phon)
{
var r = new byte[a.Length + 1];
a.CopyTo(r, 0);
r[^1] = (byte)phon;
return r;
}
static byte[] Concat(byte[] a, byte[] b)
{
var r = new byte[a.Length + b.Length];
a.CopyTo(r, 0);
b.CopyTo(r, a.Length);
return r;
}
static byte[] Concat(byte[] a, short[] b)
{
var r = new byte[a.Length + b.Length];
a.CopyTo(r, 0);
for (int i = 0; i < b.Length; i++) r[a.Length + i] = (byte)b[i];
return r;
}
}
} // namespace