/** * Wolof STT Normalizer Utility v2.0 * Includes change tracking and WhatsApp message shortening. */ const NORMALIZATION_RULES: Record = { "damae": "damay", "dama": "damay", "dma": "damay", "jai": "jaay", "jaai": "jaay", "jaye": "jaay", "jendi": "jënd", "fei": "fey", "fay": "fey", "yere": "yére", "yare": "yére", "sandwiche": "sandwich", "pan": "mburu", "cafe": "café", "sabu": "sabu", "omo": "omo", "patat": "patas", "ognon": "sooble", "riz": "ceeb", "yof": "Yoff", "dakar": "Dakar", "pikine": "Pikine", "guediawaye": "Guédiawaye", "keur": "kër", "ker": "kër", "sikarche": "ci kër", "sikarshe": "ci kër", "sikarce": "ci kër", "sikaarché": "ci kër", "quartier": "quartier", "banlieu": "banlieue", "si": "ci", "fane": "fan", "fana": "fan", "lana": "lan", "lanna": "lan", "nakka": "naka", "nakha": "naka", "niak": "ñàkk", "niakk": "ñàkk", "dencal": "denc", // requested dencal -> denc "limal": "lim", "ganee": "gañ", "gane": "gañ", "borom": "boroom", "xaalisou": "xaalis", "xaliss": "xaalis", }; const CAPITALIZED_PLACES = ["Yoff", "Dakar", "Pikine", "Guédiawaye"]; export interface NormalizationResult { normalizedText: string; changes: string[]; // Format: ["damae -> damay", ...] } export function normalizeWolof(rawText: string): NormalizationResult { if (!rawText) return { normalizedText: '', changes: [] }; let text = rawText.trim().replace(/\s{2,}/g, " "); const changes: string[] = []; const words = text.split(" "); const processedWords = words.map(word => { const lowerWord = word.toLowerCase().replace(/[.,/#!$%^&*;:{}=\-_`~()]/g, ""); if (NORMALIZATION_RULES[lowerWord]) { const replacement = NORMALIZATION_RULES[lowerWord]; if (lowerWord !== replacement.toLowerCase()) { changes.push(`${lowerWord} -> ${replacement}`); } return replacement; } const matchingPlace = CAPITALIZED_PLACES.find(p => p.toLowerCase() === lowerWord); if (matchingPlace) { if (matchingPlace !== word) { changes.push(`${word} -> ${matchingPlace}`); } return matchingPlace; } return word; }); let normalizedText = processedWords.join(" "); if (normalizedText.length > 0) { normalizedText = normalizedText.charAt(0).toUpperCase() + normalizedText.slice(1); } return { normalizedText, changes: Array.from(new Set(changes)) }; }