CognxSafeTrack
chore: finalize Sprint P2 & P3 optimizations, baseline prisma migrations, and update technical audit docs
cfbb685 | /** | |
| * Wolof STT Normalizer Utility v2.0 | |
| * Includes change tracking and WhatsApp message shortening. | |
| */ | |
| const NORMALIZATION_RULES: Record<string, string> = { | |
| "damae": "damay", | |
| "dama": "damay", | |
| "dma": "damay", | |
| "jai": "jaay", | |
| "jaai": "jaay", | |
| "jaye": "jaay", | |
| "jendi": "jënd", | |
| "fei": "fey", | |
| "fay": "fey", | |
| "yere": "yére", | |
| "yare": "yére", | |
| "sandwiche": "sandwich", | |
| "pan": "mburu", | |
| "cafe": "café", | |
| "sabu": "sabu", | |
| "omo": "omo", | |
| "patat": "patas", | |
| "ognon": "sooble", | |
| "riz": "ceeb", | |
| "yof": "Yoff", | |
| "dakar": "Dakar", | |
| "pikine": "Pikine", | |
| "guediawaye": "Guédiawaye", | |
| "keur": "kër", | |
| "ker": "kër", | |
| "sikarche": "ci kër", | |
| "sikarshe": "ci kër", | |
| "sikarce": "ci kër", | |
| "sikaarché": "ci kër", | |
| "quartier": "quartier", | |
| "banlieu": "banlieue", | |
| "si": "ci", | |
| "fane": "fan", | |
| "fana": "fan", | |
| "lana": "lan", | |
| "lanna": "lan", | |
| "nakka": "naka", | |
| "nakha": "naka", | |
| "niak": "ñàkk", | |
| "niakk": "ñàkk", | |
| "dencal": "denc", // requested dencal -> denc | |
| "limal": "lim", | |
| "ganee": "gañ", | |
| "gane": "gañ", | |
| "borom": "boroom", | |
| "xaalisou": "xaalis", | |
| "xaliss": "xaalis", | |
| }; | |
| const CAPITALIZED_PLACES = ["Yoff", "Dakar", "Pikine", "Guédiawaye"]; | |
| export interface NormalizationResult { | |
| normalizedText: string; | |
| changes: string[]; // Format: ["damae -> damay", ...] | |
| } | |
| export function normalizeWolof(rawText: string, customRules?: Record<string, string>): NormalizationResult { | |
| if (!rawText) return { normalizedText: '', changes: [] }; | |
| // Merge static rules with dynamic rules (dynamic takes precedence) | |
| const activeRules = customRules | |
| ? { ...NORMALIZATION_RULES, ...customRules } | |
| : NORMALIZATION_RULES; | |
| let text = rawText.trim().replace(/\s{2,}/g, " "); | |
| const changes: string[] = []; | |
| const words = text.split(" "); | |
| const processedWords = words.map(word => { | |
| const lowerWord = word.toLowerCase().replace(/[.,/#!$%^&*;:{}=\-_`~()]/g, ""); | |
| if (activeRules[lowerWord]) { | |
| const replacement = activeRules[lowerWord]; | |
| if (lowerWord !== replacement.toLowerCase()) { | |
| changes.push(`${lowerWord} -> ${replacement}`); | |
| } | |
| return replacement; | |
| } | |
| const matchingPlace = CAPITALIZED_PLACES.find(p => p.toLowerCase() === lowerWord); | |
| if (matchingPlace) { | |
| if (matchingPlace !== word) { | |
| changes.push(`${word} -> ${matchingPlace}`); | |
| } | |
| return matchingPlace; | |
| } | |
| return word; | |
| }); | |
| let normalizedText = processedWords.join(" "); | |
| if (normalizedText.length > 0) { | |
| normalizedText = normalizedText.charAt(0).toUpperCase() + normalizedText.slice(1); | |
| } | |
| return { normalizedText, changes: Array.from(new Set(changes)) }; | |
| } | |