| | |
| | |
| | |
| | |
| |
|
| | const NORMALIZATION_RULES: Record<string, string> = { |
| | "damae": "damay", |
| | "dama": "damay", |
| | "dma": "damay", |
| | "jai": "jaay", |
| | "jaai": "jaay", |
| | "jaye": "jaay", |
| | "jendi": "jënd", |
| | "fei": "fey", |
| | "fay": "fey", |
| | "yere": "yére", |
| | "yare": "yére", |
| | "sandwiche": "sandwich", |
| | "pan": "mburu", |
| | "cafe": "café", |
| | "sabu": "sabu", |
| | "omo": "omo", |
| | "patat": "patas", |
| | "ognon": "sooble", |
| | "riz": "ceeb", |
| | "yof": "Yoff", |
| | "dakar": "Dakar", |
| | "pikine": "Pikine", |
| | "guediawaye": "Guédiawaye", |
| | "keur": "kër", |
| | "ker": "kër", |
| | "sikarche": "ci kër", |
| | "sikarshe": "ci kër", |
| | "sikarce": "ci kër", |
| | "sikaarché": "ci kër", |
| | "quartier": "quartier", |
| | "banlieu": "banlieue", |
| | "si": "ci", |
| | "fane": "fan", |
| | "fana": "fan", |
| | "lana": "lan", |
| | "lanna": "lan", |
| | "nakka": "naka", |
| | "nakha": "naka", |
| | "niak": "ñàkk", |
| | "niakk": "ñàkk", |
| | "dencal": "denc", |
| | "limal": "lim", |
| | "ganee": "gañ", |
| | "gane": "gañ", |
| | "borom": "boroom", |
| | "xaalisou": "xaalis", |
| | "xaliss": "xaalis", |
| | }; |
| |
|
| | const CAPITALIZED_PLACES = ["Yoff", "Dakar", "Pikine", "Guédiawaye"]; |
| |
|
| | export interface NormalizationResult { |
| | normalizedText: string; |
| | changes: string[]; |
| | } |
| |
|
| | export function normalizeWolof(rawText: string): NormalizationResult { |
| | if (!rawText) return { normalizedText: '', changes: [] }; |
| |
|
| | let text = rawText.trim().replace(/\s{2,}/g, " "); |
| | const changes: string[] = []; |
| |
|
| | const words = text.split(" "); |
| | const processedWords = words.map(word => { |
| | const lowerWord = word.toLowerCase().replace(/[.,/#!$%^&*;:{}=\-_`~()]/g, ""); |
| | if (NORMALIZATION_RULES[lowerWord]) { |
| | const replacement = NORMALIZATION_RULES[lowerWord]; |
| | if (lowerWord !== replacement.toLowerCase()) { |
| | changes.push(`${lowerWord} -> ${replacement}`); |
| | } |
| | return replacement; |
| | } |
| |
|
| | const matchingPlace = CAPITALIZED_PLACES.find(p => p.toLowerCase() === lowerWord); |
| | if (matchingPlace) { |
| | if (matchingPlace !== word) { |
| | changes.push(`${word} -> ${matchingPlace}`); |
| | } |
| | return matchingPlace; |
| | } |
| |
|
| | return word; |
| | }); |
| |
|
| | let normalizedText = processedWords.join(" "); |
| |
|
| | if (normalizedText.length > 0) { |
| | normalizedText = normalizedText.charAt(0).toUpperCase() + normalizedText.slice(1); |
| | } |
| |
|
| | return { normalizedText, changes: Array.from(new Set(changes)) }; |
| | } |
| |
|