Spaces:
Sleeping
Sleeping
| import re | |
| DIGRAPH_MAP = { | |
| "th": "T", | |
| "sh": "S", | |
| "ch": "C", | |
| "ph": "F", | |
| "oo": "U", | |
| "ee": "I", | |
| "ai": "A", | |
| "ou": "W", | |
| } | |
| def normalize_text(text: str) -> str: | |
| normalized = text.lower().strip() | |
| normalized = re.sub(r"[^a-z0-9\s,.;:!?'-]", " ", normalized) | |
| normalized = re.sub(r"\s+", " ", normalized) | |
| return normalized | |
| def text_to_symbols(text: str) -> list[str]: | |
| normalized = normalize_text(text) | |
| symbols: list[str] = [] | |
| i = 0 | |
| while i < len(normalized): | |
| pair = normalized[i : i + 2] | |
| if pair in DIGRAPH_MAP: | |
| symbols.append(DIGRAPH_MAP[pair]) | |
| i += 2 | |
| continue | |
| ch = normalized[i] | |
| if ch in ",.;:!?": | |
| symbols.append("|") | |
| elif ch == " ": | |
| symbols.append(" ") | |
| else: | |
| symbols.append(ch) | |
| i += 1 | |
| return symbols | |