| """ |
| Two-pass spell correction: |
| Pass 1: pyspellchecker (fast, context-free, catches simple typos) |
| Pass 2: LanguageTool (context-aware, catches grammar + dyslexic patterns) |
| |
| Dyslexic error patterns handled: |
| - Letter reversals: b/d, p/q, n/u, m/w |
| - Phonetic spelling: "wuz", "cud", "thay" |
| - Word boundary errors: "alot", "infact", "aswell" |
| - Letter omissions: "becaus", "importnt" |
| - Letter transpositions: "teh", "recieve" |
| - Homophone confusion: there/their/they're |
| """ |
|
|
| import language_tool_python |
| from spellchecker import SpellChecker |
| from loguru import logger |
| from typing import Optional |
| import re |
|
|
|
|
| class DyslexiaAwareSpellCorrector: |
| """Two-pass spell corrector with dyslexia-specific phonetic pattern handling.""" |
|
|
| DYSLEXIC_PHONETIC_MAP = { |
| "wuz": "was", "cud": "could", "wud": "would", "shud": "should", |
| "thay": "they", "thier": "their", "recieve": "receive", |
| "beleive": "believe", "occured": "occurred", "definately": "definitely", |
| "seperate": "separate", "untill": "until", "tommorrow": "tomorrow", |
| "alot": "a lot", "infact": "in fact", "aswell": "as well", |
| "alright": "all right", "cant": "cannot", "wont": "will not", |
| "ive": "I have", "im": "I am", "id": "I would", |
| } |
|
|
| def __init__(self, language: str = "en-US"): |
| self.spell = SpellChecker() |
| self.language = language |
| |
| self._phonetic_pattern = re.compile( |
| r'\b(' + '|'.join(re.escape(k) for k in self.DYSLEXIC_PHONETIC_MAP.keys()) + r')\b', |
| re.IGNORECASE |
| ) |
| |
| self.tool = None |
| try: |
| self.tool = language_tool_python.LanguageTool(language) |
| logger.info("LanguageTool initialised successfully") |
| except Exception as e: |
| logger.warning(f"LanguageTool unavailable (JVM issue?), skipping context-aware pass: {e}") |
|
|
| def _phonetic_pass(self, text: str) -> str: |
| """Apply known dyslexic phonetic substitutions first.""" |
| def _replace(match): |
| word = match.group(0) |
| lower = word.lower() |
| replacement = self.DYSLEXIC_PHONETIC_MAP.get(lower, word) |
| |
| if word[0].isupper() and replacement[0].islower(): |
| replacement = replacement[0].upper() + replacement[1:] |
| return replacement |
|
|
| return self._phonetic_pattern.sub(_replace, text) |
|
|
| def _spellcheck_pass(self, text: str) -> str: |
| """pyspellchecker pass for simple token-level errors.""" |
| words = text.split() |
| corrected_words = [] |
| for word in words: |
| |
| stripped = word.strip(".,!?;:\"'()[]{}—–-") |
| prefix = word[:len(word) - len(word.lstrip(".,!?;:\"'()[]{}—–-"))] |
| suffix = word[len(stripped) + len(prefix):] |
|
|
| if stripped and stripped.lower() not in self.spell and not stripped.isupper(): |
| correction = self.spell.correction(stripped.lower()) |
| if correction and correction != stripped.lower(): |
| |
| if stripped[0].isupper(): |
| correction = correction.capitalize() |
| corrected_words.append(prefix + correction + suffix) |
| else: |
| corrected_words.append(word) |
| else: |
| corrected_words.append(word) |
| return " ".join(corrected_words) |
|
|
| def _languagetool_pass(self, text: str) -> str: |
| """LanguageTool pass for context-aware grammar + spelling corrections.""" |
| if self.tool is None: |
| return text |
|
|
| try: |
| matches = self.tool.check(text) |
| |
| matches = sorted(matches, key=lambda m: m.offset, reverse=True) |
| result = text |
| for match in matches: |
| if match.replacements: |
| replacement = match.replacements[0] |
| start = match.offset |
| end = start + match.errorLength |
| result = result[:start] + replacement + result[end:] |
| return result |
| except Exception as e: |
| logger.warning(f"LanguageTool check failed: {e}") |
| return text |
|
|
| def correct(self, text: str) -> str: |
| """Run all three correction passes in sequence.""" |
| if not text or not text.strip(): |
| return text |
|
|
| logger.debug(f"Spell correction input: {text[:100]}...") |
|
|
| |
| text = self._phonetic_pass(text) |
|
|
| |
| text = self._spellcheck_pass(text) |
|
|
| |
| text = self._languagetool_pass(text) |
|
|
| return text |
|
|
| def close(self): |
| """Clean up LanguageTool resources.""" |
| if self.tool is not None: |
| try: |
| self.tool.close() |
| except Exception: |
| pass |
| self.tool = None |
|
|