""" Two-pass spell correction: Pass 1: pyspellchecker (fast, context-free, catches simple typos) Pass 2: LanguageTool (context-aware, catches grammar + dyslexic patterns) Dyslexic error patterns handled: - Letter reversals: b/d, p/q, n/u, m/w - Phonetic spelling: "wuz", "cud", "thay" - Word boundary errors: "alot", "infact", "aswell" - Letter omissions: "becaus", "importnt" - Letter transpositions: "teh", "recieve" - Homophone confusion: there/their/they're """ import language_tool_python from spellchecker import SpellChecker from loguru import logger from typing import Optional import re class DyslexiaAwareSpellCorrector: """Two-pass spell corrector with dyslexia-specific phonetic pattern handling.""" DYSLEXIC_PHONETIC_MAP = { "wuz": "was", "cud": "could", "wud": "would", "shud": "should", "thay": "they", "thier": "their", "recieve": "receive", "beleive": "believe", "occured": "occurred", "definately": "definitely", "seperate": "separate", "untill": "until", "tommorrow": "tomorrow", "alot": "a lot", "infact": "in fact", "aswell": "as well", "alright": "all right", "cant": "cannot", "wont": "will not", "ive": "I have", "im": "I am", "id": "I would", } def __init__(self, language: str = "en-US"): self.spell = SpellChecker() self.language = language # Build regex pattern for phonetic map (word-boundary matching) self._phonetic_pattern = re.compile( r'\b(' + '|'.join(re.escape(k) for k in self.DYSLEXIC_PHONETIC_MAP.keys()) + r')\b', re.IGNORECASE ) # Try to initialise LanguageTool; graceful fallback if JVM not available self.tool = None try: self.tool = language_tool_python.LanguageTool(language) logger.info("LanguageTool initialised successfully") except Exception as e: logger.warning(f"LanguageTool unavailable (JVM issue?), skipping context-aware pass: {e}") def _phonetic_pass(self, text: str) -> str: """Apply known dyslexic phonetic substitutions first.""" def _replace(match): word = match.group(0) lower = word.lower() replacement = self.DYSLEXIC_PHONETIC_MAP.get(lower, word) # Preserve capitalisation of first letter if word[0].isupper() and replacement[0].islower(): replacement = replacement[0].upper() + replacement[1:] return replacement return self._phonetic_pattern.sub(_replace, text) def _spellcheck_pass(self, text: str) -> str: """pyspellchecker pass for simple token-level errors.""" words = text.split() corrected_words = [] for word in words: # Strip punctuation for checking but preserve it stripped = word.strip(".,!?;:\"'()[]{}—–-") prefix = word[:len(word) - len(word.lstrip(".,!?;:\"'()[]{}—–-"))] suffix = word[len(stripped) + len(prefix):] if stripped and stripped.lower() not in self.spell and not stripped.isupper(): correction = self.spell.correction(stripped.lower()) if correction and correction != stripped.lower(): # Preserve original capitalisation if stripped[0].isupper(): correction = correction.capitalize() corrected_words.append(prefix + correction + suffix) else: corrected_words.append(word) else: corrected_words.append(word) return " ".join(corrected_words) def _languagetool_pass(self, text: str) -> str: """LanguageTool pass for context-aware grammar + spelling corrections.""" if self.tool is None: return text try: matches = self.tool.check(text) # Apply corrections in reverse order to preserve offsets matches = sorted(matches, key=lambda m: m.offset, reverse=True) result = text for match in matches: if match.replacements: replacement = match.replacements[0] start = match.offset end = start + match.errorLength result = result[:start] + replacement + result[end:] return result except Exception as e: logger.warning(f"LanguageTool check failed: {e}") return text def correct(self, text: str) -> str: """Run all three correction passes in sequence.""" if not text or not text.strip(): return text logger.debug(f"Spell correction input: {text[:100]}...") # Pass 0: Phonetic substitutions (dyslexia-specific) text = self._phonetic_pass(text) # Pass 1: Token-level spellcheck text = self._spellcheck_pass(text) # Pass 2: Context-aware grammar correction text = self._languagetool_pass(text) return text def close(self): """Clean up LanguageTool resources.""" if self.tool is not None: try: self.tool.close() except Exception: pass self.tool = None