import gradio as gr from collections import defaultdict from typing import List, Dict, Set, Tuple import pandas as pd import numpy as np class MultilingualSpellChecker: def __init__(self): self.vocab = defaultdict(set) # Language -> set of words self.char_to_words = defaultdict(lambda: defaultdict(set)) # Language -> char -> words self.current_language = None def load_corpus(self, language: str, file_path: str): """Load corpus for specific language""" try: with open(file_path, 'r', encoding='utf-8') as f: for line in f: word = line.strip() if word: self.vocab[language].add(word) self.char_to_words[language][word[0]].add(word) print(f"Loaded {len(self.vocab[language])} words for {language}") except Exception as e: print(f"Error loading corpus for {language}: {e}") def levenshtein_distance(self, s1: str, s2: str) -> int: """Calculate Levenshtein distance between two strings""" if len(s1) < len(s2): return self.levenshtein_distance(s2, s1) if len(s2) == 0: return len(s1) previous_row = range(len(s2) + 1) for i, c1 in enumerate(s1): current_row = [i + 1] for j, c2 in enumerate(s2): insertions = previous_row[j + 1] + 1 deletions = current_row[j] + 1 substitutions = previous_row[j] + (c1 != c2) current_row.append(min(insertions, deletions, substitutions)) previous_row = current_row return previous_row[-1] def get_suggestions(self, word: str, language: str, max_suggestions: int = 10) -> Dict[str, List[str]]: """Get word suggestions for given language""" if word in self.vocab[language]: return {"exact_match": [word]} candidates = self.char_to_words[language].get(word[0], set()) if not candidates: candidates = self.vocab[language] distances = [] for candidate in candidates: distance = self.levenshtein_distance(word, candidate) distances.append((distance, candidate)) distances.sort() suggestions = { "High Probability": [], # Distance 1 "Medium Probability": [], # Distance 2 "Low Probability": [], # Distance 3 "Very Low Probability": [] # Distance 4+ } for distance, candidate in distances[:max_suggestions]: if distance == 1: suggestions["High Probability"].append(candidate) elif distance == 2: suggestions["Medium Probability"].append(candidate) elif distance == 3: suggestions["Low Probability"].append(candidate) else: suggestions["Very Low Probability"].append(candidate) return {k: v for k, v in suggestions.items() if v} def format_suggestions(suggestions: Dict[str, List[str]]) -> str: """Format suggestions for Gradio output""" if "exact_match" in suggestions: return "✓ Word is correct and exists in the corpus!" result = "Suggested corrections:\n\n" for category, words in suggestions.items(): if words: result += f"{category}:\n" result += ", ".join(words) result += "\n\n" return result def check_spelling(word: str, language: str, spell_checker: MultilingualSpellChecker) -> str: """Gradio interface function""" if not word.strip(): return "Please enter a word to check." suggestions = spell_checker.get_suggestions(word, language) return format_suggestions(suggestions) def create_gradio_interface(): # Initialize spell checker spell_checker = MultilingualSpellChecker() # Load corpora (adjust paths as needed) spell_checker.load_corpus("Tamil", "tamil.txt") spell_checker.load_corpus("Malayalam", "malayalam.txt") spell_checker.load_corpus("English", "english.txt") # Create Gradio interface iface = gr.Interface( fn=lambda word, lang: check_spelling(word, lang, spell_checker), inputs=[ gr.Textbox( label="Enter word to check", placeholder="Type a word here...", lines=1 ), gr.Dropdown( choices=["Tamil", "Malayalam","English"], label="Select Language", value="Tamil" ) ], outputs=gr.Textbox( label="Results", lines=10 ), title="Multilingual Spell Checker", description="""Enter a word in the selected language to check its spelling and get suggestions. The system will verify if the word exists in the corpus and provide similar words if it doesn't.""", theme="default", css=""" .gradio-container {max-width: 800px; margin: auto;} .output-text {font-family: monospace;} """ ) return iface # For Colab usage def setup_colab(): # Install Gradio if not already installed # !pip install -q gradio # Create and launch interface iface = create_gradio_interface() iface.launch(share=True) if __name__ == "__main__": # For local development iface = create_gradio_interface() iface.launch()