SanjayKannaV's picture
Update app.py
6f65911 verified
import gradio as gr
from collections import defaultdict
from typing import List, Dict, Set, Tuple
import pandas as pd
import numpy as np
class MultilingualSpellChecker:
def __init__(self):
self.vocab = defaultdict(set) # Language -> set of words
self.char_to_words = defaultdict(lambda: defaultdict(set)) # Language -> char -> words
self.current_language = None
def load_corpus(self, language: str, file_path: str):
"""Load corpus for specific language"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
for line in f:
word = line.strip()
if word:
self.vocab[language].add(word)
self.char_to_words[language][word[0]].add(word)
print(f"Loaded {len(self.vocab[language])} words for {language}")
except Exception as e:
print(f"Error loading corpus for {language}: {e}")
def levenshtein_distance(self, s1: str, s2: str) -> int:
"""Calculate Levenshtein distance between two strings"""
if len(s1) < len(s2):
return self.levenshtein_distance(s2, s1)
if len(s2) == 0:
return len(s1)
previous_row = range(len(s2) + 1)
for i, c1 in enumerate(s1):
current_row = [i + 1]
for j, c2 in enumerate(s2):
insertions = previous_row[j + 1] + 1
deletions = current_row[j] + 1
substitutions = previous_row[j] + (c1 != c2)
current_row.append(min(insertions, deletions, substitutions))
previous_row = current_row
return previous_row[-1]
def get_suggestions(self, word: str, language: str, max_suggestions: int = 10) -> Dict[str, List[str]]:
"""Get word suggestions for given language"""
if word in self.vocab[language]:
return {"exact_match": [word]}
candidates = self.char_to_words[language].get(word[0], set())
if not candidates:
candidates = self.vocab[language]
distances = []
for candidate in candidates:
distance = self.levenshtein_distance(word, candidate)
distances.append((distance, candidate))
distances.sort()
suggestions = {
"High Probability": [], # Distance 1
"Medium Probability": [], # Distance 2
"Low Probability": [], # Distance 3
"Very Low Probability": [] # Distance 4+
}
for distance, candidate in distances[:max_suggestions]:
if distance == 1:
suggestions["High Probability"].append(candidate)
elif distance == 2:
suggestions["Medium Probability"].append(candidate)
elif distance == 3:
suggestions["Low Probability"].append(candidate)
else:
suggestions["Very Low Probability"].append(candidate)
return {k: v for k, v in suggestions.items() if v}
def format_suggestions(suggestions: Dict[str, List[str]]) -> str:
"""Format suggestions for Gradio output"""
if "exact_match" in suggestions:
return "✓ Word is correct and exists in the corpus!"
result = "Suggested corrections:\n\n"
for category, words in suggestions.items():
if words:
result += f"{category}:\n"
result += ", ".join(words)
result += "\n\n"
return result
def check_spelling(word: str, language: str, spell_checker: MultilingualSpellChecker) -> str:
"""Gradio interface function"""
if not word.strip():
return "Please enter a word to check."
suggestions = spell_checker.get_suggestions(word, language)
return format_suggestions(suggestions)
def create_gradio_interface():
# Initialize spell checker
spell_checker = MultilingualSpellChecker()
# Load corpora (adjust paths as needed)
spell_checker.load_corpus("Tamil", "tamil.txt")
spell_checker.load_corpus("Malayalam", "malayalam.txt")
spell_checker.load_corpus("English", "english.txt")
# Create Gradio interface
iface = gr.Interface(
fn=lambda word, lang: check_spelling(word, lang, spell_checker),
inputs=[
gr.Textbox(
label="Enter word to check",
placeholder="Type a word here...",
lines=1
),
gr.Dropdown(
choices=["Tamil", "Malayalam","English"],
label="Select Language",
value="Tamil"
)
],
outputs=gr.Textbox(
label="Results",
lines=10
),
title="Multilingual Spell Checker",
description="""Enter a word in the selected language to check its spelling and get suggestions.
The system will verify if the word exists in the corpus and provide similar words if it doesn't.""",
theme="default",
css="""
.gradio-container {max-width: 800px; margin: auto;}
.output-text {font-family: monospace;}
"""
)
return iface
# For Colab usage
def setup_colab():
# Install Gradio if not already installed
# !pip install -q gradio
# Create and launch interface
iface = create_gradio_interface()
iface.launch(share=True)
if __name__ == "__main__":
# For local development
iface = create_gradio_interface()
iface.launch()