import gradio as gr import json from rapidfuzz import process, fuzz import phonetics from indic_transliteration import sanscript from indic_transliteration.sanscript import transliterate from collections import defaultdict data = [] # Load and parse the JSON data efficiently with open("hin_test.json", "r", encoding="utf-8") as file: for line in file: try: # Parse each line as a JSON object data.append(json.loads(line)) except json.JSONDecodeError as e: print(f"Error parsing line: {line}. Error: {e}") # Extract names from the dataset names_database = [entry['english word'] for entry in data if 'english word' in entry] # Precompute metaphone codes for the database names phonetic_dict = defaultdict(list) for name in names_database: code = phonetics.metaphone(name) phonetic_dict[code].append(name) # Function to perform fuzzy matching def fuzzy_match(name, threshold=70): matches = process.extract(name, names_database, scorer=fuzz.ratio, limit=5) matched_names = [ f"{match_name} - {score:.2f}" for match_name, score, _ in matches if score >= threshold ] return ", ".join(matched_names) if matched_names else "No matches found." # Function to perform phonetic matching def phonetic_match(name): input_phonetic = phonetics.metaphone(name) matches = phonetic_dict.get(input_phonetic, []) return ", ".join(matches) if matches else "No phonetic matches found." # Function to transliterate Devanagari to Roman script def devanagari_to_roman(hindi_name): roman_name_itrans = transliterate(hindi_name, sanscript.DEVANAGARI, sanscript.ITRANS) return roman_name_itrans # Function to handle different types of matching/transliteration def match_name(name, method): if method == "Fuzzy Match": return fuzzy_match(name) elif method == "Phonetic Match": return phonetic_match(name) elif method == "Devanagari to Roman Transliteration": return devanagari_to_roman(name) else: return "Select a valid method." # Gradio Interface with updated components and parameters interface = gr.Interface( fn=match_name, inputs=[ gr.Textbox(label="Enter Name"), gr.Radio( ["Fuzzy Match", "Phonetic Match", "Devanagari to Roman Transliteration"], label="Matching Method" ) ], outputs=gr.Textbox(label="Result"), title="नाम-सूचक (Naam-Suchak)", allow_flagging='never', description=( "Welcome to **नाम-सूचक**, an innovative solution developed for the SIH1787 problem statement. " "This tool offers a robust and intelligent approach for **fuzzy matching**, **phonetic matching**, " "and **transliteration** of Hindi names, ensuring seamless handling of variations and phonetic similarities. " "Effortlessly search, match, and transliterate names with high accuracy and efficiency." ), examples=[ ["Suresh", "Fuzzy Match"], ["Shiv", "Phonetic Match"], ["राहुल", "Devanagari to Roman Transliteration"] ], cache_examples=True ) # Launch the Gradio app if __name__ == "__main__": interface.launch()