Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import json | |
| from rapidfuzz import process, fuzz | |
| import phonetics | |
| from indic_transliteration import sanscript | |
| from indic_transliteration.sanscript import transliterate | |
| from collections import defaultdict | |
| data = [] | |
| # Load and parse the JSON data efficiently | |
| with open("hin_test.json", "r", encoding="utf-8") as file: | |
| for line in file: | |
| try: | |
| # Parse each line as a JSON object | |
| data.append(json.loads(line)) | |
| except json.JSONDecodeError as e: | |
| print(f"Error parsing line: {line}. Error: {e}") | |
| # Extract names from the dataset | |
| names_database = [entry['english word'] for entry in data if 'english word' in entry] | |
| # Precompute metaphone codes for the database names | |
| phonetic_dict = defaultdict(list) | |
| for name in names_database: | |
| code = phonetics.metaphone(name) | |
| phonetic_dict[code].append(name) | |
| # Function to perform fuzzy matching | |
| def fuzzy_match(name, threshold=70): | |
| matches = process.extract(name, names_database, scorer=fuzz.ratio, limit=5) | |
| matched_names = [ | |
| f"{match_name} - {score:.2f}" for match_name, score, _ in matches if score >= threshold | |
| ] | |
| return ", ".join(matched_names) if matched_names else "No matches found." | |
| # Function to perform phonetic matching | |
| def phonetic_match(name): | |
| input_phonetic = phonetics.metaphone(name) | |
| matches = phonetic_dict.get(input_phonetic, []) | |
| return ", ".join(matches) if matches else "No phonetic matches found." | |
| # Function to transliterate Devanagari to Roman script | |
| def devanagari_to_roman(hindi_name): | |
| roman_name_itrans = transliterate(hindi_name, sanscript.DEVANAGARI, sanscript.ITRANS) | |
| return roman_name_itrans | |
| # Function to handle different types of matching/transliteration | |
| def match_name(name, method): | |
| if method == "Fuzzy Match": | |
| return fuzzy_match(name) | |
| elif method == "Phonetic Match": | |
| return phonetic_match(name) | |
| elif method == "Devanagari to Roman Transliteration": | |
| return devanagari_to_roman(name) | |
| else: | |
| return "Select a valid method." | |
| # Gradio Interface with updated components and parameters | |
| interface = gr.Interface( | |
| fn=match_name, | |
| inputs=[ | |
| gr.Textbox(label="Enter Name"), | |
| gr.Radio( | |
| ["Fuzzy Match", "Phonetic Match", "Devanagari to Roman Transliteration"], | |
| label="Matching Method" | |
| ) | |
| ], | |
| outputs=gr.Textbox(label="Result"), | |
| title="नाम-सूचक (Naam-Suchak)", | |
| allow_flagging='never', | |
| description=( | |
| "Welcome to **नाम-सूचक**, an innovative solution developed for the SIH1787 problem statement. " | |
| "This tool offers a robust and intelligent approach for **fuzzy matching**, **phonetic matching**, " | |
| "and **transliteration** of Hindi names, ensuring seamless handling of variations and phonetic similarities. " | |
| "Effortlessly search, match, and transliterate names with high accuracy and efficiency." | |
| ), | |
| examples=[ | |
| ["Suresh", "Fuzzy Match"], | |
| ["Shiv", "Phonetic Match"], | |
| ["राहुल", "Devanagari to Roman Transliteration"] | |
| ], | |
| cache_examples=True | |
| ) | |
| # Launch the Gradio app | |
| if __name__ == "__main__": | |
| interface.launch() | |