fuzzyname1787 / app.py
amve's picture
Create app.py
ac9eb14 verified
import gradio as gr
import json
from rapidfuzz import process, fuzz
import phonetics
from indic_transliteration import sanscript
from indic_transliteration.sanscript import transliterate
from collections import defaultdict
data = []
# Load and parse the JSON data efficiently
with open("hin_test.json", "r", encoding="utf-8") as file:
for line in file:
try:
# Parse each line as a JSON object
data.append(json.loads(line))
except json.JSONDecodeError as e:
print(f"Error parsing line: {line}. Error: {e}")
# Extract names from the dataset
names_database = [entry['english word'] for entry in data if 'english word' in entry]
# Precompute metaphone codes for the database names
phonetic_dict = defaultdict(list)
for name in names_database:
code = phonetics.metaphone(name)
phonetic_dict[code].append(name)
# Function to perform fuzzy matching
def fuzzy_match(name, threshold=70):
matches = process.extract(name, names_database, scorer=fuzz.ratio, limit=5)
matched_names = [
f"{match_name} - {score:.2f}" for match_name, score, _ in matches if score >= threshold
]
return ", ".join(matched_names) if matched_names else "No matches found."
# Function to perform phonetic matching
def phonetic_match(name):
input_phonetic = phonetics.metaphone(name)
matches = phonetic_dict.get(input_phonetic, [])
return ", ".join(matches) if matches else "No phonetic matches found."
# Function to transliterate Devanagari to Roman script
def devanagari_to_roman(hindi_name):
roman_name_itrans = transliterate(hindi_name, sanscript.DEVANAGARI, sanscript.ITRANS)
return roman_name_itrans
# Function to handle different types of matching/transliteration
def match_name(name, method):
if method == "Fuzzy Match":
return fuzzy_match(name)
elif method == "Phonetic Match":
return phonetic_match(name)
elif method == "Devanagari to Roman Transliteration":
return devanagari_to_roman(name)
else:
return "Select a valid method."
# Gradio Interface with updated components and parameters
interface = gr.Interface(
fn=match_name,
inputs=[
gr.Textbox(label="Enter Name"),
gr.Radio(
["Fuzzy Match", "Phonetic Match", "Devanagari to Roman Transliteration"],
label="Matching Method"
)
],
outputs=gr.Textbox(label="Result"),
title="नाम-सूचक (Naam-Suchak)",
allow_flagging='never',
description=(
"Welcome to **नाम-सूचक**, an innovative solution developed for the SIH1787 problem statement. "
"This tool offers a robust and intelligent approach for **fuzzy matching**, **phonetic matching**, "
"and **transliteration** of Hindi names, ensuring seamless handling of variations and phonetic similarities. "
"Effortlessly search, match, and transliterate names with high accuracy and efficiency."
),
examples=[
["Suresh", "Fuzzy Match"],
["Shiv", "Phonetic Match"],
["राहुल", "Devanagari to Roman Transliteration"]
],
cache_examples=True
)
# Launch the Gradio app
if __name__ == "__main__":
interface.launch()