Dinesh310's picture
Create app.py
35d4b4e verified
import gradio as gr
import ctranslate2
import transformers
# Load optimized components
translator = ctranslate2.Translator("ct2_model")
tokenizer = transformers.AutoTokenizer.from_pretrained("final_model")
def transliterate(text, target_lang):
if not text.strip(): return ""
lang_codes = {"Hindi": "hin", "Bengali": "ben", "Tamil": "tam"}
input_text = f"<{lang_codes[target_lang]}> {text.lower()}"
source_tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(input_text))
results = translator.translate_batch([source_tokens])
output_tokens = results[0].hypotheses[0]
# Clean up output (handle sentencepiece tokens like ' ')
decoded = tokenizer.decode(tokenizer.convert_tokens_to_ids(output_tokens), skip_special_tokens=True)
return decoded
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🇮🇳 Indic Multilingual Transliteration")
gr.Markdown("Transliterate English names/words into Hindi, Bengali, or Tamil using an optimized mT5 model.")
with gr.Row():
with gr.Column():
input_box = gr.Textbox(label="English Input", placeholder="Type a word (e.g., 'Bharat')")
lang_drop = gr.Dropdown(["Hindi", "Bengali", "Tamil"], label="Select Language", value="Hindi")
btn = gr.Button("Transliterate", variant="primary")
with gr.Column():
output_box = gr.Textbox(label="Output")
btn.click(transliterate, inputs=[input_box, lang_drop], outputs=output_box)
gr.Examples([["namaste", "Hindi"], ["dhanyabad", "Bengali"], ["vanakkam", "Tamil"]],
inputs=[input_box, lang_drop])
if __name__ == "__main__":
demo.launch()