import gradio as gr import ctranslate2 import transformers # Load optimized components translator = ctranslate2.Translator("ct2_model") tokenizer = transformers.AutoTokenizer.from_pretrained("final_model") def transliterate(text, target_lang): if not text.strip(): return "" lang_codes = {"Hindi": "hin", "Bengali": "ben", "Tamil": "tam"} input_text = f"<{lang_codes[target_lang]}> {text.lower()}" source_tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(input_text)) results = translator.translate_batch([source_tokens]) output_tokens = results[0].hypotheses[0] # Clean up output (handle sentencepiece tokens like ' ') decoded = tokenizer.decode(tokenizer.convert_tokens_to_ids(output_tokens), skip_special_tokens=True) return decoded with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 🇮🇳 Indic Multilingual Transliteration") gr.Markdown("Transliterate English names/words into Hindi, Bengali, or Tamil using an optimized mT5 model.") with gr.Row(): with gr.Column(): input_box = gr.Textbox(label="English Input", placeholder="Type a word (e.g., 'Bharat')") lang_drop = gr.Dropdown(["Hindi", "Bengali", "Tamil"], label="Select Language", value="Hindi") btn = gr.Button("Transliterate", variant="primary") with gr.Column(): output_box = gr.Textbox(label="Output") btn.click(transliterate, inputs=[input_box, lang_drop], outputs=output_box) gr.Examples([["namaste", "Hindi"], ["dhanyabad", "Bengali"], ["vanakkam", "Tamil"]], inputs=[input_box, lang_drop]) if __name__ == "__main__": demo.launch()