Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import ctranslate2
|
| 3 |
+
import transformers
|
| 4 |
+
|
| 5 |
+
# Load optimized components
|
| 6 |
+
translator = ctranslate2.Translator("ct2_model")
|
| 7 |
+
tokenizer = transformers.AutoTokenizer.from_pretrained("final_model")
|
| 8 |
+
|
| 9 |
+
def transliterate(text, target_lang):
|
| 10 |
+
if not text.strip(): return ""
|
| 11 |
+
|
| 12 |
+
lang_codes = {"Hindi": "hin", "Bengali": "ben", "Tamil": "tam"}
|
| 13 |
+
input_text = f"<{lang_codes[target_lang]}> {text.lower()}"
|
| 14 |
+
|
| 15 |
+
source_tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(input_text))
|
| 16 |
+
results = translator.translate_batch([source_tokens])
|
| 17 |
+
|
| 18 |
+
output_tokens = results[0].hypotheses[0]
|
| 19 |
+
# Clean up output (handle sentencepiece tokens like ' ')
|
| 20 |
+
decoded = tokenizer.decode(tokenizer.convert_tokens_to_ids(output_tokens), skip_special_tokens=True)
|
| 21 |
+
return decoded
|
| 22 |
+
|
| 23 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 24 |
+
gr.Markdown("# 🇮🇳 Indic Multilingual Transliteration")
|
| 25 |
+
gr.Markdown("Transliterate English names/words into Hindi, Bengali, or Tamil using an optimized mT5 model.")
|
| 26 |
+
|
| 27 |
+
with gr.Row():
|
| 28 |
+
with gr.Column():
|
| 29 |
+
input_box = gr.Textbox(label="English Input", placeholder="Type a word (e.g., 'Bharat')")
|
| 30 |
+
lang_drop = gr.Dropdown(["Hindi", "Bengali", "Tamil"], label="Select Language", value="Hindi")
|
| 31 |
+
btn = gr.Button("Transliterate", variant="primary")
|
| 32 |
+
with gr.Column():
|
| 33 |
+
output_box = gr.Textbox(label="Output")
|
| 34 |
+
|
| 35 |
+
btn.click(transliterate, inputs=[input_box, lang_drop], outputs=output_box)
|
| 36 |
+
gr.Examples([["namaste", "Hindi"], ["dhanyabad", "Bengali"], ["vanakkam", "Tamil"]],
|
| 37 |
+
inputs=[input_box, lang_drop])
|
| 38 |
+
|
| 39 |
+
if __name__ == "__main__":
|
| 40 |
+
demo.launch()
|