File size: 1,692 Bytes
35d4b4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import gradio as gr
import ctranslate2
import transformers

# Load optimized components
translator = ctranslate2.Translator("ct2_model")
tokenizer = transformers.AutoTokenizer.from_pretrained("final_model")

def transliterate(text, target_lang):
    if not text.strip(): return ""
    
    lang_codes = {"Hindi": "hin", "Bengali": "ben", "Tamil": "tam"}
    input_text = f"<{lang_codes[target_lang]}> {text.lower()}"
    
    source_tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(input_text))
    results = translator.translate_batch([source_tokens])
    
    output_tokens = results[0].hypotheses[0]
    # Clean up output (handle sentencepiece tokens like ' ')
    decoded = tokenizer.decode(tokenizer.convert_tokens_to_ids(output_tokens), skip_special_tokens=True)
    return decoded

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🇮🇳 Indic Multilingual Transliteration")
    gr.Markdown("Transliterate English names/words into Hindi, Bengali, or Tamil using an optimized mT5 model.")
    
    with gr.Row():
        with gr.Column():
            input_box = gr.Textbox(label="English Input", placeholder="Type a word (e.g., 'Bharat')")
            lang_drop = gr.Dropdown(["Hindi", "Bengali", "Tamil"], label="Select Language", value="Hindi")
            btn = gr.Button("Transliterate", variant="primary")
        with gr.Column():
            output_box = gr.Textbox(label="Output")
    
    btn.click(transliterate, inputs=[input_box, lang_drop], outputs=output_box)
    gr.Examples([["namaste", "Hindi"], ["dhanyabad", "Bengali"], ["vanakkam", "Tamil"]], 
                inputs=[input_box, lang_drop])

if __name__ == "__main__":
    demo.launch()