Spaces:
Runtime error
Runtime error
| # Indictrans2final | |
| # Step 1: Clean environment and clone repo fresh | |
| # Step 2: Install dependencies with pinned transformers and stable indic-transliteration | |
| # Step 3: Add IndicTransToolkit source to system path | |
| import sys | |
| sys.path.insert(0, '/content/IndicTrans2/src') | |
| # Step 4: Import all required packages | |
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| from IndicTransToolkit.processor import IndicProcessor | |
| from indic_transliteration import sanscript | |
| from indic_transliteration.sanscript import transliterate | |
| import torch | |
| # Step 5: Load models and tokenizer | |
| device = torch.device("cpu") | |
| model_name = "ai4bharat/indictrans2-indic-en-dist-200M" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name, trust_remote_code=True).to(device) | |
| ip = IndicProcessor(inference=True) | |
| # Step 6: Language codes for transliteration and translation | |
| LANG_CODES = { | |
| "Hindi": {"xlit": sanscript.DEVANAGARI, "indictrans": "hin_Deva"}, | |
| "Tamil": {"xlit": sanscript.TAMIL, "indictrans": "tam_Taml"}, | |
| "Bengali": {"xlit": sanscript.BENGALI, "indictrans": "ben_Beng"}, | |
| "Telugu": {"xlit": sanscript.TELUGU, "indictrans": "tel_Telu"}, | |
| "Kannada": {"xlit": sanscript.KANNADA, "indictrans": "kan_Knda"}, | |
| "Malayalam": {"xlit": sanscript.MALAYALAM, "indictrans": "mal_Mlym"}, | |
| "Gujarati": {"xlit": sanscript.GUJARATI, "indictrans": "guj_Gujr"}, | |
| "Punjabi": {"xlit": sanscript.GURMUKHI, "indictrans": "pan_Guru"}, | |
| "Marathi": {"xlit": sanscript.DEVANAGARI, "indictrans": "mar_Deva"} | |
| } | |
| # Step 7: Native translation function | |
| def translate_native_script(text, lang): | |
| if not text or not text.strip(): | |
| return "Please enter text." | |
| src_lang = LANG_CODES[lang]["indictrans"] | |
| processed_text = ip.preprocess_batch([text], src_lang=src_lang, tgt_lang="eng_Latn") | |
| inputs = tokenizer(processed_text, return_tensors="pt", padding=True).to(device) | |
| with torch.no_grad(): | |
| outputs = model.generate(**inputs, num_beams=5, max_length=256) | |
| decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True) | |
| return ip.postprocess_batch(decoded, lang=src_lang)[0] | |
| # Step 8: Romanized translation function (transliterate then translate) | |
| def translate_roman_script(text, lang): | |
| if not text or not text.strip(): | |
| return "Please enter text." | |
| translit_script = LANG_CODES[lang]["xlit"] | |
| native_text = transliterate(text, sanscript.ITRANS, translit_script) | |
| return translate_native_script(native_text, lang) | |
| # Step 9: Gradio UI with separate tabs | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## IndicTrans2: Multilingual Translation") | |
| gr.Markdown("Translate native and romanized Indian languages to English using specialized high-accuracy workflows.") | |
| with gr.Tab("Native Script to English"): | |
| native_input = gr.Textbox(lines=5, label="Native Indian Language Text", placeholder="यहाँ अपना पाठ दर्ज करें...") | |
| native_lang = gr.Dropdown(list(LANG_CODES.keys()), label="Select Language", value="Hindi") | |
| native_output = gr.Textbox(label="English Translation") | |
| gr.Button("Translate").click(translate_native_script, inputs=[native_input, native_lang], outputs=native_output) | |
| with gr.Tab("Romanized Script to English"): | |
| roman_input = gr.Textbox(lines=5, label="Romanized Indian Language Text", placeholder="Aap kaise hain?") | |
| roman_lang = gr.Dropdown(list(LANG_CODES.keys()), label="Select Language", value="Hindi") | |
| roman_output = gr.Textbox(label="English Translation") | |
| gr.Button("Translate").click(translate_roman_script, inputs=[roman_input, roman_lang], outputs=roman_output) | |
| print("🚀 Launching the IndicTrans2 translation app...") | |
| demo.launch(share=True) | |