Spaces:

bikashpatra
/

indic_translate

Sleeping

App Files Files Community

bikashpatra commited on Dec 13, 2025

Commit

613d3bf

1 Parent(s): 2d00be3

first commit

Browse files

Files changed (3) hide show

README.md +7 -7
app.py +102 -0
requirements.txt +10 -0

README.md CHANGED Viewed

@@ -1,14 +1,14 @@
 ---
-title: Indic Translate
-emoji: 📚
 colorFrom: blue
-colorTo: purple
 sdk: gradio
-sdk_version: 6.1.0
 app_file: app.py
 pinned: false
-license: mit
-short_description: 'Indic Translation using ai4bharat/indictrans2-en-indic-dist '
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: IndicTrans2 Translation Demo
+emoji: 🌏
 colorFrom: blue
+colorTo: green
 sdk: gradio
+sdk_version: 4.44.0
 app_file: app.py
 pinned: false
 ---
+# IndicTrans2 Translation Demo
+Test English to Indic language translation using IndicTrans2.

app.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import gradio as gr
+import torch
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+from IndicTransToolkit.processor import IndicProcessor
+# Choose device - HF Spaces have free CPU tier, or upgrade for GPU
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Load the distilled model for faster inference
+MODEL_NAME = "ai4bharat/indictrans2-en-indic-dist-200M"
+@gr.cache
+def load_model():
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
+    model = AutoModelForSeq2SeqLM.from_pretrained(
+        MODEL_NAME,
+        trust_remote_code=True,
+        torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
+    ).to(DEVICE)
+    ip = IndicProcessor(inference=True)
+    return tokenizer, model, ip
+tokenizer, model, ip = load_model()
+# Language mapping
+LANGUAGES = {
+    "Hindi": "hin_Deva",
+    "Tamil": "tam_Taml",
+    "Telugu": "tel_Telu",
+    "Bengali": "ben_Beng",
+    "Marathi": "mar_Deva",
+    "Gujarati": "guj_Gujr",
+    "Kannada": "kan_Knda",
+    "Malayalam": "mal_Mlym",
+    "Punjabi": "pan_Guru",
+    "Oriya": "ory_Orya"
+}
+def translate(text, target_lang):
+    if not text.strip():
+        return "Please enter some text to translate."
+    # Preprocess
+    batch = ip.preprocess_batch(
+        [text],
+        src_lang="eng_Latn",
+        tgt_lang=LANGUAGES[target_lang]
+    )
+    # Tokenize
+    inputs = tokenizer(
+        batch,
+        truncation=True,
+        padding="longest",
+        max_length=256,
+        return_tensors="pt"
+    ).to(DEVICE)
+    # Generate
+    with torch.inference_mode():
+        outputs = model.generate(
+            **inputs,
+            num_beams=5,
+            max_length=256
+        )
+    # Decode
+    decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+    # Postprocess
+    translations = ip.postprocess_batch(decoded, lang=LANGUAGES[target_lang])
+    return translations[0]
+# Create Gradio interface
+demo = gr.Interface(
+    fn=translate,
+    inputs=[
+        gr.Textbox(
+            label="English Text",
+            placeholder="Enter English text to translate...",
+            lines=5
+        ),
+        gr.Dropdown(
+            choices=list(LANGUAGES.keys()),
+            label="Target Language",
+            value="Hindi"
+        )
+    ],
+    outputs=gr.Textbox(label="Translation", lines=5),
+    title="IndicTrans2 Translation Demo",
+    description="Translate English text to Indian languages using IndicTrans2",
+    examples=[
+        ["Hello, how are you?", "Hindi"],
+        ["The weather is beautiful today.", "Tamil"],
+        ["I love learning new languages.", "Bengali"]
+    ],
+    cache_examples=False
+)
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+torch
+transformers==4.53.2
+gradio
+sentencepiece
+nltk
+sacremoses
+pandas
+regex
+IndicTransToolkit
+accelerate