Spaces:

kasimali
/

indictrans2

Runtime error

App Files Files Community

kasimali commited on Oct 8, 2025

Commit

423591d

verified ·

1 Parent(s): ef2efca

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

README.md +5 -8
UPLOAD_INSTRUCTIONS.txt +20 -0
app.py +110 -0
requirements.txt +5 -0

README.md CHANGED Viewed

@@ -1,12 +1,9 @@
 ---
-title: Indictrans2
-emoji: 👁
-colorFrom: indigo
-colorTo: blue
 sdk: gradio
-sdk_version: 5.49.0
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: INDICTRANS2
+emoji: 🚀
 sdk: gradio
 ---
+# INDICTRANS2
+Gradio application

UPLOAD_INSTRUCTIONS.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+# Upload this Space to Hugging Face
+# Run this in a new Colab cell tomorrow:
+from huggingface_hub import HfApi, create_repo, login
+login()
+api = HfApi()
+USERNAME = "kasimali"
+SPACE_NAME = "indictrans2"
+create_repo(repo_id=f"{USERNAME}/{SPACE_NAME}", repo_type="space", space_sdk="gradio", exist_ok=True)
+api.upload_folder(
+    folder_path="./indictrans2",
+    repo_id=f"{USERNAME}/{SPACE_NAME}",
+    repo_type="space"
+)
+print(f"Uploaded: https://huggingface.co/spaces/{USERNAME}/{SPACE_NAME}")

app.py ADDED Viewed

	@@ -0,0 +1,110 @@

+# INDICTRANS2
+# --- 1. CLEAN UP AND PREPARE THE ENVIRONMENT (CORRECTLY) ---
+print("Cleaning up and preparing the environment...")
+# This command removes the old directory if it exists, preventing the 'already exists' error.
+print("✅ Environment ready.")
+# --- 2. INSTALL ALL REQUIRED LIBRARIES FROM PyPI (USING A STABLE TRANSLITERATOR) ---
+print("Installing all required libraries from PyPI...")
+# Pinning transformers to a stable version to prevent caching errors.
+# We are now using 'indic-transliteration' which is stable and maintained.
+print("✅ All libraries installed successfully.")
+# --- 3. SET UP THE SYSTEM PATH FOR THE TRANSLATION TOOLKIT (THE ONLY CORRECT METHOD) ---
+import sys
+# This tells Python where to find the IndicTransToolkit module without installation.
+sys.path.insert(0, '/content/IndicTrans2/src')
+print("✅ IndicTransToolkit added to system path.")
+# --- 4. IMPORT ALL PACKAGES ---
+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+from IndicTransToolkit.processor import IndicProcessor
+from indic_transliteration import sanscript
+from indic_transliteration.sanscript import SchemeMap, SCHEMES, transliterate
+import torch
+print("✅ All packages imported.")
+# --- 5. LOAD BOTH MODELS (TRANSLATION AND TRANSLITERATION) ---
+print("Loading models and components...")
+device = torch.device("cpu")
+# A. Translation Model
+translator_model_name = "ai4bharat/indictrans2-indic-en-dist-200M"
+translator_tokenizer = AutoTokenizer.from_pretrained(translator_model_name, trust_remote_code=True)
+translator_model = AutoModelForSeq2SeqLM.from_pretrained(translator_model_name, trust_remote_code=True).to(device)
+ip = IndicProcessor(inference=True)
+print("✅ Translation model and IndicProcessor are ready!")
+# --- 6. DEFINE THE CORRECT, HIGH-ACCURACY TRANSLATION FUNCTIONS ---
+LANG_CODES = {
+    "Hindi": {"xlit": sanscript.DEVANAGARI, "indictrans": "hin_Deva"},
+    "Tamil": {"xlit": sanscript.TAMIL, "indictrans": "tam_Taml"},
+    "Bengali": {"xlit": sanscript.BENGALI, "indictrans": "ben_Beng"},
+    "Telugu": {"xlit": sanscript.TELUGU, "indictrans": "tel_Telu"},
+    "Kannada": {"xlit": sanscript.KANNADA, "indictrans": "kan_Knda"},
+    "Malayalam": {"xlit": sanscript.MALAYALAM, "indictrans": "mal_Mlym"},
+    "Gujarati": {"xlit": sanscript.GUJARATI, "indictrans": "guj_Gujr"},
+    "Punjabi": {"xlit": sanscript.GURMUKHI, "indictrans": "pan_Guru"},
+    "Urdu": {"xlit": sanscript.URDU, "indictrans": "urd_Arab"}
+}
+# Marathi uses Devanagari script for transliteration
+LANG_CODES["Marathi"] = {"xlit": sanscript.DEVANAGARI, "indictrans": "mar_Deva"}
+def translate_native_script(native_text, source_language_name):
+    """Handles the direct native-to-English workflow."""
+    try:
+        if not native_text or not native_text.strip(): return "Please enter text."
+        src_lang = LANG_CODES[source_language_name]["indictrans"]
+        processed_text = ip.preprocess_batch([native_text], src_lang=src_lang, tgt_lang="eng_Latn")
+        inputs = translator_tokenizer(processed_text, return_tensors="pt", padding=True).to(device)
+        with torch.no_grad():
+            translated_tokens = translator_model.generate(**inputs, num_beams=5, max_length=256)
+        decoded_translation = translator_tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)
+        return ip.postprocess_batch(decoded_translation, lang=src_lang)[0]
+    except Exception as e:
+        return f"An error occurred: {str(e)}"
+def translate_roman_script(roman_text, source_language_name):
+    """Performs the high-accuracy two-step transliterate-then-translate process."""
+    try:
+        if not roman_text or not roman_text.strip(): return "Please enter text."
+        # Step 1: Transliterate Roman to Native Script using the stable 'indic-transliteration' library
+        target_script = LANG_CODES[source_language_name]["xlit"]
+        native_text = transliterate(roman_text, sanscript.ITRANS, target_script)
+        # Step 2: Translate the resulting Native Script to English
+        return translate_native_script(native_text, source_language_name)
+    except Exception as e:
+        return f"An error occurred: {str(e)}"
+print("✅ High-accuracy translation functions are ready.")
+# --- 7. CREATE AND LAUNCH THE SEPARATE UI WITH TABS ---
+with gr.Blocks() as demo:
+    gr.Markdown("## IndicTrans2: Universal Language Translator (Final Accurate Workflow)")
+    gr.Markdown("Translate from both native and romanized Indian languages to English using specialized, high-accuracy workflows.")
+    with gr.Tab("🇮🇳 Native Script to English"):
+        with gr.Row():
+            native_inputs = [
+                gr.Textbox(lines=5, label="Native Indian Language Text", placeholder="यहाँ अपना पाठ दर्ज करें..."),
+                gr.Dropdown(choices=list(LANG_CODES.keys()), label="Select Source Language", value="Hindi")
+            ]
+            native_output = gr.Textbox(label="English Translation")
+        gr.Button("Translate Native Text").click(fn=translate_native_script, inputs=native_inputs, outputs=native_output)
+    with gr.Tab("🔡 Romanized Script to English"):
+        with gr.Row():
+            roman_inputs = [
+                gr.Textbox(lines=5, label="Romanized Indian Language Text", placeholder="Aap kaise hain?"),
+                gr.Dropdown(choices=list(LANG_CODES.keys()), label="Select Source Language", value="Hindi")
+            ]
+            roman_output = gr.Textbox(label="English Translation")
+        gr.Button("Translate Romanized Text").click(fn=translate_roman_script, inputs=roman_inputs, outputs=roman_output)
+print("🚀 Launching the final, robust, and correct Gradio app...")
+demo.launch(share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+IndicTransToolkit
+gradio
+indic-transliteration
+torch
+transformers