Spaces:
Running
Running
File size: 2,662 Bytes
d57fadf a01f7e8 d57fadf a01f7e8 d57fadf a01f7e8 d57fadf a01f7e8 d57fadf a01f7e8 d57fadf a01f7e8 d57fadf a01f7e8 d57fadf a01f7e8 d57fadf a01f7e8 d57fadf a01f7e8 d57fadf a01f7e8 d57fadf a01f7e8 d57fadf a01f7e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
# ==========================================================
# IndicTrans3 Text Translation Inference
# (Stable wrapper for HF Space usage)
# ==========================================================
from gradio_client import Client
import threading
# ----------------------------------------------------------
# Lazy singleton client (HF Spaces cold-start safe)
# ----------------------------------------------------------
_client = None
_client_lock = threading.Lock()
def get_indictrans_client():
global _client
with _client_lock:
if _client is None:
_client = Client("ai4bharat/IndicTrans3-beta")
return _client
# ----------------------------------------------------------
# BCP-47 → IndicTrans language labels
# ----------------------------------------------------------
LANGUAGE_MAP = {
"hi": "Hindi",
"bn": "Bengali",
"te": "Telugu",
"mr": "Marathi",
"ta": "Tamil",
"gu": "Gujarati",
"kn": "Kannada",
"ml": "Malayalam",
"pa": "Punjabi",
"or": "Odia",
"ur": "Urdu",
"en": "English",
}
def normalize_lang(code: str) -> str:
"""
Converts BCP-47 language codes:
hi-IN → hi
ta-IN → ta
"""
return code.split("-")[0].lower()
# ----------------------------------------------------------
# PUBLIC TRANSLATION FUNCTION (USED BY app.py)
# ----------------------------------------------------------
def translate_text(text: str, target_lang_code: str) -> str:
"""
Translate text using IndicTrans3.
- Fully stateless-safe
- Uses VALID chatbot history format
- Falls back gracefully on ANY failure
"""
# Guard: empty text
if not text or not text.strip():
return text
iso = normalize_lang(target_lang_code)
# Guard: English or unsupported language
if iso == "en" or iso not in LANGUAGE_MAP:
return text
target_lang = LANGUAGE_MAP[iso]
try:
client = get_indictrans_client()
# 🔑 CRITICAL FIX:
# IndicTrans3 REQUIRES chat history to be
# a list of [user, assistant] pairs
safe_history = [["", ""]]
# Use the STABLE endpoint
result = client.predict(
user_message=text,
history=safe_history,
target_lang=target_lang,
api_name="/user",
)
# result = (translated_text, updated_history)
translated_text = result[0]
if not translated_text or not translated_text.strip():
return text
return translated_text.strip()
except Exception as e:
print(f"❌ IndicTrans3 translation error: {e}")
return text
|