File size: 2,662 Bytes
d57fadf
 
 
 
a01f7e8
 
 
 
d57fadf
 
 
a01f7e8
 
 
 
 
 
 
 
 
 
 
d57fadf
a01f7e8
d57fadf
a01f7e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d57fadf
 
 
a01f7e8
 
 
 
d57fadf
 
 
a01f7e8
 
 
d57fadf
 
 
 
a01f7e8
 
d57fadf
a01f7e8
 
 
 
 
d57fadf
a01f7e8
 
 
 
 
 
 
 
d57fadf
 
 
 
 
 
a01f7e8
 
d57fadf
a01f7e8
 
 
 
d57fadf
a01f7e8
 
 
 
 
 
 
 
d57fadf
a01f7e8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# ==========================================================
# IndicTrans3 Text Translation Inference
# (Stable wrapper for HF Space usage)
# ==========================================================

from gradio_client import Client
import threading

# ----------------------------------------------------------
# Lazy singleton client (HF Spaces cold-start safe)
# ----------------------------------------------------------
_client = None
_client_lock = threading.Lock()

def get_indictrans_client():
    global _client
    with _client_lock:
        if _client is None:
            _client = Client("ai4bharat/IndicTrans3-beta")
        return _client


# ----------------------------------------------------------
# BCP-47 → IndicTrans language labels
# ----------------------------------------------------------
LANGUAGE_MAP = {
    "hi": "Hindi",
    "bn": "Bengali",
    "te": "Telugu",
    "mr": "Marathi",
    "ta": "Tamil",
    "gu": "Gujarati",
    "kn": "Kannada",
    "ml": "Malayalam",
    "pa": "Punjabi",
    "or": "Odia",
    "ur": "Urdu",
    "en": "English",
}

def normalize_lang(code: str) -> str:
    """
    Converts BCP-47 language codes:
    hi-IN → hi
    ta-IN → ta
    """
    return code.split("-")[0].lower()


# ----------------------------------------------------------
# PUBLIC TRANSLATION FUNCTION (USED BY app.py)
# ----------------------------------------------------------
def translate_text(text: str, target_lang_code: str) -> str:
    """
    Translate text using IndicTrans3.

    - Fully stateless-safe
    - Uses VALID chatbot history format
    - Falls back gracefully on ANY failure
    """

    # Guard: empty text
    if not text or not text.strip():
        return text

    iso = normalize_lang(target_lang_code)

    # Guard: English or unsupported language
    if iso == "en" or iso not in LANGUAGE_MAP:
        return text

    target_lang = LANGUAGE_MAP[iso]

    try:
        client = get_indictrans_client()

        # 🔑 CRITICAL FIX:
        # IndicTrans3 REQUIRES chat history to be
        # a list of [user, assistant] pairs
        safe_history = [["", ""]]

        # Use the STABLE endpoint
        result = client.predict(
            user_message=text,
            history=safe_history,
            target_lang=target_lang,
            api_name="/user",
        )

        # result = (translated_text, updated_history)
        translated_text = result[0]

        if not translated_text or not translated_text.strip():
            return text

        return translated_text.strip()

    except Exception as e:
        print(f"❌ IndicTrans3 translation error: {e}")
        return text