Spaces:

basantyahya
/

translator

Sleeping

File size: 5,163 Bytes

import os
import tempfile
from typing import Tuple

import gradio as gr
from langdetect import detect, DetectorFactory
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

# Make langdetect deterministic
DetectorFactory.seed = 42

# ------- Supported languages -------
LANGS = [
    "English", "French", "Spanish", "German",
    "Italian", "Portuguese", "Swahili", "Arabic"
]

LANG2CODE = {
    "English": "en",
    "French": "fr",
    "Spanish": "es",
    "German": "de",
    "Italian": "it",
    "Portuguese": "pt",
    "Swahili": "sw",
    "Arabic": "ar",
}
CODE2LANG = {v: k for k, v in LANG2CODE.items()}

# ------- Pipeline cache -------
_model_cache = {}

def _ensure_translator(src_code: str, tgt_code: str):
    """
    Lazily load a translation pipeline for a language pair.
    We use OPUS-MT (Helsinki-NLP). We only instantiate models
    when needed to keep memory low on free tiers.
    """
    key = f"{src_code}-{tgt_code}"
    if key in _model_cache:
        return _model_cache[key]

    model_id = f"Helsinki-NLP/opus-mt-{src_code}-{tgt_code}"
    tok = AutoTokenizer.from_pretrained(model_id)
    mdl = AutoModelForSeq2SeqLM.from_pretrained(model_id)
    _model_cache[key] = pipeline("translation", model=mdl, tokenizer=tok)
    return _model_cache[key]

def _translate_once(text: str, src_code: str, tgt_code: str) -> str:
    translator = _ensure_translator(src_code, tgt_code)
    out = translator(text, max_length=512)
    return out[0]["translation_text"]

def _maybe_autodetect(text: str, src_lang: str) -> Tuple[str, str]:
    """
    Returns (resolved_src_lang_name, detected_message).
    If src_lang == 'Auto-detect', we detect and return that language.
    Otherwise, we just return src_lang.
    """
    if src_lang != "Auto-detect":
        return src_lang, f"Source: {src_lang}"

    try:
        code = detect(text)
        # Map to our supported set, otherwise raise
        if code in CODE2LANG:
            guessed = CODE2LANG[code]
            return guessed, f"Detected: {guessed} ({code})"
        else:
            # If not in our supported set, still show the code for transparency
            return "English", f"Detected unsupported lang '{code}'. Falling back to English."
    except Exception:
        return "English", "Could not detect language. Defaulted to English."

def translate(text: str, src_lang: str, tgt_lang: str):
    text = (text or "").strip()
    if not text:
        return "", "Enter text above to translate.", None

    # Resolve auto-detect
    resolved_src, detect_msg = _maybe_autodetect(text, src_lang)

    if resolved_src == tgt_lang:
        translation = text
    else:
        s = LANG2CODE[resolved_src]
        t = LANG2CODE[tgt_lang]

        # Strategy:
        # - If either side is English, translate directly
        # - Else pivot through English: src -> en -> tgt
        if s == "en" or t == "en":
            translation = _translate_once(text, s, t)
        else:
            pivot = _translate_once(text, s, "en")
            translation = _translate_once(pivot, "en", t)

    # Create a temporary .txt file for download
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8")
    tmp.write(translation)
    tmp.close()

    return translation, detect_msg, tmp.name

with gr.Blocks(title="Global Translator") as demo:
    gr.Markdown("# 🌍 Global Translator")
    gr.Markdown(
        "Translate between **English, French, Spanish, German, Italian, Portuguese, Swahili, Arabic**.\n\n"
        "- ✅ All language pairs supported (non-English pairs pivot via English)\n"
        "- 🔎 Auto-detect source language\n"
        "- ⬇️ Download result as `.txt`"
    )

    with gr.Row():
        src_lang = gr.Dropdown(["Auto-detect"] + LANGS, value="Auto-detect", label="Source")
        tgt_lang = gr.Dropdown(LANGS, value="English", label="Target")

    input_box = gr.Textbox(lines=6, label="Your text")
    translate_btn = gr.Button("Translate")

    with gr.Row():
        detected_lang = gr.Markdown("Source: Auto-detect")
    output_box = gr.Textbox(lines=6, label="Translation", interactive=False)
    download_file = gr.File(label="Download translation (.txt)", interactive=False)

    translate_btn.click(
        translate,
        inputs=[input_box, src_lang, tgt_lang],
        outputs=[output_box, detected_lang, download_file],
    )

    # Also translate on Enter
    input_box.submit(
        translate,
        inputs=[input_box, src_lang, tgt_lang],
        outputs=[output_box, detected_lang, download_file],
    )

    gr.Examples(
        examples=[
            ["Good morning! How are you today?", "Auto-detect", "French"],
            ["La tecnología está transformando la educación.", "Auto-detect", "English"],
            ["Ich mag datengetriebene Entscheidungen.", "Auto-detect", "Italian"],
            ["Ninapenda kusoma vitabu kila siku.", "Auto-detect", "English"],
            ["الذكاء الاصطناعي يغير العالم.", "Auto-detect", "Portuguese"],
        ],
        inputs=[input_box, src_lang, tgt_lang],
    )

if __name__ == "__main__":
    demo.launch()