translator / app.py
basantyahya's picture
Update app.py
7f166df verified
import os
import tempfile
from typing import Tuple
import gradio as gr
from langdetect import detect, DetectorFactory
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
# Make langdetect deterministic
DetectorFactory.seed = 42
# ------- Supported languages -------
LANGS = [
"English", "French", "Spanish", "German",
"Italian", "Portuguese", "Swahili", "Arabic"
]
LANG2CODE = {
"English": "en",
"French": "fr",
"Spanish": "es",
"German": "de",
"Italian": "it",
"Portuguese": "pt",
"Swahili": "sw",
"Arabic": "ar",
}
CODE2LANG = {v: k for k, v in LANG2CODE.items()}
# ------- Pipeline cache -------
_model_cache = {}
def _ensure_translator(src_code: str, tgt_code: str):
"""
Lazily load a translation pipeline for a language pair.
We use OPUS-MT (Helsinki-NLP). We only instantiate models
when needed to keep memory low on free tiers.
"""
key = f"{src_code}-{tgt_code}"
if key in _model_cache:
return _model_cache[key]
model_id = f"Helsinki-NLP/opus-mt-{src_code}-{tgt_code}"
tok = AutoTokenizer.from_pretrained(model_id)
mdl = AutoModelForSeq2SeqLM.from_pretrained(model_id)
_model_cache[key] = pipeline("translation", model=mdl, tokenizer=tok)
return _model_cache[key]
def _translate_once(text: str, src_code: str, tgt_code: str) -> str:
translator = _ensure_translator(src_code, tgt_code)
out = translator(text, max_length=512)
return out[0]["translation_text"]
def _maybe_autodetect(text: str, src_lang: str) -> Tuple[str, str]:
"""
Returns (resolved_src_lang_name, detected_message).
If src_lang == 'Auto-detect', we detect and return that language.
Otherwise, we just return src_lang.
"""
if src_lang != "Auto-detect":
return src_lang, f"Source: {src_lang}"
try:
code = detect(text)
# Map to our supported set, otherwise raise
if code in CODE2LANG:
guessed = CODE2LANG[code]
return guessed, f"Detected: {guessed} ({code})"
else:
# If not in our supported set, still show the code for transparency
return "English", f"Detected unsupported lang '{code}'. Falling back to English."
except Exception:
return "English", "Could not detect language. Defaulted to English."
def translate(text: str, src_lang: str, tgt_lang: str):
text = (text or "").strip()
if not text:
return "", "Enter text above to translate.", None
# Resolve auto-detect
resolved_src, detect_msg = _maybe_autodetect(text, src_lang)
if resolved_src == tgt_lang:
translation = text
else:
s = LANG2CODE[resolved_src]
t = LANG2CODE[tgt_lang]
# Strategy:
# - If either side is English, translate directly
# - Else pivot through English: src -> en -> tgt
if s == "en" or t == "en":
translation = _translate_once(text, s, t)
else:
pivot = _translate_once(text, s, "en")
translation = _translate_once(pivot, "en", t)
# Create a temporary .txt file for download
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8")
tmp.write(translation)
tmp.close()
return translation, detect_msg, tmp.name
with gr.Blocks(title="Global Translator") as demo:
gr.Markdown("# 🌍 Global Translator")
gr.Markdown(
"Translate between **English, French, Spanish, German, Italian, Portuguese, Swahili, Arabic**.\n\n"
"- ✅ All language pairs supported (non-English pairs pivot via English)\n"
"- 🔎 Auto-detect source language\n"
"- ⬇️ Download result as `.txt`"
)
with gr.Row():
src_lang = gr.Dropdown(["Auto-detect"] + LANGS, value="Auto-detect", label="Source")
tgt_lang = gr.Dropdown(LANGS, value="English", label="Target")
input_box = gr.Textbox(lines=6, label="Your text")
translate_btn = gr.Button("Translate")
with gr.Row():
detected_lang = gr.Markdown("Source: Auto-detect")
output_box = gr.Textbox(lines=6, label="Translation", interactive=False)
download_file = gr.File(label="Download translation (.txt)", interactive=False)
translate_btn.click(
translate,
inputs=[input_box, src_lang, tgt_lang],
outputs=[output_box, detected_lang, download_file],
)
# Also translate on Enter
input_box.submit(
translate,
inputs=[input_box, src_lang, tgt_lang],
outputs=[output_box, detected_lang, download_file],
)
gr.Examples(
examples=[
["Good morning! How are you today?", "Auto-detect", "French"],
["La tecnología está transformando la educación.", "Auto-detect", "English"],
["Ich mag datengetriebene Entscheidungen.", "Auto-detect", "Italian"],
["Ninapenda kusoma vitabu kila siku.", "Auto-detect", "English"],
["الذكاء الاصطناعي يغير العالم.", "Auto-detect", "Portuguese"],
],
inputs=[input_box, src_lang, tgt_lang],
)
if __name__ == "__main__":
demo.launch()