import os import tempfile from typing import Tuple import gradio as gr from langdetect import detect, DetectorFactory from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline # Make langdetect deterministic DetectorFactory.seed = 42 # ------- Supported languages ------- LANGS = [ "English", "French", "Spanish", "German", "Italian", "Portuguese", "Swahili", "Arabic" ] LANG2CODE = { "English": "en", "French": "fr", "Spanish": "es", "German": "de", "Italian": "it", "Portuguese": "pt", "Swahili": "sw", "Arabic": "ar", } CODE2LANG = {v: k for k, v in LANG2CODE.items()} # ------- Pipeline cache ------- _model_cache = {} def _ensure_translator(src_code: str, tgt_code: str): """ Lazily load a translation pipeline for a language pair. We use OPUS-MT (Helsinki-NLP). We only instantiate models when needed to keep memory low on free tiers. """ key = f"{src_code}-{tgt_code}" if key in _model_cache: return _model_cache[key] model_id = f"Helsinki-NLP/opus-mt-{src_code}-{tgt_code}" tok = AutoTokenizer.from_pretrained(model_id) mdl = AutoModelForSeq2SeqLM.from_pretrained(model_id) _model_cache[key] = pipeline("translation", model=mdl, tokenizer=tok) return _model_cache[key] def _translate_once(text: str, src_code: str, tgt_code: str) -> str: translator = _ensure_translator(src_code, tgt_code) out = translator(text, max_length=512) return out[0]["translation_text"] def _maybe_autodetect(text: str, src_lang: str) -> Tuple[str, str]: """ Returns (resolved_src_lang_name, detected_message). If src_lang == 'Auto-detect', we detect and return that language. Otherwise, we just return src_lang. """ if src_lang != "Auto-detect": return src_lang, f"Source: {src_lang}" try: code = detect(text) # Map to our supported set, otherwise raise if code in CODE2LANG: guessed = CODE2LANG[code] return guessed, f"Detected: {guessed} ({code})" else: # If not in our supported set, still show the code for transparency return "English", f"Detected unsupported lang '{code}'. Falling back to English." except Exception: return "English", "Could not detect language. Defaulted to English." def translate(text: str, src_lang: str, tgt_lang: str): text = (text or "").strip() if not text: return "", "Enter text above to translate.", None # Resolve auto-detect resolved_src, detect_msg = _maybe_autodetect(text, src_lang) if resolved_src == tgt_lang: translation = text else: s = LANG2CODE[resolved_src] t = LANG2CODE[tgt_lang] # Strategy: # - If either side is English, translate directly # - Else pivot through English: src -> en -> tgt if s == "en" or t == "en": translation = _translate_once(text, s, t) else: pivot = _translate_once(text, s, "en") translation = _translate_once(pivot, "en", t) # Create a temporary .txt file for download tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8") tmp.write(translation) tmp.close() return translation, detect_msg, tmp.name with gr.Blocks(title="Global Translator") as demo: gr.Markdown("# 🌍 Global Translator") gr.Markdown( "Translate between **English, French, Spanish, German, Italian, Portuguese, Swahili, Arabic**.\n\n" "- ✅ All language pairs supported (non-English pairs pivot via English)\n" "- 🔎 Auto-detect source language\n" "- ⬇️ Download result as `.txt`" ) with gr.Row(): src_lang = gr.Dropdown(["Auto-detect"] + LANGS, value="Auto-detect", label="Source") tgt_lang = gr.Dropdown(LANGS, value="English", label="Target") input_box = gr.Textbox(lines=6, label="Your text") translate_btn = gr.Button("Translate") with gr.Row(): detected_lang = gr.Markdown("Source: Auto-detect") output_box = gr.Textbox(lines=6, label="Translation", interactive=False) download_file = gr.File(label="Download translation (.txt)", interactive=False) translate_btn.click( translate, inputs=[input_box, src_lang, tgt_lang], outputs=[output_box, detected_lang, download_file], ) # Also translate on Enter input_box.submit( translate, inputs=[input_box, src_lang, tgt_lang], outputs=[output_box, detected_lang, download_file], ) gr.Examples( examples=[ ["Good morning! How are you today?", "Auto-detect", "French"], ["La tecnología está transformando la educación.", "Auto-detect", "English"], ["Ich mag datengetriebene Entscheidungen.", "Auto-detect", "Italian"], ["Ninapenda kusoma vitabu kila siku.", "Auto-detect", "English"], ["الذكاء الاصطناعي يغير العالم.", "Auto-detect", "Portuguese"], ], inputs=[input_box, src_lang, tgt_lang], ) if __name__ == "__main__": demo.launch()