Spaces:
Sleeping
Sleeping
| import os | |
| import tempfile | |
| from typing import Tuple | |
| import gradio as gr | |
| from langdetect import detect, DetectorFactory | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | |
| # Make langdetect deterministic | |
| DetectorFactory.seed = 42 | |
| # ------- Supported languages ------- | |
| LANGS = [ | |
| "English", "French", "Spanish", "German", | |
| "Italian", "Portuguese", "Swahili", "Arabic" | |
| ] | |
| LANG2CODE = { | |
| "English": "en", | |
| "French": "fr", | |
| "Spanish": "es", | |
| "German": "de", | |
| "Italian": "it", | |
| "Portuguese": "pt", | |
| "Swahili": "sw", | |
| "Arabic": "ar", | |
| } | |
| CODE2LANG = {v: k for k, v in LANG2CODE.items()} | |
| # ------- Pipeline cache ------- | |
| _model_cache = {} | |
| def _ensure_translator(src_code: str, tgt_code: str): | |
| """ | |
| Lazily load a translation pipeline for a language pair. | |
| We use OPUS-MT (Helsinki-NLP). We only instantiate models | |
| when needed to keep memory low on free tiers. | |
| """ | |
| key = f"{src_code}-{tgt_code}" | |
| if key in _model_cache: | |
| return _model_cache[key] | |
| model_id = f"Helsinki-NLP/opus-mt-{src_code}-{tgt_code}" | |
| tok = AutoTokenizer.from_pretrained(model_id) | |
| mdl = AutoModelForSeq2SeqLM.from_pretrained(model_id) | |
| _model_cache[key] = pipeline("translation", model=mdl, tokenizer=tok) | |
| return _model_cache[key] | |
| def _translate_once(text: str, src_code: str, tgt_code: str) -> str: | |
| translator = _ensure_translator(src_code, tgt_code) | |
| out = translator(text, max_length=512) | |
| return out[0]["translation_text"] | |
| def _maybe_autodetect(text: str, src_lang: str) -> Tuple[str, str]: | |
| """ | |
| Returns (resolved_src_lang_name, detected_message). | |
| If src_lang == 'Auto-detect', we detect and return that language. | |
| Otherwise, we just return src_lang. | |
| """ | |
| if src_lang != "Auto-detect": | |
| return src_lang, f"Source: {src_lang}" | |
| try: | |
| code = detect(text) | |
| # Map to our supported set, otherwise raise | |
| if code in CODE2LANG: | |
| guessed = CODE2LANG[code] | |
| return guessed, f"Detected: {guessed} ({code})" | |
| else: | |
| # If not in our supported set, still show the code for transparency | |
| return "English", f"Detected unsupported lang '{code}'. Falling back to English." | |
| except Exception: | |
| return "English", "Could not detect language. Defaulted to English." | |
| def translate(text: str, src_lang: str, tgt_lang: str): | |
| text = (text or "").strip() | |
| if not text: | |
| return "", "Enter text above to translate.", None | |
| # Resolve auto-detect | |
| resolved_src, detect_msg = _maybe_autodetect(text, src_lang) | |
| if resolved_src == tgt_lang: | |
| translation = text | |
| else: | |
| s = LANG2CODE[resolved_src] | |
| t = LANG2CODE[tgt_lang] | |
| # Strategy: | |
| # - If either side is English, translate directly | |
| # - Else pivot through English: src -> en -> tgt | |
| if s == "en" or t == "en": | |
| translation = _translate_once(text, s, t) | |
| else: | |
| pivot = _translate_once(text, s, "en") | |
| translation = _translate_once(pivot, "en", t) | |
| # Create a temporary .txt file for download | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8") | |
| tmp.write(translation) | |
| tmp.close() | |
| return translation, detect_msg, tmp.name | |
| with gr.Blocks(title="Global Translator") as demo: | |
| gr.Markdown("# 🌍 Global Translator") | |
| gr.Markdown( | |
| "Translate between **English, French, Spanish, German, Italian, Portuguese, Swahili, Arabic**.\n\n" | |
| "- ✅ All language pairs supported (non-English pairs pivot via English)\n" | |
| "- 🔎 Auto-detect source language\n" | |
| "- ⬇️ Download result as `.txt`" | |
| ) | |
| with gr.Row(): | |
| src_lang = gr.Dropdown(["Auto-detect"] + LANGS, value="Auto-detect", label="Source") | |
| tgt_lang = gr.Dropdown(LANGS, value="English", label="Target") | |
| input_box = gr.Textbox(lines=6, label="Your text") | |
| translate_btn = gr.Button("Translate") | |
| with gr.Row(): | |
| detected_lang = gr.Markdown("Source: Auto-detect") | |
| output_box = gr.Textbox(lines=6, label="Translation", interactive=False) | |
| download_file = gr.File(label="Download translation (.txt)", interactive=False) | |
| translate_btn.click( | |
| translate, | |
| inputs=[input_box, src_lang, tgt_lang], | |
| outputs=[output_box, detected_lang, download_file], | |
| ) | |
| # Also translate on Enter | |
| input_box.submit( | |
| translate, | |
| inputs=[input_box, src_lang, tgt_lang], | |
| outputs=[output_box, detected_lang, download_file], | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["Good morning! How are you today?", "Auto-detect", "French"], | |
| ["La tecnología está transformando la educación.", "Auto-detect", "English"], | |
| ["Ich mag datengetriebene Entscheidungen.", "Auto-detect", "Italian"], | |
| ["Ninapenda kusoma vitabu kila siku.", "Auto-detect", "English"], | |
| ["الذكاء الاصطناعي يغير العالم.", "Auto-detect", "Portuguese"], | |
| ], | |
| inputs=[input_box, src_lang, tgt_lang], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |