import os os.environ["GRADIO_SSR_MODE"] = "false" import torch import spaces import gradio as gr from transformers import AutoModelForImageTextToText, AutoProcessor from huggingface_hub import login from fastapi import Request from fastapi.responses import JSONResponse # Login with HF token if available hf_token = os.environ.get("HF_TOKEN") if hf_token: login(token=hf_token) # Language codes LANGUAGES = { "en": "English", "de": "German", "fr": "French", "es": "Spanish", "it": "Italian", "pt": "Portuguese", "nl": "Dutch", "pl": "Polish", "cs": "Czech", "ru": "Russian", "uk": "Ukrainian", "zh": "Chinese", "ja": "Japanese", "ko": "Korean", "ar": "Arabic", "hi": "Hindi", "bn": "Bengali", "tr": "Turkish", "vi": "Vietnamese", "th": "Thai", "id": "Indonesian", "ms": "Malay", "sv": "Swedish", "no": "Norwegian", "da": "Danish", "fi": "Finnish", "el": "Greek", "he": "Hebrew", "ro": "Romanian", "hu": "Hungarian", "bg": "Bulgarian", "hr": "Croatian", "sk": "Slovak", "sl": "Slovenian", "sr": "Serbian", "lt": "Lithuanian", "lv": "Latvian", "et": "Estonian", "sw": "Swahili", "ta": "Tamil", "te": "Telugu", "mr": "Marathi", "gu": "Gujarati", "kn": "Kannada", "ml": "Malayalam", "pa": "Punjabi", "ur": "Urdu", "fa": "Persian", "fil": "Filipino", "ca": "Catalan", "gl": "Galician", "eu": "Basque", "cy": "Welsh", "ga": "Irish", } model_id = "google/translategemma-4b-it" print("Loading processor...") processor = AutoProcessor.from_pretrained(model_id) print("Processor loaded!") model = None def load_model(): global model if model is None: print("Loading model...") device = "cuda" if torch.cuda.is_available() else "cpu" model = AutoModelForImageTextToText.from_pretrained( model_id, torch_dtype=torch.bfloat16, ).to(device).eval() print(f"Model loaded on {device.upper()}!") return model @spaces.GPU(duration=120) def translate(text: str, source_lang: str, target_lang: str) -> str: if not text or not text.strip(): return "" m = load_model() messages = [ { "role": "user", "content": [ { "type": "text", "source_lang_code": source_lang, "target_lang_code": target_lang, "text": text, } ], } ] device = "cuda" if torch.cuda.is_available() else "cpu" inputs = processor.apply_chat_template( messages, tokenize=True, add_generation_prompt=True, return_dict=True, return_tensors="pt" ).to(device) with torch.inference_mode(): generation = m.generate(**inputs, max_new_tokens=1024, do_sample=False) input_len = inputs["input_ids"].shape[1] output = processor.decode(generation[0][input_len:], skip_special_tokens=True) return output.strip() LANG_CODES = list(LANGUAGES.keys()) LANG_CHOICES = [f"{code} ({name})" for code, name in LANGUAGES.items()] def gradio_translate(text, source, target): src_code = source.split(" ")[0] tgt_code = target.split(" ")[0] return translate(text, src_code, tgt_code) # β Gradio UI μ μ with gr.Blocks(title="TranslateGemma") as demo: gr.HTML(""" """) with gr.Column(elem_id="col-container"): gr.HTML("""
β‘ AI-Powered Translation for 55 Languages β‘
π€ MODEL: translategemma-4b-itπ MCP Endpoint: POST /mcp
β±οΈ Note: First request loads model (~60s), then fast (~5s)