Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from fastapi import FastAPI, HTTPException | |
| from pydantic import BaseModel | |
| from transformers import pipeline | |
| import langdetect | |
| import logging | |
| import os | |
| from typing import Optional, Dict | |
| import re | |
| from functools import lru_cache, partial | |
| import asyncio | |
| from contextlib import asynccontextmanager | |
| # --- 1. Konfigurasi Awal --- | |
| os.makedirs("./cache", exist_ok=True) | |
| os.makedirs("./logs", exist_ok=True) | |
| # Set environment variables untuk Hugging Face cache | |
| os.environ["HF_HOME"] = "./cache" | |
| os.environ["TRANSFORMERS_CACHE"] = "./cache" | |
| # Environment configuration | |
| DEVICE = -1 # Selalu CPU untuk kompatibilitas | |
| MAX_TEXT_LENGTH = int(os.getenv("MAX_TEXT_LENGTH", "5000")) | |
| # Configure logging | |
| logging.basicConfig( | |
| format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", | |
| level=logging.INFO | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # Map model yang didukung | |
| MODEL_MAP = { | |
| "th": "Helsinki-NLP/opus-mt-th-en", | |
| "ja": "Helsinki-NLP/opus-mt-ja-en", | |
| "zh": "Helsinki-NLP/opus-mt-zh-en", | |
| "vi": "Helsinki-NLP/opus-mt-vi-en", | |
| } | |
| # Istilah yang dilindungi dari translasi | |
| PROTECTED_TERMS = ["2030 Aspirations", "Griffith"] | |
| # Cache untuk translator (pipeline) | |
| translators: Dict[str, pipeline] = {} | |
| # --- Pydantic Models --- | |
| class TranslationRequest(BaseModel): | |
| text: str | |
| source_lang_override: Optional[str] = None | |
| class TranslationResponse(BaseModel): | |
| translated_text: str | |
| source_language: Optional[str] = None | |
| # --- Lifespan Event Handler --- | |
| async def lifespan(app: FastAPI): | |
| """Handler lifecycle aplikasi menggunakan lifespan""" | |
| logger.info("Memulai prapemuatan model translasi...") | |
| for lang, model_name in MODEL_MAP.items(): | |
| try: | |
| logger.info(f"Memuat model untuk bahasa: {lang} ({model_name})") | |
| translators[lang] = pipeline("translation", model=model_name, device=DEVICE) | |
| logger.info(f"Model untuk {lang} berhasil dimuat.") | |
| except Exception as e: | |
| logger.error(f"Gagal memuat model untuk {lang}: {str(e)}") | |
| logger.info("Semua model telah dimuat.") | |
| yield # Aplikasi berjalan di sini | |
| # --- Inisialisasi Aplikasi FastAPI dengan Lifespan --- | |
| app = FastAPI(title="Translation Service API", lifespan=lifespan) | |
| # --- Fungsi Utility --- | |
| def get_translator(lang: str) -> pipeline: | |
| """Mengambil translator yang sudah dimuat dari cache.""" | |
| translator = translators.get(lang) | |
| if not translator: | |
| logger.error(f"Translator untuk bahasa '{lang}' tidak ditemukan. Mungkin gagal dimuat saat startup.") | |
| raise HTTPException(status_code=500, detail=f"Model terjemahan untuk '{lang}' tidak tersedia.") | |
| return translator | |
| def detect_language(text: str) -> str: | |
| """Deteksi bahasa dengan cache.""" | |
| try: | |
| preview_text = text[:500] | |
| detected_lang = langdetect.detect(preview_text) | |
| if detected_lang.startswith('zh'): | |
| return 'zh' | |
| return detected_lang if detected_lang in MODEL_MAP else "en" | |
| except Exception as e: | |
| logger.warning(f"Deteksi bahasa gagal: {str(e)}. Mengasumsikan 'en'.") | |
| return "en" | |
| def protect_terms(text: str, protected_terms: list) -> tuple[str, dict]: | |
| replacements = {} | |
| for i, term in enumerate(protected_terms): | |
| placeholder = f"__PROTECTED_{i}__" | |
| modified_text = re.sub(r'\b' + re.escape(term) + r'\b', placeholder, text, flags=re.IGNORECASE) | |
| if modified_text != text: | |
| replacements[placeholder] = term | |
| text = modified_text | |
| return text, replacements | |
| def restore_terms(text: str, replacements: dict) -> str: | |
| for placeholder, term in replacements.items(): | |
| text = text.replace(placeholder, term) | |
| return text | |
| # --- Fungsi Inti dan Endpoint API --- | |
| async def perform_translation(text: str, source_lang_override: Optional[str] = None) -> TranslationResponse: | |
| if not text or not text.strip(): | |
| raise HTTPException(status_code=400, detail="Teks input tidak boleh kosong.") | |
| if len(text) > MAX_TEXT_LENGTH: | |
| raise HTTPException( | |
| status_code=413, | |
| detail=f"Teks terlalu panjang. Panjang maksimal yang diizinkan: {MAX_TEXT_LENGTH}." | |
| ) | |
| try: | |
| if source_lang_override and source_lang_override in MODEL_MAP: | |
| source_lang = source_lang_override | |
| else: | |
| source_lang = detect_language(text) | |
| if source_lang == "en": | |
| return TranslationResponse(translated_text=text, source_language=source_lang) | |
| translator = get_translator(source_lang) | |
| modified_text, replacements = protect_terms(text, PROTECTED_TERMS) | |
| def _translate_task(): | |
| return translator(modified_text, max_length=512, num_beams=4) | |
| result = await asyncio.to_thread(_translate_task) | |
| translated_text = result[0]["translation_text"] | |
| final_text = restore_terms(translated_text, replacements) | |
| return TranslationResponse(translated_text=final_text, source_language=source_lang) | |
| except HTTPException as e: | |
| raise e | |
| except Exception as e: | |
| logger.error(f"Terjadi kesalahan saat translasi: {str(e)}") | |
| raise HTTPException(status_code=500, detail=f"Proses translasi gagal: {str(e)}") | |
| async def translate_api(request: TranslationRequest): | |
| return await perform_translation(request.text, request.source_lang_override) | |
| async def health_check(): | |
| return {"status": "healthy", "loaded_models": list(translators.keys())} | |
| # --- Handler Gradio Async --- | |
| async def translate_gradio(text: str, source_lang: str = "auto"): | |
| if not text or not text.strip(): | |
| return "Masukkan teks untuk diterjemahkan.", "N/A" | |
| try: | |
| source_lang_param = source_lang if source_lang != "auto" else None | |
| result = await perform_translation(text, source_lang_param) | |
| return result.translated_text, result.source_language or "Unknown" | |
| except HTTPException as e: | |
| return f"Error: {e.detail}", "Error" | |
| except Exception as e: | |
| return f"Error: {str(e)}", "Error" | |
| # --- UI Gradio --- | |
| def create_gradio_interface(): | |
| with gr.Blocks( | |
| title="Multi-Language Translation Service", | |
| theme=gr.themes.Soft(), | |
| css=".gradio-container { max-width: 1200px !important; }" | |
| ) as interface: | |
| gr.Markdown(""" | |
| # 🌐 Multi-Language Translation Service | |
| Terjemahkan teks dari **Thai**, **Jepang**, **Mandarin**, atau **Vietnam** ke **Inggris**. | |
| ✨ Fitur: Deteksi bahasa otomatis • Perlindungan istilah • Model Helsinki-NLP yang cepat. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| text_input = gr.Textbox(label="📝 Input Text", placeholder="Enter text to translate...", lines=6, max_lines=10) | |
| with gr.Row(): | |
| lang_dropdown = gr.Dropdown( | |
| choices=[ | |
| ("🔍 Auto-detect", "auto"), ("🇹🇭 Thai", "th"), ("🇯🇵 Japanese", "ja"), | |
| ("🇨🇳 Chinese", "zh"), ("🇻🇳 Vietnamese", "vi") | |
| ], | |
| value="auto", label="Source Language" | |
| ) | |
| translate_btn = gr.Button("🚀 Translate", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| output_text = gr.Textbox(label="🎯 Translation Result", lines=6, max_lines=10, interactive=False) | |
| detected_lang = gr.Textbox(label="🔍 Detected Language", interactive=False, max_lines=1) | |
| gr.Examples( | |
| examples=[ | |
| ["สวัสดีครับ ยินดีที่ได้รู้จัก การพัฒนา 2030 Aspirations เป็นเป้าหมายสำคัญ", "th"], | |
| ["こんにちは、はじめまして。Griffith大学での研究が進んでいます。", "ja"], | |
| ["你好,很高兴认识你。我们正在为2030 Aspirations制定计划。", "zh"], | |
| ["Xin chào, rất vui được gặp bạn. Griffith là trường đại học tuyệt vời.", "vi"], | |
| ], | |
| inputs=[text_input, lang_dropdown], | |
| outputs=[output_text, detected_lang], | |
| fn=partial(asyncio.run, translate_gradio), # Agar bisa dipakai di contoh | |
| cache_examples=False | |
| ) | |
| translate_btn.click(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang]) | |
| text_input.submit(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang]) | |
| return interface | |
| # Mount Gradio ke FastAPI | |
| gradio_app = create_gradio_interface() | |
| app = gr.mount_gradio_app(app, gradio_app, path="/") |