Spaces:

danicor
/

TR

Sleeping

App Files Files Community

danicor commited on Sep 10, 2025

Commit

bf29f37

verified ·

1 Parent(s): ee406c7

Update app.py

Browse files

Files changed (1) hide show

app.py +373 -160

app.py CHANGED Viewed

@@ -1,207 +1,420 @@
 import torch
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-import time, hashlib, threading, logging
 from datetime import datetime, timedelta
-from typing import Tuple
-from fastapi import FastAPI, HTTPException
-from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
-from contextlib import asynccontextmanager
-# ---- Logging ----
 logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("translator")
-# ---- Cache ----
 class TranslationCache:
     def __init__(self, cache_duration_minutes: int = 60):
         self.cache = {}
         self.cache_duration = timedelta(minutes=cache_duration_minutes)
         self.lock = threading.Lock()
-    def _key(self, text, source, target):
-        return hashlib.md5(f"{text}_{source}_{target}".encode()).hexdigest()
-    def get(self, text, source, target):
         with self.lock:
-            key = self._key(text, source, target)
             if key in self.cache:
-                translation, ts = self.cache[key]
-                if datetime.now() - ts < self.cache_duration:
                     return translation
                 else:
                     del self.cache[key]
             return None
-    def set(self, text, source, target, translation):
         with self.lock:
-            key = self._key(text, source, target)
             self.cache[key] = (translation, datetime.now())
-    def clear(self):
-        with self.lock:
-            self.cache = {}
-# ---- Translator ----
 class MultilingualTranslator:
-    def __init__(self, cache_minutes=60):
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         logger.info(f"Using device: {self.device}")
-        self.cache = TranslationCache(cache_minutes)
-        self.model_name = "facebook/m2m100_418M"  # سبک‌تر از نسخه 1.2B
         logger.info(f"Loading model: {self.model_name}")
-        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
-        self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(self.device)
-        logger.info("Model loaded successfully!")
-        # stats
-        self.stats = {
-            "requests": 0,
-            "chars_translated": 0,
-            "avg_time": 0.0
-        }
-    def translate_text(self, text, src, tgt) -> Tuple[str, float]:
-        start = time.time()
-        cached = self.cache.get(text, src, tgt)
-        if cached:
-            return cached, time.time() - start
         try:
-            self.tokenizer.src_lang = src
             encoded = self.tokenizer(text, return_tensors="pt").to(self.device)
-            tokens = self.model.generate(
                 **encoded,
-                forced_bos_token_id=self.tokenizer.get_lang_id(tgt),
                 max_length=512,
                 num_beams=4,
                 early_stopping=True
             )
-            result = self.tokenizer.batch_decode(tokens, skip_special_tokens=True)[0]
-            self.cache.set(text, src, tgt, result)
-            processing = time.time() - start
-            # update stats
-            self.stats["requests"] += 1
-            self.stats["chars_translated"] += len(text)
-            self.stats["avg_time"] = (
-                (self.stats["avg_time"] * (self.stats["requests"] - 1) + processing)
-                / self.stats["requests"]
-            )
-            return result, processing
         except Exception as e:
             logger.error(f"Translation error: {e}")
-            return f"Translation error: {str(e)}", time.time() - start
-# ---- Languages ----
 LANGUAGE_MAP = {
-    "English": "en", "Persian (Farsi)": "fa", "Arabic": "ar",
-    "French": "fr", "German": "de", "Spanish": "es", "Italian": "it",
-    "Portuguese": "pt", "Russian": "ru", "Chinese (Simplified)": "zh",
-    "Japanese": "ja", "Korean": "ko", "Hindi": "hi", "Turkish": "tr",
-    "Dutch": "nl", "Polish": "pl", "Swedish": "sv", "Norwegian": "no",
-    "Danish": "da", "Finnish": "fi", "Greek": "el", "Hebrew": "he",
-    "Thai": "th", "Vietnamese": "vi", "Indonesian": "id", "Malay": "ms",
-    "Czech": "cs", "Slovak": "sk", "Hungarian": "hu", "Romanian": "ro",
-    "Bulgarian": "bg", "Croatian": "hr", "Serbian": "sr", "Slovenian": "sl",
-    "Lithuanian": "lt", "Latvian": "lv", "Estonian": "et", "Ukrainian": "uk",
-    "Belarusian": "be", "Kazakh": "kk", "Uzbek": "uz", "Georgian": "ka",
-    "Armenian": "hy", "Azerbaijani": "az", "Bengali": "bn", "Urdu": "ur",
-    "Tamil": "ta", "Telugu": "te", "Malayalam": "ml", "Kannada": "kn",
-    "Gujarati": "gu", "Punjabi": "pa", "Marathi": "mr", "Nepali": "ne",
-    "Sinhala": "si", "Burmese": "my", "Khmer": "km", "Lao": "lo",
-    "Mongolian": "mn", "Afrikaans": "af", "Amharic": "am",
-    "Yoruba": "yo", "Igbo": "ig", "Hausa": "ha", "Swahili": "sw",
-    "Xhosa": "xh", "Zulu": "zu"
 }
-# ---- FastAPI ----
-translator: MultilingualTranslator | None = None  # lazy load
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    logger.info("🚀 Translator API started. Model will be loaded on first request.")
-    yield
-    logger.info("🛑 Translator API shutting down.")
-app = FastAPI(title="Translator API", lifespan=lifespan)
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"], allow_credentials=True,
-    allow_methods=["*"], allow_headers=["*"]
-)
-# ---- Models ----
-class TranslateRequest(BaseModel):
-    text: str
-    source_lang: str
-    target_lang: str
-    api_key: str | None = None
-class ConfigRequest(BaseModel):
-    cache_minutes: int
-# ---- Endpoints ----
-@app.post("/api/translate")
-async def translate(req: TranslateRequest):
     global translator
-    if not req.text.strip():
-        raise HTTPException(status_code=400, detail="No text provided")
-    src = LANGUAGE_MAP.get(req.source_lang)
-    tgt = LANGUAGE_MAP.get(req.target_lang)
-    if not src or not tgt:
-        raise HTTPException(status_code=400, detail="Invalid language codes")
     if translator is None:
-        logger.info("⏳ Loading translation model...")
-        start = time.time()
-        translator = MultilingualTranslator()
-        logger.info(f"✅ Model loaded in {time.time()-start:.1f}s")
-    translation, secs = translator.translate_text(req.text, src, tgt)
-    return {
-        "translation": translation,
-        "source_language": req.source_lang,
-        "target_language": req.target_lang,
-        "processing_time": round(secs, 2),
-        "character_count": len(req.text),
-        "status": "success"
-    }
-@app.get("/api/languages")
-async def languages():
-    return {"languages": list(LANGUAGE_MAP.keys()), "status": "success"}
-@app.get("/api/health")
-async def health():
-    return {
-        "status": "ok",
-        "model_loaded": translator is not None,
-        "device": str(translator.device) if translator else None,
-        "model": translator.model_name if translator else None
-    }
-@app.get("/api/stats")
-async def stats():
-    if translator is None:
-        return {"stats": {}, "status": "model_not_loaded"}
-    return {"stats": translator.stats, "status": "success"}
-@app.post("/api/reset-cache")
-async def reset_cache():
-    if translator is None:
-        return {"message": "Model not loaded yet", "status": "warning"}
-    translator.cache.clear()
-    return {"message": "Cache cleared", "status": "success"}
-@app.post("/api/config")
-async def update_config(cfg: ConfigRequest):
-    global translator
-    if translator is None:
-        translator = MultilingualTranslator(cfg.cache_minutes)
-        return {"message": f"Model initialized with cache {cfg.cache_minutes} minutes", "status": "success"}
-    else:
-        translator.cache = TranslationCache(cfg.cache_minutes)
-        return {"message": f"Cache duration updated to {cfg.cache_minutes} minutes", "status": "success"}
 if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=False)

+import gradio as gr
 import torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+import time
+import json
+import hashlib
 from datetime import datetime, timedelta
+import threading
+from queue import Queue
+import logging
+from typing import Dict, List, Tuple
+import requests
+# Set up logging
 logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 class TranslationCache:
     def __init__(self, cache_duration_minutes: int = 60):
         self.cache = {}
         self.cache_duration = timedelta(minutes=cache_duration_minutes)
         self.lock = threading.Lock()
+    def _generate_key(self, text: str, source_lang: str, target_lang: str) -> str:
+        """Generate cache key from text and languages"""
+        content = f"{text}_{source_lang}_{target_lang}"
+        return hashlib.md5(content.encode()).hexdigest()
+    def get(self, text: str, source_lang: str, target_lang: str) -> str:
+        """Get translation from cache if exists and not expired"""
         with self.lock:
+            key = self._generate_key(text, source_lang, target_lang)
             if key in self.cache:
+                translation, timestamp = self.cache[key]
+                if datetime.now() - timestamp < self.cache_duration:
+                    logger.info(f"Cache hit for key: {key[:8]}...")
                     return translation
                 else:
+                    # Remove expired entry
                     del self.cache[key]
             return None
+    def set(self, text: str, source_lang: str, target_lang: str, translation: str):
+        """Store translation in cache"""
         with self.lock:
+            key = self._generate_key(text, source_lang, target_lang)
             self.cache[key] = (translation, datetime.now())
+            logger.info(f"Cached translation for key: {key[:8]}...")
+class TranslationQueue:
+    def __init__(self, max_workers: int = 3):
+        self.queue = Queue()
+        self.max_workers = max_workers
+        self.current_workers = 0
+        self.lock = threading.Lock()
+    def add_task(self, task_func, *args, **kwargs):
+        """Add translation task to queue"""
+        self.queue.put((task_func, args, kwargs))
+    def process_queue(self):
+        """Process tasks from queue"""
+        while not self.queue.empty():
+            with self.lock:
+                if self.current_workers >= self.max_workers:
+                    time.sleep(0.1)
+                    continue
+                if not self.queue.empty():
+                    task_func, args, kwargs = self.queue.get()
+                    self.current_workers += 1
+                    def worker():
+                        try:
+                            result = task_func(*args, **kwargs)
+                            return result
+                        finally:
+                            with self.lock:
+                                self.current_workers -= 1
+                    thread = threading.Thread(target=worker)
+                    thread.start()
 class MultilingualTranslator:
+    def __init__(self, cache_duration_minutes: int = 60):
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         logger.info(f"Using device: {self.device}")
+        # Initialize cache and queue
+        self.cache = TranslationCache(cache_duration_minutes)
+        self.queue = TranslationQueue()
+        # Load model - using a powerful multilingual model
+        self.model_name = "facebook/m2m100_1.2B"
         logger.info(f"Loading model: {self.model_name}")
         try:
+            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+            self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
+            self.model.to(self.device)
+            # Create pipeline
+            self.translator = pipeline(
+                "translation",
+                model=self.model,
+                tokenizer=self.tokenizer,
+                device=0 if self.device.type == "cuda" else -1
+            )
+            logger.info("Model loaded successfully!")
+        except Exception as e:
+            logger.error(f"Error loading model: {e}")
+            raise
+    def translate_text(self, text: str, source_lang: str, target_lang: str) -> Tuple[str, float]:
+        """Translate text from source to target language"""
+        start_time = time.time()
+        # Check cache first
+        cached_result = self.cache.get(text, source_lang, target_lang)
+        if cached_result:
+            return cached_result, time.time() - start_time
+        try:
+            # Set source language for tokenizer
+            self.tokenizer.src_lang = source_lang
+            # Encode input
             encoded = self.tokenizer(text, return_tensors="pt").to(self.device)
+            # Generate translation
+            generated_tokens = self.model.generate(
                 **encoded,
+                forced_bos_token_id=self.tokenizer.get_lang_id(target_lang),
                 max_length=512,
                 num_beams=4,
                 early_stopping=True
             )
+            # Decode result
+            translation = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
+            # Cache the result
+            self.cache.set(text, source_lang, target_lang, translation)
+            processing_time = time.time() - start_time
+            logger.info(f"Translation completed in {processing_time:.2f} seconds")
+            return translation, processing_time
         except Exception as e:
             logger.error(f"Translation error: {e}")
+            return f"Translation error: {str(e)}", time.time() - start_time
+# Language mappings for M2M100 model
 LANGUAGE_MAP = {
+    "English": "en",
+    "Persian (Farsi)": "fa",
+    "Arabic": "ar",
+    "French": "fr",
+    "German": "de",
+    "Spanish": "es",
+    "Italian": "it",
+    "Portuguese": "pt",
+    "Russian": "ru",
+    "Chinese (Simplified)": "zh",
+    "Japanese": "ja",
+    "Korean": "ko",
+    "Hindi": "hi",
+    "Turkish": "tr",
+    "Dutch": "nl",
+    "Polish": "pl",
+    "Swedish": "sv",
+    "Norwegian": "no",
+    "Danish": "da",
+    "Finnish": "fi",
+    "Greek": "el",
+    "Hebrew": "he",
+    "Thai": "th",
+    "Vietnamese": "vi",
+    "Indonesian": "id",
+    "Malay": "ms",
+    "Czech": "cs",
+    "Slovak": "sk",
+    "Hungarian": "hu",
+    "Romanian": "ro",
+    "Bulgarian": "bg",
+    "Croatian": "hr",
+    "Serbian": "sr",
+    "Slovenian": "sl",
+    "Lithuanian": "lt",
+    "Latvian": "lv",
+    "Estonian": "et",
+    "Ukrainian": "uk",
+    "Belarusian": "be",
+    "Kazakh": "kk",
+    "Uzbek": "uz",
+    "Georgian": "ka",
+    "Armenian": "hy",
+    "Azerbaijani": "az",
+    "Bengali": "bn",
+    "Urdu": "ur",
+    "Tamil": "ta",
+    "Telugu": "te",
+    "Malayalam": "ml",
+    "Kannada": "kn",
+    "Gujarati": "gu",
+    "Punjabi": "pa",
+    "Marathi": "mr",
+    "Nepali": "ne",
+    "Sinhala": "si",
+    "Burmese": "my",
+    "Khmer": "km",
+    "Lao": "lo",
+    "Mongolian": "mn",
+    "Afrikaans": "af",
+    "Amharic": "am",
+    "Yoruba": "yo",
+    "Igbo": "ig",
+    "Hausa": "ha",
+    "Swahili": "sw",
+    "Xhosa": "xh",
+    "Zulu": "zu"
 }
+# Initialize translator with configurable cache duration
+translator = None
+cache_duration = 60  # Default 60 minutes
+def initialize_translator(cache_minutes):
     global translator
+    translator = MultilingualTranslator(cache_minutes)
+def translate_interface(text, source_lang, target_lang, cache_minutes_input):
+    """Main translation interface function"""
+    global translator, cache_duration
+    # Update cache duration if changed
+    if cache_minutes_input != cache_duration:
+        cache_duration = cache_minutes_input
+        translator = MultilingualTranslator(cache_minutes_input)
+    # Initialize translator if needed
     if translator is None:
+        translator = MultilingualTranslator(cache_minutes_input)
+    if not text.strip():
+        return "Please enter text to translate", "0.00"
+    if source_lang == target_lang:
+        return "Source and target languages cannot be the same", "0.00"
+    # Get language codes
+    source_code = LANGUAGE_MAP.get(source_lang)
+    target_code = LANGUAGE_MAP.get(target_lang)
+    if not source_code or not target_code:
+        return "Invalid language selection", "0.00"
+    try:
+        translation, processing_time = translator.translate_text(text, source_code, target_code)
+        return translation, f"{processing_time:.2f}"
+    except Exception as e:
+        return f"Error: {str(e)}", "0.00"
+# API endpoint for WordPress plugin
+def api_translate(text: str, source_lang: str, target_lang: str, api_key: str = None):
+    """API endpoint for external calls"""
+    # Here you can add API key validation if needed
+    if not text.strip():
+        return {"error": "No text provided", "status": "error"}
+    source_code = LANGUAGE_MAP.get(source_lang)
+    target_code = LANGUAGE_MAP.get(target_lang)
+    if not source_code or not target_code:
+        return {"error": "Invalid language codes", "status": "error"}
+    try:
+        if translator is None:
+            initialize_translator(60)
+        translation, processing_time = translator.translate_text(text, source_code, target_code)
+        return {
+            "translation": translation,
+            "source_language": source_lang,
+            "target_language": target_lang,
+            "processing_time": processing_time,
+            "character_count": len(text),
+            "status": "success"
+        }
+    except Exception as e:
+        return {"error": str(e), "status": "error"}
+# Initialize translator with default settings
+initialize_translator(60)
+# Create Gradio interface
+with gr.Blocks(title="Advanced Multilingual Translator", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+        # 🌍 Advanced Multilingual Translator
+        **Powered by Facebook's M2M100 Model**
+        Features:
+        - Support for 100+ languages
+        - Intelligent caching system
+        - GPU acceleration when available
+        - Concurrent user support
+        - API endpoints for WordPress integration
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### ⚙️ Settings")
+            cache_minutes = gr.Slider(
+                minimum=5,
+                maximum=1440,
+                value=60,
+                step=5,
+                label="Cache Duration (minutes)",
+                info="How long to keep translations in cache"
+            )
+            device_info = gr.Textbox(
+                value=f"Device: {torch.device('cuda' if torch.cuda.is_available() else 'cpu')}",
+                label="System Info",
+                interactive=False
+            )
+        with gr.Column(scale=2):
+            gr.Markdown("### 📝 Translation")
+            with gr.Row():
+                source_lang = gr.Dropdown(
+                    choices=list(LANGUAGE_MAP.keys()),
+                    value="English",
+                    label="Source Language"
+                )
+                target_lang = gr.Dropdown(
+                    choices=list(LANGUAGE_MAP.keys()),
+                    value="Persian (Farsi)",
+                    label="Target Language"
+                )
+            input_text = gr.Textbox(
+                lines=5,
+                placeholder="Enter text to translate...",
+                label="Input Text"
+            )
+            translate_btn = gr.Button("🔄 Translate", variant="primary")
+            output_text = gr.Textbox(
+                lines=5,
+                label="Translation",
+                interactive=False
+            )
+            processing_time = gr.Textbox(
+                label="Processing Time (seconds)",
+                interactive=False
+            )
+    # Event handlers
+    translate_btn.click(
+        fn=translate_interface,
+        inputs=[input_text, source_lang, target_lang, cache_minutes],
+        outputs=[output_text, processing_time]
+    )
+    # Auto-translate on Enter
+    input_text.submit(
+        fn=translate_interface,
+        inputs=[input_text, source_lang, target_lang, cache_minutes],
+        outputs=[output_text, processing_time]
+    )
+    gr.Markdown(
+        """
+        ---
+        ### 🔌 API Endpoint
+        **For WordPress Plugin Integration:**
+        `POST /api/translate`
+        **Parameters:**
+        - `text`: Text to translate
+        - `source_lang`: Source language name
+        - `target_lang`: Target language name
+        - `api_key`: API key (optional)
+        **Response:**
+        ```json
+        {
+            "translation": "Translated text",
+            "source_language": "English",
+            "target_language": "Persian (Farsi)",
+            "processing_time": 1.23,
+            "character_count": 100,
+            "status": "success"
+        }
+        ```
+        """
+    )
+# For API access
+app = demo
 if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True,
+        enable_queue=True,
+        max_threads=10
+    )