Spaces:

danicor
/

TR

Sleeping

App Files Files Community

danicor commited on Sep 16, 2025

Commit

d036146

verified ·

1 Parent(s): 0e92f6e

Update app.py

Browse files

Files changed (1) hide show

app.py +178 -27

app.py CHANGED Viewed

@@ -14,8 +14,15 @@ from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 import uvicorn
-# Set up logging
-logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Pydantic models for request/response
@@ -33,6 +40,9 @@ class TranslationResponse(BaseModel):
     character_count: int
     status: str
     chunks_processed: Optional[int] = None
 class TranslationCache:
     def __init__(self, cache_duration_minutes: int = 60):
@@ -52,11 +62,13 @@ class TranslationCache:
             if key in self.cache:
                 translation, timestamp = self.cache[key]
                 if datetime.now() - timestamp < self.cache_duration:
-                    logger.info(f"Cache hit for key: {key[:8]}...")
                     return translation
                 else:
                     # Remove expired entry
                     del self.cache[key]
             return None
     def set(self, text: str, source_lang: str, target_lang: str, translation: str):
@@ -64,7 +76,7 @@ class TranslationCache:
         with self.lock:
             key = self._generate_key(text, source_lang, target_lang)
             self.cache[key] = (translation, datetime.now())
-            logger.info(f"Cached translation for key: {key[:8]}...")
 class TranslationQueue:
     def __init__(self, max_workers: int = 3):
@@ -76,6 +88,7 @@ class TranslationQueue:
     def add_task(self, task_func, *args, **kwargs):
         """Add translation task to queue"""
         self.queue.put((task_func, args, kwargs))
     def process_queue(self):
         """Process tasks from queue"""
@@ -88,6 +101,7 @@ class TranslationQueue:
                 if not self.queue.empty():
                     task_func, args, kwargs = self.queue.get()
                     self.current_workers += 1
                     def worker():
                         try:
@@ -96,6 +110,7 @@ class TranslationQueue:
                         finally:
                             with self.lock:
                                 self.current_workers -= 1
                     thread = threading.Thread(target=worker)
                     thread.start()
@@ -106,7 +121,10 @@ class TextChunker:
     @staticmethod
     def split_text_smart(text: str, max_chunk_size: int = 400) -> List[str]:
         """تقسیم هوشمند متن بر اساس جملات و پاراگراف‌ها"""
         if len(text) <= max_chunk_size:
             return [text]
         chunks = []
@@ -115,22 +133,27 @@ class TextChunker:
         paragraphs = text.split('\n\n')
         current_chunk = ""
-        for paragraph in paragraphs:
             # اگر پاراگراف خودش بزرگ است، آن را تقسیم کن
             if len(paragraph) > max_chunk_size:
                 # ذخیره قسمت فعلی اگر وجود دارد
                 if current_chunk.strip():
                     chunks.append(current_chunk.strip())
                     current_chunk = ""
                 # تقسیم پاراگراف بزرگ
                 sub_chunks = TextChunker._split_paragraph(paragraph, max_chunk_size)
                 chunks.extend(sub_chunks)
             else:
                 # برر��ی اینکه آیا اضافه کردن این پاراگراف از حد تجاوز می‌کند
                 if len(current_chunk) + len(paragraph) + 2 > max_chunk_size:
                     if current_chunk.strip():
                         chunks.append(current_chunk.strip())
                     current_chunk = paragraph
                 else:
                     if current_chunk:
@@ -141,12 +164,16 @@ class TextChunker:
         # اضافه کردن آخرین قسمت
         if current_chunk.strip():
             chunks.append(current_chunk.strip())
         return chunks
     @staticmethod
     def _split_paragraph(paragraph: str, max_chunk_size: int) -> List[str]:
         """تقسیم پاراگراف بزرگ به جملات"""
         # تقسیم بر اساس جملات
         sentences = re.split(r'[.!?]+\s+', paragraph)
         chunks = []
@@ -182,11 +209,14 @@ class TextChunker:
         if current_chunk.strip():
             chunks.append(current_chunk.strip())
         return chunks
     @staticmethod
     def _split_by_comma(sentence: str, max_chunk_size: int) -> List[str]:
         """تقسیم جمله طولانی بر اساس کاما"""
         parts = sentence.split(', ')
         chunks = []
         current_chunk = ""
@@ -224,7 +254,7 @@ class TextChunker:
 class MultilingualTranslator:
     def __init__(self, cache_duration_minutes: int = 60):
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        logger.info(f"Using device: {self.device}")
         # Initialize cache and queue
         self.cache = TranslationCache(cache_duration_minutes)
@@ -232,31 +262,41 @@ class MultilingualTranslator:
         # Load model - using a powerful multilingual model
         self.model_name = "facebook/m2m100_1.2B"
-        logger.info(f"Loading model: {self.model_name}")
         try:
             self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
             self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
             self.model.to(self.device)
-            logger.info("Model loaded successfully!")
         except Exception as e:
-            logger.error(f"Error loading model: {e}")
             raise
         # تنظیمات بهینه برای ترجمه متن‌های بلند
         self.max_chunk_size = 350  # حداکثر طول هر قسمت
         self.min_chunk_overlap = 20  # همپوشانی بین قسمت‌ها
-    def translate_chunk(self, text: str, source_lang: str, target_lang: str) -> str:
         """ترجمه یک قسمت کوچک از متن"""
         try:
             # Set source language for tokenizer
             self.tokenizer.src_lang = source_lang
             # Encode input
             encoded = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(self.device)
             # Generate translation with optimized parameters
             generated_tokens = self.model.generate(
                 **encoded,
                 forced_bos_token_id=self.tokenizer.get_lang_id(target_lang),
@@ -272,6 +312,7 @@ class MultilingualTranslator:
                 pad_token_id=self.tokenizer.pad_token_id,
                 eos_token_id=self.tokenizer.eos_token_id
             )
             # Decode result
             translation = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
@@ -279,71 +320,141 @@ class MultilingualTranslator:
             # پاک‌سازی ترجمه از کاراکترهای اضافی
             translation = translation.strip()
             return translation
         except Exception as e:
-            logger.error(f"Chunk translation error: {e}")
             return f"[Translation Error: {str(e)}]"
-    def translate_text(self, text: str, source_lang: str, target_lang: str) -> Tuple[str, float, int]:
-        """ترجمه متن با پشتیبانی از متن‌های طولانی"""
         start_time = time.time()
         # بررسی کش برای کل متن
         cached_result = self.cache.get(text, source_lang, target_lang)
         if cached_result:
             return cached_result, time.time() - start_time, 1
         try:
             # اگر متن کوتاه است، مستقیماً ترجمه کن
             if len(text) <= self.max_chunk_size:
-                translation = self.translate_chunk(text, source_lang, target_lang)
                 # ذخیره در کش
                 self.cache.set(text, source_lang, target_lang, translation)
                 processing_time = time.time() - start_time
-                logger.info(f"Short text translation completed in {processing_time:.2f} seconds")
                 return translation, processing_time, 1
             # تقسیم متن طولانی به قسمت‌های کوچکتر
             chunks = TextChunker.split_text_smart(text, self.max_chunk_size)
-            logger.info(f"Split long text into {len(chunks)} chunks")
             # ترجمه هر قسمت
             translated_chunks = []
             for i, chunk in enumerate(chunks):
-                logger.info(f"Translating chunk {i+1}/{len(chunks)} (length: {len(chunk)})")
                 # بررسی کش برای هر قسمت
                 chunk_translation = self.cache.get(chunk, source_lang, target_lang)
                 if not chunk_translation:
-                    chunk_translation = self.translate_chunk(chunk, source_lang, target_lang)
                     # ذخیره قسمت در کش
                     self.cache.set(chunk, source_lang, target_lang, chunk_translation)
                 translated_chunks.append(chunk_translation)
                 # کمی استراحت بین ترجمه‌ها برای جلوگیری از بارگذاری زیاد
                 if i < len(chunks) - 1:
                     time.sleep(0.1)
             # ترکیب قسمت‌های ترجمه شده
             final_translation = self._combine_translations(translated_chunks, text)
             # ذخیره نتیجه نهایی در کش
             self.cache.set(text, source_lang, target_lang, final_translation)
             processing_time = time.time() - start_time
-            logger.info(f"Long text translation completed in {processing_time:.2f} seconds ({len(chunks)} chunks)")
             return final_translation, processing_time, len(chunks)
         except Exception as e:
-            logger.error(f"Translation error: {e}")
             return f"Translation error: {str(e)}", time.time() - start_time, 0
     def _combine_translations(self, translated_chunks: List[str], original_text: str) -> str:
         """ترکیب قسمت‌های ترجمه شده به یک متن یکپارچه"""
         if not translated_chunks:
@@ -352,6 +463,8 @@ class MultilingualTranslator:
         if len(translated_chunks) == 1:
             return translated_chunks[0]
         # ترکیب قسمت‌ها با در نظر گیری ساختار اصلی متن
         combined = []
@@ -383,6 +496,7 @@ class MultilingualTranslator:
         result = re.sub(r'\.+', '.', result)  # حذف نقطه‌های تکراری
         result = result.strip()
         return result
 # Language mappings for M2M100 model
@@ -460,7 +574,7 @@ LANGUAGE_MAP = {
 translator = MultilingualTranslator(60)
 # Create FastAPI app
-app = FastAPI(title="Multilingual Translation API", version="2.0.0")
 # Add CORS middleware
 app.add_middleware(
@@ -473,11 +587,21 @@ app.add_middleware(
 @app.get("/")
 async def root():
-    return {"message": "Multilingual Translation API v2.0", "status": "active", "features": ["long_text_support", "smart_chunking", "cache_optimization"]}
 @app.post("/api/translate")
 async def api_translate(request: TranslationRequest):
-    """API endpoint for translation with long text support"""
     if not request.text.strip():
         raise HTTPException(status_code=400, detail="No text provided")
@@ -488,7 +612,12 @@ async def api_translate(request: TranslationRequest):
         raise HTTPException(status_code=400, detail="Invalid language codes")
     try:
-        translation, processing_time, chunks_count = translator.translate_text(request.text, source_code, target_code)
         return TranslationResponse(
             translation=translation,
@@ -500,12 +629,13 @@ async def api_translate(request: TranslationRequest):
             chunks_processed=chunks_count
         )
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}")
 # Alternative endpoint for form data (compatibility with WordPress)
 @app.post("/api/translate/form")
 async def api_translate_form(request: Request):
-    """Alternative endpoint that accepts form data with long text support"""
     try:
         form_data = await request.form()
         text = form_data.get("text", "")
@@ -523,6 +653,8 @@ async def api_translate_form(request: Request):
         except:
             raise HTTPException(status_code=400, detail="Invalid request format")
     if not text.strip():
         raise HTTPException(status_code=400, detail="No text provided")
@@ -533,7 +665,12 @@ async def api_translate_form(request: Request):
         raise HTTPException(status_code=400, detail="Invalid language codes")
     try:
-        translation, processing_time, chunks_count = translator.translate_text(text, source_code, target_code)
         return {
             "translation": translation,
@@ -545,8 +682,21 @@ async def api_translate_form(request: Request):
             "chunks_processed": chunks_count
         }
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}")
 @app.get("/api/languages")
 async def get_languages():
     """Get supported languages"""
@@ -565,7 +715,8 @@ async def health_check():
         "model": translator.model_name,
         "cache_size": len(translator.cache.cache),
         "max_chunk_size": translator.max_chunk_size,
-        "version": "2.0.0"
     }
 if __name__ == "__main__":

 from pydantic import BaseModel
 import uvicorn
+# Enhanced logging configuration
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.StreamHandler(),
+        logging.FileHandler('translation.log')
+    ]
+)
 logger = logging.getLogger(__name__)
 # Pydantic models for request/response
     character_count: int
     status: str
     chunks_processed: Optional[int] = None
+    estimated_time_remaining: Optional[float] = None
+    current_chunk: Optional[int] = None
+    total_chunks: Optional[int] = None
 class TranslationCache:
     def __init__(self, cache_duration_minutes: int = 60):
             if key in self.cache:
                 translation, timestamp = self.cache[key]
                 if datetime.now() - timestamp < self.cache_duration:
+                    logger.info(f"[CACHE HIT] Retrieved cached translation for key: {key[:8]}... | Length: {len(translation)} chars")
                     return translation
                 else:
                     # Remove expired entry
                     del self.cache[key]
+                    logger.info(f"[CACHE EXPIRED] Removed expired cache entry for key: {key[:8]}...")
+            logger.info(f"[CACHE MISS] No cached translation found for key: {key[:8]}...")
             return None
     def set(self, text: str, source_lang: str, target_lang: str, translation: str):
         with self.lock:
             key = self._generate_key(text, source_lang, target_lang)
             self.cache[key] = (translation, datetime.now())
+            logger.info(f"[CACHE STORE] Cached translation for key: {key[:8]}... | Translation length: {len(translation)} chars")
 class TranslationQueue:
     def __init__(self, max_workers: int = 3):
     def add_task(self, task_func, *args, **kwargs):
         """Add translation task to queue"""
         self.queue.put((task_func, args, kwargs))
+        logger.info(f"[QUEUE] Added task to queue | Queue size: {self.queue.qsize()}")
     def process_queue(self):
         """Process tasks from queue"""
                 if not self.queue.empty():
                     task_func, args, kwargs = self.queue.get()
                     self.current_workers += 1
+                    logger.info(f"[QUEUE] Starting worker | Current workers: {self.current_workers}")
                     def worker():
                         try:
                         finally:
                             with self.lock:
                                 self.current_workers -= 1
+                                logger.info(f"[QUEUE] Worker finished | Current workers: {self.current_workers}")
                     thread = threading.Thread(target=worker)
                     thread.start()
     @staticmethod
     def split_text_smart(text: str, max_chunk_size: int = 400) -> List[str]:
         """تقسیم هوشمند متن بر اساس جملات و پاراگراف‌ها"""
+        logger.info(f"[CHUNKER] Starting smart text splitting | Text length: {len(text)} chars | Max chunk size: {max_chunk_size}")
         if len(text) <= max_chunk_size:
+            logger.info(f"[CHUNKER] Text is small, no chunking needed | Length: {len(text)}")
             return [text]
         chunks = []
         paragraphs = text.split('\n\n')
         current_chunk = ""
+        for i, paragraph in enumerate(paragraphs):
+            logger.debug(f"[CHUNKER] Processing paragraph {i+1}/{len(paragraphs)} | Length: {len(paragraph)}")
             # اگر پاراگراف خودش بزرگ است، آن را تقسیم کن
             if len(paragraph) > max_chunk_size:
                 # ذخیره قسمت فعلی اگر وجود دارد
                 if current_chunk.strip():
                     chunks.append(current_chunk.strip())
+                    logger.debug(f"[CHUNKER] Added chunk from accumulated paragraphs | Length: {len(current_chunk.strip())}")
                     current_chunk = ""
                 # تقسیم پاراگراف بزرگ
                 sub_chunks = TextChunker._split_paragraph(paragraph, max_chunk_size)
                 chunks.extend(sub_chunks)
+                logger.debug(f"[CHUNKER] Split large paragraph into {len(sub_chunks)} sub-chunks")
             else:
                 # برر��ی اینکه آیا اضافه کردن این پاراگراف از حد تجاوز می‌کند
                 if len(current_chunk) + len(paragraph) + 2 > max_chunk_size:
                     if current_chunk.strip():
                         chunks.append(current_chunk.strip())
+                        logger.debug(f"[CHUNKER] Added chunk | Length: {len(current_chunk.strip())}")
                     current_chunk = paragraph
                 else:
                     if current_chunk:
         # اضافه کردن آخرین قسمت
         if current_chunk.strip():
             chunks.append(current_chunk.strip())
+            logger.debug(f"[CHUNKER] Added final chunk | Length: {len(current_chunk.strip())}")
+        logger.info(f"[CHUNKER] Text splitting completed | Total chunks: {len(chunks)} | Average chunk size: {sum(len(c) for c in chunks) / len(chunks):.1f} chars")
         return chunks
     @staticmethod
     def _split_paragraph(paragraph: str, max_chunk_size: int) -> List[str]:
         """تقسیم پاراگراف بزرگ به جملات"""
+        logger.debug(f"[CHUNKER] Splitting large paragraph | Length: {len(paragraph)}")
         # تقسیم بر اساس جملات
         sentences = re.split(r'[.!?]+\s+', paragraph)
         chunks = []
         if current_chunk.strip():
             chunks.append(current_chunk.strip())
+        logger.debug(f"[CHUNKER] Paragraph split into {len(chunks)} sentence chunks")
         return chunks
     @staticmethod
     def _split_by_comma(sentence: str, max_chunk_size: int) -> List[str]:
         """تقسیم جمله طولانی بر اساس کاما"""
+        logger.debug(f"[CHUNKER] Splitting long sentence by comma | Length: {len(sentence)}")
         parts = sentence.split(', ')
         chunks = []
         current_chunk = ""
 class MultilingualTranslator:
     def __init__(self, cache_duration_minutes: int = 60):
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        logger.info(f"[INIT] Using device: {self.device}")
         # Initialize cache and queue
         self.cache = TranslationCache(cache_duration_minutes)
         # Load model - using a powerful multilingual model
         self.model_name = "facebook/m2m100_1.2B"
+        logger.info(f"[INIT] Loading model: {self.model_name}")
         try:
             self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
             self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
             self.model.to(self.device)
+            logger.info(f"[INIT] Model loaded successfully on {self.device}!")
         except Exception as e:
+            logger.error(f"[INIT] Error loading model: {e}")
             raise
         # تنظیمات بهینه برای ترجمه متن‌های بلند
         self.max_chunk_size = 350  # حداکثر طول هر قسمت
         self.min_chunk_overlap = 20  # همپوشانی بین قسمت‌ها
+        # Track translation progress
+        self.current_translation = {}
+        self.translation_lock = threading.Lock()
+        logger.info(f"[INIT] Translator initialized | Max chunk size: {self.max_chunk_size} chars")
+    def translate_chunk(self, text: str, source_lang: str, target_lang: str, chunk_index: int = 0, total_chunks: int = 1) -> str:
         """ترجمه یک قسمت کوچک از متن"""
         try:
+            logger.info(f"[TRANSLATE] Starting chunk translation [{chunk_index+1}/{total_chunks}] | {source_lang} → {target_lang} | Length: {len(text)} chars")
             # Set source language for tokenizer
             self.tokenizer.src_lang = source_lang
             # Encode input
             encoded = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(self.device)
+            logger.debug(f"[TRANSLATE] Text encoded | Input tokens: {encoded.input_ids.shape[1]}")
             # Generate translation with optimized parameters
+            start_time = time.time()
             generated_tokens = self.model.generate(
                 **encoded,
                 forced_bos_token_id=self.tokenizer.get_lang_id(target_lang),
                 pad_token_id=self.tokenizer.pad_token_id,
                 eos_token_id=self.tokenizer.eos_token_id
             )
+            generation_time = time.time() - start_time
             # Decode result
             translation = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
             # پاک‌سازی ترجمه از کاراکترهای اضافی
             translation = translation.strip()
+            logger.info(f"[TRANSLATE] Chunk translation completed [{chunk_index+1}/{total_chunks}] | Generation time: {generation_time:.2f}s | Output length: {len(translation)} chars")
             return translation
         except Exception as e:
+            logger.error(f"[TRANSLATE] Chunk translation error [{chunk_index+1}/{total_chunks}]: {e}")
             return f"[Translation Error: {str(e)}]"
+    def translate_text(self, text: str, source_lang: str, target_lang: str, session_id: str = None) -> Tuple[str, float, int]:
+        """ترجمه متن با پشتیبانی از متن‌های طولانی و لاگ‌های مفصل"""
         start_time = time.time()
+        if not session_id:
+            session_id = hashlib.md5(f"{text[:100]}{time.time()}".encode()).hexdigest()[:8]
+        logger.info(f"[SESSION:{session_id}] Starting translation | {source_lang} → {target_lang} | Text length: {len(text)} chars")
         # بررسی کش برای کل متن
         cached_result = self.cache.get(text, source_lang, target_lang)
         if cached_result:
+            logger.info(f"[SESSION:{session_id}] Translation completed from cache | Time: {time.time() - start_time:.2f}s")
             return cached_result, time.time() - start_time, 1
         try:
             # اگر متن کوتاه است، مستقیماً ترجمه کن
             if len(text) <= self.max_chunk_size:
+                logger.info(f"[SESSION:{session_id}] Processing as short text")
+                translation = self.translate_chunk(text, source_lang, target_lang, 0, 1)
                 # ذخیره در کش
                 self.cache.set(text, source_lang, target_lang, translation)
                 processing_time = time.time() - start_time
+                logger.info(f"[SESSION:{session_id}] Short text translation completed | Total time: {processing_time:.2f}s")
                 return translation, processing_time, 1
             # تقسیم متن طولانی به قسمت‌های کوچکتر
+            logger.info(f"[SESSION:{session_id}] Processing as long text - starting chunking")
             chunks = TextChunker.split_text_smart(text, self.max_chunk_size)
+            logger.info(f"[SESSION:{session_id}] Text split into {len(chunks)} chunks")
+            # Initialize progress tracking
+            with self.translation_lock:
+                self.current_translation[session_id] = {
+                    'total_chunks': len(chunks),
+                    'completed_chunks': 0,
+                    'start_time': start_time,
+                    'source_lang': source_lang,
+                    'target_lang': target_lang
+                }
             # ترجمه هر قسمت
             translated_chunks = []
             for i, chunk in enumerate(chunks):
+                chunk_start_time = time.time()
+                logger.info(f"[SESSION:{session_id}] Starting chunk {i+1}/{len(chunks)} | Chunk length: {len(chunk)} chars")
                 # بررسی کش برای هر قسمت
                 chunk_translation = self.cache.get(chunk, source_lang, target_lang)
                 if not chunk_translation:
+                    # Estimate remaining time
+                    if i > 0:
+                        elapsed_time = time.time() - start_time
+                        avg_time_per_chunk = elapsed_time / i
+                        estimated_remaining = avg_time_per_chunk * (len(chunks) - i)
+                        logger.info(f"[SESSION:{session_id}] Progress: {i}/{len(chunks)} | Avg time per chunk: {avg_time_per_chunk:.1f}s | Estimated remaining: {estimated_remaining:.1f}s")
+                    chunk_translation = self.translate_chunk(chunk, source_lang, target_lang, i, len(chunks))
                     # ذخیره قسمت در کش
                     self.cache.set(chunk, source_lang, target_lang, chunk_translation)
+                    chunk_time = time.time() - chunk_start_time
+                    logger.info(f"[SESSION:{session_id}] Chunk {i+1}/{len(chunks)} translated in {chunk_time:.2f}s")
+                else:
+                    logger.info(f"[SESSION:{session_id}] Chunk {i+1}/{len(chunks)} retrieved from cache")
                 translated_chunks.append(chunk_translation)
+                # Update progress
+                with self.translation_lock:
+                    if session_id in self.current_translation:
+                        self.current_translation[session_id]['completed_chunks'] = i + 1
                 # کمی استراحت بین ترجمه‌ها برای جلوگیری از بارگذاری زیاد
                 if i < len(chunks) - 1:
                     time.sleep(0.1)
             # ترکیب قسمت‌های ترجمه شده
+            logger.info(f"[SESSION:{session_id}] Combining translated chunks")
             final_translation = self._combine_translations(translated_chunks, text)
             # ذخیره نتیجه نهایی در کش
             self.cache.set(text, source_lang, target_lang, final_translation)
             processing_time = time.time() - start_time
+            logger.info(f"[SESSION:{session_id}] Long text translation completed | Total time: {processing_time:.2f}s | Chunks: {len(chunks)} | Final length: {len(final_translation)} chars")
+            # Clean up progress tracking
+            with self.translation_lock:
+                self.current_translation.pop(session_id, None)
             return final_translation, processing_time, len(chunks)
         except Exception as e:
+            logger.error(f"[SESSION:{session_id}] Translation error: {e}")
+            # Clean up progress tracking
+            with self.translation_lock:
+                self.current_translation.pop(session_id, None)
             return f"Translation error: {str(e)}", time.time() - start_time, 0
+    def get_translation_progress(self, session_id: str) -> Dict:
+        """Get current translation progress"""
+        with self.translation_lock:
+            if session_id not in self.current_translation:
+                return None
+            progress = self.current_translation[session_id].copy()
+            elapsed_time = time.time() - progress['start_time']
+            if progress['completed_chunks'] > 0:
+                avg_time_per_chunk = elapsed_time / progress['completed_chunks']
+                remaining_chunks = progress['total_chunks'] - progress['completed_chunks']
+                estimated_remaining = avg_time_per_chunk * remaining_chunks
+            else:
+                estimated_remaining = None
+            return {
+                'total_chunks': progress['total_chunks'],
+                'completed_chunks': progress['completed_chunks'],
+                'elapsed_time': elapsed_time,
+                'estimated_remaining': estimated_remaining,
+                'progress_percentage': (progress['completed_chunks'] / progress['total_chunks']) * 100
+            }
     def _combine_translations(self, translated_chunks: List[str], original_text: str) -> str:
         """ترکیب قسمت‌های ترجمه شده به یک متن یکپارچه"""
         if not translated_chunks:
         if len(translated_chunks) == 1:
             return translated_chunks[0]
+        logger.debug(f"[COMBINER] Combining {len(translated_chunks)} translated chunks")
         # ترکیب قسمت‌ها با در نظر گیری ساختار اصلی متن
         combined = []
         result = re.sub(r'\.+', '.', result)  # حذف نقطه‌های تکراری
         result = result.strip()
+        logger.debug(f"[COMBINER] Combined translation length: {len(result)} chars")
         return result
 # Language mappings for M2M100 model
 translator = MultilingualTranslator(60)
 # Create FastAPI app
+app = FastAPI(title="Enhanced Multilingual Translation API", version="2.1.0")
 # Add CORS middleware
 app.add_middleware(
 @app.get("/")
 async def root():
+    return {
+        "message": "Enhanced Multilingual Translation API v2.1",
+        "status": "active",
+        "features": [
+            "enhanced_logging",
+            "progress_tracking",
+            "long_text_support",
+            "smart_chunking",
+            "cache_optimization"
+        ]
+    }
 @app.post("/api/translate")
 async def api_translate(request: TranslationRequest):
+    """API endpoint for translation with enhanced logging and progress tracking"""
     if not request.text.strip():
         raise HTTPException(status_code=400, detail="No text provided")
         raise HTTPException(status_code=400, detail="Invalid language codes")
     try:
+        # Generate session ID for tracking
+        session_id = hashlib.md5(f"{request.text[:100]}{time.time()}".encode()).hexdigest()[:8]
+        translation, processing_time, chunks_count = translator.translate_text(
+            request.text, source_code, target_code, session_id
+        )
         return TranslationResponse(
             translation=translation,
             chunks_processed=chunks_count
         )
     except Exception as e:
+        logger.error(f"[API] Translation error: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}")
 # Alternative endpoint for form data (compatibility with WordPress)
 @app.post("/api/translate/form")
 async def api_translate_form(request: Request):
+    """Alternative endpoint that accepts form data with enhanced logging"""
     try:
         form_data = await request.form()
         text = form_data.get("text", "")
         except:
             raise HTTPException(status_code=400, detail="Invalid request format")
+    logger.info(f"[FORM API] Translation request | {source_lang} → {target_lang} | Length: {len(text)} chars")
     if not text.strip():
         raise HTTPException(status_code=400, detail="No text provided")
         raise HTTPException(status_code=400, detail="Invalid language codes")
     try:
+        # Generate session ID for tracking
+        session_id = hashlib.md5(f"{text[:100]}{time.time()}".encode()).hexdigest()[:8]
+        translation, processing_time, chunks_count = translator.translate_text(
+            text, source_code, target_code, session_id
+        )
         return {
             "translation": translation,
             "chunks_processed": chunks_count
         }
     except Exception as e:
+        logger.error(f"[FORM API] Translation error: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}")
+@app.get("/api/progress/{session_id}")
+async def get_translation_progress(session_id: str):
+    """Get translation progress for a session"""
+    progress = translator.get_translation_progress(session_id)
+    if progress is None:
+        raise HTTPException(status_code=404, detail="Session not found or completed")
+    return {
+        "status": "success",
+        "progress": progress
+    }
 @app.get("/api/languages")
 async def get_languages():
     """Get supported languages"""
         "model": translator.model_name,
         "cache_size": len(translator.cache.cache),
         "max_chunk_size": translator.max_chunk_size,
+        "active_translations": len(translator.current_translation),
+        "version": "2.1.0"
     }
 if __name__ == "__main__":