Spaces:

danicor
/

TR

Sleeping

App Files Files Community

danicor commited on Sep 20, 2025

Commit

4f0df26

verified ·

1 Parent(s): 0fc2c25

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -586

app.py CHANGED Viewed

@@ -42,10 +42,6 @@ class TranslationResponse(BaseModel):
     processing_time: float
     character_count: int
     status: str
-    chunks_processed: Optional[int] = None
-    estimated_time_remaining: Optional[float] = None
-    current_chunk: Optional[int] = None
-    total_chunks: Optional[int] = None
 class TranslationCache:
     def __init__(self, cache_duration_minutes: int = 60):
@@ -81,191 +77,16 @@ class TranslationCache:
             self.cache[key] = (translation, datetime.now())
             logger.info(f"[CACHE STORE] Cached translation for key: {key[:8]}... | Translation length: {len(translation)} chars")
-class TranslationQueue:
-    def __init__(self, max_workers: int = 3):
-        self.queue = Queue()
-        self.max_workers = max_workers
-        self.current_workers = 0
-        self.lock = threading.Lock()
-    def add_task(self, task_func, *args, **kwargs):
-        """Add translation task to queue"""
-        self.queue.put((task_func, args, kwargs))
-        logger.info(f"[QUEUE] Added task to queue | Queue size: {self.queue.qsize()}")
-    def process_queue(self):
-        """Process tasks from queue"""
-        while not self.queue.empty():
-            with self.lock:
-                if self.current_workers >= self.max_workers:
-                    time.sleep(0.1)
-                    continue
-                if not self.queue.empty():
-                    task_func, args, kwargs = self.queue.get()
-                    self.current_workers += 1
-                    logger.info(f"[QUEUE] Starting worker | Current workers: {self.current_workers}")
-                    def worker():
-                        try:
-                            result = task_func(*args, **kwargs)
-                            return result
-                        finally:
-                            with self.lock:
-                                self.current_workers -= 1
-                                logger.info(f"[QUEUE] Worker finished | Current workers: {self.current_workers}")
-                    thread = threading.Thread(target=worker)
-                    thread.start()
-class TextChunker:
-    """Ú©Ù„Ø§Ø³ Ø¨Ø±Ø§ÛŒ ØªÙ‚Ø³ÛŒÙ… Ù…ØªÙ† Ø·ÙˆÙ„Ø§Ù†ÛŒ Ø¨Ù‡ Ø¨Ø®Ø´â€ŒÙ‡Ø§ÛŒ Ú©ÙˆÚ†Ú©ØªØ±"""
-    @staticmethod
-    def split_text_smart(text: str, max_chunk_size: int = 400) -> List[str]:
-        """ØªÙ‚Ø³ÛŒÙ… Ù‡ÙˆØ´Ù…Ù†Ø¯ Ù…ØªÙ† Ø¨Ø± Ø§Ø³Ø§Ø³ Ø¬Ù…Ù„Ø§Øª Ùˆ Ù¾Ø§Ø±Ø§Ú¯Ø±Ø§Ùâ€ŒÙ‡Ø§"""
-        logger.info(f"[CHUNKER] Starting smart text splitting | Text length: {len(text)} chars | Max chunk size: {max_chunk_size}")
-        if len(text) <= max_chunk_size:
-            logger.info(f"[CHUNKER] Text is small, no chunking needed | Length: {len(text)}")
-            return [text]
-        chunks = []
-        # ØªÙ‚Ø³ÛŒÙ… Ø¨Ø± Ø§Ø³Ø§Ø³ Ù¾Ø§Ø±Ø§Ú¯Ø±Ø§Ùâ€ŒÙ‡Ø§
-        paragraphs = text.split('\n\n')
-        current_chunk = ""
-        for i, paragraph in enumerate(paragraphs):
-            logger.debug(f"[CHUNKER] Processing paragraph {i+1}/{len(paragraphs)} | Length: {len(paragraph)}")
-            # Ø§Ú¯Ø± Ù¾Ø§Ø±Ø§Ú¯Ø±Ø§Ù Ø®ÙˆØ¯Ø´ Ø¨Ø²Ø±Ú¯ Ø§Ø³ØªØŒ Ø¢Ù† Ø±Ø§ ØªÙ‚Ø³ÛŒÙ… Ú©Ù†
-            if len(paragraph) > max_chunk_size:
-                # Ø°Ø®ÛŒØ±Ù‡ Ù‚Ø³Ù…Øª ÙØ¹Ù„ÛŒ Ø§Ú¯Ø± ÙˆØ¬ÙˆØ¯ Ø¯Ø§Ø±Ø¯
-                if current_chunk.strip():
-                    chunks.append(current_chunk.strip())
-                    logger.debug(f"[CHUNKER] Added chunk from accumulated paragraphs | Length: {len(current_chunk.strip())}")
-                    current_chunk = ""
-                # ØªÙ‚Ø³ÛŒÙ… Ù¾Ø§Ø±Ø§Ú¯Ø±Ø§Ù Ø¨Ø²Ø±Ú¯
-                sub_chunks = TextChunker._split_paragraph(paragraph, max_chunk_size)
-                chunks.extend(sub_chunks)
-                logger.debug(f"[CHUNKER] Split large paragraph into {len(sub_chunks)} sub-chunks")
-            else:
-                # Ø¨Ø±Ø±Ø³ÛŒ Ø§ÛŒÙ†Ú©Ù‡ Ø¢ÛŒØ§ Ø§Ø¶Ø§ÙÙ‡ Ú©Ø±Ø¯Ù† Ø§ÛŒÙ† Ù¾Ø§Ø±Ø§Ú¯Ø±Ø§Ù Ø§Ø² ØØ¯ ØªØ¬Ø§ÙˆØ² Ù…ÛŒâ€ŒÚ©Ù†Ø¯
-                if len(current_chunk) + len(paragraph) + 2 > max_chunk_size:
-                    if current_chunk.strip():
-                        chunks.append(current_chunk.strip())
-                        logger.debug(f"[CHUNKER] Added chunk | Length: {len(current_chunk.strip())}")
-                    current_chunk = paragraph
-                else:
-                    if current_chunk:
-                        current_chunk += "\n\n" + paragraph
-                    else:
-                        current_chunk = paragraph
-        # Ø§Ø¶Ø§ÙÙ‡ Ú©Ø±Ø¯Ù† Ø¢Ø®Ø±ÛŒÙ† Ù‚Ø³Ù…Øª
-        if current_chunk.strip():
-            chunks.append(current_chunk.strip())
-            logger.debug(f"[CHUNKER] Added final chunk | Length: {len(current_chunk.strip())}")
-        logger.info(f"[CHUNKER] Text splitting completed | Total chunks: {len(chunks)} | Average chunk size: {sum(len(c) for c in chunks) / len(chunks):.1f} chars")
-        return chunks
-    @staticmethod
-    def _split_paragraph(paragraph: str, max_chunk_size: int) -> List[str]:
-        """ØªÙ‚Ø³ÛŒÙ… Ù¾Ø§Ø±Ø§Ú¯Ø±Ø§Ù Ø¨Ø²Ø±Ú¯ Ø¨Ù‡ Ø¬Ù…Ù„Ø§Øª"""
-        logger.debug(f"[CHUNKER] Splitting large paragraph | Length: {len(paragraph)}")
-        # ØªÙ‚Ø³ÛŒÙ… Ø¨Ø± Ø§Ø³Ø§Ø³ Ø¬Ù…Ù„Ø§Øª
-        sentences = re.split(r'[.!?]+\s+', paragraph)
-        chunks = []
-        current_chunk = ""
-        for sentence in sentences:
-            if not sentence.strip():
-                continue
-            # Ø§Ø¶Ø§ÙÙ‡ Ú©Ø±Ø¯Ù† Ø¹Ù„Ø§Ù…Øª Ù†Ù‚Ø·Ù‡ Ø§Ú¯Ø± ØØ°Ù Ø´Ø¯Ù‡
-            if not sentence.endswith(('.', '!', '?')):
-                sentence += '.'
-            if len(sentence) > max_chunk_size:
-                # Ø¬Ù…Ù„Ù‡ Ø®ÙˆØ¯Ø´ Ø®ÛŒÙ„ÛŒ Ø¨Ù„Ù†Ø¯ Ø§Ø³Øª - ØªÙ‚Ø³ÛŒÙ… Ø¨Ø± Ø§Ø³Ø§Ø³ Ú©Ø§Ù…Ø§
-                if current_chunk.strip():
-                    chunks.append(current_chunk.strip())
-                    current_chunk = ""
-                sub_chunks = TextChunker._split_by_comma(sentence, max_chunk_size)
-                chunks.extend(sub_chunks)
-            else:
-                if len(current_chunk) + len(sentence) + 1 > max_chunk_size:
-                    if current_chunk.strip():
-                        chunks.append(current_chunk.strip())
-                    current_chunk = sentence
-                else:
-                    if current_chunk:
-                        current_chunk += " " + sentence
-                    else:
-                        current_chunk = sentence
-        if current_chunk.strip():
-            chunks.append(current_chunk.strip())
-        logger.debug(f"[CHUNKER] Paragraph split into {len(chunks)} sentence chunks")
-        return chunks
-    @staticmethod
-    def _split_by_comma(sentence: str, max_chunk_size: int) -> List[str]:
-        """ØªÙ‚Ø³ÛŒÙ… Ø¬Ù…Ù„Ù‡ Ø·ÙˆÙ„Ø§Ù†ÛŒ Ø¨Ø± Ø§Ø³Ø§Ø³ Ú©Ø§Ù…Ø§"""
-        logger.debug(f"[CHUNKER] Splitting long sentence by comma | Length: {len(sentence)}")
-        parts = sentence.split(', ')
-        chunks = []
-        current_chunk = ""
-        for part in parts:
-            if len(part) > max_chunk_size:
-                # Ù‚Ø³Ù…Øª Ø®ÙˆØ¯Ø´ Ø®ÛŒÙ„ÛŒ Ø¨Ù„Ù†Ø¯ Ø§Ø³Øª - ØªÙ‚Ø³ÛŒÙ… Ø§Ø¬Ø¨Ø§Ø±ÛŒ
-                if current_chunk.strip():
-                    chunks.append(current_chunk.strip())
-                    current_chunk = ""
-                # ØªÙ‚Ø³ÛŒÙ… Ø§Ø¬Ø¨Ø§Ø±ÛŒ Ø¨Ø± Ø§Ø³Ø§Ø³ Ø·ÙˆÙ„
-                while len(part) > max_chunk_size:
-                    chunks.append(part[:max_chunk_size].strip())
-                    part = part[max_chunk_size:].strip()
-                if part:
-                    current_chunk = part
-            else:
-                if len(current_chunk) + len(part) + 2 > max_chunk_size:
-                    if current_chunk.strip():
-                        chunks.append(current_chunk.strip())
-                    current_chunk = part
-                else:
-                    if current_chunk:
-                        current_chunk += ", " + part
-                    else:
-                        current_chunk = part
-        if current_chunk.strip():
-            chunks.append(current_chunk.strip())
-        return chunks
 class MultilingualTranslator:
     def __init__(self, cache_duration_minutes: int = 60):
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         logger.info(f"[INIT] Using device: {self.device}")
-        # Initialize cache and queue
         self.cache = TranslationCache(cache_duration_minutes)
-        self.queue = TranslationQueue()
         # Add thread pool for parallel processing
         self.executor = ThreadPoolExecutor(max_workers=3)
-        self.background_tasks = {}
         logger.info(f"[INIT] Thread pool initialized with 3 workers")
@@ -282,231 +103,65 @@ class MultilingualTranslator:
             logger.error(f"[INIT] Error loading model: {e}")
             raise
-        # ØªÙ†Ø¸ÛŒÙ…Ø§Øª Ø¨Ù‡ÛŒÙ†Ù‡ Ø¨Ø±Ø§ÛŒ ØªØ±Ø¬Ù…Ù‡ Ù…ØªÙ†â€ŒÙ‡Ø§ÛŒ Ø¨Ù„Ù†Ø¯
-        self.max_chunk_size = 350  # ØØ¯Ø§Ú©Ø«Ø± Ø·ÙˆÙ„ Ù‡Ø± Ù‚Ø³Ù…Øª
-        self.min_chunk_overlap = 20  # Ù‡Ù…Ù¾ÙˆØ´Ø§Ù†ÛŒ Ø¨ÛŒÙ† Ù‚Ø³Ù…Øªâ€ŒÙ‡Ø§
-        # Track translation progress
-        self.current_translation = {}
-        self.translation_lock = threading.Lock()
-        logger.info(f"[INIT] Translator initialized | Max chunk size: {self.max_chunk_size} chars")
-    def translate_chunk(self, text: str, source_lang: str, target_lang: str, chunk_index: int = 0, total_chunks: int = 1) -> str:
-        """ØªØ±Ø¬Ù…Ù‡ ÛŒÚ© Ù‚Ø³Ù…Øª Ú©ÙˆÚ†Ú© Ø§Ø² Ù…ØªÙ†"""
         try:
-            logger.info(f"[TRANSLATE] Starting chunk translation [{chunk_index+1}/{total_chunks}] | {source_lang} â†’ {target_lang} | Length: {len(text)} chars")
             # Set source language for tokenizer
             self.tokenizer.src_lang = source_lang
             # Encode input
             encoded = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(self.device)
-            logger.debug(f"[TRANSLATE] Text encoded | Input tokens: {encoded.input_ids.shape[1]}")
             # Generate translation with optimized parameters
-            start_time = time.time()
             generated_tokens = self.model.generate(
                 **encoded,
                 forced_bos_token_id=self.tokenizer.get_lang_id(target_lang),
-                max_length=1024,  # Ø§ÙØ²Ø§ÛŒØ´ Ø·ÙˆÙ„ Ø®Ø±ÙˆØ¬ÛŒ
-                min_length=10,    # ØØ¯Ø§Ù‚Ù„ Ø·ÙˆÙ„ Ø®Ø±ÙˆØ¬ÛŒ
-                num_beams=5,      # Ø§ÙØ²Ø§ÛŒØ´ ØªØ¹Ø¯Ø§Ø¯ beam Ù‡Ø§ Ø¨Ø±Ø§ÛŒ Ú©ÛŒÙÛŒØª Ø¨Ù‡ØªØ±
                 early_stopping=True,
-                no_repeat_ngram_size=3,  # Ø¬Ù„ÙˆÚ¯ÛŒØ±ÛŒ Ø§Ø² ØªÚ©Ø±Ø§Ø±
-                length_penalty=1.0,      # ØªÙ†Ø¸ÛŒÙ… Ø¬Ø±ÛŒÙ…Ù‡ Ø·ÙˆÙ„
-                repetition_penalty=1.2,  # Ø¬Ù„ÙˆÚ¯ÛŒØ±ÛŒ Ø§Ø² ØªÚ©Ø±Ø§Ø± Ú©Ù„Ù…Ø§Øª
-                do_sample=False,         # Ø§Ø³ØªÙØ§Ø¯Ù‡ Ø§Ø² Ø±ÙˆØ´ Ù‚Ø·Ø¹ÛŒ
-                temperature=0.7,         # Ú©Ù†ØªØ±Ù„ ØªÙ†ÙˆØ¹
                 pad_token_id=self.tokenizer.pad_token_id,
                 eos_token_id=self.tokenizer.eos_token_id
             )
-            generation_time = time.time() - start_time
             # Decode result
             translation = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
-            # Ù¾Ø§Ú©â€ŒØ³Ø§Ø²ÛŒ ØªØ±Ø¬Ù…Ù‡ Ø§Ø² Ú©Ø§Ø±Ø§Ú©ØªØ±Ù‡Ø§ÛŒ Ø§Ø¶Ø§ÙÛŒ
             translation = translation.strip()
-            logger.info(f"[TRANSLATE] Chunk translation completed [{chunk_index+1}/{total_chunks}] | Generation time: {generation_time:.2f}s | Output length: {len(translation)} chars")
-            return translation
-        except Exception as e:
-            logger.error(f"[TRANSLATE] Chunk translation error [{chunk_index+1}/{total_chunks}]: {e}")
-            return f"[Translation Error: {str(e)}]"
-    def translate_text(self, text: str, source_lang: str, target_lang: str, session_id: str = None) -> Tuple[str, float, int]:
-        """ØªØ±Ø¬Ù…Ù‡ Ù…ØªÙ† Ø¨Ø§ Ù¾Ø´ØªÛŒØ¨Ø§Ù†ÛŒ Ø§Ø² Ù…ØªÙ†â€ŒÙ‡Ø§ÛŒ Ø·ÙˆÙ„Ø§Ù†ÛŒ Ùˆ Ù„Ø§Ú¯â€ŒÙ‡Ø§ÛŒ Ù…ÙØµÙ„"""
-        start_time = time.time()
-        if not session_id:
-            session_id = hashlib.md5(f"{text[:100]}{time.time()}".encode()).hexdigest()[:8]
-        logger.info(f"[SESSION:{session_id}] Starting translation | {source_lang} â†’ {target_lang} | Text length: {len(text)} chars")
-        # Ø¨Ø±Ø±Ø³ÛŒ Ú©Ø´ Ø¨Ø±Ø§ÛŒ Ú©Ù„ Ù…ØªÙ†
-        cached_result = self.cache.get(text, source_lang, target_lang)
-        if cached_result:
-            logger.info(f"[SESSION:{session_id}] Translation completed from cache | Time: {time.time() - start_time:.2f}s")
-            return cached_result, time.time() - start_time, 1
-        try:
-            # Ø§Ú¯Ø± Ù…ØªÙ† Ú©ÙˆØªØ§Ù‡ Ø§Ø³ØªØŒ Ù…Ø³ØªÙ‚ÛŒÙ…Ø§Ù‹ ØªØ±Ø¬Ù…Ù‡ Ú©Ù†
-            if len(text) <= self.max_chunk_size:
-                logger.info(f"[SESSION:{session_id}] Processing as short text")
-                translation = self.translate_chunk(text, source_lang, target_lang, 0, 1)
-                # Ø°Ø®ÛŒØ±Ù‡ Ø¯Ø± Ú©Ø´
-                self.cache.set(text, source_lang, target_lang, translation)
-                processing_time = time.time() - start_time
-                logger.info(f"[SESSION:{session_id}] Short text translation completed | Total time: {processing_time:.2f}s")
-                return translation, processing_time, 1
-            # ØªÙ‚Ø³ÛŒÙ… Ù…ØªÙ† Ø·ÙˆÙ„Ø§Ù†ÛŒ Ø¨Ù‡ Ù‚Ø³Ù…Øªâ€ŒÙ‡Ø§ÛŒ Ú©ÙˆÚ†Ú©ØªØ±
-            logger.info(f"[SESSION:{session_id}] Processing as long text - starting chunking")
-            chunks = TextChunker.split_text_smart(text, self.max_chunk_size)
-            logger.info(f"[SESSION:{session_id}] Text split into {len(chunks)} chunks")
-            # Initialize progress tracking
-            with self.translation_lock:
-                self.current_translation[session_id] = {
-                    'total_chunks': len(chunks),
-                    'completed_chunks': 0,
-                    'start_time': start_time,
-                    'source_lang': source_lang,
-                    'target_lang': target_lang
-                }
-            # ØªØ±Ø¬Ù…Ù‡ Ù‡Ø± Ù‚Ø³Ù…Øª
-            translated_chunks = []
-            for i, chunk in enumerate(chunks):
-                chunk_start_time = time.time()
-                logger.info(f"[SESSION:{session_id}] Starting chunk {i+1}/{len(chunks)} | Chunk length: {len(chunk)} chars")
-                # Ø¨Ø±Ø±Ø³ÛŒ Ú©Ø´ Ø¨Ø±Ø§ÛŒ Ù‡Ø± Ù‚Ø³Ù…Øª
-                chunk_translation = self.cache.get(chunk, source_lang, target_lang)
-                if not chunk_translation:
-                    # Estimate remaining time
-                    if i > 0:
-                        elapsed_time = time.time() - start_time
-                        avg_time_per_chunk = elapsed_time / i
-                        estimated_remaining = avg_time_per_chunk * (len(chunks) - i)
-                        logger.info(f"[SESSION:{session_id}] Progress: {i}/{len(chunks)} | Avg time per chunk: {avg_time_per_chunk:.1f}s | Estimated remaining: {estimated_remaining:.1f}s")
-                    chunk_translation = self.translate_chunk(chunk, source_lang, target_lang, i, len(chunks))
-                    # Ø°Ø®ÛŒØ±Ù‡ Ù‚Ø³Ù…Øª Ø¯Ø± Ú©Ø´
-                    self.cache.set(chunk, source_lang, target_lang, chunk_translation)
-                    chunk_time = time.time() - chunk_start_time
-                    logger.info(f"[SESSION:{session_id}] Chunk {i+1}/{len(chunks)} translated in {chunk_time:.2f}s")
-                else:
-                    logger.info(f"[SESSION:{session_id}] Chunk {i+1}/{len(chunks)} retrieved from cache")
-                translated_chunks.append(chunk_translation)
-                # Update progress
-                with self.translation_lock:
-                    if session_id in self.current_translation:
-                        self.current_translation[session_id]['completed_chunks'] = i + 1
-                # Ú©Ù…ÛŒ Ø§Ø³ØªØ±Ø§ØØª Ø¨ÛŒÙ† ØªØ±Ø¬Ù…Ù‡â€ŒÙ‡Ø§ Ø¨Ø±Ø§ÛŒ Ø¬Ù„ÙˆÚ¯ÛŒØ±ÛŒ Ø§Ø² Ø¨Ø§Ø±Ú¯Ø°Ø§Ø±ÛŒ Ø²ÛŒØ§Ø¯
-                if i < len(chunks) - 1:
-                    time.sleep(0.1)
-            # ØªØ±Ú©ÛŒØ¨ Ù‚Ø³Ù…Øªâ€ŒÙ‡Ø§ÛŒ ØªØ±Ø¬Ù…Ù‡ Ø´Ø¯Ù‡
-            logger.info(f"[SESSION:{session_id}] Combining translated chunks")
-            final_translation = self._combine_translations(translated_chunks, text)
-            # Ø°Ø®ÛŒØ±Ù‡ Ù†ØªÛŒØ¬Ù‡ Ù†Ù‡Ø§ÛŒÛŒ Ø¯Ø± Ú©Ø´
-            self.cache.set(text, source_lang, target_lang, final_translation)
             processing_time = time.time() - start_time
-            logger.info(f"[SESSION:{session_id}] Long text translation completed | Total time: {processing_time:.2f}s | Chunks: {len(chunks)} | Final length: {len(final_translation)} chars")
-            # Clean up progress tracking
-            with self.translation_lock:
-                self.current_translation.pop(session_id, None)
-            return final_translation, processing_time, len(chunks)
         except Exception as e:
             logger.error(f"[SESSION:{session_id}] Translation error: {e}")
-            # Clean up progress tracking
-            with self.translation_lock:
-                self.current_translation.pop(session_id, None)
-            return f"Translation error: {str(e)}", time.time() - start_time, 0
-    def get_translation_progress(self, session_id: str) -> Dict:
-        """Get current translation progress"""
-        with self.translation_lock:
-            if session_id not in self.current_translation:
-                return None
-            progress = self.current_translation[session_id].copy()
-            elapsed_time = time.time() - progress['start_time']
-            if progress['completed_chunks'] > 0:
-                avg_time_per_chunk = elapsed_time / progress['completed_chunks']
-                remaining_chunks = progress['total_chunks'] - progress['completed_chunks']
-                estimated_remaining = avg_time_per_chunk * remaining_chunks
-            else:
-                estimated_remaining = None
-            return {
-                'total_chunks': progress['total_chunks'],
-                'completed_chunks': progress['completed_chunks'],
-                'elapsed_time': elapsed_time,
-                'estimated_remaining': estimated_remaining,
-                'progress_percentage': (progress['completed_chunks'] / progress['total_chunks']) * 100
-            }
-    def _combine_translations(self, translated_chunks: List[str], original_text: str) -> str:
-        """ØªØ±Ú©ÛŒØ¨ Ù‚Ø³Ù…Øªâ€ŒÙ‡Ø§ÛŒ ØªØ±Ø¬Ù…Ù‡ Ø´Ø¯Ù‡ Ø¨Ù‡ ÛŒÚ© Ù…ØªÙ† ÛŒÚ©Ù¾Ø§Ø±Ú†Ù‡"""
-        if not translated_chunks:
-            return ""
-        if len(translated_chunks) == 1:
-            return translated_chunks[0]
-        logger.debug(f"[COMBINER] Combining {len(translated_chunks)} translated chunks")
-        # ØªØ±Ú©ÛŒØ¨ Ù‚Ø³Ù…Øªâ€ŒÙ‡Ø§ Ø¨Ø§ Ø¯Ø± Ù†Ø¸Ø± Ú¯ÛŒØ±ÛŒ Ø³Ø§Ø®ØªØ§Ø± Ø§ØµÙ„ÛŒ Ù…ØªÙ†
-        combined = []
-        for i, chunk in enumerate(translated_chunks):
-            # Ù¾Ø§Ú©â€ŒØ³Ø§Ø²ÛŒ Ù‚Ø³Ù…Øª
-            chunk = chunk.strip()
-            if not chunk:
-                continue
-            # Ø§Ø¶Ø§ÙÙ‡ Ú©Ø±Ø¯Ù† ÙØ§ØµÙ„Ù‡ Ù…Ù†Ø§Ø³Ø¨ Ø¨ÛŒÙ† Ù‚Ø³Ù…Øªâ€ŒÙ‡Ø§
-            if i > 0 and combined:
-                # Ø§Ú¯Ø± Ù‚Ø³Ù…Øª Ù‚Ø¨Ù„ÛŒ Ø¨Ø§ Ù†Ù‚Ø·Ù‡ ØªÙ…Ø§Ù… Ù†Ù…ÛŒâ€ŒØ´ÙˆØ¯ØŒ Ù†Ù‚Ø·Ù‡ Ø§Ø¶Ø§ÙÙ‡ Ú©Ù†
-                if not combined[-1].rstrip().endswith(('.', '!', '?', ':', 'Ø›', '.')):
-                    combined[-1] += '.'
-                # Ø¨Ø±Ø±Ø³ÛŒ Ø§ÛŒÙ†Ú©Ù‡ Ø¢ÛŒØ§ Ù†ÛŒØ§Ø² Ø¨Ù‡ Ù¾Ø§Ø±Ø§Ú¯Ø±Ø§Ù Ø¬Ø¯ÛŒØ¯ Ø¯Ø§Ø±ÛŒÙ…
-                if '\n\n' in original_text:
-                    combined.append('\n\n' + chunk)
-                else:
-                    combined.append(' ' + chunk)
-            else:
-                combined.append(chunk)
-        result = ''.join(combined)
-        # Ù¾Ø§Ú©â€ŒØ³Ø§Ø²ÛŒ Ù†Ù‡Ø§ÛŒÛŒ
-        result = re.sub(r'\s+', ' ', result)  # ØØ°Ù ÙØ§ØµÙ„Ù‡â€ŒÙ‡Ø§ÛŒ Ø§Ø¶Ø§ÙÛŒ
-        result = re.sub(r'\.+', '.', result)  # ØØ°Ù Ù†Ù‚Ø·Ù‡â€ŒÙ‡Ø§ÛŒ ØªÚ©Ø±Ø§Ø±ÛŒ
-        result = result.strip()
-        logger.debug(f"[COMBINER] Combined translation length: {len(result)} chars")
-        return result
     async def translate_text_async(self, text: str, source_lang: str, target_lang: str, session_id: str = None):
         """Async wrapper for translate_text"""
@@ -592,7 +247,7 @@ LANGUAGE_MAP = {
 translator = MultilingualTranslator(60)
 # Create FastAPI app
-app = FastAPI(title="Enhanced Multilingual Translation API", version="2.1.0")
 # Add CORS middleware
 app.add_middleware(
@@ -606,20 +261,18 @@ app.add_middleware(
 @app.get("/")
 async def root():
     return {
-        "message": "Enhanced Multilingual Translation API v2.1",
         "status": "active",
         "features": [
-            "enhanced_logging",
-            "progress_tracking",
-            "long_text_support",
-            "smart_chunking",
-            "cache_optimization"
         ]
     }
 @app.post("/api/translate")
 async def api_translate(request: TranslationRequest):
-    """API endpoint for translation with enhanced logging and progress tracking"""
     if not request.text.strip():
         raise HTTPException(status_code=400, detail="No text provided")
@@ -633,7 +286,7 @@ async def api_translate(request: TranslationRequest):
         # Generate session ID for tracking
         session_id = hashlib.md5(f"{request.text[:100]}{time.time()}".encode()).hexdigest()[:8]
-        translation, processing_time, chunks_count = translator.translate_text(
             request.text, source_code, target_code, session_id
         )
@@ -643,8 +296,7 @@ async def api_translate(request: TranslationRequest):
             target_language=request.target_lang,
             processing_time=processing_time,
             character_count=len(request.text),
-            status="success",
-            chunks_processed=chunks_count
         )
     except Exception as e:
         logger.error(f"[API] Translation error: {str(e)}")
@@ -653,7 +305,7 @@ async def api_translate(request: TranslationRequest):
 # Alternative endpoint for form data (compatibility with WordPress)
 @app.post("/api/translate/form")
 async def api_translate_form(request: Request):
-    """Non-blocking translation endpoint with enhanced error handling"""
     try:
         form_data = await request.form()
         text = form_data.get("text", "")
@@ -686,83 +338,33 @@ async def api_translate_form(request: Request):
     # Generate session ID for tracking
     session_id = hashlib.md5(f"{text[:100]}{time.time()}".encode()).hexdigest()[:8]
-    # Check if it's a long text that should be processed in background
-    if len(text) > translator.max_chunk_size:
-        # 🔹 اول بررسی کن آیا نتیجه در کش وجود دارد یا نه
-        cached_result = translator.cache.get(text, source_code, target_code)
-        if cached_result:
-            logger.info(f"[FORM API] Returning cached translation immediately for session: {session_id}")
-            return {
-                "translation": cached_result,
-                "source_language": source_lang,
-                "target_language": target_lang,
-                "processing_time": 0.0,
-                "character_count": len(text),
-                "status": "success",
-                "chunks_processed": None,
-                "session_id": session_id,
-                "is_heavy_text": False,
-                "cached": True
-            }
-        # 🔹 اگر در کش نبود → پس بفرست به background
-        task = asyncio.create_task(
-            translator.translate_text_async(text, source_code, target_code, session_id)
         )
-        translator.background_tasks[session_id] = task
-        logger.info(f"[FORM API] Started background translation for session: {session_id}")
         return {
-            "session_id": session_id,
-            "request_id": session_id,
-            "status": "processing",
-            "message": "Translation started in background. Use CHECK RESULT to get your translation.",
             "character_count": len(text),
-            "is_background": True,
-            "is_heavy_text": True
         }
-    else:
-        # Process short text immediately
-        try:
-            translation, processing_time, chunks_count = await translator.translate_text_async(
-                text, source_code, target_code, session_id
-            )
-            # بررسی محتوای ترجمه
-            if not translation or not translation.strip() or translation.startswith("Translation error"):
-                logger.error(f"[FORM API] Invalid translation result: {translation[:100] if translation else 'None'}")
-                return {
-                    "status": "error",
-                    "message": "Translation failed - empty or invalid result",
-                    "session_id": session_id
-                }
-            logger.info(f"[FORM API] Translation successful | Length: {len(translation)} chars")
-            return {
-                "translation": translation,
-                "source_language": source_lang,
-                "target_language": target_lang,
-                "processing_time": processing_time,
-                "character_count": len(text),
-                "status": "success",
-                "chunks_processed": chunks_count,
-                "session_id": session_id
-            }
-        except Exception as e:
-            logger.error(f"[FORM API] Translation error: {str(e)}")
-            return {"status": "error", "message": f"Translation error: {str(e)}"}
-@app.get("/api/progress/{session_id}")
-async def get_translation_progress(session_id: str):
-    """Get translation progress for a session"""
-    progress = translator.get_translation_progress(session_id)
-    if progress is None:
-        raise HTTPException(status_code=404, detail="Session not found or completed")
-    return {
-        "status": "success",
-        "progress": progress
-    }
 @app.get("/api/languages")
 async def get_languages():
@@ -781,137 +383,19 @@ async def health_check():
         "device": str(translator.device),
         "model": translator.model_name,
         "cache_size": len(translator.cache.cache),
-        "max_chunk_size": translator.max_chunk_size,
-        "active_translations": len(translator.current_translation),
-        "version": "2.1.0"
-    }
-@app.get("/api/status/{session_id}")
-async def get_session_status(session_id: str):
-    """Get translation status - non-blocking"""
-    # Check if task is in background tasks
-    if session_id in translator.background_tasks:
-        task = translator.background_tasks[session_id]
-        if task.done():
-            try:
-                translation, processing_time, chunks_count = await task
-                # Clean up completed task
-                del translator.background_tasks[session_id]
-                return {
-                    "status": "completed",
-                    "translation": translation,
-                    "processing_time": processing_time,
-                    "chunks_processed": chunks_count,
-                    "message": "Translation completed successfully"
-                }
-            except Exception as e:
-                del translator.background_tasks[session_id]
-                return {
-                    "status": "failed",
-                    "message": f"Translation failed: {str(e)}"
-                }
-        else:
-            # Task still running - get progress
-            progress = translator.get_translation_progress(session_id)
-            if progress:
-                return {
-                    "status": "processing",
-                    "progress": progress,
-                    "message": f"Processing chunk {progress['completed_chunks']}/{progress['total_chunks']}",
-                    "estimated_remaining": progress.get('estimated_remaining', 0)
-                }
-            else:
-                return {
-                    "status": "processing",
-                    "message": "Translation in progress...",
-                    "progress": None
-                }
-    # Check current active translations
-    progress = translator.get_translation_progress(session_id)
-    if progress:
-        return {
-            "status": "processing",
-            "progress": progress,
-            "message": f"Processing chunk {progress['completed_chunks']}/{progress['total_chunks']}",
-            "estimated_remaining": progress.get('estimated_remaining', 0)
-        }
-    return {
-        "status": "not_found",
-        "message": "Session not found or completed"
     }
 @app.get("/api/server-status")
 async def get_server_status():
-    """Get current server status - non-blocking"""
-    active_sessions = []
-    background_tasks_count = len(translator.background_tasks)
-    with translator.translation_lock:
-        for session_id, progress in translator.current_translation.items():
-            elapsed_time = time.time() - progress['start_time']
-            if progress['completed_chunks'] > 0:
-                avg_time_per_chunk = elapsed_time / progress['completed_chunks']
-                remaining_chunks = progress['total_chunks'] - progress['completed_chunks']
-                estimated_remaining = avg_time_per_chunk * remaining_chunks
-            else:
-                estimated_remaining = None
-            active_sessions.append({
-                'session_id': session_id,
-                'source_lang': progress['source_lang'],
-                'target_lang': progress['target_lang'],
-                'total_chunks': progress['total_chunks'],
-                'completed_chunks': progress['completed_chunks'],
-                'progress_percentage': (progress['completed_chunks'] / progress['total_chunks']) * 100,
-                'elapsed_time': elapsed_time,
-                'estimated_remaining': estimated_remaining
-            })
-    if active_sessions or background_tasks_count > 0:
-        if active_sessions:
-            latest_session = active_sessions[-1]
-            message = f"Processing chunk {latest_session['completed_chunks']}/{latest_session['total_chunks']} | {latest_session['source_lang']} â†’ {latest_session['target_lang']}"
-        else:
-            message = f"{background_tasks_count} translation(s) in background queue"
-        return {
-            "has_active_translation": True,
-            "status": "processing",
-            "message": message,
-            "active_sessions": len(active_sessions),
-            "background_tasks": background_tasks_count,
-            "total_active": len(active_sessions) + background_tasks_count
-        }
-    else:
-        return {
-            "has_active_translation": False,
-            "status": "idle",
-            "message": "Server is ready for new translations",
-            "active_sessions": 0,
-            "background_tasks": 0
-        }
-    if active_sessions:
-        # Return the most recent active session
-        latest_session = active_sessions[-1]
-        return {
-            "has_active_translation": True,
-            "status": "processing",
-            "message": f"Processing chunk {latest_session['completed_chunks']}/{latest_session['total_chunks']} | {latest_session['source_lang']} â†’ {latest_session['target_lang']}",
-            "session_data": latest_session
-        }
-    else:
-        return {
-            "has_active_translation": False,
-            "status": "no_active_translation",
-            "message": "No active translation on server"
-        }
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)

     processing_time: float
     character_count: int
     status: str
 class TranslationCache:
     def __init__(self, cache_duration_minutes: int = 60):
             self.cache[key] = (translation, datetime.now())
             logger.info(f"[CACHE STORE] Cached translation for key: {key[:8]}... | Translation length: {len(translation)} chars")
 class MultilingualTranslator:
     def __init__(self, cache_duration_minutes: int = 60):
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         logger.info(f"[INIT] Using device: {self.device}")
+        # Initialize cache
         self.cache = TranslationCache(cache_duration_minutes)
         # Add thread pool for parallel processing
         self.executor = ThreadPoolExecutor(max_workers=3)
         logger.info(f"[INIT] Thread pool initialized with 3 workers")
             logger.error(f"[INIT] Error loading model: {e}")
             raise
+        logger.info(f"[INIT] Translator initialized successfully")
+    def translate_text(self, text: str, source_lang: str, target_lang: str, session_id: str = None) -> Tuple[str, float]:
+        """ترجمه متن با پشتیبانی از کش"""
+        start_time = time.time()
+        if not session_id:
+            session_id = hashlib.md5(f"{text[:100]}{time.time()}".encode()).hexdigest()[:8]
+        logger.info(f"[SESSION:{session_id}] Starting translation | {source_lang} → {target_lang} | Text length: {len(text)} chars")
+        # بررسی کش برای کل متن
+        cached_result = self.cache.get(text, source_lang, target_lang)
+        if cached_result:
+            logger.info(f"[SESSION:{session_id}] Translation completed from cache | Time: {time.time() - start_time:.2f}s")
+            return cached_result, time.time() - start_time
         try:
             # Set source language for tokenizer
             self.tokenizer.src_lang = source_lang
             # Encode input
             encoded = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(self.device)
+            logger.debug(f"[SESSION:{session_id}] Text encoded | Input tokens: {encoded.input_ids.shape[1]}")
             # Generate translation with optimized parameters
             generated_tokens = self.model.generate(
                 **encoded,
                 forced_bos_token_id=self.tokenizer.get_lang_id(target_lang),
+                max_length=1024,
+                min_length=10,
+                num_beams=5,
                 early_stopping=True,
+                no_repeat_ngram_size=3,
+                length_penalty=1.0,
+                repetition_penalty=1.2,
+                do_sample=False,
+                temperature=0.7,
                 pad_token_id=self.tokenizer.pad_token_id,
                 eos_token_id=self.tokenizer.eos_token_id
             )
             # Decode result
             translation = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
+            # پاک‌سازی ترجمه از کاراکترهای اضافی
             translation = translation.strip()
+            # ذخیره در کش
+            self.cache.set(text, source_lang, target_lang, translation)
             processing_time = time.time() - start_time
+            logger.info(f"[SESSION:{session_id}] Translation completed | Total time: {processing_time:.2f}s | Output length: {len(translation)} chars")
+            return translation, processing_time
         except Exception as e:
             logger.error(f"[SESSION:{session_id}] Translation error: {e}")
+            return f"Translation error: {str(e)}", time.time() - start_time
     async def translate_text_async(self, text: str, source_lang: str, target_lang: str, session_id: str = None):
         """Async wrapper for translate_text"""
 translator = MultilingualTranslator(60)
 # Create FastAPI app
+app = FastAPI(title="Simplified Multilingual Translation API", version="2.0.0")
 # Add CORS middleware
 app.add_middleware(
 @app.get("/")
 async def root():
     return {
+        "message": "Simplified Multilingual Translation API v2.0",
         "status": "active",
         "features": [
+            "simplified_processing",
+            "cache_optimization",
+            "direct_translation"
         ]
     }
 @app.post("/api/translate")
 async def api_translate(request: TranslationRequest):
+    """API endpoint for translation"""
     if not request.text.strip():
         raise HTTPException(status_code=400, detail="No text provided")
         # Generate session ID for tracking
         session_id = hashlib.md5(f"{request.text[:100]}{time.time()}".encode()).hexdigest()[:8]
+        translation, processing_time = translator.translate_text(
             request.text, source_code, target_code, session_id
         )
             target_language=request.target_lang,
             processing_time=processing_time,
             character_count=len(request.text),
+            status="success"
         )
     except Exception as e:
         logger.error(f"[API] Translation error: {str(e)}")
 # Alternative endpoint for form data (compatibility with WordPress)
 @app.post("/api/translate/form")
 async def api_translate_form(request: Request):
+    """Simplified translation endpoint"""
     try:
         form_data = await request.form()
         text = form_data.get("text", "")
     # Generate session ID for tracking
     session_id = hashlib.md5(f"{text[:100]}{time.time()}".encode()).hexdigest()[:8]
+    try:
+        translation, processing_time = await translator.translate_text_async(
+            text, source_code, target_code, session_id
         )
+        # بررسی محتوای ترجمه
+        if not translation or not translation.strip() or translation.startswith("Translation error"):
+            logger.error(f"[FORM API] Invalid translation result: {translation[:100] if translation else 'None'}")
+            return {
+                "status": "error",
+                "message": "Translation failed - empty or invalid result",
+                "session_id": session_id
+            }
+        logger.info(f"[FORM API] Translation successful | Length: {len(translation)} chars")
         return {
+            "translation": translation,
+            "source_language": source_lang,
+            "target_language": target_lang,
+            "processing_time": processing_time,
             "character_count": len(text),
+            "status": "success",
+            "session_id": session_id
         }
+    except Exception as e:
+        logger.error(f"[FORM API] Translation error: {str(e)}")
+        return {"status": "error", "message": f"Translation error: {str(e)}"}
 @app.get("/api/languages")
 async def get_languages():
         "device": str(translator.device),
         "model": translator.model_name,
         "cache_size": len(translator.cache.cache),
+        "version": "2.0.0"
     }
 @app.get("/api/server-status")
 async def get_server_status():
+    """Get current server status"""
+    return {
+        "has_active_translation": False,
+        "status": "idle",
+        "message": "Server is ready for new translations",
+        "active_sessions": 0,
+        "background_tasks": 0
+    }
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)