Update app.py
Browse files
app.py
CHANGED
|
@@ -12,10 +12,11 @@ import threading
|
|
| 12 |
from queue import Queue
|
| 13 |
import logging
|
| 14 |
from typing import Dict, List, Tuple, Optional
|
| 15 |
-
from fastapi import FastAPI, HTTPException, Request
|
| 16 |
from fastapi.middleware.cors import CORSMiddleware
|
| 17 |
from pydantic import BaseModel
|
| 18 |
import uvicorn
|
|
|
|
| 19 |
|
| 20 |
# Enhanced logging configuration
|
| 21 |
logging.basicConfig(
|
|
@@ -28,6 +29,11 @@ logging.basicConfig(
|
|
| 28 |
)
|
| 29 |
logger = logging.getLogger(__name__)
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
# Pydantic models for request/response
|
| 32 |
class TranslationRequest(BaseModel):
|
| 33 |
text: str
|
|
@@ -119,11 +125,11 @@ class TranslationQueue:
|
|
| 119 |
thread.start()
|
| 120 |
|
| 121 |
class TextChunker:
|
| 122 |
-
"""
|
| 123 |
|
| 124 |
@staticmethod
|
| 125 |
def split_text_smart(text: str, max_chunk_size: int = 400) -> List[str]:
|
| 126 |
-
"""
|
| 127 |
logger.info(f"[CHUNKER] Starting smart text splitting | Text length: {len(text)} chars | Max chunk size: {max_chunk_size}")
|
| 128 |
|
| 129 |
if len(text) <= max_chunk_size:
|
|
@@ -132,27 +138,27 @@ class TextChunker:
|
|
| 132 |
|
| 133 |
chunks = []
|
| 134 |
|
| 135 |
-
#
|
| 136 |
paragraphs = text.split('\n\n')
|
| 137 |
current_chunk = ""
|
| 138 |
|
| 139 |
for i, paragraph in enumerate(paragraphs):
|
| 140 |
logger.debug(f"[CHUNKER] Processing paragraph {i+1}/{len(paragraphs)} | Length: {len(paragraph)}")
|
| 141 |
|
| 142 |
-
#
|
| 143 |
if len(paragraph) > max_chunk_size:
|
| 144 |
-
#
|
| 145 |
if current_chunk.strip():
|
| 146 |
chunks.append(current_chunk.strip())
|
| 147 |
logger.debug(f"[CHUNKER] Added chunk from accumulated paragraphs | Length: {len(current_chunk.strip())}")
|
| 148 |
current_chunk = ""
|
| 149 |
|
| 150 |
-
#
|
| 151 |
sub_chunks = TextChunker._split_paragraph(paragraph, max_chunk_size)
|
| 152 |
chunks.extend(sub_chunks)
|
| 153 |
logger.debug(f"[CHUNKER] Split large paragraph into {len(sub_chunks)} sub-chunks")
|
| 154 |
else:
|
| 155 |
-
#
|
| 156 |
if len(current_chunk) + len(paragraph) + 2 > max_chunk_size:
|
| 157 |
if current_chunk.strip():
|
| 158 |
chunks.append(current_chunk.strip())
|
|
@@ -164,7 +170,7 @@ class TextChunker:
|
|
| 164 |
else:
|
| 165 |
current_chunk = paragraph
|
| 166 |
|
| 167 |
-
#
|
| 168 |
if current_chunk.strip():
|
| 169 |
chunks.append(current_chunk.strip())
|
| 170 |
logger.debug(f"[CHUNKER] Added final chunk | Length: {len(current_chunk.strip())}")
|
|
@@ -174,10 +180,10 @@ class TextChunker:
|
|
| 174 |
|
| 175 |
@staticmethod
|
| 176 |
def _split_paragraph(paragraph: str, max_chunk_size: int) -> List[str]:
|
| 177 |
-
"""
|
| 178 |
logger.debug(f"[CHUNKER] Splitting large paragraph | Length: {len(paragraph)}")
|
| 179 |
|
| 180 |
-
#
|
| 181 |
sentences = re.split(r'[.!?]+\s+', paragraph)
|
| 182 |
chunks = []
|
| 183 |
current_chunk = ""
|
|
@@ -186,12 +192,12 @@ class TextChunker:
|
|
| 186 |
if not sentence.strip():
|
| 187 |
continue
|
| 188 |
|
| 189 |
-
#
|
| 190 |
if not sentence.endswith(('.', '!', '?')):
|
| 191 |
sentence += '.'
|
| 192 |
|
| 193 |
if len(sentence) > max_chunk_size:
|
| 194 |
-
#
|
| 195 |
if current_chunk.strip():
|
| 196 |
chunks.append(current_chunk.strip())
|
| 197 |
current_chunk = ""
|
|
@@ -217,7 +223,7 @@ class TextChunker:
|
|
| 217 |
|
| 218 |
@staticmethod
|
| 219 |
def _split_by_comma(sentence: str, max_chunk_size: int) -> List[str]:
|
| 220 |
-
"""
|
| 221 |
logger.debug(f"[CHUNKER] Splitting long sentence by comma | Length: {len(sentence)}")
|
| 222 |
|
| 223 |
parts = sentence.split(', ')
|
|
@@ -226,12 +232,12 @@ class TextChunker:
|
|
| 226 |
|
| 227 |
for part in parts:
|
| 228 |
if len(part) > max_chunk_size:
|
| 229 |
-
#
|
| 230 |
if current_chunk.strip():
|
| 231 |
chunks.append(current_chunk.strip())
|
| 232 |
current_chunk = ""
|
| 233 |
|
| 234 |
-
#
|
| 235 |
while len(part) > max_chunk_size:
|
| 236 |
chunks.append(part[:max_chunk_size].strip())
|
| 237 |
part = part[max_chunk_size:].strip()
|
|
@@ -259,38 +265,6 @@ class MultilingualTranslator:
|
|
| 259 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 260 |
logger.info(f"[INIT] Using device: {self.device}")
|
| 261 |
|
| 262 |
-
# در متد __init__ کلاس MultilingualTranslator
|
| 263 |
-
self.translation_store = {} # ذخیرهسازی موقت ترجمهها بر اساس request_id
|
| 264 |
-
self.request_mapping = {} # mapping بین request_id و hash متن
|
| 265 |
-
|
| 266 |
-
# بعد از اتمام ترجمه موفق، در متد translate_text
|
| 267 |
-
# translation_result = {
|
| 268 |
-
# 'translation': final_translation,
|
| 269 |
-
# 'processing_time': processing_time,
|
| 270 |
-
# 'chunks_processed': len(chunks),
|
| 271 |
-
# 'source_lang': source_code,
|
| 272 |
-
# 'target_lang': target_code,
|
| 273 |
-
# 'timestamp': time.time()
|
| 274 |
-
# }
|
| 275 |
-
# self.translation_store[session_id] = translation_result
|
| 276 |
-
|
| 277 |
-
# متد برای پاکسازی خودکار دادههای قدیمی
|
| 278 |
-
def cleanup_old_translations(self, max_age_hours: int = 24):
|
| 279 |
-
"""پاکسازی ترجمههای قدیمی از ذخیرهسازی"""
|
| 280 |
-
current_time = time.time()
|
| 281 |
-
keys_to_delete = []
|
| 282 |
-
|
| 283 |
-
for request_id, data in self.translation_store.items():
|
| 284 |
-
if current_time - data['timestamp'] > max_age_hours * 3600:
|
| 285 |
-
keys_to_delete.append(request_id)
|
| 286 |
-
|
| 287 |
-
for key in keys_to_delete:
|
| 288 |
-
del self.translation_store[key]
|
| 289 |
-
if key in self.request_mapping:
|
| 290 |
-
del self.request_mapping[key]
|
| 291 |
-
|
| 292 |
-
logger.info(f"[CLEANUP] Removed {len(keys_to_delete)} old translations")
|
| 293 |
-
|
| 294 |
# Initialize cache and queue
|
| 295 |
self.cache = TranslationCache(cache_duration_minutes)
|
| 296 |
self.queue = TranslationQueue()
|
|
@@ -314,9 +288,9 @@ class MultilingualTranslator:
|
|
| 314 |
logger.error(f"[INIT] Error loading model: {e}")
|
| 315 |
raise
|
| 316 |
|
| 317 |
-
#
|
| 318 |
-
self.max_chunk_size = 350 #
|
| 319 |
-
self.min_chunk_overlap = 20 #
|
| 320 |
|
| 321 |
# Track translation progress
|
| 322 |
self.current_translation = {}
|
|
@@ -325,9 +299,9 @@ class MultilingualTranslator:
|
|
| 325 |
logger.info(f"[INIT] Translator initialized | Max chunk size: {self.max_chunk_size} chars")
|
| 326 |
|
| 327 |
def translate_chunk(self, text: str, source_lang: str, target_lang: str, chunk_index: int = 0, total_chunks: int = 1) -> str:
|
| 328 |
-
"""
|
| 329 |
try:
|
| 330 |
-
logger.info(f"[TRANSLATE] Starting chunk translation [{chunk_index+1}/{total_chunks}] | {source_lang}
|
| 331 |
|
| 332 |
# Set source language for tokenizer
|
| 333 |
self.tokenizer.src_lang = source_lang
|
|
@@ -341,15 +315,15 @@ class MultilingualTranslator:
|
|
| 341 |
generated_tokens = self.model.generate(
|
| 342 |
**encoded,
|
| 343 |
forced_bos_token_id=self.tokenizer.get_lang_id(target_lang),
|
| 344 |
-
max_length=1024, #
|
| 345 |
-
min_length=10, #
|
| 346 |
-
num_beams=5, #
|
| 347 |
early_stopping=True,
|
| 348 |
-
no_repeat_ngram_size=3, #
|
| 349 |
-
length_penalty=1.0, #
|
| 350 |
-
repetition_penalty=1.2, #
|
| 351 |
-
do_sample=False, #
|
| 352 |
-
temperature=0.7, #
|
| 353 |
pad_token_id=self.tokenizer.pad_token_id,
|
| 354 |
eos_token_id=self.tokenizer.eos_token_id
|
| 355 |
)
|
|
@@ -358,7 +332,7 @@ class MultilingualTranslator:
|
|
| 358 |
# Decode result
|
| 359 |
translation = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
| 360 |
|
| 361 |
-
#
|
| 362 |
translation = translation.strip()
|
| 363 |
|
| 364 |
logger.info(f"[TRANSLATE] Chunk translation completed [{chunk_index+1}/{total_chunks}] | Generation time: {generation_time:.2f}s | Output length: {len(translation)} chars")
|
|
@@ -370,7 +344,7 @@ class MultilingualTranslator:
|
|
| 370 |
return f"[Translation Error: {str(e)}]"
|
| 371 |
|
| 372 |
def translate_text(self, text: str, source_lang: str, target_lang: str, session_id: str = None) -> Tuple[str, float, int]:
|
| 373 |
-
|
| 374 |
start_time = time.time()
|
| 375 |
|
| 376 |
if not session_id:
|
|
@@ -382,16 +356,10 @@ class MultilingualTranslator:
|
|
| 382 |
cached_result = self.cache.get(text, source_lang, target_lang)
|
| 383 |
if cached_result:
|
| 384 |
logger.info(f"[SESSION:{session_id}] Translation completed from cache | Time: {time.time() - start_time:.2f}s")
|
| 385 |
-
|
| 386 |
-
# ذخیره نتیجه در translation_store برای رهگیری
|
| 387 |
-
if session_id and cached_result and not cached_result.startswith("Translation error"):
|
| 388 |
-
self.store_translation_result(session_id, cached_result, time.time() - start_time, 1)
|
| 389 |
-
self.request_mapping[session_id] = hashlib.md5(text.encode()).hexdigest()
|
| 390 |
-
|
| 391 |
return cached_result, time.time() - start_time, 1
|
| 392 |
|
| 393 |
try:
|
| 394 |
-
# اگر متن کوتاه است
|
| 395 |
if len(text) <= self.max_chunk_size:
|
| 396 |
logger.info(f"[SESSION:{session_id}] Processing as short text")
|
| 397 |
translation = self.translate_chunk(text, source_lang, target_lang, 0, 1)
|
|
@@ -401,14 +369,9 @@ class MultilingualTranslator:
|
|
| 401 |
processing_time = time.time() - start_time
|
| 402 |
logger.info(f"[SESSION:{session_id}] Short text translation completed | Total time: {processing_time:.2f}s")
|
| 403 |
|
| 404 |
-
# ذخیره نتیجه در translation_store برای رهگیری
|
| 405 |
-
if session_id and translation and not translation.startswith("Translation error"):
|
| 406 |
-
self.store_translation_result(session_id, translation, processing_time, 1)
|
| 407 |
-
self.request_mapping[session_id] = hashlib.md5(text.encode()).hexdigest()
|
| 408 |
-
|
| 409 |
return translation, processing_time, 1
|
| 410 |
|
| 411 |
-
# تقسیم متن طولانی به
|
| 412 |
logger.info(f"[SESSION:{session_id}] Processing as long text - starting chunking")
|
| 413 |
chunks = TextChunker.split_text_smart(text, self.max_chunk_size)
|
| 414 |
logger.info(f"[SESSION:{session_id}] Text split into {len(chunks)} chunks")
|
|
@@ -423,17 +386,17 @@ class MultilingualTranslator:
|
|
| 423 |
'target_lang': target_lang
|
| 424 |
}
|
| 425 |
|
| 426 |
-
# ترجمه هر
|
| 427 |
translated_chunks = []
|
| 428 |
for i, chunk in enumerate(chunks):
|
| 429 |
chunk_start_time = time.time()
|
| 430 |
logger.info(f"[SESSION:{session_id}] Starting chunk {i+1}/{len(chunks)} | Chunk length: {len(chunk)} chars")
|
| 431 |
|
| 432 |
-
# بررسی کش برای هر
|
| 433 |
chunk_translation = self.cache.get(chunk, source_lang, target_lang)
|
| 434 |
|
| 435 |
if not chunk_translation:
|
| 436 |
-
#
|
| 437 |
if i > 0:
|
| 438 |
elapsed_time = time.time() - start_time
|
| 439 |
avg_time_per_chunk = elapsed_time / i
|
|
@@ -441,7 +404,7 @@ class MultilingualTranslator:
|
|
| 441 |
logger.info(f"[SESSION:{session_id}] Progress: {i}/{len(chunks)} | Avg time per chunk: {avg_time_per_chunk:.1f}s | Estimated remaining: {estimated_remaining:.1f}s")
|
| 442 |
|
| 443 |
chunk_translation = self.translate_chunk(chunk, source_lang, target_lang, i, len(chunks))
|
| 444 |
-
# ذخیره
|
| 445 |
self.cache.set(chunk, source_lang, target_lang, chunk_translation)
|
| 446 |
|
| 447 |
chunk_time = time.time() - chunk_start_time
|
|
@@ -456,11 +419,11 @@ class MultilingualTranslator:
|
|
| 456 |
if session_id in self.current_translation:
|
| 457 |
self.current_translation[session_id]['completed_chunks'] = i + 1
|
| 458 |
|
| 459 |
-
# کمی استراحت بین ترجمهها برای جلوگیری از بار
|
| 460 |
if i < len(chunks) - 1:
|
| 461 |
time.sleep(0.1)
|
| 462 |
|
| 463 |
-
# ترکیب
|
| 464 |
logger.info(f"[SESSION:{session_id}] Combining translated chunks")
|
| 465 |
final_translation = self._combine_translations(translated_chunks, text)
|
| 466 |
|
|
@@ -468,12 +431,26 @@ class MultilingualTranslator:
|
|
| 468 |
self.cache.set(text, source_lang, target_lang, final_translation)
|
| 469 |
|
| 470 |
processing_time = time.time() - start_time
|
|
|
|
|
|
|
| 471 |
logger.info(f"[SESSION:{session_id}] Long text translation completed | Total time: {processing_time:.2f}s | Chunks: {len(chunks)} | Final length: {len(final_translation)} chars")
|
| 472 |
|
| 473 |
-
#
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 477 |
|
| 478 |
# Clean up progress tracking
|
| 479 |
with self.translation_lock:
|
|
@@ -487,7 +464,7 @@ class MultilingualTranslator:
|
|
| 487 |
with self.translation_lock:
|
| 488 |
self.current_translation.pop(session_id, None)
|
| 489 |
return f"Translation error: {str(e)}", time.time() - start_time, 0
|
| 490 |
-
|
| 491 |
def get_translation_progress(self, session_id: str) -> Dict:
|
| 492 |
"""Get current translation progress"""
|
| 493 |
with self.translation_lock:
|
|
@@ -511,82 +488,9 @@ class MultilingualTranslator:
|
|
| 511 |
'estimated_remaining': estimated_remaining,
|
| 512 |
'progress_percentage': (progress['completed_chunks'] / progress['total_chunks']) * 100
|
| 513 |
}
|
| 514 |
-
|
| 515 |
-
"""یافتن ترجمه کامل بر اساس request_id با پشتیبانی از کش"""
|
| 516 |
-
logger.info(f"[TRACKING] Looking up translation for request_id: {request_id}")
|
| 517 |
-
|
| 518 |
-
# 1. اول در ترجمههای جاری بررسی کنیم
|
| 519 |
-
with self.translation_lock:
|
| 520 |
-
if request_id in self.current_translation:
|
| 521 |
-
progress = self.current_translation[request_id].copy()
|
| 522 |
-
logger.info(f"[TRACKING] Found active translation: {progress['completed_chunks']}/{progress['total_chunks']} chunks completed")
|
| 523 |
-
return {
|
| 524 |
-
"status": "processing",
|
| 525 |
-
"progress": progress,
|
| 526 |
-
"translation": None,
|
| 527 |
-
"found_in": "active_translations"
|
| 528 |
-
}
|
| 529 |
-
|
| 530 |
-
# 2. بررسی در تسکهای پسزمینه
|
| 531 |
-
if request_id in self.background_tasks:
|
| 532 |
-
task = self.background_tasks[request_id]
|
| 533 |
-
|
| 534 |
-
if task.done():
|
| 535 |
-
try:
|
| 536 |
-
translation, processing_time, chunks_count = task.result()
|
| 537 |
-
logger.info(f"[TRACKING] Background task completed successfully: {len(translation)} chars")
|
| 538 |
-
|
| 539 |
-
# حذف از تسکهای پسزمینه
|
| 540 |
-
del self.background_tasks[request_id]
|
| 541 |
-
|
| 542 |
-
return {
|
| 543 |
-
"status": "completed",
|
| 544 |
-
"translation": translation,
|
| 545 |
-
"processing_time": processing_time,
|
| 546 |
-
"chunks_processed": chunks_count,
|
| 547 |
-
"found_in": "background_tasks"
|
| 548 |
-
}
|
| 549 |
-
except Exception as e:
|
| 550 |
-
logger.error(f"[TRACKING] Background task failed: {str(e)}")
|
| 551 |
-
del self.background_tasks[request_id]
|
| 552 |
-
return {
|
| 553 |
-
"status": "failed",
|
| 554 |
-
"error": str(e),
|
| 555 |
-
"found_in": "background_tasks"
|
| 556 |
-
}
|
| 557 |
-
else:
|
| 558 |
-
logger.info(f"[TRACKING] Background task still running: {request_id}")
|
| 559 |
-
return {
|
| 560 |
-
"status": "processing",
|
| 561 |
-
"translation": None,
|
| 562 |
-
"found_in": "background_tasks"
|
| 563 |
-
}
|
| 564 |
-
|
| 565 |
-
# 3. بررسی در کش - این بخش نیاز به پیادهسازی سیستم رهگیری کش دارد
|
| 566 |
-
# برای این کار نیاز داریم که request_id را با متن اصلی مرتبط کنیم
|
| 567 |
-
# یک راه حل: ذخیره mapping بین request_id و hash متن
|
| 568 |
-
|
| 569 |
-
# 4. بررسی در سیستم ذخیرهسازی موقت (اگر پیادهسازی شده باشد)
|
| 570 |
-
if hasattr(self, 'translation_store') and request_id in self.translation_store:
|
| 571 |
-
result = self.translation_store[request_id]
|
| 572 |
-
logger.info(f"[TRACKING] Found in translation store: {len(result['translation'])} chars")
|
| 573 |
-
return {
|
| 574 |
-
"status": "completed",
|
| 575 |
-
"translation": result['translation'],
|
| 576 |
-
"processing_time": result['processing_time'],
|
| 577 |
-
"chunks_processed": result['chunks_processed'],
|
| 578 |
-
"found_in": "translation_store"
|
| 579 |
-
}
|
| 580 |
-
|
| 581 |
-
logger.warning(f"[TRACKING] Request ID not found: {request_id}")
|
| 582 |
-
return {
|
| 583 |
-
"status": "not_found",
|
| 584 |
-
"message": "Translation request not found",
|
| 585 |
-
"found_in": None
|
| 586 |
-
}
|
| 587 |
-
|
| 588 |
def _combine_translations(self, translated_chunks: List[str], original_text: str) -> str:
|
| 589 |
-
"""
|
| 590 |
if not translated_chunks:
|
| 591 |
return ""
|
| 592 |
|
|
@@ -595,23 +499,23 @@ class MultilingualTranslator:
|
|
| 595 |
|
| 596 |
logger.debug(f"[COMBINER] Combining {len(translated_chunks)} translated chunks")
|
| 597 |
|
| 598 |
-
#
|
| 599 |
combined = []
|
| 600 |
|
| 601 |
for i, chunk in enumerate(translated_chunks):
|
| 602 |
-
#
|
| 603 |
chunk = chunk.strip()
|
| 604 |
|
| 605 |
if not chunk:
|
| 606 |
continue
|
| 607 |
|
| 608 |
-
#
|
| 609 |
if i > 0 and combined:
|
| 610 |
-
#
|
| 611 |
-
if not combined[-1].rstrip().endswith(('.', '!', '?', ':', '
|
| 612 |
combined[-1] += '.'
|
| 613 |
|
| 614 |
-
#
|
| 615 |
if '\n\n' in original_text:
|
| 616 |
combined.append('\n\n' + chunk)
|
| 617 |
else:
|
|
@@ -621,40 +525,14 @@ class MultilingualTranslator:
|
|
| 621 |
|
| 622 |
result = ''.join(combined)
|
| 623 |
|
| 624 |
-
#
|
| 625 |
-
result = re.sub(r'\s+', ' ', result) #
|
| 626 |
-
result = re.sub(r'\.+', '.', result) #
|
| 627 |
result = result.strip()
|
| 628 |
|
| 629 |
logger.debug(f"[COMBINER] Combined translation length: {len(result)} chars")
|
| 630 |
return result
|
| 631 |
|
| 632 |
-
def cleanup_old_translations(self, max_age_hours: int = 24):
|
| 633 |
-
"""پاکسازی ترجمههای قدیمی از ذخیرهسازی"""
|
| 634 |
-
current_time = time.time()
|
| 635 |
-
keys_to_delete = []
|
| 636 |
-
|
| 637 |
-
for request_id, data in self.translation_store.items():
|
| 638 |
-
if current_time - data['timestamp'] > max_age_hours * 3600:
|
| 639 |
-
keys_to_delete.append(request_id)
|
| 640 |
-
|
| 641 |
-
for key in keys_to_delete:
|
| 642 |
-
del self.translation_store[key]
|
| 643 |
-
if key in self.request_mapping:
|
| 644 |
-
del self.request_mapping[key]
|
| 645 |
-
|
| 646 |
-
logger.info(f"[CLEANUP] Removed {len(keys_to_delete)} old translations")
|
| 647 |
-
|
| 648 |
-
def store_translation_result(self, request_id: str, translation: str, processing_time: float, chunks_processed: int):
|
| 649 |
-
"""ذخیره نتیجه ترجمه برای دسترسی بعدی"""
|
| 650 |
-
self.translation_store[request_id] = {
|
| 651 |
-
'translation': translation,
|
| 652 |
-
'processing_time': processing_time,
|
| 653 |
-
'chunks_processed': chunks_processed,
|
| 654 |
-
'timestamp': time.time()
|
| 655 |
-
}
|
| 656 |
-
logger.info(f"[STORAGE] Stored translation for request_id: {request_id}")
|
| 657 |
-
|
| 658 |
async def translate_text_async(self, text: str, source_lang: str, target_lang: str, session_id: str = None):
|
| 659 |
"""Async wrapper for translate_text"""
|
| 660 |
loop = asyncio.get_event_loop()
|
|
@@ -664,6 +542,76 @@ class MultilingualTranslator:
|
|
| 664 |
text, source_lang, target_lang, session_id
|
| 665 |
)
|
| 666 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 667 |
# Language mappings for M2M100 model
|
| 668 |
LANGUAGE_MAP = {
|
| 669 |
"English": "en",
|
|
@@ -750,57 +698,140 @@ app.add_middleware(
|
|
| 750 |
allow_headers=["*"],
|
| 751 |
)
|
| 752 |
|
| 753 |
-
|
| 754 |
-
async def root():
|
| 755 |
-
return {
|
| 756 |
-
"message": "Enhanced Multilingual Translation API v2.1",
|
| 757 |
-
"status": "active",
|
| 758 |
-
"features": [
|
| 759 |
-
"enhanced_logging",
|
| 760 |
-
"progress_tracking",
|
| 761 |
-
"long_text_support",
|
| 762 |
-
"smart_chunking",
|
| 763 |
-
"cache_optimization"
|
| 764 |
-
]
|
| 765 |
-
}
|
| 766 |
|
| 767 |
-
@app.post("/api/
|
| 768 |
-
async def
|
| 769 |
-
"""
|
| 770 |
-
if
|
| 771 |
-
|
| 772 |
-
|
| 773 |
-
source_code = LANGUAGE_MAP.get(request.source_lang)
|
| 774 |
-
target_code = LANGUAGE_MAP.get(request.target_lang)
|
| 775 |
-
|
| 776 |
-
if not source_code or not target_code:
|
| 777 |
-
raise HTTPException(status_code=400, detail="Invalid language codes")
|
| 778 |
-
|
| 779 |
try:
|
| 780 |
-
|
| 781 |
-
|
| 782 |
|
| 783 |
-
|
| 784 |
-
|
| 785 |
-
|
|
|
|
|
|
|
| 786 |
|
| 787 |
-
|
| 788 |
-
|
| 789 |
-
|
| 790 |
-
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 796 |
except Exception as e:
|
| 797 |
-
logger.error(f"[
|
| 798 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 799 |
|
| 800 |
-
# Alternative endpoint for form data (compatibility with WordPress)
|
| 801 |
@app.post("/api/translate/form")
|
| 802 |
async def api_translate_form(request: Request):
|
| 803 |
-
"""
|
|
|
|
|
|
|
|
|
|
| 804 |
try:
|
| 805 |
form_data = await request.form()
|
| 806 |
text = form_data.get("text", "")
|
|
@@ -815,13 +846,11 @@ async def api_translate_form(request: Request):
|
|
| 815 |
target_lang = json_data.get("target_lang", "")
|
| 816 |
api_key = json_data.get("api_key", None)
|
| 817 |
except:
|
| 818 |
-
|
| 819 |
-
|
| 820 |
-
logger.info(f"[FORM API] Translation request | {source_lang} → {target_lang} | Length: {len(text)} chars")
|
| 821 |
|
| 822 |
if not text.strip():
|
| 823 |
logger.error("[FORM API] No text provided")
|
| 824 |
-
return {"status": "error", "message": "
|
| 825 |
|
| 826 |
source_code = LANGUAGE_MAP.get(source_lang)
|
| 827 |
target_code = LANGUAGE_MAP.get(target_lang)
|
|
@@ -830,74 +859,143 @@ async def api_translate_form(request: Request):
|
|
| 830 |
logger.error(f"[FORM API] Invalid language codes: {source_lang} -> {target_lang}")
|
| 831 |
return {"status": "error", "message": "Invalid language codes"}
|
| 832 |
|
| 833 |
-
|
| 834 |
-
|
|
|
|
|
|
|
| 835 |
|
| 836 |
-
|
| 837 |
-
|
| 838 |
-
|
|
|
|
|
|
|
| 839 |
cached_result = translator.cache.get(text, source_code, target_code)
|
| 840 |
if cached_result:
|
| 841 |
-
logger.info(f"[FORM API] Returning cached translation immediately for
|
| 842 |
return {
|
| 843 |
"translation": cached_result,
|
| 844 |
"source_language": source_lang,
|
| 845 |
"target_language": target_lang,
|
| 846 |
"processing_time": 0.0,
|
| 847 |
-
"character_count":
|
| 848 |
"status": "success",
|
| 849 |
"chunks_processed": None,
|
| 850 |
-
"
|
| 851 |
-
"is_heavy_text": False,
|
| 852 |
"cached": True
|
| 853 |
}
|
| 854 |
-
|
| 855 |
-
|
| 856 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 857 |
)
|
| 858 |
-
|
|
|
|
| 859 |
|
| 860 |
-
logger.info(f"[FORM API] Started background
|
| 861 |
|
| 862 |
return {
|
| 863 |
-
|
| 864 |
-
|
| 865 |
-
|
| 866 |
-
|
| 867 |
-
|
| 868 |
-
|
| 869 |
-
"is_heavy_text": True
|
| 870 |
}
|
|
|
|
| 871 |
else:
|
| 872 |
# Process short text immediately
|
| 873 |
try:
|
| 874 |
-
|
| 875 |
-
|
|
|
|
|
|
|
| 876 |
)
|
| 877 |
|
| 878 |
-
#
|
| 879 |
if not translation or not translation.strip() or translation.startswith("Translation error"):
|
| 880 |
logger.error(f"[FORM API] Invalid translation result: {translation[:100] if translation else 'None'}")
|
| 881 |
return {
|
| 882 |
"status": "error",
|
| 883 |
-
"message": "Translation failed - empty or invalid result"
|
| 884 |
-
"session_id": session_id
|
| 885 |
}
|
| 886 |
|
| 887 |
-
logger.info(f"[FORM API]
|
|
|
|
| 888 |
return {
|
| 889 |
-
|
| 890 |
-
|
| 891 |
-
|
| 892 |
-
|
| 893 |
-
|
| 894 |
-
|
| 895 |
-
"chunks_processed": chunks_count,
|
| 896 |
-
"session_id": session_id
|
| 897 |
}
|
|
|
|
| 898 |
except Exception as e:
|
| 899 |
logger.error(f"[FORM API] Translation error: {str(e)}")
|
| 900 |
-
return {"status": "error", "message": f"Translation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 901 |
|
| 902 |
@app.get("/api/progress/{session_id}")
|
| 903 |
async def get_translation_progress(session_id: str):
|
|
@@ -923,6 +1021,10 @@ async def get_languages():
|
|
| 923 |
@app.get("/api/health")
|
| 924 |
async def health_check():
|
| 925 |
"""Health check endpoint"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 926 |
return {
|
| 927 |
"status": "healthy",
|
| 928 |
"device": str(translator.device),
|
|
@@ -930,6 +1032,8 @@ async def health_check():
|
|
| 930 |
"cache_size": len(translator.cache.cache),
|
| 931 |
"max_chunk_size": translator.max_chunk_size,
|
| 932 |
"active_translations": len(translator.current_translation),
|
|
|
|
|
|
|
| 933 |
"version": "2.1.0"
|
| 934 |
}
|
| 935 |
|
|
@@ -995,9 +1099,12 @@ async def get_session_status(session_id: str):
|
|
| 995 |
|
| 996 |
@app.get("/api/server-status")
|
| 997 |
async def get_server_status():
|
| 998 |
-
"""Get current server status -
|
| 999 |
active_sessions = []
|
| 1000 |
-
|
|
|
|
|
|
|
|
|
|
| 1001 |
|
| 1002 |
with translator.translation_lock:
|
| 1003 |
for session_id, progress in translator.current_translation.items():
|
|
@@ -1020,10 +1127,12 @@ async def get_server_status():
|
|
| 1020 |
'estimated_remaining': estimated_remaining
|
| 1021 |
})
|
| 1022 |
|
| 1023 |
-
|
|
|
|
|
|
|
| 1024 |
if active_sessions:
|
| 1025 |
latest_session = active_sessions[-1]
|
| 1026 |
-
message = f"Processing chunk {latest_session['completed_chunks']}/{latest_session['total_chunks']} | {latest_session['source_lang']}
|
| 1027 |
else:
|
| 1028 |
message = f"{background_tasks_count} translation(s) in background queue"
|
| 1029 |
|
|
@@ -1033,7 +1142,8 @@ async def get_server_status():
|
|
| 1033 |
"message": message,
|
| 1034 |
"active_sessions": len(active_sessions),
|
| 1035 |
"background_tasks": background_tasks_count,
|
| 1036 |
-
"total_active":
|
|
|
|
| 1037 |
}
|
| 1038 |
else:
|
| 1039 |
return {
|
|
@@ -1041,261 +1151,65 @@ async def get_server_status():
|
|
| 1041 |
"status": "idle",
|
| 1042 |
"message": "Server is ready for new translations",
|
| 1043 |
"active_sessions": 0,
|
| 1044 |
-
"background_tasks": 0
|
| 1045 |
-
|
| 1046 |
-
|
| 1047 |
-
if active_sessions:
|
| 1048 |
-
# Return the most recent active session
|
| 1049 |
-
latest_session = active_sessions[-1]
|
| 1050 |
-
return {
|
| 1051 |
-
"has_active_translation": True,
|
| 1052 |
-
"status": "processing",
|
| 1053 |
-
"message": f"Processing chunk {latest_session['completed_chunks']}/{latest_session['total_chunks']} | {latest_session['source_lang']} → {latest_session['target_lang']}",
|
| 1054 |
-
"session_data": latest_session
|
| 1055 |
-
}
|
| 1056 |
-
else:
|
| 1057 |
-
return {
|
| 1058 |
-
"has_active_translation": False,
|
| 1059 |
-
"status": "no_active_translation",
|
| 1060 |
-
"message": "No active translation on server"
|
| 1061 |
}
|
| 1062 |
|
| 1063 |
-
|
| 1064 |
-
async def confirm_completion_and_charge(request: Request):
|
| 1065 |
-
"""Endpoint برای تأیید تکمیل ترجمه و کسر اعتبار"""
|
| 1066 |
-
try:
|
| 1067 |
-
data = await request.json()
|
| 1068 |
-
session_id = data.get("session_id")
|
| 1069 |
-
request_id = data.get("request_id")
|
| 1070 |
-
|
| 1071 |
-
if not session_id and not request_id:
|
| 1072 |
-
raise HTTPException(status_code=400, detail="Session ID or Request ID required")
|
| 1073 |
-
|
| 1074 |
-
# استفاده از session_id یا request_id برای یافتن ترجمه
|
| 1075 |
-
identifier = session_id or request_id
|
| 1076 |
-
|
| 1077 |
-
# بررسی وضعیت ترجمه
|
| 1078 |
-
if identifier in translator.background_tasks:
|
| 1079 |
-
task = translator.background_tasks[identifier]
|
| 1080 |
-
|
| 1081 |
-
if task.done():
|
| 1082 |
-
try:
|
| 1083 |
-
translation, processing_time, chunks_count = await task
|
| 1084 |
-
|
| 1085 |
-
# حذف تسک از لیست تسکهای پسزمینه
|
| 1086 |
-
del translator.background_tasks[identifier]
|
| 1087 |
-
|
| 1088 |
-
return {
|
| 1089 |
-
"status": "completed",
|
| 1090 |
-
"translation": translation,
|
| 1091 |
-
"processing_time": processing_time,
|
| 1092 |
-
"chunks_processed": chunks_count,
|
| 1093 |
-
"confirmed": True,
|
| 1094 |
-
"message": "Translation completed and ready for charging"
|
| 1095 |
-
}
|
| 1096 |
-
except Exception as e:
|
| 1097 |
-
del translator.background_tasks[identifier]
|
| 1098 |
-
return {
|
| 1099 |
-
"status": "failed",
|
| 1100 |
-
"confirmed": False,
|
| 1101 |
-
"message": f"Translation failed: {str(e)}"
|
| 1102 |
-
}
|
| 1103 |
-
else:
|
| 1104 |
-
return {
|
| 1105 |
-
"status": "processing",
|
| 1106 |
-
"confirmed": False,
|
| 1107 |
-
"message": "Translation still in progress"
|
| 1108 |
-
}
|
| 1109 |
-
|
| 1110 |
-
# بررسی در ترجمههای جاری
|
| 1111 |
-
progress = translator.get_translation_progress(identifier)
|
| 1112 |
-
if progress:
|
| 1113 |
-
return {
|
| 1114 |
-
"status": "processing",
|
| 1115 |
-
"confirmed": False,
|
| 1116 |
-
"message": f"Processing chunk {progress['completed_chunks']}/{progress['total_chunks']}",
|
| 1117 |
-
"progress": progress
|
| 1118 |
-
}
|
| 1119 |
-
|
| 1120 |
-
# بررسی در کش (اگر ترجمه کامل شده باشد)
|
| 1121 |
-
# این بخش نیاز به پیاده سازی دارد تا ترجمههای کامل شده را بر اساس شناسه برگرداند
|
| 1122 |
-
|
| 1123 |
-
return {
|
| 1124 |
-
"status": "not_found",
|
| 1125 |
-
"confirmed": False,
|
| 1126 |
-
"message": "Translation session not found"
|
| 1127 |
-
}
|
| 1128 |
-
|
| 1129 |
-
except Exception as e:
|
| 1130 |
-
logger.error(f"[CONFIRM API] Error: {str(e)}")
|
| 1131 |
-
raise HTTPException(status_code=500, detail=f"Confirmation error: {str(e)}")
|
| 1132 |
|
| 1133 |
-
|
| 1134 |
-
|
| 1135 |
-
|
| 1136 |
-
|
| 1137 |
-
|
| 1138 |
-
|
| 1139 |
-
|
| 1140 |
-
|
| 1141 |
-
|
| 1142 |
-
|
| 1143 |
-
|
| 1144 |
-
|
| 1145 |
-
|
| 1146 |
-
|
| 1147 |
-
|
| 1148 |
-
|
| 1149 |
-
|
| 1150 |
-
|
| 1151 |
-
|
| 1152 |
-
|
| 1153 |
-
|
| 1154 |
-
|
| 1155 |
-
|
| 1156 |
-
|
| 1157 |
-
|
| 1158 |
-
|
| 1159 |
-
|
| 1160 |
-
|
| 1161 |
-
|
| 1162 |
-
|
| 1163 |
-
|
| 1164 |
-
|
| 1165 |
-
|
| 1166 |
-
|
| 1167 |
-
|
| 1168 |
-
|
| 1169 |
-
}
|
| 1170 |
-
|
| 1171 |
-
elif status_info['status'] == 'failed':
|
| 1172 |
-
# خطا در ترجمه
|
| 1173 |
-
return {
|
| 1174 |
-
"status": "failed",
|
| 1175 |
-
"completed": False,
|
| 1176 |
-
"ready_for_charging": False,
|
| 1177 |
-
"message": status_info.get('error', 'Translation failed')
|
| 1178 |
-
}
|
| 1179 |
-
|
| 1180 |
-
else:
|
| 1181 |
-
# یافت نشد
|
| 1182 |
-
return {
|
| 1183 |
-
"status": "not_found",
|
| 1184 |
-
"completed": False,
|
| 1185 |
-
"ready_for_charging": False,
|
| 1186 |
-
"message": "Translation request not found"
|
| 1187 |
-
}
|
| 1188 |
-
|
| 1189 |
-
except Exception as e:
|
| 1190 |
-
logger.error(f"[STATUS CHECK] Error: {str(e)}")
|
| 1191 |
-
raise HTTPException(status_code=500, detail=f"Status check error: {str(e)}")
|
| 1192 |
-
|
| 1193 |
-
@app.post("/api/check-completion")
|
| 1194 |
-
async def check_completion_status(request: Request):
|
| 1195 |
-
"""بررسی وضعیت تکمیل ترجمه برای کسر اعتبار"""
|
| 1196 |
-
try:
|
| 1197 |
-
data = await request.json()
|
| 1198 |
-
request_id = data.get("request_id")
|
| 1199 |
-
|
| 1200 |
-
if not request_id:
|
| 1201 |
-
raise HTTPException(status_code=400, detail="Request ID required")
|
| 1202 |
-
|
| 1203 |
-
# اینجا باید منطق بررسی وضعیت ترجمه بر اساس request_id پیادهسازی شود
|
| 1204 |
-
# برای سادگی، فرض میکنیم که اگر request_id در background_tasks وجود دارد،
|
| 1205 |
-
# ولی تسک کامل شده، میتوانیم وضعیت completed را برگردانیم
|
| 1206 |
-
|
| 1207 |
-
if request_id in translator.background_tasks:
|
| 1208 |
-
task = translator.background_tasks[request_id]
|
| 1209 |
-
|
| 1210 |
-
if task.done():
|
| 1211 |
-
try:
|
| 1212 |
-
translation, processing_time, chunks_count = await task
|
| 1213 |
-
return {
|
| 1214 |
-
"status": "completed",
|
| 1215 |
-
"completed": True,
|
| 1216 |
-
"message": "Translation completed successfully"
|
| 1217 |
-
}
|
| 1218 |
-
except Exception as e:
|
| 1219 |
-
return {
|
| 1220 |
-
"status": "failed",
|
| 1221 |
-
"completed": False,
|
| 1222 |
-
"message": f"Translation failed: {str(e)}"
|
| 1223 |
-
}
|
| 1224 |
-
else:
|
| 1225 |
-
return {
|
| 1226 |
-
"status": "processing",
|
| 1227 |
-
"completed": False,
|
| 1228 |
-
"message": "Translation in progress"
|
| 1229 |
-
}
|
| 1230 |
-
|
| 1231 |
-
# اگر در background_tasks نیست، ممکن است در حال پردازش باشد یا کامل شده
|
| 1232 |
-
progress = translator.get_translation_progress(request_id)
|
| 1233 |
-
if progress:
|
| 1234 |
-
return {
|
| 1235 |
-
"status": "processing",
|
| 1236 |
-
"completed": False,
|
| 1237 |
-
"message": f"Processing in progress: {progress['completed_chunks']}/{progress['total_chunks']} chunks"
|
| 1238 |
-
}
|
| 1239 |
-
|
| 1240 |
-
# بررسی آیا قبلاً کامل شده و در کش ذخیره شده
|
| 1241 |
-
# این بخش نیاز به پیادهسازی دارد
|
| 1242 |
-
|
| 1243 |
-
return {
|
| 1244 |
-
"status": "unknown",
|
| 1245 |
-
"completed": False,
|
| 1246 |
-
"message": "Request ID not found in active translations"
|
| 1247 |
-
}
|
| 1248 |
-
|
| 1249 |
-
except Exception as e:
|
| 1250 |
-
logger.error(f"[COMPLETION CHECK] Error: {str(e)}")
|
| 1251 |
-
return {
|
| 1252 |
-
"status": "error",
|
| 1253 |
-
"completed": False,
|
| 1254 |
-
"message": f"Error checking completion: {str(e)}"
|
| 1255 |
-
}
|
| 1256 |
|
| 1257 |
-
|
| 1258 |
-
|
| 1259 |
-
"""
|
| 1260 |
-
|
| 1261 |
-
|
| 1262 |
-
request_id = data.get("request_id")
|
| 1263 |
-
translation = data.get("translation")
|
| 1264 |
-
processing_time = data.get("processing_time", 0)
|
| 1265 |
-
|
| 1266 |
-
if not request_id or not translation:
|
| 1267 |
-
raise HTTPException(status_code=400, detail="Request ID and translation required")
|
| 1268 |
-
|
| 1269 |
-
logger.info(f"[NOTIFY] Translation completed for request_id: {request_id}")
|
| 1270 |
-
|
| 1271 |
-
# اینجا باید به وردپرس اطلاع دهید که ترجمه کامل شده
|
| 1272 |
-
# میتوانید از webhook یا درخواست HTTP به وردپرس استفاده کنید
|
| 1273 |
-
|
| 1274 |
-
# مثال: ارسال درخواست به وردپرس
|
| 1275 |
-
wp_url = "https://your-wordpress-site.com/wp-admin/admin-ajax.php"
|
| 1276 |
-
payload = {
|
| 1277 |
-
'action': 'amt_completion_notification',
|
| 1278 |
-
'request_id': request_id,
|
| 1279 |
-
'translation': translation,
|
| 1280 |
-
'processing_time': processing_time,
|
| 1281 |
-
'secret_key': 'your_secret_key_here' # برای امنیت
|
| 1282 |
-
}
|
| 1283 |
-
|
| 1284 |
try:
|
| 1285 |
-
|
| 1286 |
-
async with session.post(wp_url, data=payload) as response:
|
| 1287 |
-
if response.status == 200:
|
| 1288 |
-
logger.info(f"[NOTIFY] Successfully notified WordPress for request_id: {request_id}")
|
| 1289 |
-
else:
|
| 1290 |
-
logger.warning(f"[NOTIFY] WordPress notification failed: {response.status}")
|
| 1291 |
except Exception as e:
|
| 1292 |
-
logger.error(f"[
|
| 1293 |
-
|
| 1294 |
-
|
| 1295 |
-
|
| 1296 |
-
|
| 1297 |
-
|
| 1298 |
-
|
| 1299 |
-
|
| 1300 |
if __name__ == "__main__":
|
|
|
|
| 1301 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
| 12 |
from queue import Queue
|
| 13 |
import logging
|
| 14 |
from typing import Dict, List, Tuple, Optional
|
| 15 |
+
from fastapi import FastAPI, HTTPException, Request, Form
|
| 16 |
from fastapi.middleware.cors import CORSMiddleware
|
| 17 |
from pydantic import BaseModel
|
| 18 |
import uvicorn
|
| 19 |
+
import uuid
|
| 20 |
|
| 21 |
# Enhanced logging configuration
|
| 22 |
logging.basicConfig(
|
|
|
|
| 29 |
)
|
| 30 |
logger = logging.getLogger(__name__)
|
| 31 |
|
| 32 |
+
# Global storage for translation requests (WordPress integration)
|
| 33 |
+
translation_requests = {}
|
| 34 |
+
completed_translations = {}
|
| 35 |
+
translation_requests_lock = threading.Lock()
|
| 36 |
+
|
| 37 |
# Pydantic models for request/response
|
| 38 |
class TranslationRequest(BaseModel):
|
| 39 |
text: str
|
|
|
|
| 125 |
thread.start()
|
| 126 |
|
| 127 |
class TextChunker:
|
| 128 |
+
"""کلاس برای تقسیم متن طولانی به بخشهای کوچکتر"""
|
| 129 |
|
| 130 |
@staticmethod
|
| 131 |
def split_text_smart(text: str, max_chunk_size: int = 400) -> List[str]:
|
| 132 |
+
"""تقسیم هوشمند متن بر اساس جملات و پاراگرافها"""
|
| 133 |
logger.info(f"[CHUNKER] Starting smart text splitting | Text length: {len(text)} chars | Max chunk size: {max_chunk_size}")
|
| 134 |
|
| 135 |
if len(text) <= max_chunk_size:
|
|
|
|
| 138 |
|
| 139 |
chunks = []
|
| 140 |
|
| 141 |
+
# تقسیم بر اساس پاراگرافها
|
| 142 |
paragraphs = text.split('\n\n')
|
| 143 |
current_chunk = ""
|
| 144 |
|
| 145 |
for i, paragraph in enumerate(paragraphs):
|
| 146 |
logger.debug(f"[CHUNKER] Processing paragraph {i+1}/{len(paragraphs)} | Length: {len(paragraph)}")
|
| 147 |
|
| 148 |
+
# اگر پاراگراف خودش بزرگ است آن را تقسیم کن
|
| 149 |
if len(paragraph) > max_chunk_size:
|
| 150 |
+
# ذخیره قسمت فعلی اگر وجود دارد
|
| 151 |
if current_chunk.strip():
|
| 152 |
chunks.append(current_chunk.strip())
|
| 153 |
logger.debug(f"[CHUNKER] Added chunk from accumulated paragraphs | Length: {len(current_chunk.strip())}")
|
| 154 |
current_chunk = ""
|
| 155 |
|
| 156 |
+
# تقسیم پاراگراف بزرگ
|
| 157 |
sub_chunks = TextChunker._split_paragraph(paragraph, max_chunk_size)
|
| 158 |
chunks.extend(sub_chunks)
|
| 159 |
logger.debug(f"[CHUNKER] Split large paragraph into {len(sub_chunks)} sub-chunks")
|
| 160 |
else:
|
| 161 |
+
# بررسی اینکه آیا اضافه کردن این پاراگراف از حد تجاوز میکند
|
| 162 |
if len(current_chunk) + len(paragraph) + 2 > max_chunk_size:
|
| 163 |
if current_chunk.strip():
|
| 164 |
chunks.append(current_chunk.strip())
|
|
|
|
| 170 |
else:
|
| 171 |
current_chunk = paragraph
|
| 172 |
|
| 173 |
+
# اضافه کردن آخرین قسمت
|
| 174 |
if current_chunk.strip():
|
| 175 |
chunks.append(current_chunk.strip())
|
| 176 |
logger.debug(f"[CHUNKER] Added final chunk | Length: {len(current_chunk.strip())}")
|
|
|
|
| 180 |
|
| 181 |
@staticmethod
|
| 182 |
def _split_paragraph(paragraph: str, max_chunk_size: int) -> List[str]:
|
| 183 |
+
"""تقسیم پاراگراف بزرگ به جملات"""
|
| 184 |
logger.debug(f"[CHUNKER] Splitting large paragraph | Length: {len(paragraph)}")
|
| 185 |
|
| 186 |
+
# تقسیم بر اساس جملات
|
| 187 |
sentences = re.split(r'[.!?]+\s+', paragraph)
|
| 188 |
chunks = []
|
| 189 |
current_chunk = ""
|
|
|
|
| 192 |
if not sentence.strip():
|
| 193 |
continue
|
| 194 |
|
| 195 |
+
# اضافه کردن علامت نقطه اگر حذف شده
|
| 196 |
if not sentence.endswith(('.', '!', '?')):
|
| 197 |
sentence += '.'
|
| 198 |
|
| 199 |
if len(sentence) > max_chunk_size:
|
| 200 |
+
# جمله خودش خیلی بلند است - تقسیم بر اساس کاما
|
| 201 |
if current_chunk.strip():
|
| 202 |
chunks.append(current_chunk.strip())
|
| 203 |
current_chunk = ""
|
|
|
|
| 223 |
|
| 224 |
@staticmethod
|
| 225 |
def _split_by_comma(sentence: str, max_chunk_size: int) -> List[str]:
|
| 226 |
+
"""تقسیم جمله طولانی بر اساس کاما"""
|
| 227 |
logger.debug(f"[CHUNKER] Splitting long sentence by comma | Length: {len(sentence)}")
|
| 228 |
|
| 229 |
parts = sentence.split(', ')
|
|
|
|
| 232 |
|
| 233 |
for part in parts:
|
| 234 |
if len(part) > max_chunk_size:
|
| 235 |
+
# قسمت خودش خیلی بلند است - تقسیم اجباری
|
| 236 |
if current_chunk.strip():
|
| 237 |
chunks.append(current_chunk.strip())
|
| 238 |
current_chunk = ""
|
| 239 |
|
| 240 |
+
# تقسیم اجباری بر اساس طول
|
| 241 |
while len(part) > max_chunk_size:
|
| 242 |
chunks.append(part[:max_chunk_size].strip())
|
| 243 |
part = part[max_chunk_size:].strip()
|
|
|
|
| 265 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 266 |
logger.info(f"[INIT] Using device: {self.device}")
|
| 267 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
# Initialize cache and queue
|
| 269 |
self.cache = TranslationCache(cache_duration_minutes)
|
| 270 |
self.queue = TranslationQueue()
|
|
|
|
| 288 |
logger.error(f"[INIT] Error loading model: {e}")
|
| 289 |
raise
|
| 290 |
|
| 291 |
+
# تنظیمات بهینه برای ترجمه متنهای بلند
|
| 292 |
+
self.max_chunk_size = 350 # حداکثر طول هر قسمت
|
| 293 |
+
self.min_chunk_overlap = 20 # همپوشانی بین قسمتها
|
| 294 |
|
| 295 |
# Track translation progress
|
| 296 |
self.current_translation = {}
|
|
|
|
| 299 |
logger.info(f"[INIT] Translator initialized | Max chunk size: {self.max_chunk_size} chars")
|
| 300 |
|
| 301 |
def translate_chunk(self, text: str, source_lang: str, target_lang: str, chunk_index: int = 0, total_chunks: int = 1) -> str:
|
| 302 |
+
"""ترجمه یک قسمت کوچک از متن"""
|
| 303 |
try:
|
| 304 |
+
logger.info(f"[TRANSLATE] Starting chunk translation [{chunk_index+1}/{total_chunks}] | {source_lang} → {target_lang} | Length: {len(text)} chars")
|
| 305 |
|
| 306 |
# Set source language for tokenizer
|
| 307 |
self.tokenizer.src_lang = source_lang
|
|
|
|
| 315 |
generated_tokens = self.model.generate(
|
| 316 |
**encoded,
|
| 317 |
forced_bos_token_id=self.tokenizer.get_lang_id(target_lang),
|
| 318 |
+
max_length=1024, # افزایش طول خروجی
|
| 319 |
+
min_length=10, # حداقل طول خروجی
|
| 320 |
+
num_beams=5, # افزایش تعداد beam ها برای کیفیت بهتر
|
| 321 |
early_stopping=True,
|
| 322 |
+
no_repeat_ngram_size=3, # جلوگیری از تکرار
|
| 323 |
+
length_penalty=1.0, # تنظیم جریمه طول
|
| 324 |
+
repetition_penalty=1.2, # جلوگیری از تکرار کلمات
|
| 325 |
+
do_sample=False, # استفاده از روش قطعی
|
| 326 |
+
temperature=0.7, # کنترل تنوع
|
| 327 |
pad_token_id=self.tokenizer.pad_token_id,
|
| 328 |
eos_token_id=self.tokenizer.eos_token_id
|
| 329 |
)
|
|
|
|
| 332 |
# Decode result
|
| 333 |
translation = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
| 334 |
|
| 335 |
+
# پاکسازی ترجمه از کاراکترهای اضافی
|
| 336 |
translation = translation.strip()
|
| 337 |
|
| 338 |
logger.info(f"[TRANSLATE] Chunk translation completed [{chunk_index+1}/{total_chunks}] | Generation time: {generation_time:.2f}s | Output length: {len(translation)} chars")
|
|
|
|
| 344 |
return f"[Translation Error: {str(e)}]"
|
| 345 |
|
| 346 |
def translate_text(self, text: str, source_lang: str, target_lang: str, session_id: str = None) -> Tuple[str, float, int]:
|
| 347 |
+
"""ترجمه متن با پشتیبانی از متنهای طولانی و لاگهای مفصل"""
|
| 348 |
start_time = time.time()
|
| 349 |
|
| 350 |
if not session_id:
|
|
|
|
| 356 |
cached_result = self.cache.get(text, source_lang, target_lang)
|
| 357 |
if cached_result:
|
| 358 |
logger.info(f"[SESSION:{session_id}] Translation completed from cache | Time: {time.time() - start_time:.2f}s")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
return cached_result, time.time() - start_time, 1
|
| 360 |
|
| 361 |
try:
|
| 362 |
+
# اگر متن کوتاه است مستقیماً ترجمه کن
|
| 363 |
if len(text) <= self.max_chunk_size:
|
| 364 |
logger.info(f"[SESSION:{session_id}] Processing as short text")
|
| 365 |
translation = self.translate_chunk(text, source_lang, target_lang, 0, 1)
|
|
|
|
| 369 |
processing_time = time.time() - start_time
|
| 370 |
logger.info(f"[SESSION:{session_id}] Short text translation completed | Total time: {processing_time:.2f}s")
|
| 371 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
return translation, processing_time, 1
|
| 373 |
|
| 374 |
+
# تقسیم متن طولانی به قسمتهای کوچکتر
|
| 375 |
logger.info(f"[SESSION:{session_id}] Processing as long text - starting chunking")
|
| 376 |
chunks = TextChunker.split_text_smart(text, self.max_chunk_size)
|
| 377 |
logger.info(f"[SESSION:{session_id}] Text split into {len(chunks)} chunks")
|
|
|
|
| 386 |
'target_lang': target_lang
|
| 387 |
}
|
| 388 |
|
| 389 |
+
# ترجمه هر قسمت
|
| 390 |
translated_chunks = []
|
| 391 |
for i, chunk in enumerate(chunks):
|
| 392 |
chunk_start_time = time.time()
|
| 393 |
logger.info(f"[SESSION:{session_id}] Starting chunk {i+1}/{len(chunks)} | Chunk length: {len(chunk)} chars")
|
| 394 |
|
| 395 |
+
# بررسی کش برای هر قسمت
|
| 396 |
chunk_translation = self.cache.get(chunk, source_lang, target_lang)
|
| 397 |
|
| 398 |
if not chunk_translation:
|
| 399 |
+
# Estimate remaining time
|
| 400 |
if i > 0:
|
| 401 |
elapsed_time = time.time() - start_time
|
| 402 |
avg_time_per_chunk = elapsed_time / i
|
|
|
|
| 404 |
logger.info(f"[SESSION:{session_id}] Progress: {i}/{len(chunks)} | Avg time per chunk: {avg_time_per_chunk:.1f}s | Estimated remaining: {estimated_remaining:.1f}s")
|
| 405 |
|
| 406 |
chunk_translation = self.translate_chunk(chunk, source_lang, target_lang, i, len(chunks))
|
| 407 |
+
# ذخیره قسمت در کش
|
| 408 |
self.cache.set(chunk, source_lang, target_lang, chunk_translation)
|
| 409 |
|
| 410 |
chunk_time = time.time() - chunk_start_time
|
|
|
|
| 419 |
if session_id in self.current_translation:
|
| 420 |
self.current_translation[session_id]['completed_chunks'] = i + 1
|
| 421 |
|
| 422 |
+
# کمی استراحت بین ترجمهها برای جلوگیری از بارذاری زیاد
|
| 423 |
if i < len(chunks) - 1:
|
| 424 |
time.sleep(0.1)
|
| 425 |
|
| 426 |
+
# ترکیب قسمتهای ترجمه شده
|
| 427 |
logger.info(f"[SESSION:{session_id}] Combining translated chunks")
|
| 428 |
final_translation = self._combine_translations(translated_chunks, text)
|
| 429 |
|
|
|
|
| 431 |
self.cache.set(text, source_lang, target_lang, final_translation)
|
| 432 |
|
| 433 |
processing_time = time.time() - start_time
|
| 434 |
+
|
| 435 |
+
# Mark as completed for WordPress integration
|
| 436 |
logger.info(f"[SESSION:{session_id}] Long text translation completed | Total time: {processing_time:.2f}s | Chunks: {len(chunks)} | Final length: {len(final_translation)} chars")
|
| 437 |
|
| 438 |
+
# Store in completed_translations for WordPress to check
|
| 439 |
+
with translation_requests_lock:
|
| 440 |
+
completed_translations[session_id] = {
|
| 441 |
+
'translation': final_translation,
|
| 442 |
+
'processing_time': processing_time,
|
| 443 |
+
'character_count': len(text),
|
| 444 |
+
'source_lang': source_lang,
|
| 445 |
+
'target_lang': target_lang,
|
| 446 |
+
'completed_at': datetime.now().isoformat(),
|
| 447 |
+
'request_id': session_id,
|
| 448 |
+
'status': 'completed'
|
| 449 |
+
}
|
| 450 |
+
|
| 451 |
+
# Remove from processing requests if exists
|
| 452 |
+
if session_id in translation_requests:
|
| 453 |
+
del translation_requests[session_id]
|
| 454 |
|
| 455 |
# Clean up progress tracking
|
| 456 |
with self.translation_lock:
|
|
|
|
| 464 |
with self.translation_lock:
|
| 465 |
self.current_translation.pop(session_id, None)
|
| 466 |
return f"Translation error: {str(e)}", time.time() - start_time, 0
|
| 467 |
+
|
| 468 |
def get_translation_progress(self, session_id: str) -> Dict:
|
| 469 |
"""Get current translation progress"""
|
| 470 |
with self.translation_lock:
|
|
|
|
| 488 |
'estimated_remaining': estimated_remaining,
|
| 489 |
'progress_percentage': (progress['completed_chunks'] / progress['total_chunks']) * 100
|
| 490 |
}
|
| 491 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 492 |
def _combine_translations(self, translated_chunks: List[str], original_text: str) -> str:
|
| 493 |
+
"""ترکیب قسمتهای ترجمه شده به یک متن یکپارچه"""
|
| 494 |
if not translated_chunks:
|
| 495 |
return ""
|
| 496 |
|
|
|
|
| 499 |
|
| 500 |
logger.debug(f"[COMBINER] Combining {len(translated_chunks)} translated chunks")
|
| 501 |
|
| 502 |
+
# ترکیب قسمتها با در نظر گیری ساختار اصلی متن
|
| 503 |
combined = []
|
| 504 |
|
| 505 |
for i, chunk in enumerate(translated_chunks):
|
| 506 |
+
# پاکسازی قسمت
|
| 507 |
chunk = chunk.strip()
|
| 508 |
|
| 509 |
if not chunk:
|
| 510 |
continue
|
| 511 |
|
| 512 |
+
# اضافه کردن فاصله مناسب بین قسمتها
|
| 513 |
if i > 0 and combined:
|
| 514 |
+
# اگر قسمت قبلی با نقطه تمام نمیشود فاصله اضافه کن
|
| 515 |
+
if not combined[-1].rstrip().endswith(('.', '!', '?', ':', '۔', '.')):
|
| 516 |
combined[-1] += '.'
|
| 517 |
|
| 518 |
+
# بررسی اینکه آیا نیاز به پاراگراف جدید دارکم
|
| 519 |
if '\n\n' in original_text:
|
| 520 |
combined.append('\n\n' + chunk)
|
| 521 |
else:
|
|
|
|
| 525 |
|
| 526 |
result = ''.join(combined)
|
| 527 |
|
| 528 |
+
# پاکسازی نهایی
|
| 529 |
+
result = re.sub(r'\s+', ' ', result) # حذف فاصلههای اضافی
|
| 530 |
+
result = re.sub(r'\.+', '.', result) # حذف نقطههای تکراری
|
| 531 |
result = result.strip()
|
| 532 |
|
| 533 |
logger.debug(f"[COMBINER] Combined translation length: {len(result)} chars")
|
| 534 |
return result
|
| 535 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 536 |
async def translate_text_async(self, text: str, source_lang: str, target_lang: str, session_id: str = None):
|
| 537 |
"""Async wrapper for translate_text"""
|
| 538 |
loop = asyncio.get_event_loop()
|
|
|
|
| 542 |
text, source_lang, target_lang, session_id
|
| 543 |
)
|
| 544 |
|
| 545 |
+
def process_heavy_translation_background(request_id: str, text: str, source_lang: str, target_lang: str):
|
| 546 |
+
"""
|
| 547 |
+
Background function to process heavy text translations for WordPress integration.
|
| 548 |
+
Updates the completed_translations dict when done.
|
| 549 |
+
"""
|
| 550 |
+
try:
|
| 551 |
+
logger.info(f"[HF Server] Background processing started for request: {request_id}")
|
| 552 |
+
|
| 553 |
+
start_time = time.time()
|
| 554 |
+
|
| 555 |
+
# Update progress in requests
|
| 556 |
+
with translation_requests_lock:
|
| 557 |
+
if request_id in translation_requests:
|
| 558 |
+
translation_requests[request_id]['progress'] = 10
|
| 559 |
+
|
| 560 |
+
# Perform actual translation
|
| 561 |
+
translation, processing_time, chunks_count = translator.translate_text(
|
| 562 |
+
text, source_lang, target_lang, request_id
|
| 563 |
+
)
|
| 564 |
+
|
| 565 |
+
processing_time = time.time() - start_time
|
| 566 |
+
|
| 567 |
+
# Store completed translation
|
| 568 |
+
with translation_requests_lock:
|
| 569 |
+
completed_translations[request_id] = {
|
| 570 |
+
'translation': translation,
|
| 571 |
+
'processing_time': processing_time,
|
| 572 |
+
'character_count': len(text),
|
| 573 |
+
'source_lang': source_lang,
|
| 574 |
+
'target_lang': target_lang,
|
| 575 |
+
'completed_at': datetime.now().isoformat(),
|
| 576 |
+
'request_id': request_id,
|
| 577 |
+
'status': 'completed'
|
| 578 |
+
}
|
| 579 |
+
|
| 580 |
+
# Remove from processing queue
|
| 581 |
+
if request_id in translation_requests:
|
| 582 |
+
del translation_requests[request_id]
|
| 583 |
+
|
| 584 |
+
logger.info(f"[HF Server] Long text translation completed for request: {request_id} in {processing_time:.2f}s")
|
| 585 |
+
|
| 586 |
+
except Exception as e:
|
| 587 |
+
logger.error(f"[HF Server] Background processing error for {request_id}: {str(e)}")
|
| 588 |
+
|
| 589 |
+
# Mark as failed
|
| 590 |
+
with translation_requests_lock:
|
| 591 |
+
completed_translations[request_id] = {
|
| 592 |
+
'translation': '',
|
| 593 |
+
'error': str(e),
|
| 594 |
+
'status': 'failed',
|
| 595 |
+
'processing_time': time.time() - start_time if 'start_time' in locals() else 0,
|
| 596 |
+
'completed_at': datetime.now().isoformat(),
|
| 597 |
+
'request_id': request_id
|
| 598 |
+
}
|
| 599 |
+
|
| 600 |
+
# Remove from processing queue
|
| 601 |
+
if request_id in translation_requests:
|
| 602 |
+
del translation_requests[request_id]
|
| 603 |
+
|
| 604 |
+
def perform_translation_internal(text: str, source_lang: str, target_lang: str) -> str:
|
| 605 |
+
"""
|
| 606 |
+
Internal translation function - wrapper for translator.translate_text
|
| 607 |
+
"""
|
| 608 |
+
try:
|
| 609 |
+
translation, _, _ = translator.translate_text(text, source_lang, target_lang)
|
| 610 |
+
return translation
|
| 611 |
+
except Exception as e:
|
| 612 |
+
logger.error(f"[INTERNAL] Translation error: {str(e)}")
|
| 613 |
+
return f"Translation error: {str(e)}"
|
| 614 |
+
|
| 615 |
# Language mappings for M2M100 model
|
| 616 |
LANGUAGE_MAP = {
|
| 617 |
"English": "en",
|
|
|
|
| 698 |
allow_headers=["*"],
|
| 699 |
)
|
| 700 |
|
| 701 |
+
# ========== NEW WORDPRESS INTEGRATION ENDPOINTS ==========
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 702 |
|
| 703 |
+
@app.post("/api/check-completion")
|
| 704 |
+
async def check_completion(request: Request):
|
| 705 |
+
"""
|
| 706 |
+
Endpoint to verify if a translation request has been completed.
|
| 707 |
+
WordPress calls this to confirm before charging credits.
|
| 708 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 709 |
try:
|
| 710 |
+
form_data = await request.form()
|
| 711 |
+
request_id = form_data.get('request_id', '').strip()
|
| 712 |
|
| 713 |
+
if not request_id:
|
| 714 |
+
return {
|
| 715 |
+
'status': 'error',
|
| 716 |
+
'message': 'Request ID is required'
|
| 717 |
+
}
|
| 718 |
|
| 719 |
+
logger.info(f"[HF Server] Completion verification requested for: {request_id}")
|
| 720 |
+
|
| 721 |
+
with translation_requests_lock:
|
| 722 |
+
# Check if request exists in completed translations
|
| 723 |
+
if request_id in completed_translations:
|
| 724 |
+
completion_data = completed_translations[request_id]
|
| 725 |
+
|
| 726 |
+
logger.info(f"[HF Server] Completion verification for {request_id}: COMPLETED")
|
| 727 |
+
|
| 728 |
+
return {
|
| 729 |
+
'status': 'completed',
|
| 730 |
+
'request_id': request_id,
|
| 731 |
+
'completed_at': completion_data.get('completed_at'),
|
| 732 |
+
'processing_time': completion_data.get('processing_time', 0),
|
| 733 |
+
'verified': True
|
| 734 |
+
}
|
| 735 |
+
|
| 736 |
+
# Check if request is still processing
|
| 737 |
+
elif request_id in translation_requests:
|
| 738 |
+
logger.info(f"[HF Server] Completion verification for {request_id}: STILL PROCESSING")
|
| 739 |
+
|
| 740 |
+
return {
|
| 741 |
+
'status': 'processing',
|
| 742 |
+
'request_id': request_id,
|
| 743 |
+
'verified': False
|
| 744 |
+
}
|
| 745 |
+
|
| 746 |
+
else:
|
| 747 |
+
logger.info(f"[HF Server] Completion verification for {request_id}: NOT FOUND")
|
| 748 |
+
|
| 749 |
+
return {
|
| 750 |
+
'status': 'not_found',
|
| 751 |
+
'request_id': request_id,
|
| 752 |
+
'message': 'Request ID not found'
|
| 753 |
+
}
|
| 754 |
+
|
| 755 |
except Exception as e:
|
| 756 |
+
logger.error(f"[HF Server] Error in check_completion: {str(e)}")
|
| 757 |
+
return {
|
| 758 |
+
'status': 'error',
|
| 759 |
+
'message': 'Server error occurred'
|
| 760 |
+
}
|
| 761 |
+
|
| 762 |
+
@app.post("/api/check-translation-status")
|
| 763 |
+
async def check_translation_status(request: Request):
|
| 764 |
+
"""
|
| 765 |
+
Endpoint to get the current status and result of a translation request.
|
| 766 |
+
Returns translation content if completed.
|
| 767 |
+
"""
|
| 768 |
+
try:
|
| 769 |
+
form_data = await request.form()
|
| 770 |
+
request_id = form_data.get('request_id', '').strip()
|
| 771 |
+
|
| 772 |
+
if not request_id:
|
| 773 |
+
return {
|
| 774 |
+
'status': 'error',
|
| 775 |
+
'message': 'Request ID is required'
|
| 776 |
+
}
|
| 777 |
+
|
| 778 |
+
logger.info(f"[HF Server] Translation status check for: {request_id}")
|
| 779 |
+
|
| 780 |
+
with translation_requests_lock:
|
| 781 |
+
# Check if translation is completed
|
| 782 |
+
if request_id in completed_translations:
|
| 783 |
+
result = completed_translations[request_id]
|
| 784 |
+
|
| 785 |
+
logger.info(f"[HF Server] Translation status check for {request_id}: COMPLETED - returning translation")
|
| 786 |
+
|
| 787 |
+
return {
|
| 788 |
+
'status': 'completed',
|
| 789 |
+
'request_id': request_id,
|
| 790 |
+
'translation': result.get('translation', ''),
|
| 791 |
+
'processing_time': result.get('processing_time', 0),
|
| 792 |
+
'character_count': result.get('character_count', 0),
|
| 793 |
+
'completed_at': result.get('completed_at'),
|
| 794 |
+
'source_lang': result.get('source_lang', ''),
|
| 795 |
+
'target_lang': result.get('target_lang', '')
|
| 796 |
+
}
|
| 797 |
+
|
| 798 |
+
# Check if still processing
|
| 799 |
+
elif request_id in translation_requests:
|
| 800 |
+
req_data = translation_requests[request_id]
|
| 801 |
+
|
| 802 |
+
logger.info(f"[HF Server] Translation status check for {request_id}: STILL PROCESSING")
|
| 803 |
+
|
| 804 |
+
return {
|
| 805 |
+
'status': 'processing',
|
| 806 |
+
'request_id': request_id,
|
| 807 |
+
'started_at': req_data.get('started_at'),
|
| 808 |
+
'progress': req_data.get('progress', 0)
|
| 809 |
+
}
|
| 810 |
+
|
| 811 |
+
else:
|
| 812 |
+
logger.info(f"[HF Server] Translation status check for {request_id}: NOT FOUND")
|
| 813 |
+
|
| 814 |
+
return {
|
| 815 |
+
'status': 'not_found',
|
| 816 |
+
'request_id': request_id,
|
| 817 |
+
'message': 'Translation request not found'
|
| 818 |
+
}
|
| 819 |
+
|
| 820 |
+
except Exception as e:
|
| 821 |
+
logger.error(f"[HF Server] Error in check_translation_status: {str(e)}")
|
| 822 |
+
return {
|
| 823 |
+
'status': 'error',
|
| 824 |
+
'message': 'Server error occurred'
|
| 825 |
+
}
|
| 826 |
+
|
| 827 |
+
# ========== UPDATED MAIN TRANSLATION ENDPOINT ==========
|
| 828 |
|
|
|
|
| 829 |
@app.post("/api/translate/form")
|
| 830 |
async def api_translate_form(request: Request):
|
| 831 |
+
"""
|
| 832 |
+
Enhanced translation endpoint that handles both short and long texts.
|
| 833 |
+
For long texts, returns immediately with request_id for background processing.
|
| 834 |
+
"""
|
| 835 |
try:
|
| 836 |
form_data = await request.form()
|
| 837 |
text = form_data.get("text", "")
|
|
|
|
| 846 |
target_lang = json_data.get("target_lang", "")
|
| 847 |
api_key = json_data.get("api_key", None)
|
| 848 |
except:
|
| 849 |
+
return {"status": "error", "message": "Invalid request format"}
|
|
|
|
|
|
|
| 850 |
|
| 851 |
if not text.strip():
|
| 852 |
logger.error("[FORM API] No text provided")
|
| 853 |
+
return {"status": "error", "message": "Text, source language, and target language are required"}
|
| 854 |
|
| 855 |
source_code = LANGUAGE_MAP.get(source_lang)
|
| 856 |
target_code = LANGUAGE_MAP.get(target_lang)
|
|
|
|
| 859 |
logger.error(f"[FORM API] Invalid language codes: {source_lang} -> {target_lang}")
|
| 860 |
return {"status": "error", "message": "Invalid language codes"}
|
| 861 |
|
| 862 |
+
char_count = len(text)
|
| 863 |
+
is_heavy_text = char_count > 1000 # Same threshold as WordPress
|
| 864 |
+
|
| 865 |
+
logger.info(f"[FORM API] Translation request: {char_count} chars, {source_lang} → {target_lang}, Heavy: {is_heavy_text}")
|
| 866 |
|
| 867 |
+
if is_heavy_text:
|
| 868 |
+
# Generate request ID for background processing
|
| 869 |
+
request_id = str(uuid.uuid4())
|
| 870 |
+
|
| 871 |
+
# First check cache for immediate return
|
| 872 |
cached_result = translator.cache.get(text, source_code, target_code)
|
| 873 |
if cached_result:
|
| 874 |
+
logger.info(f"[FORM API] Returning cached translation immediately for request: {request_id}")
|
| 875 |
return {
|
| 876 |
"translation": cached_result,
|
| 877 |
"source_language": source_lang,
|
| 878 |
"target_language": target_lang,
|
| 879 |
"processing_time": 0.0,
|
| 880 |
+
"character_count": char_count,
|
| 881 |
"status": "success",
|
| 882 |
"chunks_processed": None,
|
| 883 |
+
"request_id": request_id,
|
|
|
|
| 884 |
"cached": True
|
| 885 |
}
|
| 886 |
+
|
| 887 |
+
# Store request for processing
|
| 888 |
+
with translation_requests_lock:
|
| 889 |
+
translation_requests[request_id] = {
|
| 890 |
+
'text': text,
|
| 891 |
+
'source_lang': source_code,
|
| 892 |
+
'target_lang': target_code,
|
| 893 |
+
'started_at': datetime.now().isoformat(),
|
| 894 |
+
'character_count': char_count,
|
| 895 |
+
'progress': 0
|
| 896 |
+
}
|
| 897 |
+
|
| 898 |
+
# Start background processing
|
| 899 |
+
thread = threading.Thread(
|
| 900 |
+
target=process_heavy_translation_background,
|
| 901 |
+
args=(request_id, text, source_code, target_code)
|
| 902 |
)
|
| 903 |
+
thread.daemon = True
|
| 904 |
+
thread.start()
|
| 905 |
|
| 906 |
+
logger.info(f"[FORM API] Started background processing for request: {request_id}")
|
| 907 |
|
| 908 |
return {
|
| 909 |
+
'is_background': True,
|
| 910 |
+
'session_id': request_id,
|
| 911 |
+
'request_id': request_id,
|
| 912 |
+
'status': 'processing',
|
| 913 |
+
'message': f'Long text ({char_count} characters) is being processed in background. Use the request ID to check status.',
|
| 914 |
+
'character_count': char_count
|
|
|
|
| 915 |
}
|
| 916 |
+
|
| 917 |
else:
|
| 918 |
# Process short text immediately
|
| 919 |
try:
|
| 920 |
+
start_time = time.time()
|
| 921 |
+
|
| 922 |
+
translation, processing_time, chunks_count = translator.translate_text(
|
| 923 |
+
text, source_code, target_code
|
| 924 |
)
|
| 925 |
|
| 926 |
+
# Check translation content
|
| 927 |
if not translation or not translation.strip() or translation.startswith("Translation error"):
|
| 928 |
logger.error(f"[FORM API] Invalid translation result: {translation[:100] if translation else 'None'}")
|
| 929 |
return {
|
| 930 |
"status": "error",
|
| 931 |
+
"message": "Translation failed - empty or invalid result"
|
|
|
|
| 932 |
}
|
| 933 |
|
| 934 |
+
logger.info(f"[FORM API] Short text translation completed in {processing_time:.2f}s")
|
| 935 |
+
|
| 936 |
return {
|
| 937 |
+
'status': 'success',
|
| 938 |
+
'translation': translation,
|
| 939 |
+
'processing_time': processing_time,
|
| 940 |
+
'character_count': char_count,
|
| 941 |
+
'source_lang': source_lang,
|
| 942 |
+
'target_lang': target_lang
|
|
|
|
|
|
|
| 943 |
}
|
| 944 |
+
|
| 945 |
except Exception as e:
|
| 946 |
logger.error(f"[FORM API] Translation error: {str(e)}")
|
| 947 |
+
return {"status": "error", "message": f"Translation failed: {str(e)}"}
|
| 948 |
+
|
| 949 |
+
# ========== EXISTING ENDPOINTS (UPDATED) ==========
|
| 950 |
+
|
| 951 |
+
@app.get("/")
|
| 952 |
+
async def root():
|
| 953 |
+
return {
|
| 954 |
+
"message": "Enhanced Multilingual Translation API v2.1 with WordPress Integration",
|
| 955 |
+
"status": "active",
|
| 956 |
+
"features": [
|
| 957 |
+
"enhanced_logging",
|
| 958 |
+
"progress_tracking",
|
| 959 |
+
"long_text_support",
|
| 960 |
+
"smart_chunking",
|
| 961 |
+
"cache_optimization",
|
| 962 |
+
"wordpress_integration",
|
| 963 |
+
"delayed_charging_support"
|
| 964 |
+
]
|
| 965 |
+
}
|
| 966 |
+
|
| 967 |
+
@app.post("/api/translate")
|
| 968 |
+
async def api_translate(request: TranslationRequest):
|
| 969 |
+
"""API endpoint for translation with enhanced logging and progress tracking"""
|
| 970 |
+
if not request.text.strip():
|
| 971 |
+
raise HTTPException(status_code=400, detail="No text provided")
|
| 972 |
+
|
| 973 |
+
source_code = LANGUAGE_MAP.get(request.source_lang)
|
| 974 |
+
target_code = LANGUAGE_MAP.get(request.target_lang)
|
| 975 |
+
|
| 976 |
+
if not source_code or not target_code:
|
| 977 |
+
raise HTTPException(status_code=400, detail="Invalid language codes")
|
| 978 |
+
|
| 979 |
+
try:
|
| 980 |
+
# Generate session ID for tracking
|
| 981 |
+
session_id = hashlib.md5(f"{request.text[:100]}{time.time()}".encode()).hexdigest()[:8]
|
| 982 |
+
|
| 983 |
+
translation, processing_time, chunks_count = translator.translate_text(
|
| 984 |
+
request.text, source_code, target_code, session_id
|
| 985 |
+
)
|
| 986 |
+
|
| 987 |
+
return TranslationResponse(
|
| 988 |
+
translation=translation,
|
| 989 |
+
source_language=request.source_lang,
|
| 990 |
+
target_language=request.target_lang,
|
| 991 |
+
processing_time=processing_time,
|
| 992 |
+
character_count=len(request.text),
|
| 993 |
+
status="success",
|
| 994 |
+
chunks_processed=chunks_count
|
| 995 |
+
)
|
| 996 |
+
except Exception as e:
|
| 997 |
+
logger.error(f"[API] Translation error: {str(e)}")
|
| 998 |
+
raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}")
|
| 999 |
|
| 1000 |
@app.get("/api/progress/{session_id}")
|
| 1001 |
async def get_translation_progress(session_id: str):
|
|
|
|
| 1021 |
@app.get("/api/health")
|
| 1022 |
async def health_check():
|
| 1023 |
"""Health check endpoint"""
|
| 1024 |
+
with translation_requests_lock:
|
| 1025 |
+
active_requests = len(translation_requests)
|
| 1026 |
+
completed_cache = len(completed_translations)
|
| 1027 |
+
|
| 1028 |
return {
|
| 1029 |
"status": "healthy",
|
| 1030 |
"device": str(translator.device),
|
|
|
|
| 1032 |
"cache_size": len(translator.cache.cache),
|
| 1033 |
"max_chunk_size": translator.max_chunk_size,
|
| 1034 |
"active_translations": len(translator.current_translation),
|
| 1035 |
+
"active_requests": active_requests,
|
| 1036 |
+
"completed_cache": completed_cache,
|
| 1037 |
"version": "2.1.0"
|
| 1038 |
}
|
| 1039 |
|
|
|
|
| 1099 |
|
| 1100 |
@app.get("/api/server-status")
|
| 1101 |
async def get_server_status():
|
| 1102 |
+
"""Get current server status - enhanced for WordPress integration"""
|
| 1103 |
active_sessions = []
|
| 1104 |
+
|
| 1105 |
+
with translation_requests_lock:
|
| 1106 |
+
background_tasks_count = len(translation_requests)
|
| 1107 |
+
completed_count = len(completed_translations)
|
| 1108 |
|
| 1109 |
with translator.translation_lock:
|
| 1110 |
for session_id, progress in translator.current_translation.items():
|
|
|
|
| 1127 |
'estimated_remaining': estimated_remaining
|
| 1128 |
})
|
| 1129 |
|
| 1130 |
+
total_active = len(active_sessions) + background_tasks_count
|
| 1131 |
+
|
| 1132 |
+
if total_active > 0:
|
| 1133 |
if active_sessions:
|
| 1134 |
latest_session = active_sessions[-1]
|
| 1135 |
+
message = f"Processing chunk {latest_session['completed_chunks']}/{latest_session['total_chunks']} | {latest_session['source_lang']} → {latest_session['target_lang']}"
|
| 1136 |
else:
|
| 1137 |
message = f"{background_tasks_count} translation(s) in background queue"
|
| 1138 |
|
|
|
|
| 1142 |
"message": message,
|
| 1143 |
"active_sessions": len(active_sessions),
|
| 1144 |
"background_tasks": background_tasks_count,
|
| 1145 |
+
"total_active": total_active,
|
| 1146 |
+
"completed_cache": completed_count
|
| 1147 |
}
|
| 1148 |
else:
|
| 1149 |
return {
|
|
|
|
| 1151 |
"status": "idle",
|
| 1152 |
"message": "Server is ready for new translations",
|
| 1153 |
"active_sessions": 0,
|
| 1154 |
+
"background_tasks": 0,
|
| 1155 |
+
"completed_cache": completed_count
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1156 |
}
|
| 1157 |
|
| 1158 |
+
# ========== CLEANUP AND MAINTENANCE FUNCTIONS ==========
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1159 |
|
| 1160 |
+
def cleanup_old_requests():
|
| 1161 |
+
"""
|
| 1162 |
+
Clean up old completed translations and stuck processing requests.
|
| 1163 |
+
Should be called periodically.
|
| 1164 |
+
"""
|
| 1165 |
+
current_time = datetime.now()
|
| 1166 |
+
|
| 1167 |
+
with translation_requests_lock:
|
| 1168 |
+
# Clean completed translations older than 2 hours
|
| 1169 |
+
to_remove_completed = []
|
| 1170 |
+
for req_id, data in completed_translations.items():
|
| 1171 |
+
try:
|
| 1172 |
+
completed_time = datetime.fromisoformat(data.get('completed_at', ''))
|
| 1173 |
+
if (current_time - completed_time).total_seconds() > 7200: # 2 hours
|
| 1174 |
+
to_remove_completed.append(req_id)
|
| 1175 |
+
except:
|
| 1176 |
+
to_remove_completed.append(req_id) # Remove invalid entries
|
| 1177 |
+
|
| 1178 |
+
for req_id in to_remove_completed:
|
| 1179 |
+
del completed_translations[req_id]
|
| 1180 |
+
|
| 1181 |
+
# Clean stuck processing requests older than 1 hour
|
| 1182 |
+
to_remove_processing = []
|
| 1183 |
+
for req_id, data in translation_requests.items():
|
| 1184 |
+
try:
|
| 1185 |
+
started_time = datetime.fromisoformat(data.get('started_at', ''))
|
| 1186 |
+
if (current_time - started_time).total_seconds() > 3600: # 1 hour
|
| 1187 |
+
to_remove_processing.append(req_id)
|
| 1188 |
+
except:
|
| 1189 |
+
to_remove_processing.append(req_id) # Remove invalid entries
|
| 1190 |
+
|
| 1191 |
+
for req_id in to_remove_processing:
|
| 1192 |
+
del translation_requests[req_id]
|
| 1193 |
+
|
| 1194 |
+
logger.info(f"[HF Server] Cleanup: Removed {len(to_remove_completed)} completed, {len(to_remove_processing)} stuck requests")
|
| 1195 |
+
return len(to_remove_completed), len(to_remove_processing)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1196 |
|
| 1197 |
+
# Schedule periodic cleanup (runs every hour)
|
| 1198 |
+
def periodic_cleanup():
|
| 1199 |
+
"""Run cleanup every hour"""
|
| 1200 |
+
while True:
|
| 1201 |
+
time.sleep(3600) # 1 hour
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1202 |
try:
|
| 1203 |
+
cleanup_old_requests()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1204 |
except Exception as e:
|
| 1205 |
+
logger.error(f"[CLEANUP] Error during periodic cleanup: {e}")
|
| 1206 |
+
|
| 1207 |
+
# Start cleanup thread
|
| 1208 |
+
cleanup_thread = threading.Thread(target=periodic_cleanup, daemon=True)
|
| 1209 |
+
cleanup_thread.start()
|
| 1210 |
+
|
| 1211 |
+
# ========== SERVER STARTUP ==========
|
| 1212 |
+
|
| 1213 |
if __name__ == "__main__":
|
| 1214 |
+
logger.info("[HF Server] Starting Enhanced Multilingual Translation API with WordPress Integration")
|
| 1215 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|