danicor commited on
Commit
43f025b
·
verified ·
1 Parent(s): eaf4dfc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +449 -535
app.py CHANGED
@@ -12,10 +12,11 @@ import threading
12
  from queue import Queue
13
  import logging
14
  from typing import Dict, List, Tuple, Optional
15
- from fastapi import FastAPI, HTTPException, Request
16
  from fastapi.middleware.cors import CORSMiddleware
17
  from pydantic import BaseModel
18
  import uvicorn
 
19
 
20
  # Enhanced logging configuration
21
  logging.basicConfig(
@@ -28,6 +29,11 @@ logging.basicConfig(
28
  )
29
  logger = logging.getLogger(__name__)
30
 
 
 
 
 
 
31
  # Pydantic models for request/response
32
  class TranslationRequest(BaseModel):
33
  text: str
@@ -119,11 +125,11 @@ class TranslationQueue:
119
  thread.start()
120
 
121
  class TextChunker:
122
- """کلاس برای تقسیم متن طولانی به بخش‌های کوچکتر"""
123
 
124
  @staticmethod
125
  def split_text_smart(text: str, max_chunk_size: int = 400) -> List[str]:
126
- """تقسیم هوشمند متن بر اساس جملات و پاراگراف‌ها"""
127
  logger.info(f"[CHUNKER] Starting smart text splitting | Text length: {len(text)} chars | Max chunk size: {max_chunk_size}")
128
 
129
  if len(text) <= max_chunk_size:
@@ -132,27 +138,27 @@ class TextChunker:
132
 
133
  chunks = []
134
 
135
- # تقسیم بر اساس پاراگراف‌ها
136
  paragraphs = text.split('\n\n')
137
  current_chunk = ""
138
 
139
  for i, paragraph in enumerate(paragraphs):
140
  logger.debug(f"[CHUNKER] Processing paragraph {i+1}/{len(paragraphs)} | Length: {len(paragraph)}")
141
 
142
- # اگر پاراگراف خودش بزرگ است، آن را تقسیم کن
143
  if len(paragraph) > max_chunk_size:
144
- # ذخیره قسمت فعلی اگر وجود دارد
145
  if current_chunk.strip():
146
  chunks.append(current_chunk.strip())
147
  logger.debug(f"[CHUNKER] Added chunk from accumulated paragraphs | Length: {len(current_chunk.strip())}")
148
  current_chunk = ""
149
 
150
- # تقسیم پاراگراف بزرگ
151
  sub_chunks = TextChunker._split_paragraph(paragraph, max_chunk_size)
152
  chunks.extend(sub_chunks)
153
  logger.debug(f"[CHUNKER] Split large paragraph into {len(sub_chunks)} sub-chunks")
154
  else:
155
- # بررسی اینکه آیا اضافه کردن این پاراگراف از حد تجاوز می‌کند
156
  if len(current_chunk) + len(paragraph) + 2 > max_chunk_size:
157
  if current_chunk.strip():
158
  chunks.append(current_chunk.strip())
@@ -164,7 +170,7 @@ class TextChunker:
164
  else:
165
  current_chunk = paragraph
166
 
167
- # اضافه کردن آخرین قسمت
168
  if current_chunk.strip():
169
  chunks.append(current_chunk.strip())
170
  logger.debug(f"[CHUNKER] Added final chunk | Length: {len(current_chunk.strip())}")
@@ -174,10 +180,10 @@ class TextChunker:
174
 
175
  @staticmethod
176
  def _split_paragraph(paragraph: str, max_chunk_size: int) -> List[str]:
177
- """تقسیم پاراگراف بزرگ به جملات"""
178
  logger.debug(f"[CHUNKER] Splitting large paragraph | Length: {len(paragraph)}")
179
 
180
- # تقسیم بر اساس جملات
181
  sentences = re.split(r'[.!?]+\s+', paragraph)
182
  chunks = []
183
  current_chunk = ""
@@ -186,12 +192,12 @@ class TextChunker:
186
  if not sentence.strip():
187
  continue
188
 
189
- # اضافه کردن علامت نقطه اگر حذف شده
190
  if not sentence.endswith(('.', '!', '?')):
191
  sentence += '.'
192
 
193
  if len(sentence) > max_chunk_size:
194
- # جمله خودش خیلی بلند است - تقسیم بر اساس کاما
195
  if current_chunk.strip():
196
  chunks.append(current_chunk.strip())
197
  current_chunk = ""
@@ -217,7 +223,7 @@ class TextChunker:
217
 
218
  @staticmethod
219
  def _split_by_comma(sentence: str, max_chunk_size: int) -> List[str]:
220
- """تقسیم جمله طولانی بر اساس کاما"""
221
  logger.debug(f"[CHUNKER] Splitting long sentence by comma | Length: {len(sentence)}")
222
 
223
  parts = sentence.split(', ')
@@ -226,12 +232,12 @@ class TextChunker:
226
 
227
  for part in parts:
228
  if len(part) > max_chunk_size:
229
- # قسمت خودش خیلی بلند است - تقسیم اجباری
230
  if current_chunk.strip():
231
  chunks.append(current_chunk.strip())
232
  current_chunk = ""
233
 
234
- # تقسیم اجباری بر اساس طول
235
  while len(part) > max_chunk_size:
236
  chunks.append(part[:max_chunk_size].strip())
237
  part = part[max_chunk_size:].strip()
@@ -259,38 +265,6 @@ class MultilingualTranslator:
259
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
260
  logger.info(f"[INIT] Using device: {self.device}")
261
 
262
- # در متد __init__ کلاس MultilingualTranslator
263
- self.translation_store = {} # ذخیره‌سازی موقت ترجمه‌ها بر اساس request_id
264
- self.request_mapping = {} # mapping بین request_id و hash متن
265
-
266
- # بعد از اتمام ترجمه موفق، در متد translate_text
267
- # translation_result = {
268
- # 'translation': final_translation,
269
- # 'processing_time': processing_time,
270
- # 'chunks_processed': len(chunks),
271
- # 'source_lang': source_code,
272
- # 'target_lang': target_code,
273
- # 'timestamp': time.time()
274
- # }
275
- # self.translation_store[session_id] = translation_result
276
-
277
- # متد برای پاک‌سازی خودکار داده‌های قدیمی
278
- def cleanup_old_translations(self, max_age_hours: int = 24):
279
- """پاک‌سازی ترجمه‌های قدیمی از ذخیره‌سازی"""
280
- current_time = time.time()
281
- keys_to_delete = []
282
-
283
- for request_id, data in self.translation_store.items():
284
- if current_time - data['timestamp'] > max_age_hours * 3600:
285
- keys_to_delete.append(request_id)
286
-
287
- for key in keys_to_delete:
288
- del self.translation_store[key]
289
- if key in self.request_mapping:
290
- del self.request_mapping[key]
291
-
292
- logger.info(f"[CLEANUP] Removed {len(keys_to_delete)} old translations")
293
-
294
  # Initialize cache and queue
295
  self.cache = TranslationCache(cache_duration_minutes)
296
  self.queue = TranslationQueue()
@@ -314,9 +288,9 @@ class MultilingualTranslator:
314
  logger.error(f"[INIT] Error loading model: {e}")
315
  raise
316
 
317
- # تنظیمات بهینه برای ترجمه متن‌های بلند
318
- self.max_chunk_size = 350 # حداکثر طول هر قسمت
319
- self.min_chunk_overlap = 20 # همپوشانی بین قسمت‌ها
320
 
321
  # Track translation progress
322
  self.current_translation = {}
@@ -325,9 +299,9 @@ class MultilingualTranslator:
325
  logger.info(f"[INIT] Translator initialized | Max chunk size: {self.max_chunk_size} chars")
326
 
327
  def translate_chunk(self, text: str, source_lang: str, target_lang: str, chunk_index: int = 0, total_chunks: int = 1) -> str:
328
- """ترجمه یک قسمت کوچک از متن"""
329
  try:
330
- logger.info(f"[TRANSLATE] Starting chunk translation [{chunk_index+1}/{total_chunks}] | {source_lang} → {target_lang} | Length: {len(text)} chars")
331
 
332
  # Set source language for tokenizer
333
  self.tokenizer.src_lang = source_lang
@@ -341,15 +315,15 @@ class MultilingualTranslator:
341
  generated_tokens = self.model.generate(
342
  **encoded,
343
  forced_bos_token_id=self.tokenizer.get_lang_id(target_lang),
344
- max_length=1024, # افزایش طول خروجی
345
- min_length=10, # حداقل طول خروجی
346
- num_beams=5, # افزایش تعداد beam ها برای کیفیت بهتر
347
  early_stopping=True,
348
- no_repeat_ngram_size=3, # جلوگیری از تکرار
349
- length_penalty=1.0, # تنظیم جریمه طول
350
- repetition_penalty=1.2, # جلوگیری از تکرار کلمات
351
- do_sample=False, # استفاده از روش قطعی
352
- temperature=0.7, # کنترل تنوع
353
  pad_token_id=self.tokenizer.pad_token_id,
354
  eos_token_id=self.tokenizer.eos_token_id
355
  )
@@ -358,7 +332,7 @@ class MultilingualTranslator:
358
  # Decode result
359
  translation = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
360
 
361
- # پاک‌سازی ترجمه از کاراکترهای اضافی
362
  translation = translation.strip()
363
 
364
  logger.info(f"[TRANSLATE] Chunk translation completed [{chunk_index+1}/{total_chunks}] | Generation time: {generation_time:.2f}s | Output length: {len(translation)} chars")
@@ -370,7 +344,7 @@ class MultilingualTranslator:
370
  return f"[Translation Error: {str(e)}]"
371
 
372
  def translate_text(self, text: str, source_lang: str, target_lang: str, session_id: str = None) -> Tuple[str, float, int]:
373
-
374
  start_time = time.time()
375
 
376
  if not session_id:
@@ -382,16 +356,10 @@ class MultilingualTranslator:
382
  cached_result = self.cache.get(text, source_lang, target_lang)
383
  if cached_result:
384
  logger.info(f"[SESSION:{session_id}] Translation completed from cache | Time: {time.time() - start_time:.2f}s")
385
-
386
- # ذخیره نتیجه در translation_store برای رهگیری
387
- if session_id and cached_result and not cached_result.startswith("Translation error"):
388
- self.store_translation_result(session_id, cached_result, time.time() - start_time, 1)
389
- self.request_mapping[session_id] = hashlib.md5(text.encode()).hexdigest()
390
-
391
  return cached_result, time.time() - start_time, 1
392
 
393
  try:
394
- # اگر متن کوتاه است، مستقیماً ترجمه کن
395
  if len(text) <= self.max_chunk_size:
396
  logger.info(f"[SESSION:{session_id}] Processing as short text")
397
  translation = self.translate_chunk(text, source_lang, target_lang, 0, 1)
@@ -401,14 +369,9 @@ class MultilingualTranslator:
401
  processing_time = time.time() - start_time
402
  logger.info(f"[SESSION:{session_id}] Short text translation completed | Total time: {processing_time:.2f}s")
403
 
404
- # ذخیره نتیجه در translation_store برای رهگیری
405
- if session_id and translation and not translation.startswith("Translation error"):
406
- self.store_translation_result(session_id, translation, processing_time, 1)
407
- self.request_mapping[session_id] = hashlib.md5(text.encode()).hexdigest()
408
-
409
  return translation, processing_time, 1
410
 
411
- # تقسیم متن طولانی به بخش‌های کوچکتر
412
  logger.info(f"[SESSION:{session_id}] Processing as long text - starting chunking")
413
  chunks = TextChunker.split_text_smart(text, self.max_chunk_size)
414
  logger.info(f"[SESSION:{session_id}] Text split into {len(chunks)} chunks")
@@ -423,17 +386,17 @@ class MultilingualTranslator:
423
  'target_lang': target_lang
424
  }
425
 
426
- # ترجمه هر بخش
427
  translated_chunks = []
428
  for i, chunk in enumerate(chunks):
429
  chunk_start_time = time.time()
430
  logger.info(f"[SESSION:{session_id}] Starting chunk {i+1}/{len(chunks)} | Chunk length: {len(chunk)} chars")
431
 
432
- # بررسی کش برای هر بخش
433
  chunk_translation = self.cache.get(chunk, source_lang, target_lang)
434
 
435
  if not chunk_translation:
436
- # تخمین زمان باقی‌مانده
437
  if i > 0:
438
  elapsed_time = time.time() - start_time
439
  avg_time_per_chunk = elapsed_time / i
@@ -441,7 +404,7 @@ class MultilingualTranslator:
441
  logger.info(f"[SESSION:{session_id}] Progress: {i}/{len(chunks)} | Avg time per chunk: {avg_time_per_chunk:.1f}s | Estimated remaining: {estimated_remaining:.1f}s")
442
 
443
  chunk_translation = self.translate_chunk(chunk, source_lang, target_lang, i, len(chunks))
444
- # ذخیره بخش در کش
445
  self.cache.set(chunk, source_lang, target_lang, chunk_translation)
446
 
447
  chunk_time = time.time() - chunk_start_time
@@ -456,11 +419,11 @@ class MultilingualTranslator:
456
  if session_id in self.current_translation:
457
  self.current_translation[session_id]['completed_chunks'] = i + 1
458
 
459
- # کمی استراحت بین ترجمه‌ها برای جلوگیری از بارگذاری زیاد
460
  if i < len(chunks) - 1:
461
  time.sleep(0.1)
462
 
463
- # ترکیب بخش‌های ترجمه شده
464
  logger.info(f"[SESSION:{session_id}] Combining translated chunks")
465
  final_translation = self._combine_translations(translated_chunks, text)
466
 
@@ -468,12 +431,26 @@ class MultilingualTranslator:
468
  self.cache.set(text, source_lang, target_lang, final_translation)
469
 
470
  processing_time = time.time() - start_time
 
 
471
  logger.info(f"[SESSION:{session_id}] Long text translation completed | Total time: {processing_time:.2f}s | Chunks: {len(chunks)} | Final length: {len(final_translation)} chars")
472
 
473
- # ذخیره نتیجه در translation_store برای رهگیری
474
- if session_id and final_translation and not final_translation.startswith("Translation error"):
475
- self.store_translation_result(session_id, final_translation, processing_time, len(chunks))
476
- self.request_mapping[session_id] = hashlib.md5(text.encode()).hexdigest()
 
 
 
 
 
 
 
 
 
 
 
 
477
 
478
  # Clean up progress tracking
479
  with self.translation_lock:
@@ -487,7 +464,7 @@ class MultilingualTranslator:
487
  with self.translation_lock:
488
  self.current_translation.pop(session_id, None)
489
  return f"Translation error: {str(e)}", time.time() - start_time, 0
490
-
491
  def get_translation_progress(self, session_id: str) -> Dict:
492
  """Get current translation progress"""
493
  with self.translation_lock:
@@ -511,82 +488,9 @@ class MultilingualTranslator:
511
  'estimated_remaining': estimated_remaining,
512
  'progress_percentage': (progress['completed_chunks'] / progress['total_chunks']) * 100
513
  }
514
- def get_translation_by_request_id(self, request_id: str) -> Dict:
515
- """یافتن ترجمه کامل بر اساس request_id با پشتیبانی از کش"""
516
- logger.info(f"[TRACKING] Looking up translation for request_id: {request_id}")
517
-
518
- # 1. اول در ترجمه‌های جاری بررسی کنیم
519
- with self.translation_lock:
520
- if request_id in self.current_translation:
521
- progress = self.current_translation[request_id].copy()
522
- logger.info(f"[TRACKING] Found active translation: {progress['completed_chunks']}/{progress['total_chunks']} chunks completed")
523
- return {
524
- "status": "processing",
525
- "progress": progress,
526
- "translation": None,
527
- "found_in": "active_translations"
528
- }
529
-
530
- # 2. بررسی در تسک‌های پس‌زمینه
531
- if request_id in self.background_tasks:
532
- task = self.background_tasks[request_id]
533
-
534
- if task.done():
535
- try:
536
- translation, processing_time, chunks_count = task.result()
537
- logger.info(f"[TRACKING] Background task completed successfully: {len(translation)} chars")
538
-
539
- # حذف از تسک‌های پس‌زمینه
540
- del self.background_tasks[request_id]
541
-
542
- return {
543
- "status": "completed",
544
- "translation": translation,
545
- "processing_time": processing_time,
546
- "chunks_processed": chunks_count,
547
- "found_in": "background_tasks"
548
- }
549
- except Exception as e:
550
- logger.error(f"[TRACKING] Background task failed: {str(e)}")
551
- del self.background_tasks[request_id]
552
- return {
553
- "status": "failed",
554
- "error": str(e),
555
- "found_in": "background_tasks"
556
- }
557
- else:
558
- logger.info(f"[TRACKING] Background task still running: {request_id}")
559
- return {
560
- "status": "processing",
561
- "translation": None,
562
- "found_in": "background_tasks"
563
- }
564
-
565
- # 3. بررسی در کش - این بخش نیاز به پیاده‌سازی سیستم رهگیری کش دارد
566
- # برای این کار نیاز داریم که request_id را با متن اصلی مرتبط کنیم
567
- # یک راه حل: ذخیره mapping بین request_id و hash متن
568
-
569
- # 4. بررسی در سیستم ذخیره‌سازی موقت (اگر پیاده‌سازی شده باشد)
570
- if hasattr(self, 'translation_store') and request_id in self.translation_store:
571
- result = self.translation_store[request_id]
572
- logger.info(f"[TRACKING] Found in translation store: {len(result['translation'])} chars")
573
- return {
574
- "status": "completed",
575
- "translation": result['translation'],
576
- "processing_time": result['processing_time'],
577
- "chunks_processed": result['chunks_processed'],
578
- "found_in": "translation_store"
579
- }
580
-
581
- logger.warning(f"[TRACKING] Request ID not found: {request_id}")
582
- return {
583
- "status": "not_found",
584
- "message": "Translation request not found",
585
- "found_in": None
586
- }
587
-
588
  def _combine_translations(self, translated_chunks: List[str], original_text: str) -> str:
589
- """ترکیب قسمت‌های ترجمه شده به یک متن یکپارچه"""
590
  if not translated_chunks:
591
  return ""
592
 
@@ -595,23 +499,23 @@ class MultilingualTranslator:
595
 
596
  logger.debug(f"[COMBINER] Combining {len(translated_chunks)} translated chunks")
597
 
598
- # ترکیب قسمت‌ها با در نظر گیری ساختار اصلی متن
599
  combined = []
600
 
601
  for i, chunk in enumerate(translated_chunks):
602
- # پاک‌سازی قسمت
603
  chunk = chunk.strip()
604
 
605
  if not chunk:
606
  continue
607
 
608
- # اضافه کردن فاصله مناسب بین قسمت‌ها
609
  if i > 0 and combined:
610
- # اگر قسمت قبلی با نقطه تمام نمی‌شود، نقطه اضافه کن
611
- if not combined[-1].rstrip().endswith(('.', '!', '?', ':', 'Ø›', '.')):
612
  combined[-1] += '.'
613
 
614
- # بررسی اینکه آیا نیاز به پاراگراف جدید داریم
615
  if '\n\n' in original_text:
616
  combined.append('\n\n' + chunk)
617
  else:
@@ -621,40 +525,14 @@ class MultilingualTranslator:
621
 
622
  result = ''.join(combined)
623
 
624
- # پاک‌سازی نهایی
625
- result = re.sub(r'\s+', ' ', result) # حذف فاصله‌های اضافی
626
- result = re.sub(r'\.+', '.', result) # حذف نقطه‌های تکراری
627
  result = result.strip()
628
 
629
  logger.debug(f"[COMBINER] Combined translation length: {len(result)} chars")
630
  return result
631
 
632
- def cleanup_old_translations(self, max_age_hours: int = 24):
633
- """پاک‌سازی ترجمه‌های قدیمی از ذخیره‌سازی"""
634
- current_time = time.time()
635
- keys_to_delete = []
636
-
637
- for request_id, data in self.translation_store.items():
638
- if current_time - data['timestamp'] > max_age_hours * 3600:
639
- keys_to_delete.append(request_id)
640
-
641
- for key in keys_to_delete:
642
- del self.translation_store[key]
643
- if key in self.request_mapping:
644
- del self.request_mapping[key]
645
-
646
- logger.info(f"[CLEANUP] Removed {len(keys_to_delete)} old translations")
647
-
648
- def store_translation_result(self, request_id: str, translation: str, processing_time: float, chunks_processed: int):
649
- """ذخیره نتیجه ترجمه برای دسترسی بعدی"""
650
- self.translation_store[request_id] = {
651
- 'translation': translation,
652
- 'processing_time': processing_time,
653
- 'chunks_processed': chunks_processed,
654
- 'timestamp': time.time()
655
- }
656
- logger.info(f"[STORAGE] Stored translation for request_id: {request_id}")
657
-
658
  async def translate_text_async(self, text: str, source_lang: str, target_lang: str, session_id: str = None):
659
  """Async wrapper for translate_text"""
660
  loop = asyncio.get_event_loop()
@@ -664,6 +542,76 @@ class MultilingualTranslator:
664
  text, source_lang, target_lang, session_id
665
  )
666
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
667
  # Language mappings for M2M100 model
668
  LANGUAGE_MAP = {
669
  "English": "en",
@@ -750,57 +698,140 @@ app.add_middleware(
750
  allow_headers=["*"],
751
  )
752
 
753
- @app.get("/")
754
- async def root():
755
- return {
756
- "message": "Enhanced Multilingual Translation API v2.1",
757
- "status": "active",
758
- "features": [
759
- "enhanced_logging",
760
- "progress_tracking",
761
- "long_text_support",
762
- "smart_chunking",
763
- "cache_optimization"
764
- ]
765
- }
766
 
767
- @app.post("/api/translate")
768
- async def api_translate(request: TranslationRequest):
769
- """API endpoint for translation with enhanced logging and progress tracking"""
770
- if not request.text.strip():
771
- raise HTTPException(status_code=400, detail="No text provided")
772
-
773
- source_code = LANGUAGE_MAP.get(request.source_lang)
774
- target_code = LANGUAGE_MAP.get(request.target_lang)
775
-
776
- if not source_code or not target_code:
777
- raise HTTPException(status_code=400, detail="Invalid language codes")
778
-
779
  try:
780
- # Generate session ID for tracking
781
- session_id = hashlib.md5(f"{request.text[:100]}{time.time()}".encode()).hexdigest()[:8]
782
 
783
- translation, processing_time, chunks_count = translator.translate_text(
784
- request.text, source_code, target_code, session_id
785
- )
 
 
786
 
787
- return TranslationResponse(
788
- translation=translation,
789
- source_language=request.source_lang,
790
- target_language=request.target_lang,
791
- processing_time=processing_time,
792
- character_count=len(request.text),
793
- status="success",
794
- chunks_processed=chunks_count
795
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
796
  except Exception as e:
797
- logger.error(f"[API] Translation error: {str(e)}")
798
- raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
799
 
800
- # Alternative endpoint for form data (compatibility with WordPress)
801
  @app.post("/api/translate/form")
802
  async def api_translate_form(request: Request):
803
- """Non-blocking translation endpoint with enhanced error handling"""
 
 
 
804
  try:
805
  form_data = await request.form()
806
  text = form_data.get("text", "")
@@ -815,13 +846,11 @@ async def api_translate_form(request: Request):
815
  target_lang = json_data.get("target_lang", "")
816
  api_key = json_data.get("api_key", None)
817
  except:
818
- raise HTTPException(status_code=400, detail="Invalid request format")
819
-
820
- logger.info(f"[FORM API] Translation request | {source_lang} → {target_lang} | Length: {len(text)} chars")
821
 
822
  if not text.strip():
823
  logger.error("[FORM API] No text provided")
824
- return {"status": "error", "message": "No text provided"}
825
 
826
  source_code = LANGUAGE_MAP.get(source_lang)
827
  target_code = LANGUAGE_MAP.get(target_lang)
@@ -830,74 +859,143 @@ async def api_translate_form(request: Request):
830
  logger.error(f"[FORM API] Invalid language codes: {source_lang} -> {target_lang}")
831
  return {"status": "error", "message": "Invalid language codes"}
832
 
833
- # Generate session ID for tracking
834
- session_id = hashlib.md5(f"{text[:100]}{time.time()}".encode()).hexdigest()[:8]
 
 
835
 
836
- # Check if it's a long text that should be processed in background
837
- if len(text) > translator.max_chunk_size:
838
- # 🔹 اول بررسی کن آیا نتیجه در کش وجود دارد یا نه
 
 
839
  cached_result = translator.cache.get(text, source_code, target_code)
840
  if cached_result:
841
- logger.info(f"[FORM API] Returning cached translation immediately for session: {session_id}")
842
  return {
843
  "translation": cached_result,
844
  "source_language": source_lang,
845
  "target_language": target_lang,
846
  "processing_time": 0.0,
847
- "character_count": len(text),
848
  "status": "success",
849
  "chunks_processed": None,
850
- "session_id": session_id,
851
- "is_heavy_text": False,
852
  "cached": True
853
  }
854
- # 🔹 اگر در کش نبود → پس بفرست به background
855
- task = asyncio.create_task(
856
- translator.translate_text_async(text, source_code, target_code, session_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
857
  )
858
- translator.background_tasks[session_id] = task
 
859
 
860
- logger.info(f"[FORM API] Started background translation for session: {session_id}")
861
 
862
  return {
863
- "session_id": session_id,
864
- "request_id": session_id,
865
- "status": "processing",
866
- "message": "Translation started in background. Use CHECK RESULT to get your translation.",
867
- "character_count": len(text),
868
- "is_background": True,
869
- "is_heavy_text": True
870
  }
 
871
  else:
872
  # Process short text immediately
873
  try:
874
- translation, processing_time, chunks_count = await translator.translate_text_async(
875
- text, source_code, target_code, session_id
 
 
876
  )
877
 
878
- # بررسی محتوای ترجمه
879
  if not translation or not translation.strip() or translation.startswith("Translation error"):
880
  logger.error(f"[FORM API] Invalid translation result: {translation[:100] if translation else 'None'}")
881
  return {
882
  "status": "error",
883
- "message": "Translation failed - empty or invalid result",
884
- "session_id": session_id
885
  }
886
 
887
- logger.info(f"[FORM API] Translation successful | Length: {len(translation)} chars")
 
888
  return {
889
- "translation": translation,
890
- "source_language": source_lang,
891
- "target_language": target_lang,
892
- "processing_time": processing_time,
893
- "character_count": len(text),
894
- "status": "success",
895
- "chunks_processed": chunks_count,
896
- "session_id": session_id
897
  }
 
898
  except Exception as e:
899
  logger.error(f"[FORM API] Translation error: {str(e)}")
900
- return {"status": "error", "message": f"Translation error: {str(e)}"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
901
 
902
  @app.get("/api/progress/{session_id}")
903
  async def get_translation_progress(session_id: str):
@@ -923,6 +1021,10 @@ async def get_languages():
923
  @app.get("/api/health")
924
  async def health_check():
925
  """Health check endpoint"""
 
 
 
 
926
  return {
927
  "status": "healthy",
928
  "device": str(translator.device),
@@ -930,6 +1032,8 @@ async def health_check():
930
  "cache_size": len(translator.cache.cache),
931
  "max_chunk_size": translator.max_chunk_size,
932
  "active_translations": len(translator.current_translation),
 
 
933
  "version": "2.1.0"
934
  }
935
 
@@ -995,9 +1099,12 @@ async def get_session_status(session_id: str):
995
 
996
  @app.get("/api/server-status")
997
  async def get_server_status():
998
- """Get current server status - non-blocking"""
999
  active_sessions = []
1000
- background_tasks_count = len(translator.background_tasks)
 
 
 
1001
 
1002
  with translator.translation_lock:
1003
  for session_id, progress in translator.current_translation.items():
@@ -1020,10 +1127,12 @@ async def get_server_status():
1020
  'estimated_remaining': estimated_remaining
1021
  })
1022
 
1023
- if active_sessions or background_tasks_count > 0:
 
 
1024
  if active_sessions:
1025
  latest_session = active_sessions[-1]
1026
- message = f"Processing chunk {latest_session['completed_chunks']}/{latest_session['total_chunks']} | {latest_session['source_lang']} → {latest_session['target_lang']}"
1027
  else:
1028
  message = f"{background_tasks_count} translation(s) in background queue"
1029
 
@@ -1033,7 +1142,8 @@ async def get_server_status():
1033
  "message": message,
1034
  "active_sessions": len(active_sessions),
1035
  "background_tasks": background_tasks_count,
1036
- "total_active": len(active_sessions) + background_tasks_count
 
1037
  }
1038
  else:
1039
  return {
@@ -1041,261 +1151,65 @@ async def get_server_status():
1041
  "status": "idle",
1042
  "message": "Server is ready for new translations",
1043
  "active_sessions": 0,
1044
- "background_tasks": 0
1045
- }
1046
-
1047
- if active_sessions:
1048
- # Return the most recent active session
1049
- latest_session = active_sessions[-1]
1050
- return {
1051
- "has_active_translation": True,
1052
- "status": "processing",
1053
- "message": f"Processing chunk {latest_session['completed_chunks']}/{latest_session['total_chunks']} | {latest_session['source_lang']} → {latest_session['target_lang']}",
1054
- "session_data": latest_session
1055
- }
1056
- else:
1057
- return {
1058
- "has_active_translation": False,
1059
- "status": "no_active_translation",
1060
- "message": "No active translation on server"
1061
  }
1062
 
1063
- @app.post("/api/confirm-completion")
1064
- async def confirm_completion_and_charge(request: Request):
1065
- """Endpoint برای تأیید تکمیل ترجمه و کسر اعتبار"""
1066
- try:
1067
- data = await request.json()
1068
- session_id = data.get("session_id")
1069
- request_id = data.get("request_id")
1070
-
1071
- if not session_id and not request_id:
1072
- raise HTTPException(status_code=400, detail="Session ID or Request ID required")
1073
-
1074
- # استفاده از session_id یا request_id برای یافتن ترجمه
1075
- identifier = session_id or request_id
1076
-
1077
- # بررسی وضعیت ترجمه
1078
- if identifier in translator.background_tasks:
1079
- task = translator.background_tasks[identifier]
1080
-
1081
- if task.done():
1082
- try:
1083
- translation, processing_time, chunks_count = await task
1084
-
1085
- # حذف تسک از لیست تسک‌های پس‌زمینه
1086
- del translator.background_tasks[identifier]
1087
-
1088
- return {
1089
- "status": "completed",
1090
- "translation": translation,
1091
- "processing_time": processing_time,
1092
- "chunks_processed": chunks_count,
1093
- "confirmed": True,
1094
- "message": "Translation completed and ready for charging"
1095
- }
1096
- except Exception as e:
1097
- del translator.background_tasks[identifier]
1098
- return {
1099
- "status": "failed",
1100
- "confirmed": False,
1101
- "message": f"Translation failed: {str(e)}"
1102
- }
1103
- else:
1104
- return {
1105
- "status": "processing",
1106
- "confirmed": False,
1107
- "message": "Translation still in progress"
1108
- }
1109
-
1110
- # بررسی در ترجمه‌های جاری
1111
- progress = translator.get_translation_progress(identifier)
1112
- if progress:
1113
- return {
1114
- "status": "processing",
1115
- "confirmed": False,
1116
- "message": f"Processing chunk {progress['completed_chunks']}/{progress['total_chunks']}",
1117
- "progress": progress
1118
- }
1119
-
1120
- # بررسی در کش (اگر ترجمه کامل شده باشد)
1121
- # این بخش نیاز به پیاده سازی دارد تا ترجمه‌های کامل شده را بر اساس شناسه برگرداند
1122
-
1123
- return {
1124
- "status": "not_found",
1125
- "confirmed": False,
1126
- "message": "Translation session not found"
1127
- }
1128
-
1129
- except Exception as e:
1130
- logger.error(f"[CONFIRM API] Error: {str(e)}")
1131
- raise HTTPException(status_code=500, detail=f"Confirmation error: {str(e)}")
1132
 
1133
- @app.post("/api/check-translation-status")
1134
- async def check_translation_status(request: Request):
1135
- """بررسی کامل وضعیت ترجمه برای کسر اعتبار"""
1136
- try:
1137
- data = await request.json()
1138
- request_id = data.get("request_id")
1139
-
1140
- if not request_id:
1141
- raise HTTPException(status_code=400, detail="Request ID required")
1142
-
1143
- logger.info(f"[STATUS CHECK] Checking status for request_id: {request_id}")
1144
-
1145
- # استفاده از متد کامل رهگیری
1146
- status_info = translator.get_translation_by_request_id(request_id)
1147
-
1148
- if status_info['status'] == 'completed':
1149
- # ترجمه کامل شده - آماد�� برای کسر اعتبار
1150
- return {
1151
- "status": "completed",
1152
- "completed": True,
1153
- "translation": status_info['translation'],
1154
- "processing_time": status_info.get('processing_time', 0),
1155
- "chunks_processed": status_info.get('chunks_processed', 0),
1156
- "ready_for_charging": True,
1157
- "message": "Translation completed successfully"
1158
- }
1159
-
1160
- elif status_info['status'] == 'processing':
1161
- # هنوز در حال پردازش
1162
- progress = status_info.get('progress', {})
1163
- return {
1164
- "status": "processing",
1165
- "completed": False,
1166
- "ready_for_charging": False,
1167
- "message": f"Processing: {progress.get('completed_chunks', 0)}/{progress.get('total_chunks', 0)} chunks",
1168
- "progress_percentage": progress.get('progress_percentage', 0)
1169
- }
1170
-
1171
- elif status_info['status'] == 'failed':
1172
- # خطا در ترجمه
1173
- return {
1174
- "status": "failed",
1175
- "completed": False,
1176
- "ready_for_charging": False,
1177
- "message": status_info.get('error', 'Translation failed')
1178
- }
1179
-
1180
- else:
1181
- # یافت نشد
1182
- return {
1183
- "status": "not_found",
1184
- "completed": False,
1185
- "ready_for_charging": False,
1186
- "message": "Translation request not found"
1187
- }
1188
-
1189
- except Exception as e:
1190
- logger.error(f"[STATUS CHECK] Error: {str(e)}")
1191
- raise HTTPException(status_code=500, detail=f"Status check error: {str(e)}")
1192
-
1193
- @app.post("/api/check-completion")
1194
- async def check_completion_status(request: Request):
1195
- """بررسی وضعیت تکمیل ترجمه برای کسر اعتبار"""
1196
- try:
1197
- data = await request.json()
1198
- request_id = data.get("request_id")
1199
-
1200
- if not request_id:
1201
- raise HTTPException(status_code=400, detail="Request ID required")
1202
-
1203
- # اینجا باید منطق بررسی وضعیت ترجمه بر اساس request_id پیاده‌سازی شود
1204
- # برای سادگی، فرض می‌کنیم که اگر request_id در background_tasks وجود دارد،
1205
- # ولی تسک کامل شده، می‌توانیم وضعیت completed را برگردانیم
1206
-
1207
- if request_id in translator.background_tasks:
1208
- task = translator.background_tasks[request_id]
1209
-
1210
- if task.done():
1211
- try:
1212
- translation, processing_time, chunks_count = await task
1213
- return {
1214
- "status": "completed",
1215
- "completed": True,
1216
- "message": "Translation completed successfully"
1217
- }
1218
- except Exception as e:
1219
- return {
1220
- "status": "failed",
1221
- "completed": False,
1222
- "message": f"Translation failed: {str(e)}"
1223
- }
1224
- else:
1225
- return {
1226
- "status": "processing",
1227
- "completed": False,
1228
- "message": "Translation in progress"
1229
- }
1230
-
1231
- # اگر در background_tasks نیست، ممکن است در حال پردازش باشد یا کامل شده
1232
- progress = translator.get_translation_progress(request_id)
1233
- if progress:
1234
- return {
1235
- "status": "processing",
1236
- "completed": False,
1237
- "message": f"Processing in progress: {progress['completed_chunks']}/{progress['total_chunks']} chunks"
1238
- }
1239
-
1240
- # بررسی آیا قبلاً کامل شده و در کش ذخیره شده
1241
- # این بخش نیاز به پیاده‌سازی دارد
1242
-
1243
- return {
1244
- "status": "unknown",
1245
- "completed": False,
1246
- "message": "Request ID not found in active translations"
1247
- }
1248
-
1249
- except Exception as e:
1250
- logger.error(f"[COMPLETION CHECK] Error: {str(e)}")
1251
- return {
1252
- "status": "error",
1253
- "completed": False,
1254
- "message": f"Error checking completion: {str(e)}"
1255
- }
1256
 
1257
- @app.post("/api/notify-completion")
1258
- async def notify_translation_completion(request: Request):
1259
- """Endpoint برای اطلاع‌رسانی خودکار تکمیل ترجمه به وردپرس"""
1260
- try:
1261
- data = await request.json()
1262
- request_id = data.get("request_id")
1263
- translation = data.get("translation")
1264
- processing_time = data.get("processing_time", 0)
1265
-
1266
- if not request_id or not translation:
1267
- raise HTTPException(status_code=400, detail="Request ID and translation required")
1268
-
1269
- logger.info(f"[NOTIFY] Translation completed for request_id: {request_id}")
1270
-
1271
- # اینجا باید به وردپرس اطلاع دهید که ترجمه کامل شده
1272
- # می‌توانید از webhook یا درخواست HTTP به وردپرس استفاده کنید
1273
-
1274
- # مثال: ارسال درخواست به وردپرس
1275
- wp_url = "https://your-wordpress-site.com/wp-admin/admin-ajax.php"
1276
- payload = {
1277
- 'action': 'amt_completion_notification',
1278
- 'request_id': request_id,
1279
- 'translation': translation,
1280
- 'processing_time': processing_time,
1281
- 'secret_key': 'your_secret_key_here' # برای امنیت
1282
- }
1283
-
1284
  try:
1285
- async with aiohttp.ClientSession() as session:
1286
- async with session.post(wp_url, data=payload) as response:
1287
- if response.status == 200:
1288
- logger.info(f"[NOTIFY] Successfully notified WordPress for request_id: {request_id}")
1289
- else:
1290
- logger.warning(f"[NOTIFY] WordPress notification failed: {response.status}")
1291
  except Exception as e:
1292
- logger.error(f"[NOTIFY] Error notifying WordPress: {str(e)}")
1293
-
1294
- return {"status": "success", "message": "Notification sent"}
1295
-
1296
- except Exception as e:
1297
- logger.error(f"[NOTIFY] Error: {str(e)}")
1298
- raise HTTPException(status_code=500, detail=f"Notification error: {str(e)}")
1299
-
1300
  if __name__ == "__main__":
 
1301
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
12
  from queue import Queue
13
  import logging
14
  from typing import Dict, List, Tuple, Optional
15
+ from fastapi import FastAPI, HTTPException, Request, Form
16
  from fastapi.middleware.cors import CORSMiddleware
17
  from pydantic import BaseModel
18
  import uvicorn
19
+ import uuid
20
 
21
  # Enhanced logging configuration
22
  logging.basicConfig(
 
29
  )
30
  logger = logging.getLogger(__name__)
31
 
32
+ # Global storage for translation requests (WordPress integration)
33
+ translation_requests = {}
34
+ completed_translations = {}
35
+ translation_requests_lock = threading.Lock()
36
+
37
  # Pydantic models for request/response
38
  class TranslationRequest(BaseModel):
39
  text: str
 
125
  thread.start()
126
 
127
  class TextChunker:
128
+ """کلاس برای تقسیم متن طولانی به بخش‌های کوچک‌تر"""
129
 
130
  @staticmethod
131
  def split_text_smart(text: str, max_chunk_size: int = 400) -> List[str]:
132
+ """تقسیم هوشمند متن بر اساس جملات و پاراگراف‌ها"""
133
  logger.info(f"[CHUNKER] Starting smart text splitting | Text length: {len(text)} chars | Max chunk size: {max_chunk_size}")
134
 
135
  if len(text) <= max_chunk_size:
 
138
 
139
  chunks = []
140
 
141
+ # تقسیم بر اساس پاراگراف‌ها
142
  paragraphs = text.split('\n\n')
143
  current_chunk = ""
144
 
145
  for i, paragraph in enumerate(paragraphs):
146
  logger.debug(f"[CHUNKER] Processing paragraph {i+1}/{len(paragraphs)} | Length: {len(paragraph)}")
147
 
148
+ # اگر پاراگراف خودش بزرگ است آن را تقسیم کن
149
  if len(paragraph) > max_chunk_size:
150
+ # ذخیره قسمت فعلی اگر وجود دارد
151
  if current_chunk.strip():
152
  chunks.append(current_chunk.strip())
153
  logger.debug(f"[CHUNKER] Added chunk from accumulated paragraphs | Length: {len(current_chunk.strip())}")
154
  current_chunk = ""
155
 
156
+ # تقسیم پاراگراف بزرگ
157
  sub_chunks = TextChunker._split_paragraph(paragraph, max_chunk_size)
158
  chunks.extend(sub_chunks)
159
  logger.debug(f"[CHUNKER] Split large paragraph into {len(sub_chunks)} sub-chunks")
160
  else:
161
+ # بررسی اینکه آیا اضافه کردن این پاراگراف از حد تجاوز می‌کند
162
  if len(current_chunk) + len(paragraph) + 2 > max_chunk_size:
163
  if current_chunk.strip():
164
  chunks.append(current_chunk.strip())
 
170
  else:
171
  current_chunk = paragraph
172
 
173
+ # اضافه کردن آخرین قسمت
174
  if current_chunk.strip():
175
  chunks.append(current_chunk.strip())
176
  logger.debug(f"[CHUNKER] Added final chunk | Length: {len(current_chunk.strip())}")
 
180
 
181
  @staticmethod
182
  def _split_paragraph(paragraph: str, max_chunk_size: int) -> List[str]:
183
+ """تقسیم پاراگراف بزرگ به جملات"""
184
  logger.debug(f"[CHUNKER] Splitting large paragraph | Length: {len(paragraph)}")
185
 
186
+ # تقسیم بر اساس جملات
187
  sentences = re.split(r'[.!?]+\s+', paragraph)
188
  chunks = []
189
  current_chunk = ""
 
192
  if not sentence.strip():
193
  continue
194
 
195
+ # اضافه کردن علامت نقطه اگر حذف شده
196
  if not sentence.endswith(('.', '!', '?')):
197
  sentence += '.'
198
 
199
  if len(sentence) > max_chunk_size:
200
+ # جمله خودش خیلی بلند است - تقسیم بر اساس کاما
201
  if current_chunk.strip():
202
  chunks.append(current_chunk.strip())
203
  current_chunk = ""
 
223
 
224
  @staticmethod
225
  def _split_by_comma(sentence: str, max_chunk_size: int) -> List[str]:
226
+ """تقسیم جمله طولانی بر اساس کاما"""
227
  logger.debug(f"[CHUNKER] Splitting long sentence by comma | Length: {len(sentence)}")
228
 
229
  parts = sentence.split(', ')
 
232
 
233
  for part in parts:
234
  if len(part) > max_chunk_size:
235
+ # قسمت خودش خیلی بلند است - تقسیم اجباری
236
  if current_chunk.strip():
237
  chunks.append(current_chunk.strip())
238
  current_chunk = ""
239
 
240
+ # تقسیم اجباری بر اساس طول
241
  while len(part) > max_chunk_size:
242
  chunks.append(part[:max_chunk_size].strip())
243
  part = part[max_chunk_size:].strip()
 
265
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
266
  logger.info(f"[INIT] Using device: {self.device}")
267
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  # Initialize cache and queue
269
  self.cache = TranslationCache(cache_duration_minutes)
270
  self.queue = TranslationQueue()
 
288
  logger.error(f"[INIT] Error loading model: {e}")
289
  raise
290
 
291
+ # تنظیمات بهینه برای ترجمه متن‌های بلند
292
+ self.max_chunk_size = 350 # حداکثر طول هر قسمت
293
+ self.min_chunk_overlap = 20 # همپوشانی بین قسمت‌ها
294
 
295
  # Track translation progress
296
  self.current_translation = {}
 
299
  logger.info(f"[INIT] Translator initialized | Max chunk size: {self.max_chunk_size} chars")
300
 
301
  def translate_chunk(self, text: str, source_lang: str, target_lang: str, chunk_index: int = 0, total_chunks: int = 1) -> str:
302
+ """ترجمه یک قسمت کوچک از متن"""
303
  try:
304
+ logger.info(f"[TRANSLATE] Starting chunk translation [{chunk_index+1}/{total_chunks}] | {source_lang} {target_lang} | Length: {len(text)} chars")
305
 
306
  # Set source language for tokenizer
307
  self.tokenizer.src_lang = source_lang
 
315
  generated_tokens = self.model.generate(
316
  **encoded,
317
  forced_bos_token_id=self.tokenizer.get_lang_id(target_lang),
318
+ max_length=1024, # افزایش طول خروجی
319
+ min_length=10, # حداقل طول خروجی
320
+ num_beams=5, # افزایش تعداد beam ها برای کیفیت بهتر
321
  early_stopping=True,
322
+ no_repeat_ngram_size=3, # جلوگیری از تکرار
323
+ length_penalty=1.0, # تنظیم جریمه طول
324
+ repetition_penalty=1.2, # جلوگیری از تکرار کلمات
325
+ do_sample=False, # استفاده از روش قطعی
326
+ temperature=0.7, # کنترل تنوع
327
  pad_token_id=self.tokenizer.pad_token_id,
328
  eos_token_id=self.tokenizer.eos_token_id
329
  )
 
332
  # Decode result
333
  translation = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
334
 
335
+ # پاک‌سازی ترجمه از کاراکترهای اضافی
336
  translation = translation.strip()
337
 
338
  logger.info(f"[TRANSLATE] Chunk translation completed [{chunk_index+1}/{total_chunks}] | Generation time: {generation_time:.2f}s | Output length: {len(translation)} chars")
 
344
  return f"[Translation Error: {str(e)}]"
345
 
346
  def translate_text(self, text: str, source_lang: str, target_lang: str, session_id: str = None) -> Tuple[str, float, int]:
347
+ """ترجمه متن با پشتیبانی از متن‌های طولانی و لاگ‌های مفصل"""
348
  start_time = time.time()
349
 
350
  if not session_id:
 
356
  cached_result = self.cache.get(text, source_lang, target_lang)
357
  if cached_result:
358
  logger.info(f"[SESSION:{session_id}] Translation completed from cache | Time: {time.time() - start_time:.2f}s")
 
 
 
 
 
 
359
  return cached_result, time.time() - start_time, 1
360
 
361
  try:
362
+ # اگر متن کوتاه است مستقیماً ترجمه کن
363
  if len(text) <= self.max_chunk_size:
364
  logger.info(f"[SESSION:{session_id}] Processing as short text")
365
  translation = self.translate_chunk(text, source_lang, target_lang, 0, 1)
 
369
  processing_time = time.time() - start_time
370
  logger.info(f"[SESSION:{session_id}] Short text translation completed | Total time: {processing_time:.2f}s")
371
 
 
 
 
 
 
372
  return translation, processing_time, 1
373
 
374
+ # تقسیم متن طولانی به قسمت‌های کوچکتر
375
  logger.info(f"[SESSION:{session_id}] Processing as long text - starting chunking")
376
  chunks = TextChunker.split_text_smart(text, self.max_chunk_size)
377
  logger.info(f"[SESSION:{session_id}] Text split into {len(chunks)} chunks")
 
386
  'target_lang': target_lang
387
  }
388
 
389
+ # ترجمه هر قسمت
390
  translated_chunks = []
391
  for i, chunk in enumerate(chunks):
392
  chunk_start_time = time.time()
393
  logger.info(f"[SESSION:{session_id}] Starting chunk {i+1}/{len(chunks)} | Chunk length: {len(chunk)} chars")
394
 
395
+ # بررسی کش برای هر قسمت
396
  chunk_translation = self.cache.get(chunk, source_lang, target_lang)
397
 
398
  if not chunk_translation:
399
+ # Estimate remaining time
400
  if i > 0:
401
  elapsed_time = time.time() - start_time
402
  avg_time_per_chunk = elapsed_time / i
 
404
  logger.info(f"[SESSION:{session_id}] Progress: {i}/{len(chunks)} | Avg time per chunk: {avg_time_per_chunk:.1f}s | Estimated remaining: {estimated_remaining:.1f}s")
405
 
406
  chunk_translation = self.translate_chunk(chunk, source_lang, target_lang, i, len(chunks))
407
+ # ذخیره قسمت در کش
408
  self.cache.set(chunk, source_lang, target_lang, chunk_translation)
409
 
410
  chunk_time = time.time() - chunk_start_time
 
419
  if session_id in self.current_translation:
420
  self.current_translation[session_id]['completed_chunks'] = i + 1
421
 
422
+ # کمی استراحت بین ترجمه‌ها برای جلوگیری از بارذاری زیاد
423
  if i < len(chunks) - 1:
424
  time.sleep(0.1)
425
 
426
+ # ترکیب قسمت‌های ترجمه شده
427
  logger.info(f"[SESSION:{session_id}] Combining translated chunks")
428
  final_translation = self._combine_translations(translated_chunks, text)
429
 
 
431
  self.cache.set(text, source_lang, target_lang, final_translation)
432
 
433
  processing_time = time.time() - start_time
434
+
435
+ # Mark as completed for WordPress integration
436
  logger.info(f"[SESSION:{session_id}] Long text translation completed | Total time: {processing_time:.2f}s | Chunks: {len(chunks)} | Final length: {len(final_translation)} chars")
437
 
438
+ # Store in completed_translations for WordPress to check
439
+ with translation_requests_lock:
440
+ completed_translations[session_id] = {
441
+ 'translation': final_translation,
442
+ 'processing_time': processing_time,
443
+ 'character_count': len(text),
444
+ 'source_lang': source_lang,
445
+ 'target_lang': target_lang,
446
+ 'completed_at': datetime.now().isoformat(),
447
+ 'request_id': session_id,
448
+ 'status': 'completed'
449
+ }
450
+
451
+ # Remove from processing requests if exists
452
+ if session_id in translation_requests:
453
+ del translation_requests[session_id]
454
 
455
  # Clean up progress tracking
456
  with self.translation_lock:
 
464
  with self.translation_lock:
465
  self.current_translation.pop(session_id, None)
466
  return f"Translation error: {str(e)}", time.time() - start_time, 0
467
+
468
  def get_translation_progress(self, session_id: str) -> Dict:
469
  """Get current translation progress"""
470
  with self.translation_lock:
 
488
  'estimated_remaining': estimated_remaining,
489
  'progress_percentage': (progress['completed_chunks'] / progress['total_chunks']) * 100
490
  }
491
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
  def _combine_translations(self, translated_chunks: List[str], original_text: str) -> str:
493
+ """ترکیب قسمت‌های ترجمه شده به یک متن یکپارچه"""
494
  if not translated_chunks:
495
  return ""
496
 
 
499
 
500
  logger.debug(f"[COMBINER] Combining {len(translated_chunks)} translated chunks")
501
 
502
+ # ترکیب قسمت‌ها با در نظر گیری ساختار اصلی متن
503
  combined = []
504
 
505
  for i, chunk in enumerate(translated_chunks):
506
+ # پاک‌سازی قسمت
507
  chunk = chunk.strip()
508
 
509
  if not chunk:
510
  continue
511
 
512
+ # اضافه کردن فاصله مناسب بین قسمت‌ها
513
  if i > 0 and combined:
514
+ # اگر قسمت قبلی با نقطه تمام نمی‌شود فاصله اضافه کن
515
+ if not combined[-1].rstrip().endswith(('.', '!', '?', ':', '۔', '.')):
516
  combined[-1] += '.'
517
 
518
+ # بررسی اینکه آیا نیاز به پاراگراف جدید دارکم
519
  if '\n\n' in original_text:
520
  combined.append('\n\n' + chunk)
521
  else:
 
525
 
526
  result = ''.join(combined)
527
 
528
+ # پاک‌سازی نهایی
529
+ result = re.sub(r'\s+', ' ', result) # حذف فاصله‌های اضافی
530
+ result = re.sub(r'\.+', '.', result) # حذف نقطه‌های تکراری
531
  result = result.strip()
532
 
533
  logger.debug(f"[COMBINER] Combined translation length: {len(result)} chars")
534
  return result
535
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
536
  async def translate_text_async(self, text: str, source_lang: str, target_lang: str, session_id: str = None):
537
  """Async wrapper for translate_text"""
538
  loop = asyncio.get_event_loop()
 
542
  text, source_lang, target_lang, session_id
543
  )
544
 
545
+ def process_heavy_translation_background(request_id: str, text: str, source_lang: str, target_lang: str):
546
+ """
547
+ Background function to process heavy text translations for WordPress integration.
548
+ Updates the completed_translations dict when done.
549
+ """
550
+ try:
551
+ logger.info(f"[HF Server] Background processing started for request: {request_id}")
552
+
553
+ start_time = time.time()
554
+
555
+ # Update progress in requests
556
+ with translation_requests_lock:
557
+ if request_id in translation_requests:
558
+ translation_requests[request_id]['progress'] = 10
559
+
560
+ # Perform actual translation
561
+ translation, processing_time, chunks_count = translator.translate_text(
562
+ text, source_lang, target_lang, request_id
563
+ )
564
+
565
+ processing_time = time.time() - start_time
566
+
567
+ # Store completed translation
568
+ with translation_requests_lock:
569
+ completed_translations[request_id] = {
570
+ 'translation': translation,
571
+ 'processing_time': processing_time,
572
+ 'character_count': len(text),
573
+ 'source_lang': source_lang,
574
+ 'target_lang': target_lang,
575
+ 'completed_at': datetime.now().isoformat(),
576
+ 'request_id': request_id,
577
+ 'status': 'completed'
578
+ }
579
+
580
+ # Remove from processing queue
581
+ if request_id in translation_requests:
582
+ del translation_requests[request_id]
583
+
584
+ logger.info(f"[HF Server] Long text translation completed for request: {request_id} in {processing_time:.2f}s")
585
+
586
+ except Exception as e:
587
+ logger.error(f"[HF Server] Background processing error for {request_id}: {str(e)}")
588
+
589
+ # Mark as failed
590
+ with translation_requests_lock:
591
+ completed_translations[request_id] = {
592
+ 'translation': '',
593
+ 'error': str(e),
594
+ 'status': 'failed',
595
+ 'processing_time': time.time() - start_time if 'start_time' in locals() else 0,
596
+ 'completed_at': datetime.now().isoformat(),
597
+ 'request_id': request_id
598
+ }
599
+
600
+ # Remove from processing queue
601
+ if request_id in translation_requests:
602
+ del translation_requests[request_id]
603
+
604
+ def perform_translation_internal(text: str, source_lang: str, target_lang: str) -> str:
605
+ """
606
+ Internal translation function - wrapper for translator.translate_text
607
+ """
608
+ try:
609
+ translation, _, _ = translator.translate_text(text, source_lang, target_lang)
610
+ return translation
611
+ except Exception as e:
612
+ logger.error(f"[INTERNAL] Translation error: {str(e)}")
613
+ return f"Translation error: {str(e)}"
614
+
615
  # Language mappings for M2M100 model
616
  LANGUAGE_MAP = {
617
  "English": "en",
 
698
  allow_headers=["*"],
699
  )
700
 
701
+ # ========== NEW WORDPRESS INTEGRATION ENDPOINTS ==========
 
 
 
 
 
 
 
 
 
 
 
 
702
 
703
+ @app.post("/api/check-completion")
704
+ async def check_completion(request: Request):
705
+ """
706
+ Endpoint to verify if a translation request has been completed.
707
+ WordPress calls this to confirm before charging credits.
708
+ """
 
 
 
 
 
 
709
  try:
710
+ form_data = await request.form()
711
+ request_id = form_data.get('request_id', '').strip()
712
 
713
+ if not request_id:
714
+ return {
715
+ 'status': 'error',
716
+ 'message': 'Request ID is required'
717
+ }
718
 
719
+ logger.info(f"[HF Server] Completion verification requested for: {request_id}")
720
+
721
+ with translation_requests_lock:
722
+ # Check if request exists in completed translations
723
+ if request_id in completed_translations:
724
+ completion_data = completed_translations[request_id]
725
+
726
+ logger.info(f"[HF Server] Completion verification for {request_id}: COMPLETED")
727
+
728
+ return {
729
+ 'status': 'completed',
730
+ 'request_id': request_id,
731
+ 'completed_at': completion_data.get('completed_at'),
732
+ 'processing_time': completion_data.get('processing_time', 0),
733
+ 'verified': True
734
+ }
735
+
736
+ # Check if request is still processing
737
+ elif request_id in translation_requests:
738
+ logger.info(f"[HF Server] Completion verification for {request_id}: STILL PROCESSING")
739
+
740
+ return {
741
+ 'status': 'processing',
742
+ 'request_id': request_id,
743
+ 'verified': False
744
+ }
745
+
746
+ else:
747
+ logger.info(f"[HF Server] Completion verification for {request_id}: NOT FOUND")
748
+
749
+ return {
750
+ 'status': 'not_found',
751
+ 'request_id': request_id,
752
+ 'message': 'Request ID not found'
753
+ }
754
+
755
  except Exception as e:
756
+ logger.error(f"[HF Server] Error in check_completion: {str(e)}")
757
+ return {
758
+ 'status': 'error',
759
+ 'message': 'Server error occurred'
760
+ }
761
+
762
+ @app.post("/api/check-translation-status")
763
+ async def check_translation_status(request: Request):
764
+ """
765
+ Endpoint to get the current status and result of a translation request.
766
+ Returns translation content if completed.
767
+ """
768
+ try:
769
+ form_data = await request.form()
770
+ request_id = form_data.get('request_id', '').strip()
771
+
772
+ if not request_id:
773
+ return {
774
+ 'status': 'error',
775
+ 'message': 'Request ID is required'
776
+ }
777
+
778
+ logger.info(f"[HF Server] Translation status check for: {request_id}")
779
+
780
+ with translation_requests_lock:
781
+ # Check if translation is completed
782
+ if request_id in completed_translations:
783
+ result = completed_translations[request_id]
784
+
785
+ logger.info(f"[HF Server] Translation status check for {request_id}: COMPLETED - returning translation")
786
+
787
+ return {
788
+ 'status': 'completed',
789
+ 'request_id': request_id,
790
+ 'translation': result.get('translation', ''),
791
+ 'processing_time': result.get('processing_time', 0),
792
+ 'character_count': result.get('character_count', 0),
793
+ 'completed_at': result.get('completed_at'),
794
+ 'source_lang': result.get('source_lang', ''),
795
+ 'target_lang': result.get('target_lang', '')
796
+ }
797
+
798
+ # Check if still processing
799
+ elif request_id in translation_requests:
800
+ req_data = translation_requests[request_id]
801
+
802
+ logger.info(f"[HF Server] Translation status check for {request_id}: STILL PROCESSING")
803
+
804
+ return {
805
+ 'status': 'processing',
806
+ 'request_id': request_id,
807
+ 'started_at': req_data.get('started_at'),
808
+ 'progress': req_data.get('progress', 0)
809
+ }
810
+
811
+ else:
812
+ logger.info(f"[HF Server] Translation status check for {request_id}: NOT FOUND")
813
+
814
+ return {
815
+ 'status': 'not_found',
816
+ 'request_id': request_id,
817
+ 'message': 'Translation request not found'
818
+ }
819
+
820
+ except Exception as e:
821
+ logger.error(f"[HF Server] Error in check_translation_status: {str(e)}")
822
+ return {
823
+ 'status': 'error',
824
+ 'message': 'Server error occurred'
825
+ }
826
+
827
+ # ========== UPDATED MAIN TRANSLATION ENDPOINT ==========
828
 
 
829
  @app.post("/api/translate/form")
830
  async def api_translate_form(request: Request):
831
+ """
832
+ Enhanced translation endpoint that handles both short and long texts.
833
+ For long texts, returns immediately with request_id for background processing.
834
+ """
835
  try:
836
  form_data = await request.form()
837
  text = form_data.get("text", "")
 
846
  target_lang = json_data.get("target_lang", "")
847
  api_key = json_data.get("api_key", None)
848
  except:
849
+ return {"status": "error", "message": "Invalid request format"}
 
 
850
 
851
  if not text.strip():
852
  logger.error("[FORM API] No text provided")
853
+ return {"status": "error", "message": "Text, source language, and target language are required"}
854
 
855
  source_code = LANGUAGE_MAP.get(source_lang)
856
  target_code = LANGUAGE_MAP.get(target_lang)
 
859
  logger.error(f"[FORM API] Invalid language codes: {source_lang} -> {target_lang}")
860
  return {"status": "error", "message": "Invalid language codes"}
861
 
862
+ char_count = len(text)
863
+ is_heavy_text = char_count > 1000 # Same threshold as WordPress
864
+
865
+ logger.info(f"[FORM API] Translation request: {char_count} chars, {source_lang} → {target_lang}, Heavy: {is_heavy_text}")
866
 
867
+ if is_heavy_text:
868
+ # Generate request ID for background processing
869
+ request_id = str(uuid.uuid4())
870
+
871
+ # First check cache for immediate return
872
  cached_result = translator.cache.get(text, source_code, target_code)
873
  if cached_result:
874
+ logger.info(f"[FORM API] Returning cached translation immediately for request: {request_id}")
875
  return {
876
  "translation": cached_result,
877
  "source_language": source_lang,
878
  "target_language": target_lang,
879
  "processing_time": 0.0,
880
+ "character_count": char_count,
881
  "status": "success",
882
  "chunks_processed": None,
883
+ "request_id": request_id,
 
884
  "cached": True
885
  }
886
+
887
+ # Store request for processing
888
+ with translation_requests_lock:
889
+ translation_requests[request_id] = {
890
+ 'text': text,
891
+ 'source_lang': source_code,
892
+ 'target_lang': target_code,
893
+ 'started_at': datetime.now().isoformat(),
894
+ 'character_count': char_count,
895
+ 'progress': 0
896
+ }
897
+
898
+ # Start background processing
899
+ thread = threading.Thread(
900
+ target=process_heavy_translation_background,
901
+ args=(request_id, text, source_code, target_code)
902
  )
903
+ thread.daemon = True
904
+ thread.start()
905
 
906
+ logger.info(f"[FORM API] Started background processing for request: {request_id}")
907
 
908
  return {
909
+ 'is_background': True,
910
+ 'session_id': request_id,
911
+ 'request_id': request_id,
912
+ 'status': 'processing',
913
+ 'message': f'Long text ({char_count} characters) is being processed in background. Use the request ID to check status.',
914
+ 'character_count': char_count
 
915
  }
916
+
917
  else:
918
  # Process short text immediately
919
  try:
920
+ start_time = time.time()
921
+
922
+ translation, processing_time, chunks_count = translator.translate_text(
923
+ text, source_code, target_code
924
  )
925
 
926
+ # Check translation content
927
  if not translation or not translation.strip() or translation.startswith("Translation error"):
928
  logger.error(f"[FORM API] Invalid translation result: {translation[:100] if translation else 'None'}")
929
  return {
930
  "status": "error",
931
+ "message": "Translation failed - empty or invalid result"
 
932
  }
933
 
934
+ logger.info(f"[FORM API] Short text translation completed in {processing_time:.2f}s")
935
+
936
  return {
937
+ 'status': 'success',
938
+ 'translation': translation,
939
+ 'processing_time': processing_time,
940
+ 'character_count': char_count,
941
+ 'source_lang': source_lang,
942
+ 'target_lang': target_lang
 
 
943
  }
944
+
945
  except Exception as e:
946
  logger.error(f"[FORM API] Translation error: {str(e)}")
947
+ return {"status": "error", "message": f"Translation failed: {str(e)}"}
948
+
949
+ # ========== EXISTING ENDPOINTS (UPDATED) ==========
950
+
951
+ @app.get("/")
952
+ async def root():
953
+ return {
954
+ "message": "Enhanced Multilingual Translation API v2.1 with WordPress Integration",
955
+ "status": "active",
956
+ "features": [
957
+ "enhanced_logging",
958
+ "progress_tracking",
959
+ "long_text_support",
960
+ "smart_chunking",
961
+ "cache_optimization",
962
+ "wordpress_integration",
963
+ "delayed_charging_support"
964
+ ]
965
+ }
966
+
967
+ @app.post("/api/translate")
968
+ async def api_translate(request: TranslationRequest):
969
+ """API endpoint for translation with enhanced logging and progress tracking"""
970
+ if not request.text.strip():
971
+ raise HTTPException(status_code=400, detail="No text provided")
972
+
973
+ source_code = LANGUAGE_MAP.get(request.source_lang)
974
+ target_code = LANGUAGE_MAP.get(request.target_lang)
975
+
976
+ if not source_code or not target_code:
977
+ raise HTTPException(status_code=400, detail="Invalid language codes")
978
+
979
+ try:
980
+ # Generate session ID for tracking
981
+ session_id = hashlib.md5(f"{request.text[:100]}{time.time()}".encode()).hexdigest()[:8]
982
+
983
+ translation, processing_time, chunks_count = translator.translate_text(
984
+ request.text, source_code, target_code, session_id
985
+ )
986
+
987
+ return TranslationResponse(
988
+ translation=translation,
989
+ source_language=request.source_lang,
990
+ target_language=request.target_lang,
991
+ processing_time=processing_time,
992
+ character_count=len(request.text),
993
+ status="success",
994
+ chunks_processed=chunks_count
995
+ )
996
+ except Exception as e:
997
+ logger.error(f"[API] Translation error: {str(e)}")
998
+ raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}")
999
 
1000
  @app.get("/api/progress/{session_id}")
1001
  async def get_translation_progress(session_id: str):
 
1021
  @app.get("/api/health")
1022
  async def health_check():
1023
  """Health check endpoint"""
1024
+ with translation_requests_lock:
1025
+ active_requests = len(translation_requests)
1026
+ completed_cache = len(completed_translations)
1027
+
1028
  return {
1029
  "status": "healthy",
1030
  "device": str(translator.device),
 
1032
  "cache_size": len(translator.cache.cache),
1033
  "max_chunk_size": translator.max_chunk_size,
1034
  "active_translations": len(translator.current_translation),
1035
+ "active_requests": active_requests,
1036
+ "completed_cache": completed_cache,
1037
  "version": "2.1.0"
1038
  }
1039
 
 
1099
 
1100
  @app.get("/api/server-status")
1101
  async def get_server_status():
1102
+ """Get current server status - enhanced for WordPress integration"""
1103
  active_sessions = []
1104
+
1105
+ with translation_requests_lock:
1106
+ background_tasks_count = len(translation_requests)
1107
+ completed_count = len(completed_translations)
1108
 
1109
  with translator.translation_lock:
1110
  for session_id, progress in translator.current_translation.items():
 
1127
  'estimated_remaining': estimated_remaining
1128
  })
1129
 
1130
+ total_active = len(active_sessions) + background_tasks_count
1131
+
1132
+ if total_active > 0:
1133
  if active_sessions:
1134
  latest_session = active_sessions[-1]
1135
+ message = f"Processing chunk {latest_session['completed_chunks']}/{latest_session['total_chunks']} | {latest_session['source_lang']} {latest_session['target_lang']}"
1136
  else:
1137
  message = f"{background_tasks_count} translation(s) in background queue"
1138
 
 
1142
  "message": message,
1143
  "active_sessions": len(active_sessions),
1144
  "background_tasks": background_tasks_count,
1145
+ "total_active": total_active,
1146
+ "completed_cache": completed_count
1147
  }
1148
  else:
1149
  return {
 
1151
  "status": "idle",
1152
  "message": "Server is ready for new translations",
1153
  "active_sessions": 0,
1154
+ "background_tasks": 0,
1155
+ "completed_cache": completed_count
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1156
  }
1157
 
1158
+ # ========== CLEANUP AND MAINTENANCE FUNCTIONS ==========
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1159
 
1160
+ def cleanup_old_requests():
1161
+ """
1162
+ Clean up old completed translations and stuck processing requests.
1163
+ Should be called periodically.
1164
+ """
1165
+ current_time = datetime.now()
1166
+
1167
+ with translation_requests_lock:
1168
+ # Clean completed translations older than 2 hours
1169
+ to_remove_completed = []
1170
+ for req_id, data in completed_translations.items():
1171
+ try:
1172
+ completed_time = datetime.fromisoformat(data.get('completed_at', ''))
1173
+ if (current_time - completed_time).total_seconds() > 7200: # 2 hours
1174
+ to_remove_completed.append(req_id)
1175
+ except:
1176
+ to_remove_completed.append(req_id) # Remove invalid entries
1177
+
1178
+ for req_id in to_remove_completed:
1179
+ del completed_translations[req_id]
1180
+
1181
+ # Clean stuck processing requests older than 1 hour
1182
+ to_remove_processing = []
1183
+ for req_id, data in translation_requests.items():
1184
+ try:
1185
+ started_time = datetime.fromisoformat(data.get('started_at', ''))
1186
+ if (current_time - started_time).total_seconds() > 3600: # 1 hour
1187
+ to_remove_processing.append(req_id)
1188
+ except:
1189
+ to_remove_processing.append(req_id) # Remove invalid entries
1190
+
1191
+ for req_id in to_remove_processing:
1192
+ del translation_requests[req_id]
1193
+
1194
+ logger.info(f"[HF Server] Cleanup: Removed {len(to_remove_completed)} completed, {len(to_remove_processing)} stuck requests")
1195
+ return len(to_remove_completed), len(to_remove_processing)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1196
 
1197
+ # Schedule periodic cleanup (runs every hour)
1198
+ def periodic_cleanup():
1199
+ """Run cleanup every hour"""
1200
+ while True:
1201
+ time.sleep(3600) # 1 hour
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1202
  try:
1203
+ cleanup_old_requests()
 
 
 
 
 
1204
  except Exception as e:
1205
+ logger.error(f"[CLEANUP] Error during periodic cleanup: {e}")
1206
+
1207
+ # Start cleanup thread
1208
+ cleanup_thread = threading.Thread(target=periodic_cleanup, daemon=True)
1209
+ cleanup_thread.start()
1210
+
1211
+ # ========== SERVER STARTUP ==========
1212
+
1213
  if __name__ == "__main__":
1214
+ logger.info("[HF Server] Starting Enhanced Multilingual Translation API with WordPress Integration")
1215
  uvicorn.run(app, host="0.0.0.0", port=7860)