danicor commited on
Commit
4f0df26
·
verified ·
1 Parent(s): 0fc2c25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -586
app.py CHANGED
@@ -42,10 +42,6 @@ class TranslationResponse(BaseModel):
42
  processing_time: float
43
  character_count: int
44
  status: str
45
- chunks_processed: Optional[int] = None
46
- estimated_time_remaining: Optional[float] = None
47
- current_chunk: Optional[int] = None
48
- total_chunks: Optional[int] = None
49
 
50
  class TranslationCache:
51
  def __init__(self, cache_duration_minutes: int = 60):
@@ -81,191 +77,16 @@ class TranslationCache:
81
  self.cache[key] = (translation, datetime.now())
82
  logger.info(f"[CACHE STORE] Cached translation for key: {key[:8]}... | Translation length: {len(translation)} chars")
83
 
84
- class TranslationQueue:
85
- def __init__(self, max_workers: int = 3):
86
- self.queue = Queue()
87
- self.max_workers = max_workers
88
- self.current_workers = 0
89
- self.lock = threading.Lock()
90
-
91
- def add_task(self, task_func, *args, **kwargs):
92
- """Add translation task to queue"""
93
- self.queue.put((task_func, args, kwargs))
94
- logger.info(f"[QUEUE] Added task to queue | Queue size: {self.queue.qsize()}")
95
-
96
- def process_queue(self):
97
- """Process tasks from queue"""
98
- while not self.queue.empty():
99
- with self.lock:
100
- if self.current_workers >= self.max_workers:
101
- time.sleep(0.1)
102
- continue
103
-
104
- if not self.queue.empty():
105
- task_func, args, kwargs = self.queue.get()
106
- self.current_workers += 1
107
- logger.info(f"[QUEUE] Starting worker | Current workers: {self.current_workers}")
108
-
109
- def worker():
110
- try:
111
- result = task_func(*args, **kwargs)
112
- return result
113
- finally:
114
- with self.lock:
115
- self.current_workers -= 1
116
- logger.info(f"[QUEUE] Worker finished | Current workers: {self.current_workers}")
117
-
118
- thread = threading.Thread(target=worker)
119
- thread.start()
120
-
121
- class TextChunker:
122
- """کلاس برای تقسیم متن طولانی به بخش‌های کوچکتر"""
123
-
124
- @staticmethod
125
- def split_text_smart(text: str, max_chunk_size: int = 400) -> List[str]:
126
- """تقسیم هوشمند متن بر اساس جملات و پاراگراف‌ها"""
127
- logger.info(f"[CHUNKER] Starting smart text splitting | Text length: {len(text)} chars | Max chunk size: {max_chunk_size}")
128
-
129
- if len(text) <= max_chunk_size:
130
- logger.info(f"[CHUNKER] Text is small, no chunking needed | Length: {len(text)}")
131
- return [text]
132
-
133
- chunks = []
134
-
135
- # تقسیم بر اساس پاراگراف‌ها
136
- paragraphs = text.split('\n\n')
137
- current_chunk = ""
138
-
139
- for i, paragraph in enumerate(paragraphs):
140
- logger.debug(f"[CHUNKER] Processing paragraph {i+1}/{len(paragraphs)} | Length: {len(paragraph)}")
141
-
142
- # اگر پاراگراف خودش بزرگ است، آن را تقسیم کن
143
- if len(paragraph) > max_chunk_size:
144
- # ذخیره قسمت فعلی اگر وجود دارد
145
- if current_chunk.strip():
146
- chunks.append(current_chunk.strip())
147
- logger.debug(f"[CHUNKER] Added chunk from accumulated paragraphs | Length: {len(current_chunk.strip())}")
148
- current_chunk = ""
149
-
150
- # تقسیم پاراگراف بزرگ
151
- sub_chunks = TextChunker._split_paragraph(paragraph, max_chunk_size)
152
- chunks.extend(sub_chunks)
153
- logger.debug(f"[CHUNKER] Split large paragraph into {len(sub_chunks)} sub-chunks")
154
- else:
155
- # بررسی اینکه آیا اضافه کردن این پاراگراف از حد تجاوز می‌کند
156
- if len(current_chunk) + len(paragraph) + 2 > max_chunk_size:
157
- if current_chunk.strip():
158
- chunks.append(current_chunk.strip())
159
- logger.debug(f"[CHUNKER] Added chunk | Length: {len(current_chunk.strip())}")
160
- current_chunk = paragraph
161
- else:
162
- if current_chunk:
163
- current_chunk += "\n\n" + paragraph
164
- else:
165
- current_chunk = paragraph
166
-
167
- # اضافه کردن آخرین قسمت
168
- if current_chunk.strip():
169
- chunks.append(current_chunk.strip())
170
- logger.debug(f"[CHUNKER] Added final chunk | Length: {len(current_chunk.strip())}")
171
-
172
- logger.info(f"[CHUNKER] Text splitting completed | Total chunks: {len(chunks)} | Average chunk size: {sum(len(c) for c in chunks) / len(chunks):.1f} chars")
173
- return chunks
174
-
175
- @staticmethod
176
- def _split_paragraph(paragraph: str, max_chunk_size: int) -> List[str]:
177
- """تقسیم پاراگراف بزرگ به جملات"""
178
- logger.debug(f"[CHUNKER] Splitting large paragraph | Length: {len(paragraph)}")
179
-
180
- # تقسیم بر اساس جملات
181
- sentences = re.split(r'[.!?]+\s+', paragraph)
182
- chunks = []
183
- current_chunk = ""
184
-
185
- for sentence in sentences:
186
- if not sentence.strip():
187
- continue
188
-
189
- # اضافه کردن علامت نقطه اگر حذف شده
190
- if not sentence.endswith(('.', '!', '?')):
191
- sentence += '.'
192
-
193
- if len(sentence) > max_chunk_size:
194
- # جمله خودش خیلی بلند است - تقسیم بر اساس کاما
195
- if current_chunk.strip():
196
- chunks.append(current_chunk.strip())
197
- current_chunk = ""
198
-
199
- sub_chunks = TextChunker._split_by_comma(sentence, max_chunk_size)
200
- chunks.extend(sub_chunks)
201
- else:
202
- if len(current_chunk) + len(sentence) + 1 > max_chunk_size:
203
- if current_chunk.strip():
204
- chunks.append(current_chunk.strip())
205
- current_chunk = sentence
206
- else:
207
- if current_chunk:
208
- current_chunk += " " + sentence
209
- else:
210
- current_chunk = sentence
211
-
212
- if current_chunk.strip():
213
- chunks.append(current_chunk.strip())
214
-
215
- logger.debug(f"[CHUNKER] Paragraph split into {len(chunks)} sentence chunks")
216
- return chunks
217
-
218
- @staticmethod
219
- def _split_by_comma(sentence: str, max_chunk_size: int) -> List[str]:
220
- """تقسیم جمله طولانی بر اساس کاما"""
221
- logger.debug(f"[CHUNKER] Splitting long sentence by comma | Length: {len(sentence)}")
222
-
223
- parts = sentence.split(', ')
224
- chunks = []
225
- current_chunk = ""
226
-
227
- for part in parts:
228
- if len(part) > max_chunk_size:
229
- # قسمت خودش خیلی بلند است - تقسیم اجباری
230
- if current_chunk.strip():
231
- chunks.append(current_chunk.strip())
232
- current_chunk = ""
233
-
234
- # تقسیم اجباری بر اساس طول
235
- while len(part) > max_chunk_size:
236
- chunks.append(part[:max_chunk_size].strip())
237
- part = part[max_chunk_size:].strip()
238
-
239
- if part:
240
- current_chunk = part
241
- else:
242
- if len(current_chunk) + len(part) + 2 > max_chunk_size:
243
- if current_chunk.strip():
244
- chunks.append(current_chunk.strip())
245
- current_chunk = part
246
- else:
247
- if current_chunk:
248
- current_chunk += ", " + part
249
- else:
250
- current_chunk = part
251
-
252
- if current_chunk.strip():
253
- chunks.append(current_chunk.strip())
254
-
255
- return chunks
256
-
257
  class MultilingualTranslator:
258
  def __init__(self, cache_duration_minutes: int = 60):
259
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
260
  logger.info(f"[INIT] Using device: {self.device}")
261
 
262
- # Initialize cache and queue
263
  self.cache = TranslationCache(cache_duration_minutes)
264
- self.queue = TranslationQueue()
265
 
266
  # Add thread pool for parallel processing
267
  self.executor = ThreadPoolExecutor(max_workers=3)
268
- self.background_tasks = {}
269
 
270
  logger.info(f"[INIT] Thread pool initialized with 3 workers")
271
 
@@ -282,231 +103,65 @@ class MultilingualTranslator:
282
  logger.error(f"[INIT] Error loading model: {e}")
283
  raise
284
 
285
- # تنظیمات بهینه برای ترجمه متن‌های بلند
286
- self.max_chunk_size = 350 # حداکثر طول هر قسمت
287
- self.min_chunk_overlap = 20 # همپوشانی بین قسمت‌ها
 
 
288
 
289
- # Track translation progress
290
- self.current_translation = {}
291
- self.translation_lock = threading.Lock()
 
 
 
 
 
 
 
292
 
293
- logger.info(f"[INIT] Translator initialized | Max chunk size: {self.max_chunk_size} chars")
294
-
295
- def translate_chunk(self, text: str, source_lang: str, target_lang: str, chunk_index: int = 0, total_chunks: int = 1) -> str:
296
- """ترجمه یک قسمت کوچک از متن"""
297
  try:
298
- logger.info(f"[TRANSLATE] Starting chunk translation [{chunk_index+1}/{total_chunks}] | {source_lang} → {target_lang} | Length: {len(text)} chars")
299
-
300
  # Set source language for tokenizer
301
  self.tokenizer.src_lang = source_lang
302
 
303
  # Encode input
304
  encoded = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(self.device)
305
- logger.debug(f"[TRANSLATE] Text encoded | Input tokens: {encoded.input_ids.shape[1]}")
306
 
307
  # Generate translation with optimized parameters
308
- start_time = time.time()
309
  generated_tokens = self.model.generate(
310
  **encoded,
311
  forced_bos_token_id=self.tokenizer.get_lang_id(target_lang),
312
- max_length=1024, # افزایش طول خروجی
313
- min_length=10, # حداقل طول خروجی
314
- num_beams=5, # افزایش تعداد beam ها برای کیفیت بهتر
315
  early_stopping=True,
316
- no_repeat_ngram_size=3, # جلوگیری از تکرار
317
- length_penalty=1.0, # تنظیم جریمه طول
318
- repetition_penalty=1.2, # جلوگیری از تکرار کلمات
319
- do_sample=False, # استفاده از روش قطعی
320
- temperature=0.7, # کنترل تنوع
321
  pad_token_id=self.tokenizer.pad_token_id,
322
  eos_token_id=self.tokenizer.eos_token_id
323
  )
324
- generation_time = time.time() - start_time
325
 
326
  # Decode result
327
  translation = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
328
 
329
- # پاک‌سازی ترجمه از کاراکترهای اضافی
330
  translation = translation.strip()
331
 
332
- logger.info(f"[TRANSLATE] Chunk translation completed [{chunk_index+1}/{total_chunks}] | Generation time: {generation_time:.2f}s | Output length: {len(translation)} chars")
333
-
334
- return translation
335
-
336
- except Exception as e:
337
- logger.error(f"[TRANSLATE] Chunk translation error [{chunk_index+1}/{total_chunks}]: {e}")
338
- return f"[Translation Error: {str(e)}]"
339
-
340
- def translate_text(self, text: str, source_lang: str, target_lang: str, session_id: str = None) -> Tuple[str, float, int]:
341
- """ترجمه متن با پشتیبانی از متن‌های طولانی و لاگ‌های مفصل"""
342
- start_time = time.time()
343
-
344
- if not session_id:
345
- session_id = hashlib.md5(f"{text[:100]}{time.time()}".encode()).hexdigest()[:8]
346
-
347
- logger.info(f"[SESSION:{session_id}] Starting translation | {source_lang} → {target_lang} | Text length: {len(text)} chars")
348
-
349
- # بررسی کش برای کل متن
350
- cached_result = self.cache.get(text, source_lang, target_lang)
351
- if cached_result:
352
- logger.info(f"[SESSION:{session_id}] Translation completed from cache | Time: {time.time() - start_time:.2f}s")
353
- return cached_result, time.time() - start_time, 1
354
-
355
- try:
356
- # اگر متن کوتاه است، مستقیماً ترجمه کن
357
- if len(text) <= self.max_chunk_size:
358
- logger.info(f"[SESSION:{session_id}] Processing as short text")
359
- translation = self.translate_chunk(text, source_lang, target_lang, 0, 1)
360
-
361
- # ذخیره در کش
362
- self.cache.set(text, source_lang, target_lang, translation)
363
- processing_time = time.time() - start_time
364
- logger.info(f"[SESSION:{session_id}] Short text translation completed | Total time: {processing_time:.2f}s")
365
-
366
- return translation, processing_time, 1
367
-
368
- # تقسیم متن طولانی به قسمت‌های کوچکتر
369
- logger.info(f"[SESSION:{session_id}] Processing as long text - starting chunking")
370
- chunks = TextChunker.split_text_smart(text, self.max_chunk_size)
371
- logger.info(f"[SESSION:{session_id}] Text split into {len(chunks)} chunks")
372
-
373
- # Initialize progress tracking
374
- with self.translation_lock:
375
- self.current_translation[session_id] = {
376
- 'total_chunks': len(chunks),
377
- 'completed_chunks': 0,
378
- 'start_time': start_time,
379
- 'source_lang': source_lang,
380
- 'target_lang': target_lang
381
- }
382
-
383
- # ترجمه هر قسمت
384
- translated_chunks = []
385
- for i, chunk in enumerate(chunks):
386
- chunk_start_time = time.time()
387
- logger.info(f"[SESSION:{session_id}] Starting chunk {i+1}/{len(chunks)} | Chunk length: {len(chunk)} chars")
388
-
389
- # بررسی کش برای هر قسمت
390
- chunk_translation = self.cache.get(chunk, source_lang, target_lang)
391
-
392
- if not chunk_translation:
393
- # Estimate remaining time
394
- if i > 0:
395
- elapsed_time = time.time() - start_time
396
- avg_time_per_chunk = elapsed_time / i
397
- estimated_remaining = avg_time_per_chunk * (len(chunks) - i)
398
- logger.info(f"[SESSION:{session_id}] Progress: {i}/{len(chunks)} | Avg time per chunk: {avg_time_per_chunk:.1f}s | Estimated remaining: {estimated_remaining:.1f}s")
399
-
400
- chunk_translation = self.translate_chunk(chunk, source_lang, target_lang, i, len(chunks))
401
- # ذخیره قسمت در کش
402
- self.cache.set(chunk, source_lang, target_lang, chunk_translation)
403
-
404
- chunk_time = time.time() - chunk_start_time
405
- logger.info(f"[SESSION:{session_id}] Chunk {i+1}/{len(chunks)} translated in {chunk_time:.2f}s")
406
- else:
407
- logger.info(f"[SESSION:{session_id}] Chunk {i+1}/{len(chunks)} retrieved from cache")
408
-
409
- translated_chunks.append(chunk_translation)
410
-
411
- # Update progress
412
- with self.translation_lock:
413
- if session_id in self.current_translation:
414
- self.current_translation[session_id]['completed_chunks'] = i + 1
415
-
416
- # کمی استراحت بین ترجمه‌ها برای جلوگیری از بارگذاری زیاد
417
- if i < len(chunks) - 1:
418
- time.sleep(0.1)
419
-
420
- # ترکیب قسمت‌های ترجمه شده
421
- logger.info(f"[SESSION:{session_id}] Combining translated chunks")
422
- final_translation = self._combine_translations(translated_chunks, text)
423
-
424
- # ذخیره نتیجه نهایی در کش
425
- self.cache.set(text, source_lang, target_lang, final_translation)
426
 
427
  processing_time = time.time() - start_time
428
- logger.info(f"[SESSION:{session_id}] Long text translation completed | Total time: {processing_time:.2f}s | Chunks: {len(chunks)} | Final length: {len(final_translation)} chars")
429
-
430
- # Clean up progress tracking
431
- with self.translation_lock:
432
- self.current_translation.pop(session_id, None)
433
 
434
- return final_translation, processing_time, len(chunks)
435
 
436
  except Exception as e:
437
  logger.error(f"[SESSION:{session_id}] Translation error: {e}")
438
- # Clean up progress tracking
439
- with self.translation_lock:
440
- self.current_translation.pop(session_id, None)
441
- return f"Translation error: {str(e)}", time.time() - start_time, 0
442
-
443
- def get_translation_progress(self, session_id: str) -> Dict:
444
- """Get current translation progress"""
445
- with self.translation_lock:
446
- if session_id not in self.current_translation:
447
- return None
448
-
449
- progress = self.current_translation[session_id].copy()
450
- elapsed_time = time.time() - progress['start_time']
451
-
452
- if progress['completed_chunks'] > 0:
453
- avg_time_per_chunk = elapsed_time / progress['completed_chunks']
454
- remaining_chunks = progress['total_chunks'] - progress['completed_chunks']
455
- estimated_remaining = avg_time_per_chunk * remaining_chunks
456
- else:
457
- estimated_remaining = None
458
-
459
- return {
460
- 'total_chunks': progress['total_chunks'],
461
- 'completed_chunks': progress['completed_chunks'],
462
- 'elapsed_time': elapsed_time,
463
- 'estimated_remaining': estimated_remaining,
464
- 'progress_percentage': (progress['completed_chunks'] / progress['total_chunks']) * 100
465
- }
466
-
467
- def _combine_translations(self, translated_chunks: List[str], original_text: str) -> str:
468
- """ترکیب قسمت‌های ترجمه شده به یک متن یکپارچه"""
469
- if not translated_chunks:
470
- return ""
471
-
472
- if len(translated_chunks) == 1:
473
- return translated_chunks[0]
474
-
475
- logger.debug(f"[COMBINER] Combining {len(translated_chunks)} translated chunks")
476
-
477
- # ترکیب قسمت‌ها با در نظر گیری ساختار اصلی متن
478
- combined = []
479
-
480
- for i, chunk in enumerate(translated_chunks):
481
- # پاک‌سازی قسمت
482
- chunk = chunk.strip()
483
-
484
- if not chunk:
485
- continue
486
-
487
- # اضافه کردن فاصله مناسب بین قسمت‌ها
488
- if i > 0 and combined:
489
- # اگر قسمت قبلی با نقطه تمام نمی‌شود، نقطه اضافه کن
490
- if not combined[-1].rstrip().endswith(('.', '!', '?', ':', 'Ø›', '.')):
491
- combined[-1] += '.'
492
-
493
- # بررسی اینکه آیا نیاز به پاراگراف جدید داریم
494
- if '\n\n' in original_text:
495
- combined.append('\n\n' + chunk)
496
- else:
497
- combined.append(' ' + chunk)
498
- else:
499
- combined.append(chunk)
500
-
501
- result = ''.join(combined)
502
-
503
- # پاک‌سازی نهایی
504
- result = re.sub(r'\s+', ' ', result) # حذف فاصله‌های اضافی
505
- result = re.sub(r'\.+', '.', result) # حذف نقطه‌های تکراری
506
- result = result.strip()
507
-
508
- logger.debug(f"[COMBINER] Combined translation length: {len(result)} chars")
509
- return result
510
 
511
  async def translate_text_async(self, text: str, source_lang: str, target_lang: str, session_id: str = None):
512
  """Async wrapper for translate_text"""
@@ -592,7 +247,7 @@ LANGUAGE_MAP = {
592
  translator = MultilingualTranslator(60)
593
 
594
  # Create FastAPI app
595
- app = FastAPI(title="Enhanced Multilingual Translation API", version="2.1.0")
596
 
597
  # Add CORS middleware
598
  app.add_middleware(
@@ -606,20 +261,18 @@ app.add_middleware(
606
  @app.get("/")
607
  async def root():
608
  return {
609
- "message": "Enhanced Multilingual Translation API v2.1",
610
  "status": "active",
611
  "features": [
612
- "enhanced_logging",
613
- "progress_tracking",
614
- "long_text_support",
615
- "smart_chunking",
616
- "cache_optimization"
617
  ]
618
  }
619
 
620
  @app.post("/api/translate")
621
  async def api_translate(request: TranslationRequest):
622
- """API endpoint for translation with enhanced logging and progress tracking"""
623
  if not request.text.strip():
624
  raise HTTPException(status_code=400, detail="No text provided")
625
 
@@ -633,7 +286,7 @@ async def api_translate(request: TranslationRequest):
633
  # Generate session ID for tracking
634
  session_id = hashlib.md5(f"{request.text[:100]}{time.time()}".encode()).hexdigest()[:8]
635
 
636
- translation, processing_time, chunks_count = translator.translate_text(
637
  request.text, source_code, target_code, session_id
638
  )
639
 
@@ -643,8 +296,7 @@ async def api_translate(request: TranslationRequest):
643
  target_language=request.target_lang,
644
  processing_time=processing_time,
645
  character_count=len(request.text),
646
- status="success",
647
- chunks_processed=chunks_count
648
  )
649
  except Exception as e:
650
  logger.error(f"[API] Translation error: {str(e)}")
@@ -653,7 +305,7 @@ async def api_translate(request: TranslationRequest):
653
  # Alternative endpoint for form data (compatibility with WordPress)
654
  @app.post("/api/translate/form")
655
  async def api_translate_form(request: Request):
656
- """Non-blocking translation endpoint with enhanced error handling"""
657
  try:
658
  form_data = await request.form()
659
  text = form_data.get("text", "")
@@ -686,83 +338,33 @@ async def api_translate_form(request: Request):
686
  # Generate session ID for tracking
687
  session_id = hashlib.md5(f"{text[:100]}{time.time()}".encode()).hexdigest()[:8]
688
 
689
- # Check if it's a long text that should be processed in background
690
- if len(text) > translator.max_chunk_size:
691
- # 🔹 اول بررسی کن آیا نتیجه در کش وجود دارد یا نه
692
- cached_result = translator.cache.get(text, source_code, target_code)
693
- if cached_result:
694
- logger.info(f"[FORM API] Returning cached translation immediately for session: {session_id}")
695
- return {
696
- "translation": cached_result,
697
- "source_language": source_lang,
698
- "target_language": target_lang,
699
- "processing_time": 0.0,
700
- "character_count": len(text),
701
- "status": "success",
702
- "chunks_processed": None,
703
- "session_id": session_id,
704
- "is_heavy_text": False,
705
- "cached": True
706
- }
707
- # 🔹 اگر در کش نبود → پس بفرست به background
708
- task = asyncio.create_task(
709
- translator.translate_text_async(text, source_code, target_code, session_id)
710
  )
711
- translator.background_tasks[session_id] = task
712
 
713
- logger.info(f"[FORM API] Started background translation for session: {session_id}")
 
 
 
 
 
 
 
714
 
 
715
  return {
716
- "session_id": session_id,
717
- "request_id": session_id,
718
- "status": "processing",
719
- "message": "Translation started in background. Use CHECK RESULT to get your translation.",
720
  "character_count": len(text),
721
- "is_background": True,
722
- "is_heavy_text": True
723
  }
724
- else:
725
- # Process short text immediately
726
- try:
727
- translation, processing_time, chunks_count = await translator.translate_text_async(
728
- text, source_code, target_code, session_id
729
- )
730
-
731
- # بررسی محتوای ترجمه
732
- if not translation or not translation.strip() or translation.startswith("Translation error"):
733
- logger.error(f"[FORM API] Invalid translation result: {translation[:100] if translation else 'None'}")
734
- return {
735
- "status": "error",
736
- "message": "Translation failed - empty or invalid result",
737
- "session_id": session_id
738
- }
739
-
740
- logger.info(f"[FORM API] Translation successful | Length: {len(translation)} chars")
741
- return {
742
- "translation": translation,
743
- "source_language": source_lang,
744
- "target_language": target_lang,
745
- "processing_time": processing_time,
746
- "character_count": len(text),
747
- "status": "success",
748
- "chunks_processed": chunks_count,
749
- "session_id": session_id
750
- }
751
- except Exception as e:
752
- logger.error(f"[FORM API] Translation error: {str(e)}")
753
- return {"status": "error", "message": f"Translation error: {str(e)}"}
754
-
755
- @app.get("/api/progress/{session_id}")
756
- async def get_translation_progress(session_id: str):
757
- """Get translation progress for a session"""
758
- progress = translator.get_translation_progress(session_id)
759
- if progress is None:
760
- raise HTTPException(status_code=404, detail="Session not found or completed")
761
-
762
- return {
763
- "status": "success",
764
- "progress": progress
765
- }
766
 
767
  @app.get("/api/languages")
768
  async def get_languages():
@@ -781,137 +383,19 @@ async def health_check():
781
  "device": str(translator.device),
782
  "model": translator.model_name,
783
  "cache_size": len(translator.cache.cache),
784
- "max_chunk_size": translator.max_chunk_size,
785
- "active_translations": len(translator.current_translation),
786
- "version": "2.1.0"
787
- }
788
-
789
- @app.get("/api/status/{session_id}")
790
- async def get_session_status(session_id: str):
791
- """Get translation status - non-blocking"""
792
-
793
- # Check if task is in background tasks
794
- if session_id in translator.background_tasks:
795
- task = translator.background_tasks[session_id]
796
-
797
- if task.done():
798
- try:
799
- translation, processing_time, chunks_count = await task
800
- # Clean up completed task
801
- del translator.background_tasks[session_id]
802
-
803
- return {
804
- "status": "completed",
805
- "translation": translation,
806
- "processing_time": processing_time,
807
- "chunks_processed": chunks_count,
808
- "message": "Translation completed successfully"
809
- }
810
- except Exception as e:
811
- del translator.background_tasks[session_id]
812
- return {
813
- "status": "failed",
814
- "message": f"Translation failed: {str(e)}"
815
- }
816
- else:
817
- # Task still running - get progress
818
- progress = translator.get_translation_progress(session_id)
819
-
820
- if progress:
821
- return {
822
- "status": "processing",
823
- "progress": progress,
824
- "message": f"Processing chunk {progress['completed_chunks']}/{progress['total_chunks']}",
825
- "estimated_remaining": progress.get('estimated_remaining', 0)
826
- }
827
- else:
828
- return {
829
- "status": "processing",
830
- "message": "Translation in progress...",
831
- "progress": None
832
- }
833
-
834
- # Check current active translations
835
- progress = translator.get_translation_progress(session_id)
836
- if progress:
837
- return {
838
- "status": "processing",
839
- "progress": progress,
840
- "message": f"Processing chunk {progress['completed_chunks']}/{progress['total_chunks']}",
841
- "estimated_remaining": progress.get('estimated_remaining', 0)
842
- }
843
-
844
- return {
845
- "status": "not_found",
846
- "message": "Session not found or completed"
847
  }
848
 
849
  @app.get("/api/server-status")
850
  async def get_server_status():
851
- """Get current server status - non-blocking"""
852
- active_sessions = []
853
- background_tasks_count = len(translator.background_tasks)
854
-
855
- with translator.translation_lock:
856
- for session_id, progress in translator.current_translation.items():
857
- elapsed_time = time.time() - progress['start_time']
858
- if progress['completed_chunks'] > 0:
859
- avg_time_per_chunk = elapsed_time / progress['completed_chunks']
860
- remaining_chunks = progress['total_chunks'] - progress['completed_chunks']
861
- estimated_remaining = avg_time_per_chunk * remaining_chunks
862
- else:
863
- estimated_remaining = None
864
-
865
- active_sessions.append({
866
- 'session_id': session_id,
867
- 'source_lang': progress['source_lang'],
868
- 'target_lang': progress['target_lang'],
869
- 'total_chunks': progress['total_chunks'],
870
- 'completed_chunks': progress['completed_chunks'],
871
- 'progress_percentage': (progress['completed_chunks'] / progress['total_chunks']) * 100,
872
- 'elapsed_time': elapsed_time,
873
- 'estimated_remaining': estimated_remaining
874
- })
875
-
876
- if active_sessions or background_tasks_count > 0:
877
- if active_sessions:
878
- latest_session = active_sessions[-1]
879
- message = f"Processing chunk {latest_session['completed_chunks']}/{latest_session['total_chunks']} | {latest_session['source_lang']} → {latest_session['target_lang']}"
880
- else:
881
- message = f"{background_tasks_count} translation(s) in background queue"
882
-
883
- return {
884
- "has_active_translation": True,
885
- "status": "processing",
886
- "message": message,
887
- "active_sessions": len(active_sessions),
888
- "background_tasks": background_tasks_count,
889
- "total_active": len(active_sessions) + background_tasks_count
890
- }
891
- else:
892
- return {
893
- "has_active_translation": False,
894
- "status": "idle",
895
- "message": "Server is ready for new translations",
896
- "active_sessions": 0,
897
- "background_tasks": 0
898
- }
899
-
900
- if active_sessions:
901
- # Return the most recent active session
902
- latest_session = active_sessions[-1]
903
- return {
904
- "has_active_translation": True,
905
- "status": "processing",
906
- "message": f"Processing chunk {latest_session['completed_chunks']}/{latest_session['total_chunks']} | {latest_session['source_lang']} → {latest_session['target_lang']}",
907
- "session_data": latest_session
908
- }
909
- else:
910
- return {
911
- "has_active_translation": False,
912
- "status": "no_active_translation",
913
- "message": "No active translation on server"
914
- }
915
 
916
  if __name__ == "__main__":
917
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
42
  processing_time: float
43
  character_count: int
44
  status: str
 
 
 
 
45
 
46
  class TranslationCache:
47
  def __init__(self, cache_duration_minutes: int = 60):
 
77
  self.cache[key] = (translation, datetime.now())
78
  logger.info(f"[CACHE STORE] Cached translation for key: {key[:8]}... | Translation length: {len(translation)} chars")
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  class MultilingualTranslator:
81
  def __init__(self, cache_duration_minutes: int = 60):
82
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
83
  logger.info(f"[INIT] Using device: {self.device}")
84
 
85
+ # Initialize cache
86
  self.cache = TranslationCache(cache_duration_minutes)
 
87
 
88
  # Add thread pool for parallel processing
89
  self.executor = ThreadPoolExecutor(max_workers=3)
 
90
 
91
  logger.info(f"[INIT] Thread pool initialized with 3 workers")
92
 
 
103
  logger.error(f"[INIT] Error loading model: {e}")
104
  raise
105
 
106
+ logger.info(f"[INIT] Translator initialized successfully")
107
+
108
+ def translate_text(self, text: str, source_lang: str, target_lang: str, session_id: str = None) -> Tuple[str, float]:
109
+ """ترجمه متن با پشتیبانی از کش"""
110
+ start_time = time.time()
111
 
112
+ if not session_id:
113
+ session_id = hashlib.md5(f"{text[:100]}{time.time()}".encode()).hexdigest()[:8]
114
+
115
+ logger.info(f"[SESSION:{session_id}] Starting translation | {source_lang} → {target_lang} | Text length: {len(text)} chars")
116
+
117
+ # بررسی کش برای کل متن
118
+ cached_result = self.cache.get(text, source_lang, target_lang)
119
+ if cached_result:
120
+ logger.info(f"[SESSION:{session_id}] Translation completed from cache | Time: {time.time() - start_time:.2f}s")
121
+ return cached_result, time.time() - start_time
122
 
 
 
 
 
123
  try:
 
 
124
  # Set source language for tokenizer
125
  self.tokenizer.src_lang = source_lang
126
 
127
  # Encode input
128
  encoded = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(self.device)
129
+ logger.debug(f"[SESSION:{session_id}] Text encoded | Input tokens: {encoded.input_ids.shape[1]}")
130
 
131
  # Generate translation with optimized parameters
 
132
  generated_tokens = self.model.generate(
133
  **encoded,
134
  forced_bos_token_id=self.tokenizer.get_lang_id(target_lang),
135
+ max_length=1024,
136
+ min_length=10,
137
+ num_beams=5,
138
  early_stopping=True,
139
+ no_repeat_ngram_size=3,
140
+ length_penalty=1.0,
141
+ repetition_penalty=1.2,
142
+ do_sample=False,
143
+ temperature=0.7,
144
  pad_token_id=self.tokenizer.pad_token_id,
145
  eos_token_id=self.tokenizer.eos_token_id
146
  )
 
147
 
148
  # Decode result
149
  translation = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
150
 
151
+ # پاک‌سازی ترجمه از کاراکترهای اضافی
152
  translation = translation.strip()
153
 
154
+ # ذخیره در کش
155
+ self.cache.set(text, source_lang, target_lang, translation)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
  processing_time = time.time() - start_time
158
+ logger.info(f"[SESSION:{session_id}] Translation completed | Total time: {processing_time:.2f}s | Output length: {len(translation)} chars")
 
 
 
 
159
 
160
+ return translation, processing_time
161
 
162
  except Exception as e:
163
  logger.error(f"[SESSION:{session_id}] Translation error: {e}")
164
+ return f"Translation error: {str(e)}", time.time() - start_time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
  async def translate_text_async(self, text: str, source_lang: str, target_lang: str, session_id: str = None):
167
  """Async wrapper for translate_text"""
 
247
  translator = MultilingualTranslator(60)
248
 
249
  # Create FastAPI app
250
+ app = FastAPI(title="Simplified Multilingual Translation API", version="2.0.0")
251
 
252
  # Add CORS middleware
253
  app.add_middleware(
 
261
  @app.get("/")
262
  async def root():
263
  return {
264
+ "message": "Simplified Multilingual Translation API v2.0",
265
  "status": "active",
266
  "features": [
267
+ "simplified_processing",
268
+ "cache_optimization",
269
+ "direct_translation"
 
 
270
  ]
271
  }
272
 
273
  @app.post("/api/translate")
274
  async def api_translate(request: TranslationRequest):
275
+ """API endpoint for translation"""
276
  if not request.text.strip():
277
  raise HTTPException(status_code=400, detail="No text provided")
278
 
 
286
  # Generate session ID for tracking
287
  session_id = hashlib.md5(f"{request.text[:100]}{time.time()}".encode()).hexdigest()[:8]
288
 
289
+ translation, processing_time = translator.translate_text(
290
  request.text, source_code, target_code, session_id
291
  )
292
 
 
296
  target_language=request.target_lang,
297
  processing_time=processing_time,
298
  character_count=len(request.text),
299
+ status="success"
 
300
  )
301
  except Exception as e:
302
  logger.error(f"[API] Translation error: {str(e)}")
 
305
  # Alternative endpoint for form data (compatibility with WordPress)
306
  @app.post("/api/translate/form")
307
  async def api_translate_form(request: Request):
308
+ """Simplified translation endpoint"""
309
  try:
310
  form_data = await request.form()
311
  text = form_data.get("text", "")
 
338
  # Generate session ID for tracking
339
  session_id = hashlib.md5(f"{text[:100]}{time.time()}".encode()).hexdigest()[:8]
340
 
341
+ try:
342
+ translation, processing_time = await translator.translate_text_async(
343
+ text, source_code, target_code, session_id
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  )
 
345
 
346
+ # بررسی محتوای ترجمه
347
+ if not translation or not translation.strip() or translation.startswith("Translation error"):
348
+ logger.error(f"[FORM API] Invalid translation result: {translation[:100] if translation else 'None'}")
349
+ return {
350
+ "status": "error",
351
+ "message": "Translation failed - empty or invalid result",
352
+ "session_id": session_id
353
+ }
354
 
355
+ logger.info(f"[FORM API] Translation successful | Length: {len(translation)} chars")
356
  return {
357
+ "translation": translation,
358
+ "source_language": source_lang,
359
+ "target_language": target_lang,
360
+ "processing_time": processing_time,
361
  "character_count": len(text),
362
+ "status": "success",
363
+ "session_id": session_id
364
  }
365
+ except Exception as e:
366
+ logger.error(f"[FORM API] Translation error: {str(e)}")
367
+ return {"status": "error", "message": f"Translation error: {str(e)}"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
 
369
  @app.get("/api/languages")
370
  async def get_languages():
 
383
  "device": str(translator.device),
384
  "model": translator.model_name,
385
  "cache_size": len(translator.cache.cache),
386
+ "version": "2.0.0"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387
  }
388
 
389
  @app.get("/api/server-status")
390
  async def get_server_status():
391
+ """Get current server status"""
392
+ return {
393
+ "has_active_translation": False,
394
+ "status": "idle",
395
+ "message": "Server is ready for new translations",
396
+ "active_sessions": 0,
397
+ "background_tasks": 0
398
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
 
400
  if __name__ == "__main__":
401
  uvicorn.run(app, host="0.0.0.0", port=7860)