danicor commited on
Commit
d036146
·
verified ·
1 Parent(s): 0e92f6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +178 -27
app.py CHANGED
@@ -14,8 +14,15 @@ from fastapi.middleware.cors import CORSMiddleware
14
  from pydantic import BaseModel
15
  import uvicorn
16
 
17
- # Set up logging
18
- logging.basicConfig(level=logging.INFO)
 
 
 
 
 
 
 
19
  logger = logging.getLogger(__name__)
20
 
21
  # Pydantic models for request/response
@@ -33,6 +40,9 @@ class TranslationResponse(BaseModel):
33
  character_count: int
34
  status: str
35
  chunks_processed: Optional[int] = None
 
 
 
36
 
37
  class TranslationCache:
38
  def __init__(self, cache_duration_minutes: int = 60):
@@ -52,11 +62,13 @@ class TranslationCache:
52
  if key in self.cache:
53
  translation, timestamp = self.cache[key]
54
  if datetime.now() - timestamp < self.cache_duration:
55
- logger.info(f"Cache hit for key: {key[:8]}...")
56
  return translation
57
  else:
58
  # Remove expired entry
59
  del self.cache[key]
 
 
60
  return None
61
 
62
  def set(self, text: str, source_lang: str, target_lang: str, translation: str):
@@ -64,7 +76,7 @@ class TranslationCache:
64
  with self.lock:
65
  key = self._generate_key(text, source_lang, target_lang)
66
  self.cache[key] = (translation, datetime.now())
67
- logger.info(f"Cached translation for key: {key[:8]}...")
68
 
69
  class TranslationQueue:
70
  def __init__(self, max_workers: int = 3):
@@ -76,6 +88,7 @@ class TranslationQueue:
76
  def add_task(self, task_func, *args, **kwargs):
77
  """Add translation task to queue"""
78
  self.queue.put((task_func, args, kwargs))
 
79
 
80
  def process_queue(self):
81
  """Process tasks from queue"""
@@ -88,6 +101,7 @@ class TranslationQueue:
88
  if not self.queue.empty():
89
  task_func, args, kwargs = self.queue.get()
90
  self.current_workers += 1
 
91
 
92
  def worker():
93
  try:
@@ -96,6 +110,7 @@ class TranslationQueue:
96
  finally:
97
  with self.lock:
98
  self.current_workers -= 1
 
99
 
100
  thread = threading.Thread(target=worker)
101
  thread.start()
@@ -106,7 +121,10 @@ class TextChunker:
106
  @staticmethod
107
  def split_text_smart(text: str, max_chunk_size: int = 400) -> List[str]:
108
  """تقسیم هوشمند متن بر اساس جملات و پاراگراف‌ها"""
 
 
109
  if len(text) <= max_chunk_size:
 
110
  return [text]
111
 
112
  chunks = []
@@ -115,22 +133,27 @@ class TextChunker:
115
  paragraphs = text.split('\n\n')
116
  current_chunk = ""
117
 
118
- for paragraph in paragraphs:
 
 
119
  # اگر پاراگراف خودش بزرگ است، آن را تقسیم کن
120
  if len(paragraph) > max_chunk_size:
121
  # ذخیره قسمت فعلی اگر وجود دارد
122
  if current_chunk.strip():
123
  chunks.append(current_chunk.strip())
 
124
  current_chunk = ""
125
 
126
  # تقسیم پاراگراف بزرگ
127
  sub_chunks = TextChunker._split_paragraph(paragraph, max_chunk_size)
128
  chunks.extend(sub_chunks)
 
129
  else:
130
  # برر��ی اینکه آیا اضافه کردن این پاراگراف از حد تجاوز می‌کند
131
  if len(current_chunk) + len(paragraph) + 2 > max_chunk_size:
132
  if current_chunk.strip():
133
  chunks.append(current_chunk.strip())
 
134
  current_chunk = paragraph
135
  else:
136
  if current_chunk:
@@ -141,12 +164,16 @@ class TextChunker:
141
  # اضافه کردن آخرین قسمت
142
  if current_chunk.strip():
143
  chunks.append(current_chunk.strip())
 
144
 
 
145
  return chunks
146
 
147
  @staticmethod
148
  def _split_paragraph(paragraph: str, max_chunk_size: int) -> List[str]:
149
  """تقسیم پاراگراف بزرگ به جملات"""
 
 
150
  # تقسیم بر اساس جملات
151
  sentences = re.split(r'[.!?]+\s+', paragraph)
152
  chunks = []
@@ -182,11 +209,14 @@ class TextChunker:
182
  if current_chunk.strip():
183
  chunks.append(current_chunk.strip())
184
 
 
185
  return chunks
186
 
187
  @staticmethod
188
  def _split_by_comma(sentence: str, max_chunk_size: int) -> List[str]:
189
  """تقسیم جمله طولانی بر اساس کاما"""
 
 
190
  parts = sentence.split(', ')
191
  chunks = []
192
  current_chunk = ""
@@ -224,7 +254,7 @@ class TextChunker:
224
  class MultilingualTranslator:
225
  def __init__(self, cache_duration_minutes: int = 60):
226
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
227
- logger.info(f"Using device: {self.device}")
228
 
229
  # Initialize cache and queue
230
  self.cache = TranslationCache(cache_duration_minutes)
@@ -232,31 +262,41 @@ class MultilingualTranslator:
232
 
233
  # Load model - using a powerful multilingual model
234
  self.model_name = "facebook/m2m100_1.2B"
235
- logger.info(f"Loading model: {self.model_name}")
236
 
237
  try:
238
  self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
239
  self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
240
  self.model.to(self.device)
241
- logger.info("Model loaded successfully!")
242
  except Exception as e:
243
- logger.error(f"Error loading model: {e}")
244
  raise
245
 
246
  # تنظیمات بهینه برای ترجمه متن‌های بلند
247
  self.max_chunk_size = 350 # حداکثر طول هر قسمت
248
  self.min_chunk_overlap = 20 # همپوشانی بین قسمت‌ها
 
 
 
 
 
 
249
 
250
- def translate_chunk(self, text: str, source_lang: str, target_lang: str) -> str:
251
  """ترجمه یک قسمت کوچک از متن"""
252
  try:
 
 
253
  # Set source language for tokenizer
254
  self.tokenizer.src_lang = source_lang
255
 
256
  # Encode input
257
  encoded = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(self.device)
 
258
 
259
  # Generate translation with optimized parameters
 
260
  generated_tokens = self.model.generate(
261
  **encoded,
262
  forced_bos_token_id=self.tokenizer.get_lang_id(target_lang),
@@ -272,6 +312,7 @@ class MultilingualTranslator:
272
  pad_token_id=self.tokenizer.pad_token_id,
273
  eos_token_id=self.tokenizer.eos_token_id
274
  )
 
275
 
276
  # Decode result
277
  translation = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
@@ -279,71 +320,141 @@ class MultilingualTranslator:
279
  # پاک‌سازی ترجمه از کاراکترهای اضافی
280
  translation = translation.strip()
281
 
 
 
282
  return translation
283
 
284
  except Exception as e:
285
- logger.error(f"Chunk translation error: {e}")
286
  return f"[Translation Error: {str(e)}]"
287
 
288
- def translate_text(self, text: str, source_lang: str, target_lang: str) -> Tuple[str, float, int]:
289
- """ترجمه متن با پشتیبانی از متن‌های طولانی"""
290
  start_time = time.time()
291
 
 
 
 
 
 
292
  # بررسی کش برای کل متن
293
  cached_result = self.cache.get(text, source_lang, target_lang)
294
  if cached_result:
 
295
  return cached_result, time.time() - start_time, 1
296
 
297
  try:
298
  # اگر متن کوتاه است، مستقیماً ترجمه کن
299
  if len(text) <= self.max_chunk_size:
300
- translation = self.translate_chunk(text, source_lang, target_lang)
 
301
 
302
  # ذخیره در کش
303
  self.cache.set(text, source_lang, target_lang, translation)
304
  processing_time = time.time() - start_time
305
- logger.info(f"Short text translation completed in {processing_time:.2f} seconds")
306
 
307
  return translation, processing_time, 1
308
 
309
  # تقسیم متن طولانی به قسمت‌های کوچکتر
 
310
  chunks = TextChunker.split_text_smart(text, self.max_chunk_size)
311
- logger.info(f"Split long text into {len(chunks)} chunks")
 
 
 
 
 
 
 
 
 
 
312
 
313
  # ترجمه هر قسمت
314
  translated_chunks = []
315
  for i, chunk in enumerate(chunks):
316
- logger.info(f"Translating chunk {i+1}/{len(chunks)} (length: {len(chunk)})")
 
317
 
318
  # بررسی کش برای هر قسمت
319
  chunk_translation = self.cache.get(chunk, source_lang, target_lang)
320
 
321
  if not chunk_translation:
322
- chunk_translation = self.translate_chunk(chunk, source_lang, target_lang)
 
 
 
 
 
 
 
323
  # ذخیره قسمت در کش
324
  self.cache.set(chunk, source_lang, target_lang, chunk_translation)
 
 
 
 
 
325
 
326
  translated_chunks.append(chunk_translation)
327
 
 
 
 
 
 
328
  # کمی استراحت بین ترجمه‌ها برای جلوگیری از بارگذاری زیاد
329
  if i < len(chunks) - 1:
330
  time.sleep(0.1)
331
 
332
  # ترکیب قسمت‌های ترجمه شده
 
333
  final_translation = self._combine_translations(translated_chunks, text)
334
 
335
  # ذخیره نتیجه نهایی در کش
336
  self.cache.set(text, source_lang, target_lang, final_translation)
337
 
338
  processing_time = time.time() - start_time
339
- logger.info(f"Long text translation completed in {processing_time:.2f} seconds ({len(chunks)} chunks)")
 
 
 
 
340
 
341
  return final_translation, processing_time, len(chunks)
342
 
343
  except Exception as e:
344
- logger.error(f"Translation error: {e}")
 
 
 
345
  return f"Translation error: {str(e)}", time.time() - start_time, 0
346
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  def _combine_translations(self, translated_chunks: List[str], original_text: str) -> str:
348
  """ترکیب قسمت‌های ترجمه شده به یک متن یکپارچه"""
349
  if not translated_chunks:
@@ -352,6 +463,8 @@ class MultilingualTranslator:
352
  if len(translated_chunks) == 1:
353
  return translated_chunks[0]
354
 
 
 
355
  # ترکیب قسمت‌ها با در نظر گیری ساختار اصلی متن
356
  combined = []
357
 
@@ -383,6 +496,7 @@ class MultilingualTranslator:
383
  result = re.sub(r'\.+', '.', result) # حذف نقطه‌های تکراری
384
  result = result.strip()
385
 
 
386
  return result
387
 
388
  # Language mappings for M2M100 model
@@ -460,7 +574,7 @@ LANGUAGE_MAP = {
460
  translator = MultilingualTranslator(60)
461
 
462
  # Create FastAPI app
463
- app = FastAPI(title="Multilingual Translation API", version="2.0.0")
464
 
465
  # Add CORS middleware
466
  app.add_middleware(
@@ -473,11 +587,21 @@ app.add_middleware(
473
 
474
  @app.get("/")
475
  async def root():
476
- return {"message": "Multilingual Translation API v2.0", "status": "active", "features": ["long_text_support", "smart_chunking", "cache_optimization"]}
 
 
 
 
 
 
 
 
 
 
477
 
478
  @app.post("/api/translate")
479
  async def api_translate(request: TranslationRequest):
480
- """API endpoint for translation with long text support"""
481
  if not request.text.strip():
482
  raise HTTPException(status_code=400, detail="No text provided")
483
 
@@ -488,7 +612,12 @@ async def api_translate(request: TranslationRequest):
488
  raise HTTPException(status_code=400, detail="Invalid language codes")
489
 
490
  try:
491
- translation, processing_time, chunks_count = translator.translate_text(request.text, source_code, target_code)
 
 
 
 
 
492
 
493
  return TranslationResponse(
494
  translation=translation,
@@ -500,12 +629,13 @@ async def api_translate(request: TranslationRequest):
500
  chunks_processed=chunks_count
501
  )
502
  except Exception as e:
 
503
  raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}")
504
 
505
  # Alternative endpoint for form data (compatibility with WordPress)
506
  @app.post("/api/translate/form")
507
  async def api_translate_form(request: Request):
508
- """Alternative endpoint that accepts form data with long text support"""
509
  try:
510
  form_data = await request.form()
511
  text = form_data.get("text", "")
@@ -523,6 +653,8 @@ async def api_translate_form(request: Request):
523
  except:
524
  raise HTTPException(status_code=400, detail="Invalid request format")
525
 
 
 
526
  if not text.strip():
527
  raise HTTPException(status_code=400, detail="No text provided")
528
 
@@ -533,7 +665,12 @@ async def api_translate_form(request: Request):
533
  raise HTTPException(status_code=400, detail="Invalid language codes")
534
 
535
  try:
536
- translation, processing_time, chunks_count = translator.translate_text(text, source_code, target_code)
 
 
 
 
 
537
 
538
  return {
539
  "translation": translation,
@@ -545,8 +682,21 @@ async def api_translate_form(request: Request):
545
  "chunks_processed": chunks_count
546
  }
547
  except Exception as e:
 
548
  raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}")
549
 
 
 
 
 
 
 
 
 
 
 
 
 
550
  @app.get("/api/languages")
551
  async def get_languages():
552
  """Get supported languages"""
@@ -565,7 +715,8 @@ async def health_check():
565
  "model": translator.model_name,
566
  "cache_size": len(translator.cache.cache),
567
  "max_chunk_size": translator.max_chunk_size,
568
- "version": "2.0.0"
 
569
  }
570
 
571
  if __name__ == "__main__":
 
14
  from pydantic import BaseModel
15
  import uvicorn
16
 
17
+ # Enhanced logging configuration
18
+ logging.basicConfig(
19
+ level=logging.INFO,
20
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
21
+ handlers=[
22
+ logging.StreamHandler(),
23
+ logging.FileHandler('translation.log')
24
+ ]
25
+ )
26
  logger = logging.getLogger(__name__)
27
 
28
  # Pydantic models for request/response
 
40
  character_count: int
41
  status: str
42
  chunks_processed: Optional[int] = None
43
+ estimated_time_remaining: Optional[float] = None
44
+ current_chunk: Optional[int] = None
45
+ total_chunks: Optional[int] = None
46
 
47
  class TranslationCache:
48
  def __init__(self, cache_duration_minutes: int = 60):
 
62
  if key in self.cache:
63
  translation, timestamp = self.cache[key]
64
  if datetime.now() - timestamp < self.cache_duration:
65
+ logger.info(f"[CACHE HIT] Retrieved cached translation for key: {key[:8]}... | Length: {len(translation)} chars")
66
  return translation
67
  else:
68
  # Remove expired entry
69
  del self.cache[key]
70
+ logger.info(f"[CACHE EXPIRED] Removed expired cache entry for key: {key[:8]}...")
71
+ logger.info(f"[CACHE MISS] No cached translation found for key: {key[:8]}...")
72
  return None
73
 
74
  def set(self, text: str, source_lang: str, target_lang: str, translation: str):
 
76
  with self.lock:
77
  key = self._generate_key(text, source_lang, target_lang)
78
  self.cache[key] = (translation, datetime.now())
79
+ logger.info(f"[CACHE STORE] Cached translation for key: {key[:8]}... | Translation length: {len(translation)} chars")
80
 
81
  class TranslationQueue:
82
  def __init__(self, max_workers: int = 3):
 
88
  def add_task(self, task_func, *args, **kwargs):
89
  """Add translation task to queue"""
90
  self.queue.put((task_func, args, kwargs))
91
+ logger.info(f"[QUEUE] Added task to queue | Queue size: {self.queue.qsize()}")
92
 
93
  def process_queue(self):
94
  """Process tasks from queue"""
 
101
  if not self.queue.empty():
102
  task_func, args, kwargs = self.queue.get()
103
  self.current_workers += 1
104
+ logger.info(f"[QUEUE] Starting worker | Current workers: {self.current_workers}")
105
 
106
  def worker():
107
  try:
 
110
  finally:
111
  with self.lock:
112
  self.current_workers -= 1
113
+ logger.info(f"[QUEUE] Worker finished | Current workers: {self.current_workers}")
114
 
115
  thread = threading.Thread(target=worker)
116
  thread.start()
 
121
  @staticmethod
122
  def split_text_smart(text: str, max_chunk_size: int = 400) -> List[str]:
123
  """تقسیم هوشمند متن بر اساس جملات و پاراگراف‌ها"""
124
+ logger.info(f"[CHUNKER] Starting smart text splitting | Text length: {len(text)} chars | Max chunk size: {max_chunk_size}")
125
+
126
  if len(text) <= max_chunk_size:
127
+ logger.info(f"[CHUNKER] Text is small, no chunking needed | Length: {len(text)}")
128
  return [text]
129
 
130
  chunks = []
 
133
  paragraphs = text.split('\n\n')
134
  current_chunk = ""
135
 
136
+ for i, paragraph in enumerate(paragraphs):
137
+ logger.debug(f"[CHUNKER] Processing paragraph {i+1}/{len(paragraphs)} | Length: {len(paragraph)}")
138
+
139
  # اگر پاراگراف خودش بزرگ است، آن را تقسیم کن
140
  if len(paragraph) > max_chunk_size:
141
  # ذخیره قسمت فعلی اگر وجود دارد
142
  if current_chunk.strip():
143
  chunks.append(current_chunk.strip())
144
+ logger.debug(f"[CHUNKER] Added chunk from accumulated paragraphs | Length: {len(current_chunk.strip())}")
145
  current_chunk = ""
146
 
147
  # تقسیم پاراگراف بزرگ
148
  sub_chunks = TextChunker._split_paragraph(paragraph, max_chunk_size)
149
  chunks.extend(sub_chunks)
150
+ logger.debug(f"[CHUNKER] Split large paragraph into {len(sub_chunks)} sub-chunks")
151
  else:
152
  # برر��ی اینکه آیا اضافه کردن این پاراگراف از حد تجاوز می‌کند
153
  if len(current_chunk) + len(paragraph) + 2 > max_chunk_size:
154
  if current_chunk.strip():
155
  chunks.append(current_chunk.strip())
156
+ logger.debug(f"[CHUNKER] Added chunk | Length: {len(current_chunk.strip())}")
157
  current_chunk = paragraph
158
  else:
159
  if current_chunk:
 
164
  # اضافه کردن آخرین قسمت
165
  if current_chunk.strip():
166
  chunks.append(current_chunk.strip())
167
+ logger.debug(f"[CHUNKER] Added final chunk | Length: {len(current_chunk.strip())}")
168
 
169
+ logger.info(f"[CHUNKER] Text splitting completed | Total chunks: {len(chunks)} | Average chunk size: {sum(len(c) for c in chunks) / len(chunks):.1f} chars")
170
  return chunks
171
 
172
  @staticmethod
173
  def _split_paragraph(paragraph: str, max_chunk_size: int) -> List[str]:
174
  """تقسیم پاراگراف بزرگ به جملات"""
175
+ logger.debug(f"[CHUNKER] Splitting large paragraph | Length: {len(paragraph)}")
176
+
177
  # تقسیم بر اساس جملات
178
  sentences = re.split(r'[.!?]+\s+', paragraph)
179
  chunks = []
 
209
  if current_chunk.strip():
210
  chunks.append(current_chunk.strip())
211
 
212
+ logger.debug(f"[CHUNKER] Paragraph split into {len(chunks)} sentence chunks")
213
  return chunks
214
 
215
  @staticmethod
216
  def _split_by_comma(sentence: str, max_chunk_size: int) -> List[str]:
217
  """تقسیم جمله طولانی بر اساس کاما"""
218
+ logger.debug(f"[CHUNKER] Splitting long sentence by comma | Length: {len(sentence)}")
219
+
220
  parts = sentence.split(', ')
221
  chunks = []
222
  current_chunk = ""
 
254
  class MultilingualTranslator:
255
  def __init__(self, cache_duration_minutes: int = 60):
256
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
257
+ logger.info(f"[INIT] Using device: {self.device}")
258
 
259
  # Initialize cache and queue
260
  self.cache = TranslationCache(cache_duration_minutes)
 
262
 
263
  # Load model - using a powerful multilingual model
264
  self.model_name = "facebook/m2m100_1.2B"
265
+ logger.info(f"[INIT] Loading model: {self.model_name}")
266
 
267
  try:
268
  self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
269
  self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
270
  self.model.to(self.device)
271
+ logger.info(f"[INIT] Model loaded successfully on {self.device}!")
272
  except Exception as e:
273
+ logger.error(f"[INIT] Error loading model: {e}")
274
  raise
275
 
276
  # تنظیمات بهینه برای ترجمه متن‌های بلند
277
  self.max_chunk_size = 350 # حداکثر طول هر قسمت
278
  self.min_chunk_overlap = 20 # همپوشانی بین قسمت‌ها
279
+
280
+ # Track translation progress
281
+ self.current_translation = {}
282
+ self.translation_lock = threading.Lock()
283
+
284
+ logger.info(f"[INIT] Translator initialized | Max chunk size: {self.max_chunk_size} chars")
285
 
286
+ def translate_chunk(self, text: str, source_lang: str, target_lang: str, chunk_index: int = 0, total_chunks: int = 1) -> str:
287
  """ترجمه یک قسمت کوچک از متن"""
288
  try:
289
+ logger.info(f"[TRANSLATE] Starting chunk translation [{chunk_index+1}/{total_chunks}] | {source_lang} → {target_lang} | Length: {len(text)} chars")
290
+
291
  # Set source language for tokenizer
292
  self.tokenizer.src_lang = source_lang
293
 
294
  # Encode input
295
  encoded = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(self.device)
296
+ logger.debug(f"[TRANSLATE] Text encoded | Input tokens: {encoded.input_ids.shape[1]}")
297
 
298
  # Generate translation with optimized parameters
299
+ start_time = time.time()
300
  generated_tokens = self.model.generate(
301
  **encoded,
302
  forced_bos_token_id=self.tokenizer.get_lang_id(target_lang),
 
312
  pad_token_id=self.tokenizer.pad_token_id,
313
  eos_token_id=self.tokenizer.eos_token_id
314
  )
315
+ generation_time = time.time() - start_time
316
 
317
  # Decode result
318
  translation = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
 
320
  # پاک‌سازی ترجمه از کاراکترهای اضافی
321
  translation = translation.strip()
322
 
323
+ logger.info(f"[TRANSLATE] Chunk translation completed [{chunk_index+1}/{total_chunks}] | Generation time: {generation_time:.2f}s | Output length: {len(translation)} chars")
324
+
325
  return translation
326
 
327
  except Exception as e:
328
+ logger.error(f"[TRANSLATE] Chunk translation error [{chunk_index+1}/{total_chunks}]: {e}")
329
  return f"[Translation Error: {str(e)}]"
330
 
331
+ def translate_text(self, text: str, source_lang: str, target_lang: str, session_id: str = None) -> Tuple[str, float, int]:
332
+ """ترجمه متن با پشتیبانی از متن‌های طولانی و لاگ‌های مفصل"""
333
  start_time = time.time()
334
 
335
+ if not session_id:
336
+ session_id = hashlib.md5(f"{text[:100]}{time.time()}".encode()).hexdigest()[:8]
337
+
338
+ logger.info(f"[SESSION:{session_id}] Starting translation | {source_lang} → {target_lang} | Text length: {len(text)} chars")
339
+
340
  # بررسی کش برای کل متن
341
  cached_result = self.cache.get(text, source_lang, target_lang)
342
  if cached_result:
343
+ logger.info(f"[SESSION:{session_id}] Translation completed from cache | Time: {time.time() - start_time:.2f}s")
344
  return cached_result, time.time() - start_time, 1
345
 
346
  try:
347
  # اگر متن کوتاه است، مستقیماً ترجمه کن
348
  if len(text) <= self.max_chunk_size:
349
+ logger.info(f"[SESSION:{session_id}] Processing as short text")
350
+ translation = self.translate_chunk(text, source_lang, target_lang, 0, 1)
351
 
352
  # ذخیره در کش
353
  self.cache.set(text, source_lang, target_lang, translation)
354
  processing_time = time.time() - start_time
355
+ logger.info(f"[SESSION:{session_id}] Short text translation completed | Total time: {processing_time:.2f}s")
356
 
357
  return translation, processing_time, 1
358
 
359
  # تقسیم متن طولانی به قسمت‌های کوچکتر
360
+ logger.info(f"[SESSION:{session_id}] Processing as long text - starting chunking")
361
  chunks = TextChunker.split_text_smart(text, self.max_chunk_size)
362
+ logger.info(f"[SESSION:{session_id}] Text split into {len(chunks)} chunks")
363
+
364
+ # Initialize progress tracking
365
+ with self.translation_lock:
366
+ self.current_translation[session_id] = {
367
+ 'total_chunks': len(chunks),
368
+ 'completed_chunks': 0,
369
+ 'start_time': start_time,
370
+ 'source_lang': source_lang,
371
+ 'target_lang': target_lang
372
+ }
373
 
374
  # ترجمه هر قسمت
375
  translated_chunks = []
376
  for i, chunk in enumerate(chunks):
377
+ chunk_start_time = time.time()
378
+ logger.info(f"[SESSION:{session_id}] Starting chunk {i+1}/{len(chunks)} | Chunk length: {len(chunk)} chars")
379
 
380
  # بررسی کش برای هر قسمت
381
  chunk_translation = self.cache.get(chunk, source_lang, target_lang)
382
 
383
  if not chunk_translation:
384
+ # Estimate remaining time
385
+ if i > 0:
386
+ elapsed_time = time.time() - start_time
387
+ avg_time_per_chunk = elapsed_time / i
388
+ estimated_remaining = avg_time_per_chunk * (len(chunks) - i)
389
+ logger.info(f"[SESSION:{session_id}] Progress: {i}/{len(chunks)} | Avg time per chunk: {avg_time_per_chunk:.1f}s | Estimated remaining: {estimated_remaining:.1f}s")
390
+
391
+ chunk_translation = self.translate_chunk(chunk, source_lang, target_lang, i, len(chunks))
392
  # ذخیره قسمت در کش
393
  self.cache.set(chunk, source_lang, target_lang, chunk_translation)
394
+
395
+ chunk_time = time.time() - chunk_start_time
396
+ logger.info(f"[SESSION:{session_id}] Chunk {i+1}/{len(chunks)} translated in {chunk_time:.2f}s")
397
+ else:
398
+ logger.info(f"[SESSION:{session_id}] Chunk {i+1}/{len(chunks)} retrieved from cache")
399
 
400
  translated_chunks.append(chunk_translation)
401
 
402
+ # Update progress
403
+ with self.translation_lock:
404
+ if session_id in self.current_translation:
405
+ self.current_translation[session_id]['completed_chunks'] = i + 1
406
+
407
  # کمی استراحت بین ترجمه‌ها برای جلوگیری از بارگذاری زیاد
408
  if i < len(chunks) - 1:
409
  time.sleep(0.1)
410
 
411
  # ترکیب قسمت‌های ترجمه شده
412
+ logger.info(f"[SESSION:{session_id}] Combining translated chunks")
413
  final_translation = self._combine_translations(translated_chunks, text)
414
 
415
  # ذخیره نتیجه نهایی در کش
416
  self.cache.set(text, source_lang, target_lang, final_translation)
417
 
418
  processing_time = time.time() - start_time
419
+ logger.info(f"[SESSION:{session_id}] Long text translation completed | Total time: {processing_time:.2f}s | Chunks: {len(chunks)} | Final length: {len(final_translation)} chars")
420
+
421
+ # Clean up progress tracking
422
+ with self.translation_lock:
423
+ self.current_translation.pop(session_id, None)
424
 
425
  return final_translation, processing_time, len(chunks)
426
 
427
  except Exception as e:
428
+ logger.error(f"[SESSION:{session_id}] Translation error: {e}")
429
+ # Clean up progress tracking
430
+ with self.translation_lock:
431
+ self.current_translation.pop(session_id, None)
432
  return f"Translation error: {str(e)}", time.time() - start_time, 0
433
 
434
+ def get_translation_progress(self, session_id: str) -> Dict:
435
+ """Get current translation progress"""
436
+ with self.translation_lock:
437
+ if session_id not in self.current_translation:
438
+ return None
439
+
440
+ progress = self.current_translation[session_id].copy()
441
+ elapsed_time = time.time() - progress['start_time']
442
+
443
+ if progress['completed_chunks'] > 0:
444
+ avg_time_per_chunk = elapsed_time / progress['completed_chunks']
445
+ remaining_chunks = progress['total_chunks'] - progress['completed_chunks']
446
+ estimated_remaining = avg_time_per_chunk * remaining_chunks
447
+ else:
448
+ estimated_remaining = None
449
+
450
+ return {
451
+ 'total_chunks': progress['total_chunks'],
452
+ 'completed_chunks': progress['completed_chunks'],
453
+ 'elapsed_time': elapsed_time,
454
+ 'estimated_remaining': estimated_remaining,
455
+ 'progress_percentage': (progress['completed_chunks'] / progress['total_chunks']) * 100
456
+ }
457
+
458
  def _combine_translations(self, translated_chunks: List[str], original_text: str) -> str:
459
  """ترکیب قسمت‌های ترجمه شده به یک متن یکپارچه"""
460
  if not translated_chunks:
 
463
  if len(translated_chunks) == 1:
464
  return translated_chunks[0]
465
 
466
+ logger.debug(f"[COMBINER] Combining {len(translated_chunks)} translated chunks")
467
+
468
  # ترکیب قسمت‌ها با در نظر گیری ساختار اصلی متن
469
  combined = []
470
 
 
496
  result = re.sub(r'\.+', '.', result) # حذف نقطه‌های تکراری
497
  result = result.strip()
498
 
499
+ logger.debug(f"[COMBINER] Combined translation length: {len(result)} chars")
500
  return result
501
 
502
  # Language mappings for M2M100 model
 
574
  translator = MultilingualTranslator(60)
575
 
576
  # Create FastAPI app
577
+ app = FastAPI(title="Enhanced Multilingual Translation API", version="2.1.0")
578
 
579
  # Add CORS middleware
580
  app.add_middleware(
 
587
 
588
  @app.get("/")
589
  async def root():
590
+ return {
591
+ "message": "Enhanced Multilingual Translation API v2.1",
592
+ "status": "active",
593
+ "features": [
594
+ "enhanced_logging",
595
+ "progress_tracking",
596
+ "long_text_support",
597
+ "smart_chunking",
598
+ "cache_optimization"
599
+ ]
600
+ }
601
 
602
  @app.post("/api/translate")
603
  async def api_translate(request: TranslationRequest):
604
+ """API endpoint for translation with enhanced logging and progress tracking"""
605
  if not request.text.strip():
606
  raise HTTPException(status_code=400, detail="No text provided")
607
 
 
612
  raise HTTPException(status_code=400, detail="Invalid language codes")
613
 
614
  try:
615
+ # Generate session ID for tracking
616
+ session_id = hashlib.md5(f"{request.text[:100]}{time.time()}".encode()).hexdigest()[:8]
617
+
618
+ translation, processing_time, chunks_count = translator.translate_text(
619
+ request.text, source_code, target_code, session_id
620
+ )
621
 
622
  return TranslationResponse(
623
  translation=translation,
 
629
  chunks_processed=chunks_count
630
  )
631
  except Exception as e:
632
+ logger.error(f"[API] Translation error: {str(e)}")
633
  raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}")
634
 
635
  # Alternative endpoint for form data (compatibility with WordPress)
636
  @app.post("/api/translate/form")
637
  async def api_translate_form(request: Request):
638
+ """Alternative endpoint that accepts form data with enhanced logging"""
639
  try:
640
  form_data = await request.form()
641
  text = form_data.get("text", "")
 
653
  except:
654
  raise HTTPException(status_code=400, detail="Invalid request format")
655
 
656
+ logger.info(f"[FORM API] Translation request | {source_lang} → {target_lang} | Length: {len(text)} chars")
657
+
658
  if not text.strip():
659
  raise HTTPException(status_code=400, detail="No text provided")
660
 
 
665
  raise HTTPException(status_code=400, detail="Invalid language codes")
666
 
667
  try:
668
+ # Generate session ID for tracking
669
+ session_id = hashlib.md5(f"{text[:100]}{time.time()}".encode()).hexdigest()[:8]
670
+
671
+ translation, processing_time, chunks_count = translator.translate_text(
672
+ text, source_code, target_code, session_id
673
+ )
674
 
675
  return {
676
  "translation": translation,
 
682
  "chunks_processed": chunks_count
683
  }
684
  except Exception as e:
685
+ logger.error(f"[FORM API] Translation error: {str(e)}")
686
  raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}")
687
 
688
+ @app.get("/api/progress/{session_id}")
689
+ async def get_translation_progress(session_id: str):
690
+ """Get translation progress for a session"""
691
+ progress = translator.get_translation_progress(session_id)
692
+ if progress is None:
693
+ raise HTTPException(status_code=404, detail="Session not found or completed")
694
+
695
+ return {
696
+ "status": "success",
697
+ "progress": progress
698
+ }
699
+
700
  @app.get("/api/languages")
701
  async def get_languages():
702
  """Get supported languages"""
 
715
  "model": translator.model_name,
716
  "cache_size": len(translator.cache.cache),
717
  "max_chunk_size": translator.max_chunk_size,
718
+ "active_translations": len(translator.current_translation),
719
+ "version": "2.1.0"
720
  }
721
 
722
  if __name__ == "__main__":