danicor commited on
Commit
d207ff4
·
verified ·
1 Parent(s): 8b787b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1087 -641
app.py CHANGED
@@ -1,47 +1,40 @@
1
- # server.py
2
  import asyncio
3
  from concurrent.futures import ThreadPoolExecutor
4
  import threading
 
 
5
  import time
6
  import json
7
  import hashlib
8
  import re
9
  from datetime import datetime, timedelta
 
10
  from queue import Queue
11
  import logging
12
- from typing import Dict, List, Tuple, Optional, Any
13
- from fastapi import FastAPI, HTTPException, Request
14
  from fastapi.middleware.cors import CORSMiddleware
15
  from pydantic import BaseModel
16
  import uvicorn
17
- import os
18
-
19
- # Optional: Transformers (if you want local model)
20
- # If you don't plan to run a local transformer, you can still keep API and adapt.
21
- try:
22
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
23
- TRANSFORMERS_AVAILABLE = True
24
- except Exception:
25
- TRANSFORMERS_AVAILABLE = False
26
-
27
- # ----------------------- Configuration -----------------------
28
- LOG_FILE = os.environ.get("TRANSLATION_LOG", "translation.log")
29
- HF_MODEL = os.environ.get("HF_MODEL", "facebook/m2m100_418M") # change to 1.2B if you have resources
30
- MAX_WORKERS = int(os.environ.get("MAX_WORKERS", "3"))
31
- CACHE_MINUTES = int(os.environ.get("CACHE_MINUTES", "60"))
32
- MAX_CHUNK_SIZE = int(os.environ.get("MAX_CHUNK_SIZE", "350"))
33
- SERVER_HOST = os.environ.get("SERVER_HOST", "0.0.0.0")
34
- SERVER_PORT = int(os.environ.get("SERVER_PORT", "7860"))
35
-
36
- # ----------------------- Logging -----------------------
37
  logging.basicConfig(
38
  level=logging.INFO,
39
- format="%(asctime)s - %(levelname)s - %(message)s",
40
- handlers=[logging.StreamHandler(), logging.FileHandler(LOG_FILE)]
 
 
 
41
  )
42
- logger = logging.getLogger("translator-server")
 
 
 
 
 
43
 
44
- # ----------------------- Pydantic Models -----------------------
45
  class TranslationRequest(BaseModel):
46
  text: str
47
  source_lang: str
@@ -59,13 +52,592 @@ class TranslationResponse(BaseModel):
59
  estimated_time_remaining: Optional[float] = None
60
  current_chunk: Optional[int] = None
61
  total_chunks: Optional[int] = None
62
- session_id: Optional[str] = None
63
 
64
- # ----------------------- Language Map -----------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  LANGUAGE_MAP = {
66
  "English": "en",
67
- "Persian (Farsi)": "fa",
68
- "Persian": "fa",
69
  "Arabic": "ar",
70
  "French": "fr",
71
  "German": "de",
@@ -74,7 +646,6 @@ LANGUAGE_MAP = {
74
  "Portuguese": "pt",
75
  "Russian": "ru",
76
  "Chinese (Simplified)": "zh",
77
- "Chinese": "zh",
78
  "Japanese": "ja",
79
  "Korean": "ko",
80
  "Hindi": "hi",
@@ -134,676 +705,551 @@ LANGUAGE_MAP = {
134
  "Zulu": "zu"
135
  }
136
 
137
- # ----------------------- Helpers -----------------------
138
- def generate_session_id(prefix: str = "") -> str:
139
- return hashlib.md5(f"{prefix}_{time.time()}_{os.urandom(8)}".encode()).hexdigest()[:12]
140
-
141
- # ----------------------- Cache -----------------------
142
- class TranslationCache:
143
- def __init__(self, cache_duration_minutes: int = CACHE_MINUTES):
144
- self.cache: Dict[str, Tuple[str, datetime]] = {}
145
- self.cache_duration = timedelta(minutes=cache_duration_minutes)
146
- self.lock = threading.Lock()
147
-
148
- def _generate_key(self, text: str, source_lang: str, target_lang: str) -> str:
149
- content = f"{text}__{source_lang}__{target_lang}"
150
- return hashlib.sha256(content.encode()).hexdigest()
151
-
152
- def get(self, text: str, source_lang: str, target_lang: str) -> Optional[str]:
153
- with self.lock:
154
- key = self._generate_key(text, source_lang, target_lang)
155
- entry = self.cache.get(key)
156
- if entry:
157
- translation, ts = entry
158
- if datetime.utcnow() - ts < self.cache_duration:
159
- logger.info(f"[CACHE HIT] {key[:8]} len={len(translation)}")
160
- return translation
161
- else:
162
- del self.cache[key]
163
- logger.info(f"[CACHE EXPIRED] {key[:8]}")
164
- logger.debug(f"[CACHE MISS] {key[:8]}")
165
- return None
166
-
167
- def set(self, text: str, source_lang: str, target_lang: str, translation: str):
168
- with self.lock:
169
- key = self._generate_key(text, source_lang, target_lang)
170
- self.cache[key] = (translation, datetime.utcnow())
171
- logger.info(f"[CACHE SET] {key[:8]} len={len(translation)}")
172
-
173
- # ----------------------- Smart Chunker -----------------------
174
- class TextChunker:
175
- """Smart splitting: paragraphs -> sentences -> commas fallback."""
176
-
177
- @staticmethod
178
- def split_text_smart(text: str, max_chunk_size: int = MAX_CHUNK_SIZE) -> List[str]:
179
- text = text.strip()
180
- if not text:
181
- return []
182
- if len(text) <= max_chunk_size:
183
- return [text]
184
-
185
- # First split by paragraphs to preserve structure
186
- paragraphs = [p.strip() for p in re.split(r'\n{2,}', text) if p.strip()]
187
- chunks: List[str] = []
188
- current = ""
189
-
190
- for p in paragraphs:
191
- if len(p) <= max_chunk_size:
192
- if not current:
193
- current = p
194
- else:
195
- if len(current) + 2 + len(p) <= max_chunk_size:
196
- current += "\n\n" + p
197
- else:
198
- chunks.append(current.strip())
199
- current = p
200
- else:
201
- # paragraph too large -> split to sentences
202
- if current:
203
- chunks.append(current.strip())
204
- current = ""
205
- parts = TextChunker._split_paragraph(p, max_chunk_size)
206
- chunks.extend(parts)
207
-
208
- if current:
209
- chunks.append(current.strip())
210
-
211
- # Safety: merge very small chunks
212
- merged: List[str] = []
213
- for c in chunks:
214
- if not merged:
215
- merged.append(c)
216
- else:
217
- if len(merged[-1]) + 1 + len(c) <= max_chunk_size:
218
- merged[-1] = merged[-1] + "\n\n" + c
219
- else:
220
- merged.append(c)
221
-
222
- logger.info(f"[CHUNKER] split into {len(merged)} chunks (avg {sum(len(x) for x in merged)/len(merged):.1f} chars)")
223
- return merged
224
-
225
- @staticmethod
226
- def _split_paragraph(paragraph: str, max_chunk_size: int) -> List[str]:
227
- sentences = re.split(r'(?<=[.!?])\s+', paragraph)
228
- chunks: List[str] = []
229
- current = ""
230
- for s in sentences:
231
- s = s.strip()
232
- if not s:
233
- continue
234
- if len(s) > max_chunk_size:
235
- # fallback: split by commas
236
- parts = TextChunker._split_by_comma(s, max_chunk_size)
237
- if current:
238
- chunks.append(current.strip()); current = ""
239
- chunks.extend(parts)
240
- else:
241
- if not current:
242
- current = s
243
- elif len(current) + 1 + len(s) <= max_chunk_size:
244
- current += " " + s
245
- else:
246
- chunks.append(current.strip())
247
- current = s
248
- if current:
249
- chunks.append(current.strip())
250
- return chunks
251
-
252
- @staticmethod
253
- def _split_by_comma(sentence: str, max_chunk_size: int) -> List[str]:
254
- parts = [p.strip() for p in sentence.split(',') if p.strip()]
255
- chunks: List[str] = []
256
- current = ""
257
- for p in parts:
258
- if len(p) > max_chunk_size:
259
- # hard cut
260
- i = 0
261
- while i < len(p):
262
- slice_ = p[i:i+max_chunk_size].strip()
263
- if slice_:
264
- chunks.append(slice_)
265
- i += max_chunk_size
266
- else:
267
- if not current:
268
- current = p
269
- elif len(current) + 2 + len(p) <= max_chunk_size:
270
- current += ", " + p
271
- else:
272
- chunks.append(current.strip())
273
- current = p
274
- if current:
275
- chunks.append(current.strip())
276
- return chunks
277
-
278
- # ----------------------- Translator Core -----------------------
279
- class MultilingualTranslator:
280
- def __init__(self, cache_minutes: int = CACHE_MINUTES, max_workers: int = MAX_WORKERS):
281
- self.device = "cpu"
282
- self.model_name = HF_MODEL
283
- self.tokenizer = None
284
- self.model = None
285
- self.generation_lock = threading.Lock() # ensure model.generate serialized
286
- self.executor = ThreadPoolExecutor(max_workers=max_workers)
287
- self.background_tasks: Dict[str, asyncio.Task] = {}
288
- self.cache = TranslationCache(cache_minutes)
289
- self.current_translation: Dict[str, Dict[str, Any]] = {}
290
- self.translation_lock = threading.Lock()
291
- self.max_chunk_size = MAX_CHUNK_SIZE
292
-
293
- if TRANSFORMERS_AVAILABLE:
294
- try:
295
- # prefer GPU if available
296
- import torch as _torch
297
- self.device = "cuda" if _torch.cuda.is_available() else "cpu"
298
- logger.info(f"[MODEL] Loading {self.model_name} on {self.device} (this may take time)...")
299
- self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, use_fast=False)
300
- self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
301
- if self.device == "cuda":
302
- self.model.to("cuda")
303
- logger.info("[MODEL] Model loaded successfully.")
304
- except Exception as e:
305
- logger.exception(f"[MODEL] Failed to load model '{self.model_name}': {e}")
306
- self.model = None
307
- self.tokenizer = None
308
- else:
309
- logger.warning("[MODEL] transformers not available — running in mock mode (no local model).")
310
-
311
- # internal chunk translation executed in threadpool (but generation uses generation_lock)
312
- def _translate_chunk_sync(self, text: str, src_code: str, tgt_code: str, chunk_index: int = 0, total_chunks: int = 1) -> str:
313
- """Synchronous chunk translation (called in executor)."""
314
- if not text:
315
- return ""
316
- if self.model is None or self.tokenizer is None:
317
- # mock: prefix target language code if no model
318
- logger.warning("[TRANSLATE] No model available, returning mock translation.")
319
- return f"[{tgt_code}] {text}"
320
 
321
- try:
322
- # set tokenizer language if model supports
323
- with self.generation_lock:
324
- # some tokenizers use .src_lang (M2M100)
325
- try:
326
- if hasattr(self.tokenizer, "src_lang"):
327
- self.tokenizer.src_lang = src_code
328
- except Exception:
329
- pass
330
-
331
- inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
332
- if hasattr(inputs, "to"):
333
- # pyright typing
334
- pass
335
- # Move tensors to device if cuda
336
- import torch as _torch
337
- if self.device == "cuda":
338
- for k, v in inputs.items():
339
- if isinstance(v, _torch.Tensor):
340
- inputs[k] = v.to("cuda")
341
-
342
- # determine forced_bos_token_id if available
343
- forced_bos = None
344
- try:
345
- if hasattr(self.tokenizer, "get_lang_id"):
346
- forced_bos = self.tokenizer.get_lang_id(tgt_code)
347
- except Exception:
348
- forced_bos = None
349
-
350
- gen_kwargs = dict(
351
- **inputs,
352
- max_length=1024,
353
- num_beams=4,
354
- early_stopping=True
355
- )
356
- if forced_bos is not None:
357
- gen_kwargs["forced_bos_token_id"] = forced_bos
358
-
359
- t0 = time.time()
360
- outputs = self.model.generate(**gen_kwargs)
361
- gen_time = time.time() - t0
362
-
363
- # move to cpu if needed for decode
364
- decoded = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)[0].strip()
365
- logger.info(f"[GEN] chunk {chunk_index+1}/{total_chunks} generated in {gen_time:.2f}s len={len(decoded)}")
366
- return decoded
367
- except Exception as e:
368
- logger.exception(f"[TRANSLATE] Error generating chunk: {e}")
369
- return f"[Translation Error: {str(e)}]"
370
-
371
- async def translate_chunk_async(self, text: str, src_code: str, tgt_code: str, chunk_index: int = 0, total_chunks: int = 1) -> str:
372
- loop = asyncio.get_event_loop()
373
- return await loop.run_in_executor(self.executor, self._translate_chunk_sync, text, src_code, tgt_code, chunk_index, total_chunks)
374
-
375
- async def translate_text_async(self, text: str, src_code: str, tgt_code: str, session_id: Optional[str] = None) -> Tuple[str, float, int]:
376
- """Full translation (async wrapper). Returns (translation, processing_time, chunks_count)"""
377
- start_time = time.time()
378
- if not session_id:
379
- session_id = generate_session_id("sess")
380
-
381
- # check cache full text
382
- cached_full = self.cache.get(text, src_code, tgt_code)
383
- if cached_full:
384
- return cached_full, time.time() - start_time, 1
385
-
386
- # short text
387
- if len(text) <= self.max_chunk_size:
388
- # update progress
389
- with self.translation_lock:
390
- self.current_translation[session_id] = {
391
- "total_chunks": 1,
392
- "completed_chunks": 0,
393
- "start_time": start_time,
394
- "source_lang": src_code,
395
- "target_lang": tgt_code
396
- }
397
- chunk_result = await self.translate_chunk_async(text, src_code, tgt_code, 0, 1)
398
- self.cache.set(text, src_code, tgt_code, chunk_result)
399
- elapsed = time.time() - start_time
400
- with self.translation_lock:
401
- self.current_translation.pop(session_id, None)
402
- return chunk_result, elapsed, 1
403
-
404
- # long text -> chunking
405
- chunks = TextChunker.split_text_smart(text, self.max_chunk_size)
406
- total = len(chunks)
407
- translated_chunks: List[str] = []
408
-
409
- with self.translation_lock:
410
- self.current_translation[session_id] = {
411
- "total_chunks": total,
412
- "completed_chunks": 0,
413
- "start_time": start_time,
414
- "source_lang": src_code,
415
- "target_lang": tgt_code
416
- }
417
-
418
- for i, chunk in enumerate(chunks):
419
- # check cached per-chunk
420
- c_cached = self.cache.get(chunk, src_code, tgt_code)
421
- if c_cached:
422
- translated_chunks.append(c_cached)
423
- with self.translation_lock:
424
- self.current_translation[session_id]["completed_chunks"] = i + 1
425
- logger.debug(f"[SESSION {session_id}] chunk {i+1}/{total} from cache")
426
- continue
427
-
428
- # translate chunk
429
- chunk_translation = await self.translate_chunk_async(chunk, src_code, tgt_code, i, total)
430
- translated_chunks.append(chunk_translation)
431
- self.cache.set(chunk, src_code, tgt_code, chunk_translation)
432
- with self.translation_lock:
433
- self.current_translation[session_id]["completed_chunks"] = i + 1
434
-
435
- # short pause to yield CPU
436
- await asyncio.sleep(0.01)
437
-
438
- # combine
439
- final = self._combine_translations(translated_chunks, text)
440
-
441
- # set full-text cache
442
- self.cache.set(text, src_code, tgt_code, final)
443
-
444
- elapsed = time.time() - start_time
445
- with self.translation_lock:
446
- self.current_translation.pop(session_id, None)
447
-
448
- return final, elapsed, total
449
-
450
- def submit_background(self, text: str, src_code: str, tgt_code: str, session_id: Optional[str] = None) -> str:
451
- """Schedule background translation and return session_id immediately"""
452
- if not session_id:
453
- session_id = generate_session_id("bg")
454
- loop = asyncio.get_event_loop()
455
- task = loop.create_task(self._bg_task_wrapper(text, src_code, tgt_code, session_id))
456
- self.background_tasks[session_id] = task
457
- logger.info(f"[BG] Scheduled background task {session_id}")
458
- return session_id
459
-
460
- async def _bg_task_wrapper(self, text: str, src_code: str, tgt_code: str, session_id: str):
461
- """Wrapper executed in background to run translate_text_async and keep result accessible"""
462
- try:
463
- result, elapsed, chunks = await self.translate_text_async(text, src_code, tgt_code, session_id)
464
- # store result for retrieval
465
- with self.translation_lock:
466
- # we can store result in background_tasks as result property or a separate dict
467
- # here, we'll attach attributes to task for simplicity
468
- task = self.background_tasks.get(session_id)
469
- if task is not None:
470
- # monkeypatch result
471
- setattr(task, "result_data", {
472
- "translation": result,
473
- "processing_time": elapsed,
474
- "chunks": chunks,
475
- "character_count": len(text),
476
- "status": "completed"
477
- })
478
- logger.info(f"[BG] Completed background {session_id} len={len(result)}")
479
- except Exception as e:
480
- logger.exception(f"[BG] Error in background task {session_id}: {e}")
481
- task = self.background_tasks.get(session_id)
482
- if task is not None:
483
- setattr(task, "result_data", {
484
- "translation": None,
485
- "processing_time": 0.0,
486
- "chunks": 0,
487
- "character_count": len(text),
488
- "status": "failed",
489
- "error": str(e)
490
- })
491
-
492
- def get_background_result(self, session_id: str) -> Optional[Dict]:
493
- task = self.background_tasks.get(session_id)
494
- if not task:
495
- return None
496
- if task.done():
497
- # if result_data present, return it
498
- res = getattr(task, "result_data", None)
499
- # cleanup
500
- try:
501
- del self.background_tasks[session_id]
502
- except KeyError:
503
- pass
504
- return res
505
- else:
506
- return {
507
- "status": "processing",
508
- "progress": self.get_translation_progress(session_id)
509
- }
510
-
511
- def get_translation_progress(self, session_id: str) -> Optional[Dict]:
512
- with self.translation_lock:
513
- if session_id not in self.current_translation:
514
- return None
515
- p = self.current_translation[session_id].copy()
516
- elapsed = time.time() - p['start_time']
517
- completed = p.get('completed_chunks', 0)
518
- total = p.get('total_chunks', 1)
519
- estimated_remaining = None
520
- if completed > 0:
521
- avg = elapsed / completed
522
- estimated_remaining = avg * (total - completed)
523
- return {
524
- "total_chunks": total,
525
- "completed_chunks": completed,
526
- "elapsed_time": elapsed,
527
- "estimated_remaining": estimated_remaining,
528
- "progress_percentage": (completed / total) * 100 if total else 0
529
- }
530
-
531
- def _combine_translations(self, translated_chunks: List[str], original_text: str) -> str:
532
- # simple join preserving paragraph breaks if existed
533
- if not translated_chunks:
534
- return ""
535
- # if original had paragraphs
536
- if "\n\n" in original_text:
537
- sep = "\n\n"
538
- else:
539
- sep = " "
540
- combined = sep.join([c.strip() for c in translated_chunks if c and c.strip()])
541
- # normalize whitespace
542
- combined = re.sub(r'\s+', ' ', combined).strip()
543
- return combined
544
-
545
- # ----------------------- Translator Initialization -----------------------
546
- translator = MultilingualTranslator(cache_minutes=CACHE_MINUTES, max_workers=MAX_WORKERS)
547
-
548
- # ----------------------- FastAPI App -----------------------
549
  app = FastAPI(title="Enhanced Multilingual Translation API", version="2.1.0")
 
 
550
  app.add_middleware(
551
  CORSMiddleware,
552
- allow_origins=["*"], # in production, set your WP domain(s)
553
  allow_credentials=True,
554
  allow_methods=["*"],
555
  allow_headers=["*"],
556
  )
557
 
558
- # ----------------------- Routes -----------------------
559
- @app.get("/")
560
- async def root():
561
- return {
562
- "message": "Enhanced Multilingual Translation API v2.1",
563
- "status": "active",
564
- "model": translator.model_name,
565
- "device": getattr(translator, "device", "cpu"),
566
- "features": ["cache", "background_tasks", "progress_tracking", "chunking"]
567
- }
568
 
569
- @app.post("/api/translate", response_model=TranslationResponse)
570
- async def api_translate(request: TranslationRequest):
571
  """
572
- JSON endpoint for synchronous translation. Waits until translation completes.
573
- (Suitable for short texts)
574
  """
575
- text = request.text or ""
576
- if not text.strip():
577
- raise HTTPException(status_code=400, detail="No text provided")
578
-
579
- # map language names to codes if needed
580
- src_code = LANGUAGE_MAP.get(request.source_lang, request.source_lang)
581
- tgt_code = LANGUAGE_MAP.get(request.target_lang, request.target_lang)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
582
 
583
- # Run translation (async)
 
 
 
 
584
  try:
585
- translation, processing_time, chunks_count = await translator.translate_text_async(text, src_code, tgt_code)
586
- return TranslationResponse(
587
- translation=translation,
588
- source_language=request.source_lang,
589
- target_language=request.target_lang,
590
- processing_time=float(processing_time),
591
- character_count=len(text),
592
- status="success",
593
- chunks_processed=chunks_count,
594
- estimated_time_remaining=0.0,
595
- current_chunk=chunks_count,
596
- total_chunks=chunks_count,
597
- session_id=None
598
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
599
  except Exception as e:
600
- logger.exception("[API] translate error")
601
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
602
 
603
  @app.post("/api/translate/form")
604
  async def api_translate_form(request: Request):
605
  """
606
- Compatibility endpoint for form-data (used by WP plugin's call).
607
- Accepts either form-encoded or JSON payload.
608
- Behavior:
609
- - If short text -> translate immediately and return translation
610
- - If long text -> check cache; if cached return result; else schedule background and return session info
611
  """
612
  try:
613
- form = await request.form()
614
- # form() returns a starlette.datastructures.FormData object; fallback if empty
615
- data = dict(form) if form else {}
616
- # prefer form fields
617
- text = data.get("text") or (await request.json()).get("text") if request.headers.get("content-type", "").startswith("application/json") else None
618
- source_lang = data.get("source_lang") or (await request.json()).get("source_lang") if text is None else data.get("source_lang")
619
- target_lang = data.get("target_lang") or (await request.json()).get("target_lang") if text is None else data.get("target_lang")
620
- api_key = data.get("api_key") or None
621
- except Exception:
622
- # fallback: try json directly
623
  try:
624
- payload = await request.json()
625
- text = payload.get("text", "")
626
- source_lang = payload.get("source_lang", "")
627
- target_lang = payload.get("target_lang", "")
628
- api_key = payload.get("api_key", None)
629
- except Exception:
630
- raise HTTPException(status_code=400, detail="Invalid request format")
631
-
632
- text = text or ""
633
- source_lang = source_lang or ""
634
- target_lang = target_lang or ""
635
-
636
- logger.info(f"[FORM API] Request: {len(text)} chars | {source_lang} -> {target_lang}")
637
-
638
  if not text.strip():
639
- return {"status": "error", "message": "No text provided"}
640
-
641
- src_code = LANGUAGE_MAP.get(source_lang, source_lang)
642
- tgt_code = LANGUAGE_MAP.get(target_lang, target_lang)
643
-
644
- # Generate session id
645
- session_id = generate_session_id("req")
646
-
647
- # If long text -> background
648
- if len(text) > translator.max_chunk_size:
649
- # Check full-text cache first
650
- cached_full = translator.cache.get(text, src_code, tgt_code)
651
- if cached_full:
652
- logger.info(f"[FORM API] returning cached full result for session {session_id}")
 
 
 
 
 
 
 
 
 
 
 
653
  return {
654
- "translation": cached_full,
 
 
655
  "processing_time": 0.0,
656
- "character_count": len(text),
657
  "status": "success",
658
  "chunks_processed": None,
659
- "session_id": session_id,
660
  "cached": True
661
  }
662
-
663
- # schedule background translation
664
- # ensure we schedule within event loop
665
- loop = asyncio.get_event_loop()
666
- task = loop.create_task(translator._bg_task_wrapper(text, src_code, tgt_code, session_id))
667
- translator.background_tasks[session_id] = task
668
- logger.info(f"[FORM API] background scheduled session {session_id}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
669
  return {
670
- "session_id": session_id,
671
- "request_id": session_id,
672
- "status": "processing",
673
- "message": "Translation started in background. Use /api/status/{session_id} or /api/progress/{session_id} to check.",
674
- "character_count": len(text),
675
- "is_background": True,
676
- "is_heavy_text": True
 
 
 
677
  }
 
678
  else:
679
- # short text - translate immediately
680
  try:
681
- translation, processing_time, chunks_count = await translator.translate_text_async(text, src_code, tgt_code, session_id)
682
- # validate
683
- if not translation or (isinstance(translation, str) and translation.lower().startswith("translation error")):
684
- logger.error("[FORM API] Invalid translation result")
685
- return {"status": "error", "message": "Translation failed - empty or invalid result", "session_id": session_id}
 
 
 
 
 
 
 
 
 
 
 
686
  return {
687
- "translation": translation,
688
- "source_language": source_lang,
689
- "target_language": target_lang,
690
- "processing_time": processing_time,
691
- "character_count": len(text),
692
- "status": "success",
693
- "chunks_processed": chunks_count,
694
- "session_id": session_id,
695
- "cached": False
696
  }
 
697
  except Exception as e:
698
- logger.exception("[FORM API] translation error")
699
- return {"status": "error", "message": f"Translation error: {str(e)}"}
700
 
701
- @app.get("/api/status/{session_id}")
702
- async def get_session_status(session_id: str):
703
- """Return completed result if available, or processing state."""
704
- # check background tasks dict first
705
- bg = translator.background_tasks.get(session_id)
706
- if bg:
707
- if bg.done():
708
- res = getattr(bg, "result_data", None)
709
- if res:
710
- return {
711
- "status": "completed",
712
- "translation": res.get("translation"),
713
- "processing_time": res.get("processing_time"),
714
- "chunks_processed": res.get("chunks"),
715
- "character_count": res.get("character_count")
716
- }
717
- else:
718
- return {"status": "completed", "message": "Completed but no data"}
719
- else:
720
- progress = translator.get_translation_progress(session_id)
721
- return {"status": "processing", "progress": progress}
722
 
723
- # else check current_translation (in-progress immediate)
724
- prog = translator.get_translation_progress(session_id)
725
- if prog:
726
- return {"status": "processing", "progress": prog}
727
- return {"status": "not_found", "message": "Session not found or already cleaned up"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
728
 
729
  @app.get("/api/progress/{session_id}")
730
  async def get_translation_progress(session_id: str):
731
- p = translator.get_translation_progress(session_id)
732
- if p is None:
 
733
  raise HTTPException(status_code=404, detail="Session not found or completed")
734
- return {"status": "success", "progress": p}
735
-
736
- @app.get("/api/result/{session_id}")
737
- async def get_result(session_id: str):
738
- # check background
739
- bg = translator.background_tasks.get(session_id)
740
- if bg and bg.done():
741
- res = getattr(bg, "result_data", None)
742
- if res:
743
- return {
744
- "status": "success",
745
- "translation": res.get("translation"),
746
- "processing_time": res.get("processing_time"),
747
- "character_count": res.get("character_count"),
748
- "chunks_processed": res.get("chunks"),
749
- "session_id": session_id
750
- }
751
- else:
752
- return {"status": "error", "message": "Completed but no result data"}
753
-
754
- # if still processing
755
- prog = translator.get_translation_progress(session_id)
756
- if prog:
757
- return {"status": "processing", "progress": prog}
758
-
759
- # maybe not found
760
- raise HTTPException(status_code=404, detail="Session not found")
761
 
762
  @app.get("/api/languages")
763
  async def get_languages():
764
- return {"languages": list(LANGUAGE_MAP.keys()), "language_codes": LANGUAGE_MAP, "status": "success"}
 
 
 
 
 
765
 
766
  @app.get("/api/health")
767
  async def health_check():
 
 
 
 
 
768
  return {
769
  "status": "healthy",
770
- "device": getattr(translator, "device", "cpu"),
771
- "model": getattr(translator, "model_name", None),
772
  "cache_size": len(translator.cache.cache),
773
  "max_chunk_size": translator.max_chunk_size,
774
  "active_translations": len(translator.current_translation),
775
- "background_tasks": len(translator.background_tasks),
 
776
  "version": "2.1.0"
777
  }
778
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
779
  @app.get("/api/server-status")
780
  async def get_server_status():
 
 
 
781
  active_sessions = []
 
 
 
 
 
782
  with translator.translation_lock:
783
- for sid, progress in translator.current_translation.items():
784
- elapsed = time.time() - progress['start_time']
785
- completed = progress.get('completed_chunks', 0)
786
- total = progress.get('total_chunks', 1)
787
- percent = (completed / total) * 100 if total else 0
 
 
 
 
788
  active_sessions.append({
789
- "session_id": sid,
790
- "source_lang": progress.get('source_lang'),
791
- "target_lang": progress.get('target_lang'),
792
- "total_chunks": total,
793
- "completed_chunks": completed,
794
- "progress_percentage": percent,
795
- "elapsed_time": elapsed
 
796
  })
797
- bg_count = len(translator.background_tasks)
798
- return {
799
- "has_active_translation": bool(active_sessions) or bg_count > 0,
800
- "active_sessions": active_sessions,
801
- "background_tasks": bg_count,
802
- "message": f"{len(active_sessions)} active, {bg_count} in background"
803
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
804
 
805
- # ----------------------- Run -----------------------
806
  if __name__ == "__main__":
807
- # uvicorn.run(app, host="0.0.0.0", port=7860)
808
- logger.info(f"Starting server on {SERVER_HOST}:{SERVER_PORT} (model={translator.model_name})")
809
- uvicorn.run("app:app", host=SERVER_HOST, port=SERVER_PORT, log_level="info", reload=False)
 
 
1
  import asyncio
2
  from concurrent.futures import ThreadPoolExecutor
3
  import threading
4
+ import torch
5
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
  import time
7
  import json
8
  import hashlib
9
  import re
10
  from datetime import datetime, timedelta
11
+ import threading
12
  from queue import Queue
13
  import logging
14
+ from typing import Dict, List, Tuple, Optional
15
+ from fastapi import FastAPI, HTTPException, Request, Form
16
  from fastapi.middleware.cors import CORSMiddleware
17
  from pydantic import BaseModel
18
  import uvicorn
19
+ import uuid
20
+
21
+ # Enhanced logging configuration
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  logging.basicConfig(
23
  level=logging.INFO,
24
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
25
+ handlers=[
26
+ logging.StreamHandler(),
27
+ logging.FileHandler('translation.log')
28
+ ]
29
  )
30
+ logger = logging.getLogger(__name__)
31
+
32
+ # Global storage for translation requests (WordPress integration)
33
+ translation_requests = {}
34
+ completed_translations = {}
35
+ translation_requests_lock = threading.Lock()
36
 
37
+ # Pydantic models for request/response
38
  class TranslationRequest(BaseModel):
39
  text: str
40
  source_lang: str
 
52
  estimated_time_remaining: Optional[float] = None
53
  current_chunk: Optional[int] = None
54
  total_chunks: Optional[int] = None
 
55
 
56
+ class TranslationCache:
57
+ def __init__(self, cache_duration_minutes: int = 60):
58
+ self.cache = {}
59
+ self.cache_duration = timedelta(minutes=cache_duration_minutes)
60
+ self.lock = threading.Lock()
61
+
62
+ def _generate_key(self, text: str, source_lang: str, target_lang: str) -> str:
63
+ """Generate cache key from text and languages"""
64
+ content = f"{text}_{source_lang}_{target_lang}"
65
+ return hashlib.md5(content.encode()).hexdigest()
66
+
67
+ def get(self, text: str, source_lang: str, target_lang: str) -> str:
68
+ """Get translation from cache if exists and not expired"""
69
+ with self.lock:
70
+ key = self._generate_key(text, source_lang, target_lang)
71
+ if key in self.cache:
72
+ translation, timestamp = self.cache[key]
73
+ if datetime.now() - timestamp < self.cache_duration:
74
+ logger.info(f"[CACHE HIT] Retrieved cached translation for key: {key[:8]}... | Length: {len(translation)} chars")
75
+ return translation
76
+ else:
77
+ # Remove expired entry
78
+ del self.cache[key]
79
+ logger.info(f"[CACHE EXPIRED] Removed expired cache entry for key: {key[:8]}...")
80
+ logger.info(f"[CACHE MISS] No cached translation found for key: {key[:8]}...")
81
+ return None
82
+
83
+ def set(self, text: str, source_lang: str, target_lang: str, translation: str):
84
+ """Store translation in cache"""
85
+ with self.lock:
86
+ key = self._generate_key(text, source_lang, target_lang)
87
+ self.cache[key] = (translation, datetime.now())
88
+ logger.info(f"[CACHE STORE] Cached translation for key: {key[:8]}... | Translation length: {len(translation)} chars")
89
+
90
+ class TranslationQueue:
91
+ def __init__(self, max_workers: int = 3):
92
+ self.queue = Queue()
93
+ self.max_workers = max_workers
94
+ self.current_workers = 0
95
+ self.lock = threading.Lock()
96
+
97
+ def add_task(self, task_func, *args, **kwargs):
98
+ """Add translation task to queue"""
99
+ self.queue.put((task_func, args, kwargs))
100
+ logger.info(f"[QUEUE] Added task to queue | Queue size: {self.queue.qsize()}")
101
+
102
+ def process_queue(self):
103
+ """Process tasks from queue"""
104
+ while not self.queue.empty():
105
+ with self.lock:
106
+ if self.current_workers >= self.max_workers:
107
+ time.sleep(0.1)
108
+ continue
109
+
110
+ if not self.queue.empty():
111
+ task_func, args, kwargs = self.queue.get()
112
+ self.current_workers += 1
113
+ logger.info(f"[QUEUE] Starting worker | Current workers: {self.current_workers}")
114
+
115
+ def worker():
116
+ try:
117
+ result = task_func(*args, **kwargs)
118
+ return result
119
+ finally:
120
+ with self.lock:
121
+ self.current_workers -= 1
122
+ logger.info(f"[QUEUE] Worker finished | Current workers: {self.current_workers}")
123
+
124
+ thread = threading.Thread(target=worker)
125
+ thread.start()
126
+
127
+ class TextChunker:
128
+ """کلاس برای تقسیم متن طولانی به بخش‌های کوچک‌تر"""
129
+
130
+ @staticmethod
131
+ def split_text_smart(text: str, max_chunk_size: int = 400) -> List[str]:
132
+ """تقسیم هوشمند متن بر اساس جملات و پاراگراف‌ها"""
133
+ logger.info(f"[CHUNKER] Starting smart text splitting | Text length: {len(text)} chars | Max chunk size: {max_chunk_size}")
134
+
135
+ if len(text) <= max_chunk_size:
136
+ logger.info(f"[CHUNKER] Text is small, no chunking needed | Length: {len(text)}")
137
+ return [text]
138
+
139
+ chunks = []
140
+
141
+ # تقسیم بر اساس پاراگراف‌ها
142
+ paragraphs = text.split('\n\n')
143
+ current_chunk = ""
144
+
145
+ for i, paragraph in enumerate(paragraphs):
146
+ logger.debug(f"[CHUNKER] Processing paragraph {i+1}/{len(paragraphs)} | Length: {len(paragraph)}")
147
+
148
+ # اگر پاراگراف خودش بزرگ است آن را تقسیم کن
149
+ if len(paragraph) > max_chunk_size:
150
+ # ذخیره قسمت فعلی اگر وجود دارد
151
+ if current_chunk.strip():
152
+ chunks.append(current_chunk.strip())
153
+ logger.debug(f"[CHUNKER] Added chunk from accumulated paragraphs | Length: {len(current_chunk.strip())}")
154
+ current_chunk = ""
155
+
156
+ # تقسیم پاراگراف بزرگ
157
+ sub_chunks = TextChunker._split_paragraph(paragraph, max_chunk_size)
158
+ chunks.extend(sub_chunks)
159
+ logger.debug(f"[CHUNKER] Split large paragraph into {len(sub_chunks)} sub-chunks")
160
+ else:
161
+ # بررسی اینکه آیا اضافه کردن این پاراگراف از حد تجاوز می‌کند
162
+ if len(current_chunk) + len(paragraph) + 2 > max_chunk_size:
163
+ if current_chunk.strip():
164
+ chunks.append(current_chunk.strip())
165
+ logger.debug(f"[CHUNKER] Added chunk | Length: {len(current_chunk.strip())}")
166
+ current_chunk = paragraph
167
+ else:
168
+ if current_chunk:
169
+ current_chunk += "\n\n" + paragraph
170
+ else:
171
+ current_chunk = paragraph
172
+
173
+ # اضافه کردن آخرین قسمت
174
+ if current_chunk.strip():
175
+ chunks.append(current_chunk.strip())
176
+ logger.debug(f"[CHUNKER] Added final chunk | Length: {len(current_chunk.strip())}")
177
+
178
+ logger.info(f"[CHUNKER] Text splitting completed | Total chunks: {len(chunks)} | Average chunk size: {sum(len(c) for c in chunks) / len(chunks):.1f} chars")
179
+ return chunks
180
+
181
+ @staticmethod
182
+ def _split_paragraph(paragraph: str, max_chunk_size: int) -> List[str]:
183
+ """تقسیم پاراگراف بزرگ به جملات"""
184
+ logger.debug(f"[CHUNKER] Splitting large paragraph | Length: {len(paragraph)}")
185
+
186
+ # تقسیم بر اساس جملات
187
+ sentences = re.split(r'[.!?]+\s+', paragraph)
188
+ chunks = []
189
+ current_chunk = ""
190
+
191
+ for sentence in sentences:
192
+ if not sentence.strip():
193
+ continue
194
+
195
+ # اضافه کردن علامت نقطه اگر حذف شده
196
+ if not sentence.endswith(('.', '!', '?')):
197
+ sentence += '.'
198
+
199
+ if len(sentence) > max_chunk_size:
200
+ # جمله خودش خیلی بلند است - تقسیم بر اساس کاما
201
+ if current_chunk.strip():
202
+ chunks.append(current_chunk.strip())
203
+ current_chunk = ""
204
+
205
+ sub_chunks = TextChunker._split_by_comma(sentence, max_chunk_size)
206
+ chunks.extend(sub_chunks)
207
+ else:
208
+ if len(current_chunk) + len(sentence) + 1 > max_chunk_size:
209
+ if current_chunk.strip():
210
+ chunks.append(current_chunk.strip())
211
+ current_chunk = sentence
212
+ else:
213
+ if current_chunk:
214
+ current_chunk += " " + sentence
215
+ else:
216
+ current_chunk = sentence
217
+
218
+ if current_chunk.strip():
219
+ chunks.append(current_chunk.strip())
220
+
221
+ logger.debug(f"[CHUNKER] Paragraph split into {len(chunks)} sentence chunks")
222
+ return chunks
223
+
224
+ @staticmethod
225
+ def _split_by_comma(sentence: str, max_chunk_size: int) -> List[str]:
226
+ """تقسیم جمله طولانی بر اساس کاما"""
227
+ logger.debug(f"[CHUNKER] Splitting long sentence by comma | Length: {len(sentence)}")
228
+
229
+ parts = sentence.split(', ')
230
+ chunks = []
231
+ current_chunk = ""
232
+
233
+ for part in parts:
234
+ if len(part) > max_chunk_size:
235
+ # قسمت خودش خیلی بلند است - تقسیم اجباری
236
+ if current_chunk.strip():
237
+ chunks.append(current_chunk.strip())
238
+ current_chunk = ""
239
+
240
+ # تقسیم اجباری بر اساس طول
241
+ while len(part) > max_chunk_size:
242
+ chunks.append(part[:max_chunk_size].strip())
243
+ part = part[max_chunk_size:].strip()
244
+
245
+ if part:
246
+ current_chunk = part
247
+ else:
248
+ if len(current_chunk) + len(part) + 2 > max_chunk_size:
249
+ if current_chunk.strip():
250
+ chunks.append(current_chunk.strip())
251
+ current_chunk = part
252
+ else:
253
+ if current_chunk:
254
+ current_chunk += ", " + part
255
+ else:
256
+ current_chunk = part
257
+
258
+ if current_chunk.strip():
259
+ chunks.append(current_chunk.strip())
260
+
261
+ return chunks
262
+
263
+ class MultilingualTranslator:
264
+ def __init__(self, cache_duration_minutes: int = 60):
265
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
266
+ logger.info(f"[INIT] Using device: {self.device}")
267
+
268
+ # Initialize cache and queue
269
+ self.cache = TranslationCache(cache_duration_minutes)
270
+ self.queue = TranslationQueue()
271
+
272
+ # Add thread pool for parallel processing
273
+ self.executor = ThreadPoolExecutor(max_workers=3)
274
+ self.background_tasks = {}
275
+
276
+ logger.info(f"[INIT] Thread pool initialized with 3 workers")
277
+
278
+ # Load model - using a powerful multilingual model
279
+ self.model_name = "facebook/m2m100_1.2B"
280
+ logger.info(f"[INIT] Loading model: {self.model_name}")
281
+
282
+ try:
283
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
284
+ self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
285
+ self.model.to(self.device)
286
+ logger.info(f"[INIT] Model loaded successfully on {self.device}!")
287
+ except Exception as e:
288
+ logger.error(f"[INIT] Error loading model: {e}")
289
+ raise
290
+
291
+ # تنظیمات بهینه برای ترجمه متن‌های بلند
292
+ self.max_chunk_size = 350 # حداکثر طول هر قسمت
293
+ self.min_chunk_overlap = 20 # همپوشانی بین قسمت‌ها
294
+
295
+ # Track translation progress
296
+ self.current_translation = {}
297
+ self.translation_lock = threading.Lock()
298
+
299
+ logger.info(f"[INIT] Translator initialized | Max chunk size: {self.max_chunk_size} chars")
300
+
301
+ def translate_chunk(self, text: str, source_lang: str, target_lang: str, chunk_index: int = 0, total_chunks: int = 1) -> str:
302
+ """ترجمه یک قس��…ت Ú©ÙˆÚ†Ú© از متن"""
303
+ try:
304
+ logger.info(f"[TRANSLATE] Starting chunk translation [{chunk_index+1}/{total_chunks}] | {source_lang} → {target_lang} | Length: {len(text)} chars")
305
+
306
+ # Set source language for tokenizer
307
+ self.tokenizer.src_lang = source_lang
308
+
309
+ # Encode input
310
+ encoded = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(self.device)
311
+ logger.debug(f"[TRANSLATE] Text encoded | Input tokens: {encoded.input_ids.shape[1]}")
312
+
313
+ # Generate translation with optimized parameters
314
+ start_time = time.time()
315
+ generated_tokens = self.model.generate(
316
+ **encoded,
317
+ forced_bos_token_id=self.tokenizer.get_lang_id(target_lang),
318
+ max_length=1024, # افزایش طول خروجی
319
+ min_length=10, # حداقل طول خروجی
320
+ num_beams=5, # افزایش تعداد beam ها برای کیفیت بهتر
321
+ early_stopping=True,
322
+ no_repeat_ngram_size=3, # جلوگیری از تکرار
323
+ length_penalty=1.0, # تنظیم جریمه طول
324
+ repetition_penalty=1.2, # جلوگیری از تکرار کلمات
325
+ do_sample=False, # استفاده از روش قطعی
326
+ temperature=0.7, # کنترل تنوع
327
+ pad_token_id=self.tokenizer.pad_token_id,
328
+ eos_token_id=self.tokenizer.eos_token_id
329
+ )
330
+ generation_time = time.time() - start_time
331
+
332
+ # Decode result
333
+ translation = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
334
+
335
+ # پاک‌سازی ترجمه از کاراکترهای اضافی
336
+ translation = translation.strip()
337
+
338
+ logger.info(f"[TRANSLATE] Chunk translation completed [{chunk_index+1}/{total_chunks}] | Generation time: {generation_time:.2f}s | Output length: {len(translation)} chars")
339
+
340
+ return translation
341
+
342
+ except Exception as e:
343
+ logger.error(f"[TRANSLATE] Chunk translation error [{chunk_index+1}/{total_chunks}]: {e}")
344
+ return f"[Translation Error: {str(e)}]"
345
+
346
+ def translate_text(self, text: str, source_lang: str, target_lang: str, session_id: str = None) -> Tuple[str, float, int]:
347
+ """ترجمه متن با پشتیبانی از متن‌های طولانی و لاگ‌های مفصل"""
348
+ start_time = time.time()
349
+
350
+ if not session_id:
351
+ session_id = hashlib.md5(f"{text[:100]}{time.time()}".encode()).hexdigest()[:8]
352
+
353
+ logger.info(f"[SESSION:{session_id}] Starting translation | {source_lang} → {target_lang} | Text length: {len(text)} chars")
354
+
355
+ # بررسی کش برای کل متن
356
+ cached_result = self.cache.get(text, source_lang, target_lang)
357
+ if cached_result:
358
+ logger.info(f"[SESSION:{session_id}] Translation completed from cache | Time: {time.time() - start_time:.2f}s")
359
+ return cached_result, time.time() - start_time, 1
360
+
361
+ try:
362
+ # اگر متن کوتاه است مستقیماً ترجمه کن
363
+ if len(text) <= self.max_chunk_size:
364
+ logger.info(f"[SESSION:{session_id}] Processing as short text")
365
+ translation = self.translate_chunk(text, source_lang, target_lang, 0, 1)
366
+
367
+ # ذخیره در کش
368
+ self.cache.set(text, source_lang, target_lang, translation)
369
+ processing_time = time.time() - start_time
370
+ logger.info(f"[SESSION:{session_id}] Short text translation completed | Total time: {processing_time:.2f}s")
371
+
372
+ return translation, processing_time, 1
373
+
374
+ # تقسیم متن طولانی به قسمت‌های کوچک‌تر
375
+ logger.info(f"[SESSION:{session_id}] Processing as long text - starting chunking")
376
+ chunks = TextChunker.split_text_smart(text, self.max_chunk_size)
377
+ logger.info(f"[SESSION:{session_id}] Text split into {len(chunks)} chunks")
378
+
379
+ # Initialize progress tracking
380
+ with self.translation_lock:
381
+ self.current_translation[session_id] = {
382
+ 'total_chunks': len(chunks),
383
+ 'completed_chunks': 0,
384
+ 'start_time': start_time,
385
+ 'source_lang': source_lang,
386
+ 'target_lang': target_lang
387
+ }
388
+
389
+ # ترجمه هر قسمت
390
+ translated_chunks = []
391
+ for i, chunk in enumerate(chunks):
392
+ chunk_start_time = time.time()
393
+ logger.info(f"[SESSION:{session_id}] Starting chunk {i+1}/{len(chunks)} | Chunk length: {len(chunk)} chars")
394
+
395
+ # بررسی کش برای هر قسمت
396
+ chunk_translation = self.cache.get(chunk, source_lang, target_lang)
397
+
398
+ if not chunk_translation:
399
+ # Estimate remaining time
400
+ if i > 0:
401
+ elapsed_time = time.time() - start_time
402
+ avg_time_per_chunk = elapsed_time / i
403
+ estimated_remaining = avg_time_per_chunk * (len(chunks) - i)
404
+ logger.info(f"[SESSION:{session_id}] Progress: {i}/{len(chunks)} | Avg time per chunk: {avg_time_per_chunk:.1f}s | Estimated remaining: {estimated_remaining:.1f}s")
405
+
406
+ chunk_translation = self.translate_chunk(chunk, source_lang, target_lang, i, len(chunks))
407
+ # ذخیره قسمت در کش
408
+ self.cache.set(chunk, source_lang, target_lang, chunk_translation)
409
+
410
+ chunk_time = time.time() - chunk_start_time
411
+ logger.info(f"[SESSION:{session_id}] Chunk {i+1}/{len(chunks)} translated in {chunk_time:.2f}s")
412
+ else:
413
+ logger.info(f"[SESSION:{session_id}] Chunk {i+1}/{len(chunks)} retrieved from cache")
414
+
415
+ translated_chunks.append(chunk_translation)
416
+
417
+ # Update progress
418
+ with self.translation_lock:
419
+ if session_id in self.current_translation:
420
+ self.current_translation[session_id]['completed_chunks'] = i + 1
421
+
422
+ # کمی استراحت بین ترجمه‌ها برای جلوگیری از بارذاری زیاد
423
+ if i < len(chunks) - 1:
424
+ time.sleep(0.1)
425
+
426
+ # ترکیب قسمت‌های ترجمه شده
427
+ logger.info(f"[SESSION:{session_id}] Combining translated chunks")
428
+ final_translation = self._combine_translations(translated_chunks, text)
429
+
430
+ # ذخیره نتیجه نهایی در کش
431
+ self.cache.set(text, source_lang, target_lang, final_translation)
432
+
433
+ processing_time = time.time() - start_time
434
+
435
+ # Mark as completed for WordPress integration
436
+ logger.info(f"[SESSION:{session_id}] Long text translation completed | Total time: {processing_time:.2f}s | Chunks: {len(chunks)} | Final length: {len(final_translation)} chars")
437
+
438
+ # Store in completed_translations for WordPress to check
439
+ with translation_requests_lock:
440
+ completed_translations[session_id] = {
441
+ 'translation': final_translation,
442
+ 'processing_time': processing_time,
443
+ 'character_count': len(text),
444
+ 'source_lang': source_lang,
445
+ 'target_lang': target_lang,
446
+ 'completed_at': datetime.now().isoformat(),
447
+ 'request_id': session_id,
448
+ 'status': 'completed'
449
+ }
450
+
451
+ # Remove from processing requests if exists
452
+ if session_id in translation_requests:
453
+ del translation_requests[session_id]
454
+
455
+ # Clean up progress tracking
456
+ with self.translation_lock:
457
+ self.current_translation.pop(session_id, None)
458
+
459
+ return final_translation, processing_time, len(chunks)
460
+
461
+ except Exception as e:
462
+ logger.error(f"[SESSION:{session_id}] Translation error: {e}")
463
+ # Clean up progress tracking
464
+ with self.translation_lock:
465
+ self.current_translation.pop(session_id, None)
466
+ return f"Translation error: {str(e)}", time.time() - start_time, 0
467
+
468
+ def get_translation_progress(self, session_id: str) -> Dict:
469
+ """Get current translation progress"""
470
+ with self.translation_lock:
471
+ if session_id not in self.current_translation:
472
+ return None
473
+
474
+ progress = self.current_translation[session_id].copy()
475
+ elapsed_time = time.time() - progress['start_time']
476
+
477
+ if progress['completed_chunks'] > 0:
478
+ avg_time_per_chunk = elapsed_time / progress['completed_chunks']
479
+ remaining_chunks = progress['total_chunks'] - progress['completed_chunks']
480
+ estimated_remaining = avg_time_per_chunk * remaining_chunks
481
+ else:
482
+ estimated_remaining = None
483
+
484
+ return {
485
+ 'total_chunks': progress['total_chunks'],
486
+ 'completed_chunks': progress['completed_chunks'],
487
+ 'elapsed_time': elapsed_time,
488
+ 'estimated_remaining': estimated_remaining,
489
+ 'progress_percentage': (progress['completed_chunks'] / progress['total_chunks']) * 100
490
+ }
491
+
492
+ def _combine_translations(self, translated_chunks: List[str], original_text: str) -> str:
493
+ """ترکیب قسمت‌های ترجمه شده به یک متن یکپارچه"""
494
+ if not translated_chunks:
495
+ return ""
496
+
497
+ if len(translated_chunks) == 1:
498
+ return translated_chunks[0]
499
+
500
+ logger.debug(f"[COMBINER] Combining {len(translated_chunks)} translated chunks")
501
+
502
+ # ترکیب قسمت‌ها با در نظر گیری ساختار اصلی متن
503
+ combined = []
504
+
505
+ for i, chunk in enumerate(translated_chunks):
506
+ # پاک‌سازی قسمت
507
+ chunk = chunk.strip()
508
+
509
+ if not chunk:
510
+ continue
511
+
512
+ # اضافه کردن فاصله مناسب بین قسمت‌ها
513
+ if i > 0 and combined:
514
+ # اگر قسمت قبلی با نقطه تمام نمی‌شود فاصله اضافه کن
515
+ if not combined[-1].rstrip().endswith(('.', '!', '?', ':', 'Û”', '.')):
516
+ combined[-1] += '.'
517
+
518
+ # بررسی اینکه آیا نیاز به پاراگراف جدید دارکم
519
+ if '\n\n' in original_text:
520
+ combined.append('\n\n' + chunk)
521
+ else:
522
+ combined.append(' ' + chunk)
523
+ else:
524
+ combined.append(chunk)
525
+
526
+ result = ''.join(combined)
527
+
528
+ # پاک‌سازی نهایی
529
+ result = re.sub(r'\s+', ' ', result) # حذف فاصله‌های اضافی
530
+ result = re.sub(r'\.+', '.', result) # حذف نقطه‌های تکراری
531
+ result = result.strip()
532
+
533
+ logger.debug(f"[COMBINER] Combined translation length: {len(result)} chars")
534
+ return result
535
+
536
+ async def translate_text_async(self, text: str, source_lang: str, target_lang: str, session_id: str = None):
537
+ """Async wrapper for translate_text"""
538
+ loop = asyncio.get_event_loop()
539
+ return await loop.run_in_executor(
540
+ self.executor,
541
+ self.translate_text,
542
+ text, source_lang, target_lang, session_id
543
+ )
544
+
545
+ def process_heavy_translation_background(request_id: str, text: str, source_lang: str, target_lang: str):
546
+ """
547
+ FIXED: Background function with better error handling and status updates
548
+ """
549
+ try:
550
+ logger.info(f"[HF Server] Background processing started for request: {request_id}")
551
+
552
+ start_time = time.time()
553
+
554
+ # Update progress in requests
555
+ with translation_requests_lock:
556
+ if request_id in translation_requests:
557
+ translation_requests[request_id]['progress'] = 10
558
+ translation_requests[request_id]['status'] = 'processing'
559
+
560
+ # Perform actual translation
561
+ translation, processing_time, chunks_count = translator.translate_text(
562
+ text, source_lang, target_lang, request_id
563
+ )
564
+
565
+ total_processing_time = time.time() - start_time
566
+
567
+ # FIXED: Validate translation result
568
+ if not translation or not translation.strip():
569
+ logger.error(f"[HF Server] Empty translation result for request: {request_id}")
570
+
571
+ # Store failed translation
572
+ with translation_requests_lock:
573
+ completed_translations[request_id] = {
574
+ 'translation': '',
575
+ 'error': 'Translation completed but result is empty',
576
+ 'status': 'failed',
577
+ 'processing_time': total_processing_time,
578
+ 'completed_at': datetime.now().isoformat(),
579
+ 'request_id': request_id
580
+ }
581
+
582
+ if request_id in translation_requests:
583
+ del translation_requests[request_id]
584
+ return
585
+
586
+ # Store completed translation
587
+ with translation_requests_lock:
588
+ completed_translations[request_id] = {
589
+ 'translation': translation,
590
+ 'processing_time': total_processing_time,
591
+ 'character_count': len(text),
592
+ 'source_lang': source_lang,
593
+ 'target_lang': target_lang,
594
+ 'completed_at': datetime.now().isoformat(),
595
+ 'request_id': request_id,
596
+ 'status': 'completed',
597
+ 'chunks_processed': chunks_count,
598
+ 'source_lang_display': translation_requests[request_id].get('source_lang_display', source_lang),
599
+ 'target_lang_display': translation_requests[request_id].get('target_lang_display', target_lang)
600
+ }
601
+
602
+ # Remove from processing queue
603
+ if request_id in translation_requests:
604
+ del translation_requests[request_id]
605
+
606
+ logger.info(f"[HF Server] Heavy text translation completed for request: {request_id} in {total_processing_time:.2f}s with {len(translation)} chars")
607
+
608
+ except Exception as e:
609
+ logger.error(f"[HF Server] Background processing error for {request_id}: {str(e)}")
610
+
611
+ # Mark as failed
612
+ with translation_requests_lock:
613
+ completed_translations[request_id] = {
614
+ 'translation': '',
615
+ 'error': str(e),
616
+ 'status': 'failed',
617
+ 'processing_time': time.time() - start_time if 'start_time' in locals() else 0,
618
+ 'completed_at': datetime.now().isoformat(),
619
+ 'request_id': request_id
620
+ }
621
+
622
+ # Remove from processing queue
623
+ if request_id in translation_requests:
624
+ del translation_requests[request_id]
625
+
626
+ def perform_translation_internal(text: str, source_lang: str, target_lang: str) -> str:
627
+ """
628
+ Internal translation function - wrapper for translator.translate_text
629
+ """
630
+ try:
631
+ translation, _, _ = translator.translate_text(text, source_lang, target_lang)
632
+ return translation
633
+ except Exception as e:
634
+ logger.error(f"[INTERNAL] Translation error: {str(e)}")
635
+ return f"Translation error: {str(e)}"
636
+
637
+ # Language mappings for M2M100 model
638
  LANGUAGE_MAP = {
639
  "English": "en",
640
+ "Persian (Farsi)": "fa",
 
641
  "Arabic": "ar",
642
  "French": "fr",
643
  "German": "de",
 
646
  "Portuguese": "pt",
647
  "Russian": "ru",
648
  "Chinese (Simplified)": "zh",
 
649
  "Japanese": "ja",
650
  "Korean": "ko",
651
  "Hindi": "hi",
 
705
  "Zulu": "zu"
706
  }
707
 
708
+ # Initialize translator
709
+ translator = MultilingualTranslator(60)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
710
 
711
+ # Create FastAPI app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
712
  app = FastAPI(title="Enhanced Multilingual Translation API", version="2.1.0")
713
+
714
+ # Add CORS middleware
715
  app.add_middleware(
716
  CORSMiddleware,
717
+ allow_origins=["*"],
718
  allow_credentials=True,
719
  allow_methods=["*"],
720
  allow_headers=["*"],
721
  )
722
 
723
+ # ========== NEW WORDPRESS INTEGRATION ENDPOINTS ==========
 
 
 
 
 
 
 
 
 
724
 
725
+ @app.post("/api/check-completion")
726
+ async def check_completion(request: Request):
727
  """
728
+ FIXED: Enhanced completion verification endpoint
 
729
  """
730
+ try:
731
+ form_data = await request.form()
732
+ request_id = form_data.get('request_id', '').strip()
733
+
734
+ if not request_id:
735
+ return {
736
+ 'status': 'error',
737
+ 'message': 'Request ID is required'
738
+ }
739
+
740
+ logger.info(f"[HF Server] Completion verification requested for: {request_id}")
741
+
742
+ with translation_requests_lock:
743
+ # Check if request exists in completed translations
744
+ if request_id in completed_translations:
745
+ completion_data = completed_translations[request_id]
746
+
747
+ logger.info(f"[HF Server] Completion verification for {request_id}: COMPLETED")
748
+
749
+ return {
750
+ 'status': 'completed',
751
+ 'request_id': request_id,
752
+ 'completed_at': completion_data.get('completed_at'),
753
+ 'processing_time': completion_data.get('processing_time', 0),
754
+ 'character_count': completion_data.get('character_count', 0),
755
+ 'translation_length': len(completion_data.get('translation', '')),
756
+ 'verified': True
757
+ }
758
+
759
+ # Check if request is still processing
760
+ elif request_id in translation_requests:
761
+ logger.info(f"[HF Server] Completion verification for {request_id}: STILL PROCESSING")
762
+
763
+ return {
764
+ 'status': 'processing',
765
+ 'request_id': request_id,
766
+ 'progress': translation_requests[request_id].get('progress', 0),
767
+ 'verified': False
768
+ }
769
+
770
+ else:
771
+ logger.info(f"[HF Server] Completion verification for {request_id}: NOT FOUND")
772
+
773
+ return {
774
+ 'status': 'not_found',
775
+ 'request_id': request_id,
776
+ 'message': 'Request ID not found'
777
+ }
778
+
779
+ except Exception as e:
780
+ logger.error(f"[HF Server] Error in check_completion: {str(e)}")
781
+ return {
782
+ 'status': 'error',
783
+ 'message': 'Server error occurred'
784
+ }
785
 
786
+ @app.post("/api/check-translation-status")
787
+ async def check_translation_status(request: Request):
788
+ """
789
+ FIXED: Enhanced translation status endpoint with full translation content
790
+ """
791
  try:
792
+ form_data = await request.form()
793
+ request_id = form_data.get('request_id', '').strip()
794
+
795
+ if not request_id:
796
+ return {
797
+ 'status': 'error',
798
+ 'message': 'Request ID is required'
799
+ }
800
+
801
+ logger.info(f"[HF Server] Translation status check for: {request_id}")
802
+
803
+ with translation_requests_lock:
804
+ # Check if translation is completed
805
+ if request_id in completed_translations:
806
+ result = completed_translations[request_id]
807
+
808
+ logger.info(f"[HF Server] Translation status check for {request_id}: COMPLETED - returning full data")
809
+
810
+ return {
811
+ 'status': 'completed',
812
+ 'request_id': request_id,
813
+ 'translation': result.get('translation', ''),
814
+ 'processing_time': result.get('processing_time', 0),
815
+ 'character_count': result.get('character_count', 0),
816
+ 'completed_at': result.get('completed_at'),
817
+ 'source_lang': result.get('source_lang_display', result.get('source_lang', '')),
818
+ 'target_lang': result.get('target_lang_display', result.get('target_lang', '')),
819
+ 'chunks_processed': result.get('chunks_processed', 1),
820
+ 'translation_length': len(result.get('translation', ''))
821
+ }
822
+
823
+ # Check if still processing
824
+ elif request_id in translation_requests:
825
+ req_data = translation_requests[request_id]
826
+
827
+ logger.info(f"[HF Server] Translation status check for {request_id}: STILL PROCESSING")
828
+
829
+ return {
830
+ 'status': 'processing',
831
+ 'request_id': request_id,
832
+ 'started_at': req_data.get('started_at'),
833
+ 'progress': req_data.get('progress', 0),
834
+ 'character_count': req_data.get('character_count', 0),
835
+ 'source_lang': req_data.get('source_lang_display', req_data.get('source_lang', '')),
836
+ 'target_lang': req_data.get('target_lang_display', req_data.get('target_lang', ''))
837
+ }
838
+
839
+ else:
840
+ logger.info(f"[HF Server] Translation status check for {request_id}: NOT FOUND")
841
+
842
+ return {
843
+ 'status': 'not_found',
844
+ 'request_id': request_id,
845
+ 'message': 'Translation request not found'
846
+ }
847
+
848
  except Exception as e:
849
+ logger.error(f"[HF Server] Error in check_translation_status: {str(e)}")
850
+ return {
851
+ 'status': 'error',
852
+ 'message': 'Server error occurred'
853
+ }
854
+
855
+ # ========== UPDATED MAIN TRANSLATION ENDPOINT ==========
856
 
857
  @app.post("/api/translate/form")
858
  async def api_translate_form(request: Request):
859
  """
860
+ FIXED: Enhanced translation endpoint with better heavy text handling
 
 
 
 
861
  """
862
  try:
863
+ form_data = await request.form()
864
+ text = form_data.get("text", "")
865
+ source_lang = form_data.get("source_lang", "")
866
+ target_lang = form_data.get("target_lang", "")
867
+ api_key = form_data.get("api_key", None)
868
+ except:
 
 
 
 
869
  try:
870
+ json_data = await request.json()
871
+ text = json_data.get("text", "")
872
+ source_lang = json_data.get("source_lang", "")
873
+ target_lang = json_data.get("target_lang", "")
874
+ api_key = json_data.get("api_key", None)
875
+ except:
876
+ return {"status": "error", "message": "Invalid request format"}
877
+
 
 
 
 
 
 
878
  if not text.strip():
879
+ logger.error("[FORM API] No text provided")
880
+ return {"status": "error", "message": "Text, source language, and target language are required"}
881
+
882
+ source_code = LANGUAGE_MAP.get(source_lang)
883
+ target_code = LANGUAGE_MAP.get(target_lang)
884
+
885
+ if not source_code or not target_code:
886
+ logger.error(f"[FORM API] Invalid language codes: {source_lang} -> {target_lang}")
887
+ return {"status": "error", "message": "Invalid language codes"}
888
+
889
+ char_count = len(text)
890
+ # FIXED: Correct heavy text threshold detection
891
+ is_heavy_text = char_count > 1000 # Same as WordPress threshold
892
+
893
+ logger.info(f"[FORM API] Translation request: {char_count} chars, {source_lang} → {target_lang}, Heavy: {is_heavy_text}")
894
+
895
+ # FIXED: Always use background processing for heavy texts
896
+ if is_heavy_text:
897
+ # Generate request ID for background processing
898
+ request_id = str(uuid.uuid4())
899
+
900
+ # First check cache for immediate return
901
+ cached_result = translator.cache.get(text, source_code, target_code)
902
+ if cached_result:
903
+ logger.info(f"[FORM API] Returning cached translation immediately for request: {request_id}")
904
  return {
905
+ "translation": cached_result,
906
+ "source_language": source_lang,
907
+ "target_language": target_lang,
908
  "processing_time": 0.0,
909
+ "character_count": char_count,
910
  "status": "success",
911
  "chunks_processed": None,
912
+ "request_id": request_id,
913
  "cached": True
914
  }
915
+
916
+ # Store request for processing
917
+ with translation_requests_lock:
918
+ translation_requests[request_id] = {
919
+ 'text': text,
920
+ 'source_lang': source_code,
921
+ 'target_lang': target_code,
922
+ 'started_at': datetime.now().isoformat(),
923
+ 'character_count': char_count,
924
+ 'progress': 0,
925
+ 'source_lang_display': source_lang,
926
+ 'target_lang_display': target_lang
927
+ }
928
+
929
+ # Start background processing
930
+ thread = threading.Thread(
931
+ target=process_heavy_translation_background,
932
+ args=(request_id, text, source_code, target_code)
933
+ )
934
+ thread.daemon = True
935
+ thread.start()
936
+
937
+ logger.info(f"[FORM API] Started background processing for heavy text - request: {request_id}")
938
+
939
+ # FIXED: Return proper background response for WordPress
940
  return {
941
+ 'is_background': True,
942
+ 'session_id': request_id,
943
+ 'request_id': request_id,
944
+ 'server_request_id': request_id, # Added for compatibility
945
+ 'status': 'processing',
946
+ 'is_heavy_text': True,
947
+ 'message': f'Heavy text ({char_count} characters) is being processed in background. Translation will appear automatically.',
948
+ 'character_count': char_count,
949
+ 'source_lang': source_lang,
950
+ 'target_lang': target_lang
951
  }
952
+
953
  else:
954
+ # Process short text immediately
955
  try:
956
+ start_time = time.time()
957
+
958
+ translation, processing_time, chunks_count = translator.translate_text(
959
+ text, source_code, target_code
960
+ )
961
+
962
+ # Check translation content
963
+ if not translation or not translation.strip() or translation.startswith("Translation error"):
964
+ logger.error(f"[FORM API] Invalid translation result: {translation[:100] if translation else 'None'}")
965
+ return {
966
+ "status": "error",
967
+ "message": "Translation failed - empty or invalid result"
968
+ }
969
+
970
+ logger.info(f"[FORM API] Short text translation completed in {processing_time:.2f}s")
971
+
972
  return {
973
+ 'status': 'success',
974
+ 'translation': translation,
975
+ 'processing_time': processing_time,
976
+ 'character_count': char_count,
977
+ 'source_lang': source_lang,
978
+ 'target_lang': target_lang,
979
+ 'is_heavy_text': False,
980
+ 'chunks_processed': chunks_count
 
981
  }
982
+
983
  except Exception as e:
984
+ logger.error(f"[FORM API] Translation error: {str(e)}")
985
+ return {"status": "error", "message": f"Translation failed: {str(e)}"}
986
 
987
+ # ========== EXISTING ENDPOINTS (UPDATED) ==========
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
988
 
989
+ @app.get("/")
990
+ async def root():
991
+ return {
992
+ "message": "Enhanced Multilingual Translation API v2.1 with WordPress Integration",
993
+ "status": "active",
994
+ "features": [
995
+ "enhanced_logging",
996
+ "progress_tracking",
997
+ "long_text_support",
998
+ "smart_chunking",
999
+ "cache_optimization",
1000
+ "wordpress_integration",
1001
+ "delayed_charging_support"
1002
+ ]
1003
+ }
1004
+
1005
+ @app.post("/api/translate")
1006
+ async def api_translate(request: TranslationRequest):
1007
+ """API endpoint for translation with enhanced logging and progress tracking"""
1008
+ if not request.text.strip():
1009
+ raise HTTPException(status_code=400, detail="No text provided")
1010
+
1011
+ source_code = LANGUAGE_MAP.get(request.source_lang)
1012
+ target_code = LANGUAGE_MAP.get(request.target_lang)
1013
+
1014
+ if not source_code or not target_code:
1015
+ raise HTTPException(status_code=400, detail="Invalid language codes")
1016
+
1017
+ try:
1018
+ # Generate session ID for tracking
1019
+ session_id = hashlib.md5(f"{request.text[:100]}{time.time()}".encode()).hexdigest()[:8]
1020
+
1021
+ translation, processing_time, chunks_count = translator.translate_text(
1022
+ request.text, source_code, target_code, session_id
1023
+ )
1024
+
1025
+ return TranslationResponse(
1026
+ translation=translation,
1027
+ source_language=request.source_lang,
1028
+ target_language=request.target_lang,
1029
+ processing_time=processing_time,
1030
+ character_count=len(request.text),
1031
+ status="success",
1032
+ chunks_processed=chunks_count
1033
+ )
1034
+ except Exception as e:
1035
+ logger.error(f"[API] Translation error: {str(e)}")
1036
+ raise HTTPException(status_code=500, detail=f"Translation error: {str(e)}")
1037
 
1038
  @app.get("/api/progress/{session_id}")
1039
  async def get_translation_progress(session_id: str):
1040
+ """Get translation progress for a session"""
1041
+ progress = translator.get_translation_progress(session_id)
1042
+ if progress is None:
1043
  raise HTTPException(status_code=404, detail="Session not found or completed")
1044
+
1045
+ return {
1046
+ "status": "success",
1047
+ "progress": progress
1048
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1049
 
1050
  @app.get("/api/languages")
1051
  async def get_languages():
1052
+ """Get supported languages"""
1053
+ return {
1054
+ "languages": list(LANGUAGE_MAP.keys()),
1055
+ "language_codes": LANGUAGE_MAP,
1056
+ "status": "success"
1057
+ }
1058
 
1059
  @app.get("/api/health")
1060
  async def health_check():
1061
+ """Health check endpoint"""
1062
+ with translation_requests_lock:
1063
+ active_requests = len(translation_requests)
1064
+ completed_cache = len(completed_translations)
1065
+
1066
  return {
1067
  "status": "healthy",
1068
+ "device": str(translator.device),
1069
+ "model": translator.model_name,
1070
  "cache_size": len(translator.cache.cache),
1071
  "max_chunk_size": translator.max_chunk_size,
1072
  "active_translations": len(translator.current_translation),
1073
+ "active_requests": active_requests,
1074
+ "completed_cache": completed_cache,
1075
  "version": "2.1.0"
1076
  }
1077
 
1078
+ @app.get("/api/status/{session_id}")
1079
+ async def get_session_status(session_id: str):
1080
+ """Get translation status - non-blocking"""
1081
+
1082
+ # Check if task is in background tasks
1083
+ if session_id in translator.background_tasks:
1084
+ task = translator.background_tasks[session_id]
1085
+
1086
+ if task.done():
1087
+ try:
1088
+ translation, processing_time, chunks_count = await task
1089
+ # Clean up completed task
1090
+ del translator.background_tasks[session_id]
1091
+
1092
+ return {
1093
+ "status": "completed",
1094
+ "translation": translation,
1095
+ "processing_time": processing_time,
1096
+ "chunks_processed": chunks_count,
1097
+ "message": "Translation completed successfully"
1098
+ }
1099
+ except Exception as e:
1100
+ del translator.background_tasks[session_id]
1101
+ return {
1102
+ "status": "failed",
1103
+ "message": f"Translation failed: {str(e)}"
1104
+ }
1105
+ else:
1106
+ # Task still running - get progress
1107
+ progress = translator.get_translation_progress(session_id)
1108
+
1109
+ if progress:
1110
+ return {
1111
+ "status": "processing",
1112
+ "progress": progress,
1113
+ "message": f"Processing chunk {progress['completed_chunks']}/{progress['total_chunks']}",
1114
+ "estimated_remaining": progress.get('estimated_remaining', 0)
1115
+ }
1116
+ else:
1117
+ return {
1118
+ "status": "processing",
1119
+ "message": "Translation in progress...",
1120
+ "progress": None
1121
+ }
1122
+
1123
+ # Check current active translations
1124
+ progress = translator.get_translation_progress(session_id)
1125
+ if progress:
1126
+ return {
1127
+ "status": "processing",
1128
+ "progress": progress,
1129
+ "message": f"Processing chunk {progress['completed_chunks']}/{progress['total_chunks']}",
1130
+ "estimated_remaining": progress.get('estimated_remaining', 0)
1131
+ }
1132
+
1133
+ return {
1134
+ "status": "not_found",
1135
+ "message": "Session not found or completed"
1136
+ }
1137
+
1138
  @app.get("/api/server-status")
1139
  async def get_server_status():
1140
+ """
1141
+ FIXED: Enhanced server status with better information
1142
+ """
1143
  active_sessions = []
1144
+
1145
+ with translation_requests_lock:
1146
+ background_tasks_count = len(translation_requests)
1147
+ completed_count = len(completed_translations)
1148
+
1149
  with translator.translation_lock:
1150
+ for session_id, progress in translator.current_translation.items():
1151
+ elapsed_time = time.time() - progress['start_time']
1152
+ if progress['completed_chunks'] > 0:
1153
+ avg_time_per_chunk = elapsed_time / progress['completed_chunks']
1154
+ remaining_chunks = progress['total_chunks'] - progress['completed_chunks']
1155
+ estimated_remaining = avg_time_per_chunk * remaining_chunks
1156
+ else:
1157
+ estimated_remaining = None
1158
+
1159
  active_sessions.append({
1160
+ 'session_id': session_id,
1161
+ 'source_lang': progress['source_lang'],
1162
+ 'target_lang': progress['target_lang'],
1163
+ 'total_chunks': progress['total_chunks'],
1164
+ 'completed_chunks': progress['completed_chunks'],
1165
+ 'progress_percentage': (progress['completed_chunks'] / progress['total_chunks']) * 100,
1166
+ 'elapsed_time': elapsed_time,
1167
+ 'estimated_remaining': estimated_remaining
1168
  })
1169
+
1170
+ total_active = len(active_sessions) + background_tasks_count
1171
+
1172
+ if total_active > 0:
1173
+ if active_sessions:
1174
+ latest_session = active_sessions[-1]
1175
+ message = f"Processing chunk {latest_session['completed_chunks']}/{latest_session['total_chunks']} | {latest_session['source_lang']} → {latest_session['target_lang']}"
1176
+ else:
1177
+ message = f"{background_tasks_count} translation(s) in background queue"
1178
+
1179
+ return {
1180
+ "has_active_translation": True,
1181
+ "status": "processing",
1182
+ "message": message,
1183
+ "active_sessions": len(active_sessions),
1184
+ "background_tasks": background_tasks_count,
1185
+ "total_active": total_active,
1186
+ "completed_cache": completed_count,
1187
+ "active_session_details": active_sessions[:3] if active_sessions else [] # Return first 3 for details
1188
+ }
1189
+ else:
1190
+ return {
1191
+ "has_active_translation": False,
1192
+ "status": "idle",
1193
+ "message": "Server is ready for new translations",
1194
+ "active_sessions": 0,
1195
+ "background_tasks": 0,
1196
+ "completed_cache": completed_count
1197
+ }
1198
+
1199
+ # ========== CLEANUP AND MAINTENANCE FUNCTIONS ==========
1200
+
1201
+ def cleanup_old_requests():
1202
+ """
1203
+ FIXED: Enhanced cleanup with better time handling
1204
+ """
1205
+ current_time = datetime.now()
1206
+
1207
+ with translation_requests_lock:
1208
+ # Clean completed translations older than 2 hours
1209
+ to_remove_completed = []
1210
+ for req_id, data in completed_translations.items():
1211
+ try:
1212
+ completed_time = datetime.fromisoformat(data.get('completed_at', ''))
1213
+ if (current_time - completed_time).total_seconds() > 7200: # 2 hours
1214
+ to_remove_completed.append(req_id)
1215
+ except:
1216
+ to_remove_completed.append(req_id) # Remove invalid entries
1217
+
1218
+ for req_id in to_remove_completed:
1219
+ del completed_translations[req_id]
1220
+
1221
+ # Clean stuck processing requests older than 1 hour
1222
+ to_remove_processing = []
1223
+ for req_id, data in translation_requests.items():
1224
+ try:
1225
+ started_time = datetime.fromisoformat(data.get('started_at', ''))
1226
+ if (current_time - started_time).total_seconds() > 3600: # 1 hour
1227
+ to_remove_processing.append(req_id)
1228
+ except:
1229
+ to_remove_processing.append(req_id) # Remove invalid entries
1230
+
1231
+ for req_id in to_remove_processing:
1232
+ del translation_requests[req_id]
1233
+
1234
+ logger.info(f"[HF Server] Cleanup: Removed {len(to_remove_completed)} completed, {len(to_remove_processing)} stuck requests")
1235
+ return len(to_remove_completed), len(to_remove_processing)
1236
+
1237
+ # Schedule periodic cleanup (runs every hour)
1238
+ def periodic_cleanup():
1239
+ """Run cleanup every hour"""
1240
+ while True:
1241
+ time.sleep(3600) # 1 hour
1242
+ try:
1243
+ cleanup_old_requests()
1244
+ except Exception as e:
1245
+ logger.error(f"[CLEANUP] Error during periodic cleanup: {e}")
1246
+
1247
+ # Start cleanup thread
1248
+ cleanup_thread = threading.Thread(target=periodic_cleanup, daemon=True)
1249
+ cleanup_thread.start()
1250
+
1251
+ # ========== SERVER STARTUP ==========
1252
 
 
1253
  if __name__ == "__main__":
1254
+ logger.info("[HF Server] Starting Enhanced Multilingual Translation API with WordPress Integration")
1255
+ uvicorn.run(app, host="0.0.0.0", port=7860)