Update app.py
Browse files
app.py
CHANGED
|
@@ -125,11 +125,11 @@ class TranslationQueue:
|
|
| 125 |
thread.start()
|
| 126 |
|
| 127 |
class TextChunker:
|
| 128 |
-
"""
|
| 129 |
|
| 130 |
@staticmethod
|
| 131 |
def split_text_smart(text: str, max_chunk_size: int = 400) -> List[str]:
|
| 132 |
-
"""
|
| 133 |
logger.info(f"[CHUNKER] Starting smart text splitting | Text length: {len(text)} chars | Max chunk size: {max_chunk_size}")
|
| 134 |
|
| 135 |
if len(text) <= max_chunk_size:
|
|
@@ -138,27 +138,27 @@ class TextChunker:
|
|
| 138 |
|
| 139 |
chunks = []
|
| 140 |
|
| 141 |
-
#
|
| 142 |
paragraphs = text.split('\n\n')
|
| 143 |
current_chunk = ""
|
| 144 |
|
| 145 |
for i, paragraph in enumerate(paragraphs):
|
| 146 |
logger.debug(f"[CHUNKER] Processing paragraph {i+1}/{len(paragraphs)} | Length: {len(paragraph)}")
|
| 147 |
|
| 148 |
-
#
|
| 149 |
if len(paragraph) > max_chunk_size:
|
| 150 |
-
#
|
| 151 |
if current_chunk.strip():
|
| 152 |
chunks.append(current_chunk.strip())
|
| 153 |
logger.debug(f"[CHUNKER] Added chunk from accumulated paragraphs | Length: {len(current_chunk.strip())}")
|
| 154 |
current_chunk = ""
|
| 155 |
|
| 156 |
-
#
|
| 157 |
sub_chunks = TextChunker._split_paragraph(paragraph, max_chunk_size)
|
| 158 |
chunks.extend(sub_chunks)
|
| 159 |
logger.debug(f"[CHUNKER] Split large paragraph into {len(sub_chunks)} sub-chunks")
|
| 160 |
else:
|
| 161 |
-
#
|
| 162 |
if len(current_chunk) + len(paragraph) + 2 > max_chunk_size:
|
| 163 |
if current_chunk.strip():
|
| 164 |
chunks.append(current_chunk.strip())
|
|
@@ -170,7 +170,7 @@ class TextChunker:
|
|
| 170 |
else:
|
| 171 |
current_chunk = paragraph
|
| 172 |
|
| 173 |
-
#
|
| 174 |
if current_chunk.strip():
|
| 175 |
chunks.append(current_chunk.strip())
|
| 176 |
logger.debug(f"[CHUNKER] Added final chunk | Length: {len(current_chunk.strip())}")
|
|
@@ -180,10 +180,10 @@ class TextChunker:
|
|
| 180 |
|
| 181 |
@staticmethod
|
| 182 |
def _split_paragraph(paragraph: str, max_chunk_size: int) -> List[str]:
|
| 183 |
-
"""
|
| 184 |
logger.debug(f"[CHUNKER] Splitting large paragraph | Length: {len(paragraph)}")
|
| 185 |
|
| 186 |
-
#
|
| 187 |
sentences = re.split(r'[.!?]+\s+', paragraph)
|
| 188 |
chunks = []
|
| 189 |
current_chunk = ""
|
|
@@ -192,12 +192,12 @@ class TextChunker:
|
|
| 192 |
if not sentence.strip():
|
| 193 |
continue
|
| 194 |
|
| 195 |
-
#
|
| 196 |
if not sentence.endswith(('.', '!', '?')):
|
| 197 |
sentence += '.'
|
| 198 |
|
| 199 |
if len(sentence) > max_chunk_size:
|
| 200 |
-
#
|
| 201 |
if current_chunk.strip():
|
| 202 |
chunks.append(current_chunk.strip())
|
| 203 |
current_chunk = ""
|
|
@@ -223,7 +223,7 @@ class TextChunker:
|
|
| 223 |
|
| 224 |
@staticmethod
|
| 225 |
def _split_by_comma(sentence: str, max_chunk_size: int) -> List[str]:
|
| 226 |
-
"""
|
| 227 |
logger.debug(f"[CHUNKER] Splitting long sentence by comma | Length: {len(sentence)}")
|
| 228 |
|
| 229 |
parts = sentence.split(', ')
|
|
@@ -232,12 +232,12 @@ class TextChunker:
|
|
| 232 |
|
| 233 |
for part in parts:
|
| 234 |
if len(part) > max_chunk_size:
|
| 235 |
-
#
|
| 236 |
if current_chunk.strip():
|
| 237 |
chunks.append(current_chunk.strip())
|
| 238 |
current_chunk = ""
|
| 239 |
|
| 240 |
-
#
|
| 241 |
while len(part) > max_chunk_size:
|
| 242 |
chunks.append(part[:max_chunk_size].strip())
|
| 243 |
part = part[max_chunk_size:].strip()
|
|
@@ -288,9 +288,9 @@ class MultilingualTranslator:
|
|
| 288 |
logger.error(f"[INIT] Error loading model: {e}")
|
| 289 |
raise
|
| 290 |
|
| 291 |
-
#
|
| 292 |
-
self.max_chunk_size = 350 #
|
| 293 |
-
self.min_chunk_overlap = 20 #
|
| 294 |
|
| 295 |
# Track translation progress
|
| 296 |
self.current_translation = {}
|
|
@@ -299,9 +299,9 @@ class MultilingualTranslator:
|
|
| 299 |
logger.info(f"[INIT] Translator initialized | Max chunk size: {self.max_chunk_size} chars")
|
| 300 |
|
| 301 |
def translate_chunk(self, text: str, source_lang: str, target_lang: str, chunk_index: int = 0, total_chunks: int = 1) -> str:
|
| 302 |
-
"""
|
| 303 |
try:
|
| 304 |
-
logger.info(f"[TRANSLATE] Starting chunk translation [{chunk_index+1}/{total_chunks}] | {source_lang}
|
| 305 |
|
| 306 |
# Set source language for tokenizer
|
| 307 |
self.tokenizer.src_lang = source_lang
|
|
@@ -315,15 +315,15 @@ class MultilingualTranslator:
|
|
| 315 |
generated_tokens = self.model.generate(
|
| 316 |
**encoded,
|
| 317 |
forced_bos_token_id=self.tokenizer.get_lang_id(target_lang),
|
| 318 |
-
max_length=1024, #
|
| 319 |
-
min_length=10, #
|
| 320 |
-
num_beams=5, #
|
| 321 |
early_stopping=True,
|
| 322 |
-
no_repeat_ngram_size=3, #
|
| 323 |
-
length_penalty=1.0, #
|
| 324 |
-
repetition_penalty=1.2, #
|
| 325 |
-
do_sample=False, #
|
| 326 |
-
temperature=0.7, #
|
| 327 |
pad_token_id=self.tokenizer.pad_token_id,
|
| 328 |
eos_token_id=self.tokenizer.eos_token_id
|
| 329 |
)
|
|
@@ -332,7 +332,7 @@ class MultilingualTranslator:
|
|
| 332 |
# Decode result
|
| 333 |
translation = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
| 334 |
|
| 335 |
-
#
|
| 336 |
translation = translation.strip()
|
| 337 |
|
| 338 |
logger.info(f"[TRANSLATE] Chunk translation completed [{chunk_index+1}/{total_chunks}] | Generation time: {generation_time:.2f}s | Output length: {len(translation)} chars")
|
|
@@ -344,34 +344,34 @@ class MultilingualTranslator:
|
|
| 344 |
return f"[Translation Error: {str(e)}]"
|
| 345 |
|
| 346 |
def translate_text(self, text: str, source_lang: str, target_lang: str, session_id: str = None) -> Tuple[str, float, int]:
|
| 347 |
-
"""
|
| 348 |
start_time = time.time()
|
| 349 |
|
| 350 |
if not session_id:
|
| 351 |
session_id = hashlib.md5(f"{text[:100]}{time.time()}".encode()).hexdigest()[:8]
|
| 352 |
|
| 353 |
-
logger.info(f"[SESSION:{session_id}] Starting translation | {source_lang}
|
| 354 |
|
| 355 |
-
#
|
| 356 |
cached_result = self.cache.get(text, source_lang, target_lang)
|
| 357 |
if cached_result:
|
| 358 |
logger.info(f"[SESSION:{session_id}] Translation completed from cache | Time: {time.time() - start_time:.2f}s")
|
| 359 |
return cached_result, time.time() - start_time, 1
|
| 360 |
|
| 361 |
try:
|
| 362 |
-
#
|
| 363 |
if len(text) <= self.max_chunk_size:
|
| 364 |
logger.info(f"[SESSION:{session_id}] Processing as short text")
|
| 365 |
translation = self.translate_chunk(text, source_lang, target_lang, 0, 1)
|
| 366 |
|
| 367 |
-
#
|
| 368 |
self.cache.set(text, source_lang, target_lang, translation)
|
| 369 |
processing_time = time.time() - start_time
|
| 370 |
logger.info(f"[SESSION:{session_id}] Short text translation completed | Total time: {processing_time:.2f}s")
|
| 371 |
|
| 372 |
return translation, processing_time, 1
|
| 373 |
|
| 374 |
-
#
|
| 375 |
logger.info(f"[SESSION:{session_id}] Processing as long text - starting chunking")
|
| 376 |
chunks = TextChunker.split_text_smart(text, self.max_chunk_size)
|
| 377 |
logger.info(f"[SESSION:{session_id}] Text split into {len(chunks)} chunks")
|
|
@@ -386,13 +386,13 @@ class MultilingualTranslator:
|
|
| 386 |
'target_lang': target_lang
|
| 387 |
}
|
| 388 |
|
| 389 |
-
#
|
| 390 |
translated_chunks = []
|
| 391 |
for i, chunk in enumerate(chunks):
|
| 392 |
chunk_start_time = time.time()
|
| 393 |
logger.info(f"[SESSION:{session_id}] Starting chunk {i+1}/{len(chunks)} | Chunk length: {len(chunk)} chars")
|
| 394 |
|
| 395 |
-
#
|
| 396 |
chunk_translation = self.cache.get(chunk, source_lang, target_lang)
|
| 397 |
|
| 398 |
if not chunk_translation:
|
|
@@ -404,7 +404,7 @@ class MultilingualTranslator:
|
|
| 404 |
logger.info(f"[SESSION:{session_id}] Progress: {i}/{len(chunks)} | Avg time per chunk: {avg_time_per_chunk:.1f}s | Estimated remaining: {estimated_remaining:.1f}s")
|
| 405 |
|
| 406 |
chunk_translation = self.translate_chunk(chunk, source_lang, target_lang, i, len(chunks))
|
| 407 |
-
#
|
| 408 |
self.cache.set(chunk, source_lang, target_lang, chunk_translation)
|
| 409 |
|
| 410 |
chunk_time = time.time() - chunk_start_time
|
|
@@ -419,15 +419,15 @@ class MultilingualTranslator:
|
|
| 419 |
if session_id in self.current_translation:
|
| 420 |
self.current_translation[session_id]['completed_chunks'] = i + 1
|
| 421 |
|
| 422 |
-
#
|
| 423 |
if i < len(chunks) - 1:
|
| 424 |
time.sleep(0.1)
|
| 425 |
|
| 426 |
-
#
|
| 427 |
logger.info(f"[SESSION:{session_id}] Combining translated chunks")
|
| 428 |
final_translation = self._combine_translations(translated_chunks, text)
|
| 429 |
|
| 430 |
-
#
|
| 431 |
self.cache.set(text, source_lang, target_lang, final_translation)
|
| 432 |
|
| 433 |
processing_time = time.time() - start_time
|
|
@@ -490,7 +490,7 @@ class MultilingualTranslator:
|
|
| 490 |
}
|
| 491 |
|
| 492 |
def _combine_translations(self, translated_chunks: List[str], original_text: str) -> str:
|
| 493 |
-
"""
|
| 494 |
if not translated_chunks:
|
| 495 |
return ""
|
| 496 |
|
|
@@ -499,23 +499,23 @@ class MultilingualTranslator:
|
|
| 499 |
|
| 500 |
logger.debug(f"[COMBINER] Combining {len(translated_chunks)} translated chunks")
|
| 501 |
|
| 502 |
-
#
|
| 503 |
combined = []
|
| 504 |
|
| 505 |
for i, chunk in enumerate(translated_chunks):
|
| 506 |
-
#
|
| 507 |
chunk = chunk.strip()
|
| 508 |
|
| 509 |
if not chunk:
|
| 510 |
continue
|
| 511 |
|
| 512 |
-
#
|
| 513 |
if i > 0 and combined:
|
| 514 |
-
#
|
| 515 |
-
if not combined[-1].rstrip().endswith(('.', '!', '?', ':', '
|
| 516 |
combined[-1] += '.'
|
| 517 |
|
| 518 |
-
#
|
| 519 |
if '\n\n' in original_text:
|
| 520 |
combined.append('\n\n' + chunk)
|
| 521 |
else:
|
|
@@ -525,9 +525,9 @@ class MultilingualTranslator:
|
|
| 525 |
|
| 526 |
result = ''.join(combined)
|
| 527 |
|
| 528 |
-
#
|
| 529 |
-
result = re.sub(r'\s+', ' ', result) #
|
| 530 |
-
result = re.sub(r'\.+', '.', result) #
|
| 531 |
result = result.strip()
|
| 532 |
|
| 533 |
logger.debug(f"[COMBINER] Combined translation length: {len(result)} chars")
|
|
@@ -544,7 +544,8 @@ class MultilingualTranslator:
|
|
| 544 |
|
| 545 |
def process_heavy_translation_background(request_id: str, text: str, source_lang: str, target_lang: str):
|
| 546 |
"""
|
| 547 |
-
|
|
|
|
| 548 |
"""
|
| 549 |
try:
|
| 550 |
logger.info(f"[HF Server] Background processing started for request: {request_id}")
|
|
@@ -555,55 +556,45 @@ def process_heavy_translation_background(request_id: str, text: str, source_lang
|
|
| 555 |
with translation_requests_lock:
|
| 556 |
if request_id in translation_requests:
|
| 557 |
translation_requests[request_id]['progress'] = 10
|
| 558 |
-
translation_requests[request_id]['status'] = 'processing'
|
| 559 |
|
| 560 |
# Perform actual translation
|
| 561 |
translation, processing_time, chunks_count = translator.translate_text(
|
| 562 |
text, source_lang, target_lang, request_id
|
| 563 |
)
|
| 564 |
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
# FIXED: Validate translation result
|
| 568 |
-
if not translation or not translation.strip():
|
| 569 |
-
logger.error(f"[HF Server] Empty translation result for request: {request_id}")
|
| 570 |
-
|
| 571 |
-
# Store failed translation
|
| 572 |
-
with translation_requests_lock:
|
| 573 |
-
completed_translations[request_id] = {
|
| 574 |
-
'translation': '',
|
| 575 |
-
'error': 'Translation completed but result is empty',
|
| 576 |
-
'status': 'failed',
|
| 577 |
-
'processing_time': total_processing_time,
|
| 578 |
-
'completed_at': datetime.now().isoformat(),
|
| 579 |
-
'request_id': request_id
|
| 580 |
-
}
|
| 581 |
-
|
| 582 |
-
if request_id in translation_requests:
|
| 583 |
-
del translation_requests[request_id]
|
| 584 |
-
return
|
| 585 |
|
| 586 |
# Store completed translation
|
| 587 |
with translation_requests_lock:
|
| 588 |
completed_translations[request_id] = {
|
| 589 |
'translation': translation,
|
| 590 |
-
'processing_time':
|
| 591 |
'character_count': len(text),
|
| 592 |
'source_lang': source_lang,
|
| 593 |
'target_lang': target_lang,
|
| 594 |
'completed_at': datetime.now().isoformat(),
|
| 595 |
'request_id': request_id,
|
| 596 |
'status': 'completed',
|
| 597 |
-
'
|
| 598 |
-
'source_lang_display': translation_requests[request_id].get('source_lang_display', source_lang),
|
| 599 |
-
'target_lang_display': translation_requests[request_id].get('target_lang_display', target_lang)
|
| 600 |
}
|
| 601 |
|
| 602 |
# Remove from processing queue
|
| 603 |
if request_id in translation_requests:
|
| 604 |
del translation_requests[request_id]
|
| 605 |
|
| 606 |
-
logger.info(f"[HF Server]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 607 |
|
| 608 |
except Exception as e:
|
| 609 |
logger.error(f"[HF Server] Background processing error for {request_id}: {str(e)}")
|
|
@@ -616,13 +607,80 @@ def process_heavy_translation_background(request_id: str, text: str, source_lang
|
|
| 616 |
'status': 'failed',
|
| 617 |
'processing_time': time.time() - start_time if 'start_time' in locals() else 0,
|
| 618 |
'completed_at': datetime.now().isoformat(),
|
| 619 |
-
'request_id': request_id
|
|
|
|
| 620 |
}
|
| 621 |
|
| 622 |
# Remove from processing queue
|
| 623 |
if request_id in translation_requests:
|
| 624 |
del translation_requests[request_id]
|
| 625 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 626 |
def perform_translation_internal(text: str, source_lang: str, target_lang: str) -> str:
|
| 627 |
"""
|
| 628 |
Internal translation function - wrapper for translator.translate_text
|
|
@@ -725,7 +783,8 @@ app.add_middleware(
|
|
| 725 |
@app.post("/api/check-completion")
|
| 726 |
async def check_completion(request: Request):
|
| 727 |
"""
|
| 728 |
-
|
|
|
|
| 729 |
"""
|
| 730 |
try:
|
| 731 |
form_data = await request.form()
|
|
@@ -751,8 +810,6 @@ async def check_completion(request: Request):
|
|
| 751 |
'request_id': request_id,
|
| 752 |
'completed_at': completion_data.get('completed_at'),
|
| 753 |
'processing_time': completion_data.get('processing_time', 0),
|
| 754 |
-
'character_count': completion_data.get('character_count', 0),
|
| 755 |
-
'translation_length': len(completion_data.get('translation', '')),
|
| 756 |
'verified': True
|
| 757 |
}
|
| 758 |
|
|
@@ -763,7 +820,6 @@ async def check_completion(request: Request):
|
|
| 763 |
return {
|
| 764 |
'status': 'processing',
|
| 765 |
'request_id': request_id,
|
| 766 |
-
'progress': translation_requests[request_id].get('progress', 0),
|
| 767 |
'verified': False
|
| 768 |
}
|
| 769 |
|
|
@@ -786,7 +842,8 @@ async def check_completion(request: Request):
|
|
| 786 |
@app.post("/api/check-translation-status")
|
| 787 |
async def check_translation_status(request: Request):
|
| 788 |
"""
|
| 789 |
-
|
|
|
|
| 790 |
"""
|
| 791 |
try:
|
| 792 |
form_data = await request.form()
|
|
@@ -805,7 +862,7 @@ async def check_translation_status(request: Request):
|
|
| 805 |
if request_id in completed_translations:
|
| 806 |
result = completed_translations[request_id]
|
| 807 |
|
| 808 |
-
logger.info(f"[HF Server] Translation status check for {request_id}: COMPLETED - returning
|
| 809 |
|
| 810 |
return {
|
| 811 |
'status': 'completed',
|
|
@@ -814,10 +871,8 @@ async def check_translation_status(request: Request):
|
|
| 814 |
'processing_time': result.get('processing_time', 0),
|
| 815 |
'character_count': result.get('character_count', 0),
|
| 816 |
'completed_at': result.get('completed_at'),
|
| 817 |
-
'source_lang': result.get('
|
| 818 |
-
'target_lang': result.get('
|
| 819 |
-
'chunks_processed': result.get('chunks_processed', 1),
|
| 820 |
-
'translation_length': len(result.get('translation', ''))
|
| 821 |
}
|
| 822 |
|
| 823 |
# Check if still processing
|
|
@@ -830,10 +885,7 @@ async def check_translation_status(request: Request):
|
|
| 830 |
'status': 'processing',
|
| 831 |
'request_id': request_id,
|
| 832 |
'started_at': req_data.get('started_at'),
|
| 833 |
-
'progress': req_data.get('progress', 0)
|
| 834 |
-
'character_count': req_data.get('character_count', 0),
|
| 835 |
-
'source_lang': req_data.get('source_lang_display', req_data.get('source_lang', '')),
|
| 836 |
-
'target_lang': req_data.get('target_lang_display', req_data.get('target_lang', ''))
|
| 837 |
}
|
| 838 |
|
| 839 |
else:
|
|
@@ -857,7 +909,8 @@ async def check_translation_status(request: Request):
|
|
| 857 |
@app.post("/api/translate/form")
|
| 858 |
async def api_translate_form(request: Request):
|
| 859 |
"""
|
| 860 |
-
|
|
|
|
| 861 |
"""
|
| 862 |
try:
|
| 863 |
form_data = await request.form()
|
|
@@ -887,12 +940,10 @@ async def api_translate_form(request: Request):
|
|
| 887 |
return {"status": "error", "message": "Invalid language codes"}
|
| 888 |
|
| 889 |
char_count = len(text)
|
| 890 |
-
|
| 891 |
-
is_heavy_text = char_count > 1000 # Same as WordPress threshold
|
| 892 |
|
| 893 |
logger.info(f"[FORM API] Translation request: {char_count} chars, {source_lang} → {target_lang}, Heavy: {is_heavy_text}")
|
| 894 |
|
| 895 |
-
# FIXED: Always use background processing for heavy texts
|
| 896 |
if is_heavy_text:
|
| 897 |
# Generate request ID for background processing
|
| 898 |
request_id = str(uuid.uuid4())
|
|
@@ -921,9 +972,7 @@ async def api_translate_form(request: Request):
|
|
| 921 |
'target_lang': target_code,
|
| 922 |
'started_at': datetime.now().isoformat(),
|
| 923 |
'character_count': char_count,
|
| 924 |
-
'progress': 0
|
| 925 |
-
'source_lang_display': source_lang,
|
| 926 |
-
'target_lang_display': target_lang
|
| 927 |
}
|
| 928 |
|
| 929 |
# Start background processing
|
|
@@ -934,20 +983,15 @@ async def api_translate_form(request: Request):
|
|
| 934 |
thread.daemon = True
|
| 935 |
thread.start()
|
| 936 |
|
| 937 |
-
logger.info(f"[FORM API] Started background processing for
|
| 938 |
|
| 939 |
-
# FIXED: Return proper background response for WordPress
|
| 940 |
return {
|
| 941 |
'is_background': True,
|
| 942 |
'session_id': request_id,
|
| 943 |
'request_id': request_id,
|
| 944 |
-
'server_request_id': request_id, # Added for compatibility
|
| 945 |
'status': 'processing',
|
| 946 |
-
'
|
| 947 |
-
'
|
| 948 |
-
'character_count': char_count,
|
| 949 |
-
'source_lang': source_lang,
|
| 950 |
-
'target_lang': target_lang
|
| 951 |
}
|
| 952 |
|
| 953 |
else:
|
|
@@ -975,9 +1019,7 @@ async def api_translate_form(request: Request):
|
|
| 975 |
'processing_time': processing_time,
|
| 976 |
'character_count': char_count,
|
| 977 |
'source_lang': source_lang,
|
| 978 |
-
'target_lang': target_lang
|
| 979 |
-
'is_heavy_text': False,
|
| 980 |
-
'chunks_processed': chunks_count
|
| 981 |
}
|
| 982 |
|
| 983 |
except Exception as e:
|
|
@@ -1135,11 +1177,51 @@ async def get_session_status(session_id: str):
|
|
| 1135 |
"message": "Session not found or completed"
|
| 1136 |
}
|
| 1137 |
|
| 1138 |
-
|
| 1139 |
-
|
|
|
|
| 1140 |
"""
|
| 1141 |
-
|
| 1142 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1143 |
active_sessions = []
|
| 1144 |
|
| 1145 |
with translation_requests_lock:
|
|
@@ -1183,8 +1265,7 @@ async def get_server_status():
|
|
| 1183 |
"active_sessions": len(active_sessions),
|
| 1184 |
"background_tasks": background_tasks_count,
|
| 1185 |
"total_active": total_active,
|
| 1186 |
-
"completed_cache": completed_count
|
| 1187 |
-
"active_session_details": active_sessions[:3] if active_sessions else [] # Return first 3 for details
|
| 1188 |
}
|
| 1189 |
else:
|
| 1190 |
return {
|
|
@@ -1200,7 +1281,8 @@ async def get_server_status():
|
|
| 1200 |
|
| 1201 |
def cleanup_old_requests():
|
| 1202 |
"""
|
| 1203 |
-
|
|
|
|
| 1204 |
"""
|
| 1205 |
current_time = datetime.now()
|
| 1206 |
|
|
|
|
| 125 |
thread.start()
|
| 126 |
|
| 127 |
class TextChunker:
|
| 128 |
+
"""کلاس برای تقسیم متن طولانی به بخشهای کوچکتر"""
|
| 129 |
|
| 130 |
@staticmethod
|
| 131 |
def split_text_smart(text: str, max_chunk_size: int = 400) -> List[str]:
|
| 132 |
+
"""تقسیم هوشمند متن بر اساس جملات و پاراگرافها"""
|
| 133 |
logger.info(f"[CHUNKER] Starting smart text splitting | Text length: {len(text)} chars | Max chunk size: {max_chunk_size}")
|
| 134 |
|
| 135 |
if len(text) <= max_chunk_size:
|
|
|
|
| 138 |
|
| 139 |
chunks = []
|
| 140 |
|
| 141 |
+
# تقسیم بر اساس پاراگرافها
|
| 142 |
paragraphs = text.split('\n\n')
|
| 143 |
current_chunk = ""
|
| 144 |
|
| 145 |
for i, paragraph in enumerate(paragraphs):
|
| 146 |
logger.debug(f"[CHUNKER] Processing paragraph {i+1}/{len(paragraphs)} | Length: {len(paragraph)}")
|
| 147 |
|
| 148 |
+
# اگر پاراگراف خودش بزرگ است آن را تقسیم کن
|
| 149 |
if len(paragraph) > max_chunk_size:
|
| 150 |
+
# ذخیره قسمت فعلی اگر وجود دارد
|
| 151 |
if current_chunk.strip():
|
| 152 |
chunks.append(current_chunk.strip())
|
| 153 |
logger.debug(f"[CHUNKER] Added chunk from accumulated paragraphs | Length: {len(current_chunk.strip())}")
|
| 154 |
current_chunk = ""
|
| 155 |
|
| 156 |
+
# تقسیم پاراگراف بزرگ
|
| 157 |
sub_chunks = TextChunker._split_paragraph(paragraph, max_chunk_size)
|
| 158 |
chunks.extend(sub_chunks)
|
| 159 |
logger.debug(f"[CHUNKER] Split large paragraph into {len(sub_chunks)} sub-chunks")
|
| 160 |
else:
|
| 161 |
+
# بررسی اینکه آیا اضافه کردن این پاراگراف از حد تجاوز میکند
|
| 162 |
if len(current_chunk) + len(paragraph) + 2 > max_chunk_size:
|
| 163 |
if current_chunk.strip():
|
| 164 |
chunks.append(current_chunk.strip())
|
|
|
|
| 170 |
else:
|
| 171 |
current_chunk = paragraph
|
| 172 |
|
| 173 |
+
# اضافه کردن آخرین قسمت
|
| 174 |
if current_chunk.strip():
|
| 175 |
chunks.append(current_chunk.strip())
|
| 176 |
logger.debug(f"[CHUNKER] Added final chunk | Length: {len(current_chunk.strip())}")
|
|
|
|
| 180 |
|
| 181 |
@staticmethod
|
| 182 |
def _split_paragraph(paragraph: str, max_chunk_size: int) -> List[str]:
|
| 183 |
+
"""تقسیم پاراگراف بزرگ به جملات"""
|
| 184 |
logger.debug(f"[CHUNKER] Splitting large paragraph | Length: {len(paragraph)}")
|
| 185 |
|
| 186 |
+
# تقسیم بر اساس جملات
|
| 187 |
sentences = re.split(r'[.!?]+\s+', paragraph)
|
| 188 |
chunks = []
|
| 189 |
current_chunk = ""
|
|
|
|
| 192 |
if not sentence.strip():
|
| 193 |
continue
|
| 194 |
|
| 195 |
+
# اضافه کردن علامت نقطه اگر حذف شده
|
| 196 |
if not sentence.endswith(('.', '!', '?')):
|
| 197 |
sentence += '.'
|
| 198 |
|
| 199 |
if len(sentence) > max_chunk_size:
|
| 200 |
+
# جمله خودش خیلی بلند است - تقسیم بر اساس کاما
|
| 201 |
if current_chunk.strip():
|
| 202 |
chunks.append(current_chunk.strip())
|
| 203 |
current_chunk = ""
|
|
|
|
| 223 |
|
| 224 |
@staticmethod
|
| 225 |
def _split_by_comma(sentence: str, max_chunk_size: int) -> List[str]:
|
| 226 |
+
"""تقسیم جمله طولانی بر اساس کاما"""
|
| 227 |
logger.debug(f"[CHUNKER] Splitting long sentence by comma | Length: {len(sentence)}")
|
| 228 |
|
| 229 |
parts = sentence.split(', ')
|
|
|
|
| 232 |
|
| 233 |
for part in parts:
|
| 234 |
if len(part) > max_chunk_size:
|
| 235 |
+
# قسمت خودش خیلی بلند است - تقسیم اجباری
|
| 236 |
if current_chunk.strip():
|
| 237 |
chunks.append(current_chunk.strip())
|
| 238 |
current_chunk = ""
|
| 239 |
|
| 240 |
+
# تقسیم اجباری بر اساس طول
|
| 241 |
while len(part) > max_chunk_size:
|
| 242 |
chunks.append(part[:max_chunk_size].strip())
|
| 243 |
part = part[max_chunk_size:].strip()
|
|
|
|
| 288 |
logger.error(f"[INIT] Error loading model: {e}")
|
| 289 |
raise
|
| 290 |
|
| 291 |
+
# تنظیمات بهینه برای ترجمه متنهای بلند
|
| 292 |
+
self.max_chunk_size = 350 # حداکثر طول هر قسمت
|
| 293 |
+
self.min_chunk_overlap = 20 # همپوشانی بین قسمتها
|
| 294 |
|
| 295 |
# Track translation progress
|
| 296 |
self.current_translation = {}
|
|
|
|
| 299 |
logger.info(f"[INIT] Translator initialized | Max chunk size: {self.max_chunk_size} chars")
|
| 300 |
|
| 301 |
def translate_chunk(self, text: str, source_lang: str, target_lang: str, chunk_index: int = 0, total_chunks: int = 1) -> str:
|
| 302 |
+
"""ترجمه یک قسمت کوچک از متن"""
|
| 303 |
try:
|
| 304 |
+
logger.info(f"[TRANSLATE] Starting chunk translation [{chunk_index+1}/{total_chunks}] | {source_lang} → {target_lang} | Length: {len(text)} chars")
|
| 305 |
|
| 306 |
# Set source language for tokenizer
|
| 307 |
self.tokenizer.src_lang = source_lang
|
|
|
|
| 315 |
generated_tokens = self.model.generate(
|
| 316 |
**encoded,
|
| 317 |
forced_bos_token_id=self.tokenizer.get_lang_id(target_lang),
|
| 318 |
+
max_length=1024, # افزایش طول خروجی
|
| 319 |
+
min_length=10, # حداقل طول خروجی
|
| 320 |
+
num_beams=5, # افزایش تعداد beam ها برای کیفیت بهتر
|
| 321 |
early_stopping=True,
|
| 322 |
+
no_repeat_ngram_size=3, # جلوگیری از تکرار
|
| 323 |
+
length_penalty=1.0, # تنظیم جریمه طول
|
| 324 |
+
repetition_penalty=1.2, # جلوگیری از تکرار کلمات
|
| 325 |
+
do_sample=False, # استفاده از روش قطعی
|
| 326 |
+
temperature=0.7, # کنترل تنوع
|
| 327 |
pad_token_id=self.tokenizer.pad_token_id,
|
| 328 |
eos_token_id=self.tokenizer.eos_token_id
|
| 329 |
)
|
|
|
|
| 332 |
# Decode result
|
| 333 |
translation = self.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
| 334 |
|
| 335 |
+
# پاکسازی ترجمه از کاراکترهای اضافی
|
| 336 |
translation = translation.strip()
|
| 337 |
|
| 338 |
logger.info(f"[TRANSLATE] Chunk translation completed [{chunk_index+1}/{total_chunks}] | Generation time: {generation_time:.2f}s | Output length: {len(translation)} chars")
|
|
|
|
| 344 |
return f"[Translation Error: {str(e)}]"
|
| 345 |
|
| 346 |
def translate_text(self, text: str, source_lang: str, target_lang: str, session_id: str = None) -> Tuple[str, float, int]:
|
| 347 |
+
"""ترجمه متن با پشتیبانی از متنهای طولانی و لاگهای مفصل"""
|
| 348 |
start_time = time.time()
|
| 349 |
|
| 350 |
if not session_id:
|
| 351 |
session_id = hashlib.md5(f"{text[:100]}{time.time()}".encode()).hexdigest()[:8]
|
| 352 |
|
| 353 |
+
logger.info(f"[SESSION:{session_id}] Starting translation | {source_lang} → {target_lang} | Text length: {len(text)} chars")
|
| 354 |
|
| 355 |
+
# بررسی کش برای کل متن
|
| 356 |
cached_result = self.cache.get(text, source_lang, target_lang)
|
| 357 |
if cached_result:
|
| 358 |
logger.info(f"[SESSION:{session_id}] Translation completed from cache | Time: {time.time() - start_time:.2f}s")
|
| 359 |
return cached_result, time.time() - start_time, 1
|
| 360 |
|
| 361 |
try:
|
| 362 |
+
# اگر متن کوتاه است مستقیماً ترجمه کن
|
| 363 |
if len(text) <= self.max_chunk_size:
|
| 364 |
logger.info(f"[SESSION:{session_id}] Processing as short text")
|
| 365 |
translation = self.translate_chunk(text, source_lang, target_lang, 0, 1)
|
| 366 |
|
| 367 |
+
# ذخیره در کش
|
| 368 |
self.cache.set(text, source_lang, target_lang, translation)
|
| 369 |
processing_time = time.time() - start_time
|
| 370 |
logger.info(f"[SESSION:{session_id}] Short text translation completed | Total time: {processing_time:.2f}s")
|
| 371 |
|
| 372 |
return translation, processing_time, 1
|
| 373 |
|
| 374 |
+
# تقسیم متن طولانی به قسمتهای کوچکتر
|
| 375 |
logger.info(f"[SESSION:{session_id}] Processing as long text - starting chunking")
|
| 376 |
chunks = TextChunker.split_text_smart(text, self.max_chunk_size)
|
| 377 |
logger.info(f"[SESSION:{session_id}] Text split into {len(chunks)} chunks")
|
|
|
|
| 386 |
'target_lang': target_lang
|
| 387 |
}
|
| 388 |
|
| 389 |
+
# ترجمه هر قسمت
|
| 390 |
translated_chunks = []
|
| 391 |
for i, chunk in enumerate(chunks):
|
| 392 |
chunk_start_time = time.time()
|
| 393 |
logger.info(f"[SESSION:{session_id}] Starting chunk {i+1}/{len(chunks)} | Chunk length: {len(chunk)} chars")
|
| 394 |
|
| 395 |
+
# بررسی کش برای هر قسمت
|
| 396 |
chunk_translation = self.cache.get(chunk, source_lang, target_lang)
|
| 397 |
|
| 398 |
if not chunk_translation:
|
|
|
|
| 404 |
logger.info(f"[SESSION:{session_id}] Progress: {i}/{len(chunks)} | Avg time per chunk: {avg_time_per_chunk:.1f}s | Estimated remaining: {estimated_remaining:.1f}s")
|
| 405 |
|
| 406 |
chunk_translation = self.translate_chunk(chunk, source_lang, target_lang, i, len(chunks))
|
| 407 |
+
# ذخیره قسمت در کش
|
| 408 |
self.cache.set(chunk, source_lang, target_lang, chunk_translation)
|
| 409 |
|
| 410 |
chunk_time = time.time() - chunk_start_time
|
|
|
|
| 419 |
if session_id in self.current_translation:
|
| 420 |
self.current_translation[session_id]['completed_chunks'] = i + 1
|
| 421 |
|
| 422 |
+
# کمی استراحت بین ترجمهها برای جلوگیری از بارذاری زیاد
|
| 423 |
if i < len(chunks) - 1:
|
| 424 |
time.sleep(0.1)
|
| 425 |
|
| 426 |
+
# ترکیب قسمتهای ترجمه شده
|
| 427 |
logger.info(f"[SESSION:{session_id}] Combining translated chunks")
|
| 428 |
final_translation = self._combine_translations(translated_chunks, text)
|
| 429 |
|
| 430 |
+
# ذخیره نتیجه نهایی در کش
|
| 431 |
self.cache.set(text, source_lang, target_lang, final_translation)
|
| 432 |
|
| 433 |
processing_time = time.time() - start_time
|
|
|
|
| 490 |
}
|
| 491 |
|
| 492 |
def _combine_translations(self, translated_chunks: List[str], original_text: str) -> str:
|
| 493 |
+
"""ترکیب قسمتهای ترجمه شده به یک متن یکپارچه"""
|
| 494 |
if not translated_chunks:
|
| 495 |
return ""
|
| 496 |
|
|
|
|
| 499 |
|
| 500 |
logger.debug(f"[COMBINER] Combining {len(translated_chunks)} translated chunks")
|
| 501 |
|
| 502 |
+
# ترکیب قسمتها با در نظر گیری ساختار اصلی متن
|
| 503 |
combined = []
|
| 504 |
|
| 505 |
for i, chunk in enumerate(translated_chunks):
|
| 506 |
+
# پاکسازی قسمت
|
| 507 |
chunk = chunk.strip()
|
| 508 |
|
| 509 |
if not chunk:
|
| 510 |
continue
|
| 511 |
|
| 512 |
+
# اضافه کردن فاصله مناسب بین قسمتها
|
| 513 |
if i > 0 and combined:
|
| 514 |
+
# اگر قسمت قبلی با نقطه تمام نمیشود فاصله اضافه کن
|
| 515 |
+
if not combined[-1].rstrip().endswith(('.', '!', '?', ':', '۔', '.')):
|
| 516 |
combined[-1] += '.'
|
| 517 |
|
| 518 |
+
# بررسی اینکه آیا نیاز به پاراگراف جدید دارکم
|
| 519 |
if '\n\n' in original_text:
|
| 520 |
combined.append('\n\n' + chunk)
|
| 521 |
else:
|
|
|
|
| 525 |
|
| 526 |
result = ''.join(combined)
|
| 527 |
|
| 528 |
+
# پاکسازی نهایی
|
| 529 |
+
result = re.sub(r'\s+', ' ', result) # حذف فاصلههای اضافی
|
| 530 |
+
result = re.sub(r'\.+', '.', result) # حذف نقطههای تکراری
|
| 531 |
result = result.strip()
|
| 532 |
|
| 533 |
logger.debug(f"[COMBINER] Combined translation length: {len(result)} chars")
|
|
|
|
| 544 |
|
| 545 |
def process_heavy_translation_background(request_id: str, text: str, source_lang: str, target_lang: str):
|
| 546 |
"""
|
| 547 |
+
Background function to process heavy text translations for WordPress integration.
|
| 548 |
+
Updates the completed_translations dict when done and automatically charges credits.
|
| 549 |
"""
|
| 550 |
try:
|
| 551 |
logger.info(f"[HF Server] Background processing started for request: {request_id}")
|
|
|
|
| 556 |
with translation_requests_lock:
|
| 557 |
if request_id in translation_requests:
|
| 558 |
translation_requests[request_id]['progress'] = 10
|
|
|
|
| 559 |
|
| 560 |
# Perform actual translation
|
| 561 |
translation, processing_time, chunks_count = translator.translate_text(
|
| 562 |
text, source_lang, target_lang, request_id
|
| 563 |
)
|
| 564 |
|
| 565 |
+
processing_time = time.time() - start_time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 566 |
|
| 567 |
# Store completed translation
|
| 568 |
with translation_requests_lock:
|
| 569 |
completed_translations[request_id] = {
|
| 570 |
'translation': translation,
|
| 571 |
+
'processing_time': processing_time,
|
| 572 |
'character_count': len(text),
|
| 573 |
'source_lang': source_lang,
|
| 574 |
'target_lang': target_lang,
|
| 575 |
'completed_at': datetime.now().isoformat(),
|
| 576 |
'request_id': request_id,
|
| 577 |
'status': 'completed',
|
| 578 |
+
'auto_charged': False # فلگ برای ردیابی کسر خودکار اعتبار
|
|
|
|
|
|
|
| 579 |
}
|
| 580 |
|
| 581 |
# Remove from processing queue
|
| 582 |
if request_id in translation_requests:
|
| 583 |
del translation_requests[request_id]
|
| 584 |
|
| 585 |
+
logger.info(f"[HF Server] Long text translation completed for request: {request_id} in {processing_time:.2f}s")
|
| 586 |
+
|
| 587 |
+
# NEW: اطلاعرسانی خودکار به ووردپرس برای کسر اعتبار
|
| 588 |
+
charge_success = notify_wordpress_completion_and_charge(request_id)
|
| 589 |
+
|
| 590 |
+
if charge_success:
|
| 591 |
+
# علامتگذاری به عنوان کسر شده
|
| 592 |
+
with translation_requests_lock:
|
| 593 |
+
if request_id in completed_translations:
|
| 594 |
+
completed_translations[request_id]['auto_charged'] = True
|
| 595 |
+
logger.info(f"[HF Server] Automatic charging completed for request: {request_id}")
|
| 596 |
+
else:
|
| 597 |
+
logger.warning(f"[HF Server] Automatic charging failed for request: {request_id}")
|
| 598 |
|
| 599 |
except Exception as e:
|
| 600 |
logger.error(f"[HF Server] Background processing error for {request_id}: {str(e)}")
|
|
|
|
| 607 |
'status': 'failed',
|
| 608 |
'processing_time': time.time() - start_time if 'start_time' in locals() else 0,
|
| 609 |
'completed_at': datetime.now().isoformat(),
|
| 610 |
+
'request_id': request_id,
|
| 611 |
+
'auto_charged': False
|
| 612 |
}
|
| 613 |
|
| 614 |
# Remove from processing queue
|
| 615 |
if request_id in translation_requests:
|
| 616 |
del translation_requests[request_id]
|
| 617 |
|
| 618 |
+
def notify_wordpress_completion_and_charge(request_id: str, wordpress_url: str = None):
|
| 619 |
+
"""
|
| 620 |
+
اطلاعرسانی به ووردپرس پس از تکمیل ترجمه و کسر خودکار اعتبار
|
| 621 |
+
"""
|
| 622 |
+
try:
|
| 623 |
+
if not wordpress_url:
|
| 624 |
+
# آدرس ووردپرس باید از متغیر محیطی یا تنظیمات دریافت شود
|
| 625 |
+
wordpress_url = os.getenv('WORDPRESS_URL', 'https://your-wordpress-site.com')
|
| 626 |
+
|
| 627 |
+
# پیدا کردن اطلاعات ترجمه تکمیل شده
|
| 628 |
+
with translation_requests_lock:
|
| 629 |
+
if request_id not in completed_translations:
|
| 630 |
+
logger.error(f"[AUTO CHARGE] Translation not found in completed cache: {request_id}")
|
| 631 |
+
return False
|
| 632 |
+
|
| 633 |
+
translation_data = completed_translations[request_id]
|
| 634 |
+
|
| 635 |
+
# ارسال درخواست به ووردپرس برای کسر خودکار اعتبار
|
| 636 |
+
charge_url = f"{wordpress_url.rstrip('/')}/wp-admin/admin-ajax.php"
|
| 637 |
+
|
| 638 |
+
charge_payload = {
|
| 639 |
+
'action': 'amt_auto_charge_completed',
|
| 640 |
+
'request_id': request_id,
|
| 641 |
+
'character_count': translation_data.get('character_count', 0),
|
| 642 |
+
'processing_time': translation_data.get('processing_time', 0),
|
| 643 |
+
'translation_length': len(translation_data.get('translation', '')),
|
| 644 |
+
'source_lang': translation_data.get('source_lang', ''),
|
| 645 |
+
'target_lang': translation_data.get('target_lang', ''),
|
| 646 |
+
'completed_at': translation_data.get('completed_at', ''),
|
| 647 |
+
'nonce': 'auto_charge_nonce' # باید از ووردپرس دریافت شود
|
| 648 |
+
}
|
| 649 |
+
|
| 650 |
+
logger.info(f"[AUTO CHARGE] Notifying WordPress for automatic charging: {request_id}")
|
| 651 |
+
|
| 652 |
+
# ارسال درخواست POST به ووردپرس
|
| 653 |
+
import requests
|
| 654 |
+
response = requests.post(
|
| 655 |
+
charge_url,
|
| 656 |
+
data=charge_payload,
|
| 657 |
+
timeout=30,
|
| 658 |
+
headers={
|
| 659 |
+
'Content-Type': 'application/x-www-form-urlencoded',
|
| 660 |
+
'User-Agent': 'HuggingFace-Translation-Server/2.1.0'
|
| 661 |
+
}
|
| 662 |
+
)
|
| 663 |
+
|
| 664 |
+
if response.status_code == 200:
|
| 665 |
+
try:
|
| 666 |
+
result = response.json()
|
| 667 |
+
if result.get('success'):
|
| 668 |
+
logger.info(f"[AUTO CHARGE] WordPress automatic charging successful: {request_id} - Cost: {result.get('cost', 0)}")
|
| 669 |
+
return True
|
| 670 |
+
else:
|
| 671 |
+
logger.error(f"[AUTO CHARGE] WordPress charging failed: {result.get('data', {}).get('message', 'Unknown error')}")
|
| 672 |
+
return False
|
| 673 |
+
except:
|
| 674 |
+
logger.error(f"[AUTO CHARGE] Invalid JSON response from WordPress")
|
| 675 |
+
return False
|
| 676 |
+
else:
|
| 677 |
+
logger.error(f"[AUTO CHARGE] WordPress request failed with status: {response.status_code}")
|
| 678 |
+
return False
|
| 679 |
+
|
| 680 |
+
except Exception as e:
|
| 681 |
+
logger.error(f"[AUTO CHARGE] Error notifying WordPress: {str(e)}")
|
| 682 |
+
return False
|
| 683 |
+
|
| 684 |
def perform_translation_internal(text: str, source_lang: str, target_lang: str) -> str:
|
| 685 |
"""
|
| 686 |
Internal translation function - wrapper for translator.translate_text
|
|
|
|
| 783 |
@app.post("/api/check-completion")
|
| 784 |
async def check_completion(request: Request):
|
| 785 |
"""
|
| 786 |
+
Endpoint to verify if a translation request has been completed.
|
| 787 |
+
WordPress calls this to confirm before charging credits.
|
| 788 |
"""
|
| 789 |
try:
|
| 790 |
form_data = await request.form()
|
|
|
|
| 810 |
'request_id': request_id,
|
| 811 |
'completed_at': completion_data.get('completed_at'),
|
| 812 |
'processing_time': completion_data.get('processing_time', 0),
|
|
|
|
|
|
|
| 813 |
'verified': True
|
| 814 |
}
|
| 815 |
|
|
|
|
| 820 |
return {
|
| 821 |
'status': 'processing',
|
| 822 |
'request_id': request_id,
|
|
|
|
| 823 |
'verified': False
|
| 824 |
}
|
| 825 |
|
|
|
|
| 842 |
@app.post("/api/check-translation-status")
|
| 843 |
async def check_translation_status(request: Request):
|
| 844 |
"""
|
| 845 |
+
Endpoint to get the current status and result of a translation request.
|
| 846 |
+
Returns translation content if completed.
|
| 847 |
"""
|
| 848 |
try:
|
| 849 |
form_data = await request.form()
|
|
|
|
| 862 |
if request_id in completed_translations:
|
| 863 |
result = completed_translations[request_id]
|
| 864 |
|
| 865 |
+
logger.info(f"[HF Server] Translation status check for {request_id}: COMPLETED - returning translation")
|
| 866 |
|
| 867 |
return {
|
| 868 |
'status': 'completed',
|
|
|
|
| 871 |
'processing_time': result.get('processing_time', 0),
|
| 872 |
'character_count': result.get('character_count', 0),
|
| 873 |
'completed_at': result.get('completed_at'),
|
| 874 |
+
'source_lang': result.get('source_lang', ''),
|
| 875 |
+
'target_lang': result.get('target_lang', '')
|
|
|
|
|
|
|
| 876 |
}
|
| 877 |
|
| 878 |
# Check if still processing
|
|
|
|
| 885 |
'status': 'processing',
|
| 886 |
'request_id': request_id,
|
| 887 |
'started_at': req_data.get('started_at'),
|
| 888 |
+
'progress': req_data.get('progress', 0)
|
|
|
|
|
|
|
|
|
|
| 889 |
}
|
| 890 |
|
| 891 |
else:
|
|
|
|
| 909 |
@app.post("/api/translate/form")
|
| 910 |
async def api_translate_form(request: Request):
|
| 911 |
"""
|
| 912 |
+
Enhanced translation endpoint that handles both short and long texts.
|
| 913 |
+
For long texts, returns immediately with request_id for background processing.
|
| 914 |
"""
|
| 915 |
try:
|
| 916 |
form_data = await request.form()
|
|
|
|
| 940 |
return {"status": "error", "message": "Invalid language codes"}
|
| 941 |
|
| 942 |
char_count = len(text)
|
| 943 |
+
is_heavy_text = char_count > 1000 # Same threshold as WordPress
|
|
|
|
| 944 |
|
| 945 |
logger.info(f"[FORM API] Translation request: {char_count} chars, {source_lang} → {target_lang}, Heavy: {is_heavy_text}")
|
| 946 |
|
|
|
|
| 947 |
if is_heavy_text:
|
| 948 |
# Generate request ID for background processing
|
| 949 |
request_id = str(uuid.uuid4())
|
|
|
|
| 972 |
'target_lang': target_code,
|
| 973 |
'started_at': datetime.now().isoformat(),
|
| 974 |
'character_count': char_count,
|
| 975 |
+
'progress': 0
|
|
|
|
|
|
|
| 976 |
}
|
| 977 |
|
| 978 |
# Start background processing
|
|
|
|
| 983 |
thread.daemon = True
|
| 984 |
thread.start()
|
| 985 |
|
| 986 |
+
logger.info(f"[FORM API] Started background processing for request: {request_id}")
|
| 987 |
|
|
|
|
| 988 |
return {
|
| 989 |
'is_background': True,
|
| 990 |
'session_id': request_id,
|
| 991 |
'request_id': request_id,
|
|
|
|
| 992 |
'status': 'processing',
|
| 993 |
+
'message': f'Long text ({char_count} characters) is being processed in background. Use the request ID to check status.',
|
| 994 |
+
'character_count': char_count
|
|
|
|
|
|
|
|
|
|
| 995 |
}
|
| 996 |
|
| 997 |
else:
|
|
|
|
| 1019 |
'processing_time': processing_time,
|
| 1020 |
'character_count': char_count,
|
| 1021 |
'source_lang': source_lang,
|
| 1022 |
+
'target_lang': target_lang
|
|
|
|
|
|
|
| 1023 |
}
|
| 1024 |
|
| 1025 |
except Exception as e:
|
|
|
|
| 1177 |
"message": "Session not found or completed"
|
| 1178 |
}
|
| 1179 |
|
| 1180 |
+
# اضافه کردن endpoint جدید برای بررسی وضعیت کسر اعتبار
|
| 1181 |
+
@app.post("/api/check-auto-charge-status")
|
| 1182 |
+
async def check_auto_charge_status(request: Request):
|
| 1183 |
"""
|
| 1184 |
+
بررسی وضعیت کسر خودکار اعتبار برای درخواست خاص
|
| 1185 |
"""
|
| 1186 |
+
try:
|
| 1187 |
+
form_data = await request.form()
|
| 1188 |
+
request_id = form_data.get('request_id', '').strip()
|
| 1189 |
+
|
| 1190 |
+
if not request_id:
|
| 1191 |
+
return {
|
| 1192 |
+
'status': 'error',
|
| 1193 |
+
'message': 'Request ID is required'
|
| 1194 |
+
}
|
| 1195 |
+
|
| 1196 |
+
with translation_requests_lock:
|
| 1197 |
+
if request_id in completed_translations:
|
| 1198 |
+
translation_data = completed_translations[request_id]
|
| 1199 |
+
|
| 1200 |
+
return {
|
| 1201 |
+
'status': 'completed',
|
| 1202 |
+
'request_id': request_id,
|
| 1203 |
+
'auto_charged': translation_data.get('auto_charged', False),
|
| 1204 |
+
'completed_at': translation_data.get('completed_at'),
|
| 1205 |
+
'processing_time': translation_data.get('processing_time', 0),
|
| 1206 |
+
'character_count': translation_data.get('character_count', 0)
|
| 1207 |
+
}
|
| 1208 |
+
else:
|
| 1209 |
+
return {
|
| 1210 |
+
'status': 'not_found',
|
| 1211 |
+
'request_id': request_id,
|
| 1212 |
+
'message': 'Translation not found'
|
| 1213 |
+
}
|
| 1214 |
+
|
| 1215 |
+
except Exception as e:
|
| 1216 |
+
logger.error(f"[HF Server] Error checking auto charge status: {str(e)}")
|
| 1217 |
+
return {
|
| 1218 |
+
'status': 'error',
|
| 1219 |
+
'message': 'Server error occurred'
|
| 1220 |
+
}
|
| 1221 |
+
|
| 1222 |
+
@app.get("/api/server-status")
|
| 1223 |
+
async def get_server_status():
|
| 1224 |
+
"""Get current server status - enhanced for WordPress integration"""
|
| 1225 |
active_sessions = []
|
| 1226 |
|
| 1227 |
with translation_requests_lock:
|
|
|
|
| 1265 |
"active_sessions": len(active_sessions),
|
| 1266 |
"background_tasks": background_tasks_count,
|
| 1267 |
"total_active": total_active,
|
| 1268 |
+
"completed_cache": completed_count
|
|
|
|
| 1269 |
}
|
| 1270 |
else:
|
| 1271 |
return {
|
|
|
|
| 1281 |
|
| 1282 |
def cleanup_old_requests():
|
| 1283 |
"""
|
| 1284 |
+
Clean up old completed translations and stuck processing requests.
|
| 1285 |
+
Should be called periodically.
|
| 1286 |
"""
|
| 1287 |
current_time = datetime.now()
|
| 1288 |
|