suprimedev commited on
Commit
d6d00a2
·
verified ·
1 Parent(s): ad2942a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -124
app.py CHANGED
@@ -112,7 +112,7 @@ def transcribe_audio_chunked(audio_path: str, language: str = "fa-IR") -> List[D
112
  recognizer.dynamic_energy_adjustment_damping = 0.15
113
  recognizer.dynamic_energy_adjustment_ratio = 1.5
114
  recognizer.pause_threshold = 0.8
115
- recognizer.non_speaking_duration = 1.5
116
 
117
  try:
118
  duration = get_video_duration(audio_path)
@@ -121,11 +121,12 @@ def transcribe_audio_chunked(audio_path: str, language: str = "fa-IR") -> List[D
121
  return []
122
 
123
  chunk_duration = 10 # ثانیه
124
- chunk_overlap = 1.5 # همپوشانی بین chunk ها (برای مثال 1.5 ثانیه)
125
 
126
- temp_chunk_dir = tempfile.mkdtemp() # دایرکتوری موقت برای chunk ها
127
 
128
- for start_time in range(0, int(duration), max(1, chunk_duration - int(chunk_overlap))): # Ensure step is at least 1 sec
 
129
  end_time = min(start_time + chunk_duration, duration)
130
 
131
  chunk_filename = f"chunk_{start_time}_{end_time}.wav"
@@ -262,73 +263,14 @@ def srt_string_to_segments(srt_string: str) -> List[Dict]:
262
 
263
  return segments
264
 
265
- def find_longest_suffix_prefix_overlap(text1: str, text2: str, min_overlap_chars: int = 5) -> int:
266
  """
267
- Finds the length of the longest common string that is a suffix of text1 and a prefix of text2.
268
- Returns the length of the overlap in characters.
269
- """
270
- # Iterate from the maximum possible overlap length down to min_overlap_chars
271
- for k in range(min(len(text1), len(text2)), min_overlap_chars - 1, -1):
272
- # Check if the suffix of text1 of length k matches the prefix of text2 of length k
273
- if text1[-k:] == text2[:k]:
274
- return k
275
-
276
- # If no overlap of at least min_overlap_chars is found
277
- return 0
278
-
279
- def deduplicate_segments(segments: List[Dict], min_overlap_chars: int = 5) -> List[Dict]:
280
- """
281
- Removes duplicate text at the boundaries of consecutive segments.
282
- """
283
- if not segments:
284
- return []
285
-
286
- # Ensure segments are sorted by start time
287
- segments.sort(key=lambda x: x['start'])
288
-
289
- deduplicated_segments = []
290
- if segments:
291
- deduplicated_segments.append(segments[0]) # Start with the first segment
292
-
293
- for i in range(1, len(segments)):
294
- current_segment = segments[i]
295
- # Use the last segment added to the deduplicated list for comparison
296
- previous_segment = deduplicated_segments[-1]
297
-
298
- prev_text = previous_segment['text']
299
- curr_text = current_segment['text']
300
-
301
- # Skip if current text is empty or previous text is empty
302
- if not curr_text.strip() or not prev_text.strip():
303
- deduplicated_segments.append(current_segment)
304
- continue
305
-
306
- overlap_length = find_longest_suffix_prefix_overlap(prev_text, curr_text, min_overlap_chars)
307
-
308
- if overlap_length > 0:
309
- # Trim the overlapping part from the beginning of the current segment's text
310
- current_segment['text'] = curr_text[overlap_length:].strip()
311
-
312
- # If the current segment's text becomes empty after trimming, it might be redundant.
313
- # It will be added with empty text, and clean_up_segments will ensure minimum duration.
314
-
315
- # Add the current segment (potentially modified) to the list
316
- deduplicated_segments.append(current_segment)
317
-
318
- # Re-apply clean_up_segments to ensure timing is valid and durations are met after text modification.
319
- # This also handles cases where text might have become empty or very short.
320
- return clean_up_segments(deduplicated_segments)
321
-
322
-
323
- def clean_up_segments(segments: List[Dict], min_duration: float = 1.0, min_gap: float = 0.1) -> List[Dict]:
324
- """
325
- Cleans up subtitle segments to ensure minimum duration and minimum gap between consecutive segments.
326
- This helps prevent overlapping subtitles and ensures they are visible for a sufficient time.
327
  """
328
  if not segments:
329
  return []
330
 
331
- # Sort segments by start time to ensure correct processing order
332
  segments.sort(key=lambda x: x['start'])
333
 
334
  processed_segments = []
@@ -338,32 +280,20 @@ def clean_up_segments(segments: List[Dict], min_duration: float = 1.0, min_gap:
338
  end = segment['end']
339
  text = segment['text']
340
 
341
- # 1. Ensure minimum duration for the segment itself
342
- # If the segment is too short, extend its end time.
343
- if end - start < min_duration:
344
- end = start + min_duration
345
-
346
- # 2. Ensure minimum gap *after* this segment and *before* the next one
347
- # This prevents consecutive subtitles from overlapping or appearing too close.
348
  if i < len(segments) - 1:
349
  next_segment_start = segments[i+1]['start']
350
- # If the current segment ends too close to the next one starts (considering the gap)
351
- if end + min_gap > next_segment_start:
352
- # Adjust the current segment's end time to create the minimum gap
353
  end = next_segment_start - min_gap
354
-
355
- # Re-check minimum duration constraint after adjusting end time
356
- # If adjusting end time made it too short, set end time based on min_duration
357
- if end - start < min_duration:
358
- end = start + min_duration
359
 
360
- # Final check: ensure end time is not before start time after all adjustments
361
- # This can happen in edge cases or if min_gap is too large relative to segment duration.
362
- if end < start:
363
- end = start + min_duration # Fallback to minimum duration
364
 
365
  processed_segments.append({
366
- "start": round(start, 3), # Round to milliseconds for SRT
367
  "end": round(end, 3),
368
  "text": text
369
  })
@@ -374,7 +304,6 @@ def translate_srt_content(srt_content_string: str, target_language_name: str) ->
374
  """ترجمه محتوای SRT با استفاده از API"""
375
  api_key = os.environ.get("API_KEY")
376
  if not api_key:
377
- # Raising a Gradio Error will display it nicely in the UI
378
  raise gr.Error("API key for translation not found. Please set it as an environment variable 'API_KEY'.")
379
 
380
  url = "https://api.ohmygpt.com/v1/chat/completions"
@@ -382,7 +311,6 @@ def translate_srt_content(srt_content_string: str, target_language_name: str) ->
382
  temperature = 0.7
383
  top_p = 0.9
384
 
385
- # پرامپت حرفه‌ای و دقیق برای ترجمه زیرنویس با تاکید بر حفظ زمان‌بندی
386
  system_prompt = f"""
387
  You are an AI assistant specialized in professional subtitle translation. Your task is to translate the provided SRT content from its original language into **{target_language_name}**.
388
 
@@ -392,7 +320,7 @@ You are an AI assistant specialized in professional subtitle translation. Your t
392
  2. **Translate to Target Language:** Translate the corrected source text into **{target_language_name}**.
393
  3. **Preserve SRT Format:** Maintain the **exact SRT format**:
394
  * Keep the original sequence numbers (e.g., 1, 2, 3...).
395
- * **Crucially, preserve the original timestamps** for each segment (start and end times). Do not change them unless absolutely necessary for grammatical correctness or natural flow in the target language, and only make minimal, justified adjustments. The primary goal is to have the translated subtitles appear on screen during the same time intervals as the original subtitles. If a character speaks slowly or with pauses, ensure the translated text still fits within the original time bounds as closely as possible.
396
  * Maintain line breaks within segments.
397
  4. **Natural and Accurate:** Ensure the translated subtitles sound natural, are culturally appropriate, and accurately convey the meaning of the original dialogue.
398
  5. **No Extraneous Text:** **Absolutely do not include any introductory phrases, concluding remarks, explanations, or any text outside of the standard SRT format.** Your output must be a complete and valid SRT file.
@@ -420,7 +348,7 @@ Here is the SRT content to process:
420
  }
421
 
422
  try:
423
- response = requests.post(url, headers=headers, json=payload, timeout=180) # Increased timeout for potentially long SRTs
424
  response.raise_for_status()
425
 
426
  data = response.json()
@@ -428,15 +356,12 @@ Here is the SRT content to process:
428
  if 'choices' in data and data['choices']:
429
  translated_text = data['choices'][0]['message']['content']
430
 
431
- # Basic validation of the output
432
  if not translated_text or not translated_text.strip():
433
  print("Error: Translation API returned empty content.")
434
  return "Error: Translation API returned empty content."
435
 
436
- # Check if it looks like SRT (starts with a number)
437
  first_line_check = translated_text.strip().split('\n')[0]
438
  if not first_line_check.isdigit():
439
- # Log the problematic response for debugging
440
  print(f"Warning: Translation API response might not be valid SRT. First line: '{first_line_check}'. Response snippet: {translated_text[:200]}...")
441
  return f"Error: Translation API returned unexpected format. Does not start with a sequence number. Response snippet: {translated_text[:200]}..."
442
 
@@ -483,7 +408,7 @@ def add_subtitles_to_video(video_path: str, srt_path: str, output_path: str) ->
483
  'ffmpeg',
484
  '-i', video_path,
485
  '-vf', f"subtitles={srt_path}:force_style='Fontsize=24,FontName=Arial,MarginV=10'",
486
- '-c:a', 'copy', # کپی کردن استریم صوتی بدون تغییر
487
  '-y',
488
  output_path
489
  ]
@@ -599,14 +524,17 @@ def process_video(video_input, url_input, language, embed_subtitles, translate_c
599
 
600
  final_srt_path = None
601
  status_message = ""
602
- processed_segments = [] # This will hold the final, cleaned, and deduplicated segments
603
 
604
  if translate_checkbox:
605
  target_language_name = translation_language_dropdown
606
  print(f"Attempting to translate to: {target_language_name}")
607
 
608
- # Convert initial segments to SRT string for translation
609
- original_srt_content_string = segments_to_srt_string(initial_segments)
 
 
 
610
 
611
  # Translate the SRT content
612
  translated_srt_content_string = translate_srt_content(original_srt_content_string, target_language_name)
@@ -620,11 +548,8 @@ def process_video(video_input, url_input, language, embed_subtitles, translate_c
620
  # Parse the translated SRT content back into segments
621
  parsed_translated_segments = srt_string_to_segments(translated_srt_content_string)
622
 
623
- # Deduplicate text from translated segments
624
- deduplicated_translated_segments = deduplicate_segments(parsed_translated_segments)
625
-
626
- # Clean up timing and ensure minimum durations/gaps
627
- processed_segments = clean_up_segments(deduplicated_translated_segments)
628
 
629
  # Save the final processed segments to a file
630
  translated_srt_path = os.path.join(temp_dir, "translated_subtitles.srt")
@@ -636,12 +561,8 @@ def process_video(video_input, url_input, language, embed_subtitles, translate_c
636
  status_message = f"زیرنویس به {target_language_name} ترجمه شد و پردازش تکمیل شد!"
637
 
638
  else:
639
- # If translation is not checked, use the initial segments
640
- # Deduplicate text from initial segments
641
- deduplicated_initial_segments = deduplicate_segments(initial_segments)
642
-
643
- # Clean up timing and ensure minimum durations/gaps
644
- processed_segments = clean_up_segments(deduplicated_initial_segments)
645
 
646
  # Save the final processed segments to a file
647
  original_srt_path = os.path.join(temp_dir, "subtitles.srt")
@@ -652,30 +573,20 @@ def process_video(video_input, url_input, language, embed_subtitles, translate_c
652
  sessions[session_hash]['status'] = 'completed'
653
  status_message = "پردازش با موفقیت انجام شد!"
654
 
655
- # Ensure final_srt_path is correctly set if not set above (e.g., if translation failed but we continued)
656
- if not final_srt_path and processed_segments:
657
- # This case should ideally not be reached if translation or initial SRT creation worked.
658
- # But as a safeguard:
659
- fallback_srt_path = os.path.join(temp_dir, "fallback_subtitles.srt")
660
- create_srt_file(processed_segments, fallback_srt_path)
661
- final_srt_path = fallback_srt_path
662
- print("Warning: final_srt_path was not set, created a fallback SRT.")
663
-
664
-
665
  output_video = None
666
  if embed_subtitles:
667
  output_video_path = os.path.join(temp_dir, "output_with_subtitles.mp4")
668
- if final_srt_path and os.path.exists(final_srt_path): # Ensure SRT file exists before embedding
669
  if add_subtitles_to_video(video_path, final_srt_path, output_video_path):
670
  output_video = output_video_path
671
  else:
672
- output_video = video_path # Fallback to original video if embedding fails
673
  status_message += " (خطا در چسباندن زیرنویس به ویدیو، ویدیوی اصلی برگردانده شد)"
674
  else:
675
- output_video = video_path # Cannot embed if no SRT file
676
  status_message += " (فایل زیرنویس برای چسباندن یافت نشد)"
677
  else:
678
- output_video = video_path # Return original video if not embedding
679
 
680
  sessions[session_hash]['final_srt_path'] = final_srt_path
681
  sessions[session_hash]['final_video_path'] = output_video
@@ -684,7 +595,6 @@ def process_video(video_input, url_input, language, embed_subtitles, translate_c
684
 
685
  except Exception as e:
686
  print(f"Unexpected error in process_video: {e}")
687
- # Clean up temporary directory and session on critical error
688
  if 'temp_dir' in locals() and os.path.exists(temp_dir):
689
  try:
690
  shutil.rmtree(temp_dir)
@@ -712,7 +622,6 @@ def track_subtitle(request: gr.Request):
712
  segments = session_data.get('segments', [])
713
  result = "زیرنویس ترجمه شده:\n\n"
714
  for i, seg in enumerate(segments, 1):
715
- # Format time to be more readable in tracking output
716
  start_display = f"{seg['start']:.1f}s"
717
  end_display = f"{seg['end']:.1f}s"
718
  result += f"{i}. [{start_display} - {end_display}]: {seg['text']}\n"
 
112
  recognizer.dynamic_energy_adjustment_damping = 0.15
113
  recognizer.dynamic_energy_adjustment_ratio = 1.5
114
  recognizer.pause_threshold = 0.8
115
+ recognizer.non_speaking_duration = 0.5
116
 
117
  try:
118
  duration = get_video_duration(audio_path)
 
121
  return []
122
 
123
  chunk_duration = 10 # ثانیه
124
+ chunk_overlap = 0 # حذف همپوشانی برای جلوگیری از تکرار
125
 
126
+ temp_chunk_dir = tempfile.mkdtemp()
127
 
128
+ # Process chunks without overlap
129
+ for start_time in range(0, int(duration), chunk_duration):
130
  end_time = min(start_time + chunk_duration, duration)
131
 
132
  chunk_filename = f"chunk_{start_time}_{end_time}.wav"
 
263
 
264
  return segments
265
 
266
+ def clean_up_segments(segments: List[Dict], min_gap: float = 0.05) -> List[Dict]:
267
  """
268
+ پاکسازی segment ها برای اطمینان از عدم همپوشانی
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  """
270
  if not segments:
271
  return []
272
 
273
+ # مرتب‌سازی بر اساس زمان شروع
274
  segments.sort(key=lambda x: x['start'])
275
 
276
  processed_segments = []
 
280
  end = segment['end']
281
  text = segment['text']
282
 
283
+ # اطمینان از اینکه segment بعدی شروع نمی‌شود تا قبلی تمام شود
 
 
 
 
 
 
284
  if i < len(segments) - 1:
285
  next_segment_start = segments[i+1]['start']
286
+ # اگر زیرنویس فعلی با بعدی همپوشانی دارد
287
+ if end > next_segment_start - min_gap:
288
+ # زمان پایان را کمی قبل از شروع بعدی تنظیم کن
289
  end = next_segment_start - min_gap
 
 
 
 
 
290
 
291
+ # اطمینان از اینکه زمان پایان بعد از زمان شروع است
292
+ if end <= start:
293
+ end = start + 0.5 # حداقل 0.5 ثانیه
 
294
 
295
  processed_segments.append({
296
+ "start": round(start, 3),
297
  "end": round(end, 3),
298
  "text": text
299
  })
 
304
  """ترجمه محتوای SRT با استفاده از API"""
305
  api_key = os.environ.get("API_KEY")
306
  if not api_key:
 
307
  raise gr.Error("API key for translation not found. Please set it as an environment variable 'API_KEY'.")
308
 
309
  url = "https://api.ohmygpt.com/v1/chat/completions"
 
311
  temperature = 0.7
312
  top_p = 0.9
313
 
 
314
  system_prompt = f"""
315
  You are an AI assistant specialized in professional subtitle translation. Your task is to translate the provided SRT content from its original language into **{target_language_name}**.
316
 
 
320
  2. **Translate to Target Language:** Translate the corrected source text into **{target_language_name}**.
321
  3. **Preserve SRT Format:** Maintain the **exact SRT format**:
322
  * Keep the original sequence numbers (e.g., 1, 2, 3...).
323
+ * **Crucially, preserve the original timestamps** for each segment (start and end times). Do not change them unless absolutely necessary for grammatical correctness or natural flow in the target language, and only make minimal, justified adjustments. The primary goal is to have the translated subtitles appear on screen during the same time intervals as the original subtitles.
324
  * Maintain line breaks within segments.
325
  4. **Natural and Accurate:** Ensure the translated subtitles sound natural, are culturally appropriate, and accurately convey the meaning of the original dialogue.
326
  5. **No Extraneous Text:** **Absolutely do not include any introductory phrases, concluding remarks, explanations, or any text outside of the standard SRT format.** Your output must be a complete and valid SRT file.
 
348
  }
349
 
350
  try:
351
+ response = requests.post(url, headers=headers, json=payload, timeout=180)
352
  response.raise_for_status()
353
 
354
  data = response.json()
 
356
  if 'choices' in data and data['choices']:
357
  translated_text = data['choices'][0]['message']['content']
358
 
 
359
  if not translated_text or not translated_text.strip():
360
  print("Error: Translation API returned empty content.")
361
  return "Error: Translation API returned empty content."
362
 
 
363
  first_line_check = translated_text.strip().split('\n')[0]
364
  if not first_line_check.isdigit():
 
365
  print(f"Warning: Translation API response might not be valid SRT. First line: '{first_line_check}'. Response snippet: {translated_text[:200]}...")
366
  return f"Error: Translation API returned unexpected format. Does not start with a sequence number. Response snippet: {translated_text[:200]}..."
367
 
 
408
  'ffmpeg',
409
  '-i', video_path,
410
  '-vf', f"subtitles={srt_path}:force_style='Fontsize=24,FontName=Arial,MarginV=10'",
411
+ '-c:a', 'copy',
412
  '-y',
413
  output_path
414
  ]
 
524
 
525
  final_srt_path = None
526
  status_message = ""
527
+ processed_segments = []
528
 
529
  if translate_checkbox:
530
  target_language_name = translation_language_dropdown
531
  print(f"Attempting to translate to: {target_language_name}")
532
 
533
+ # Clean up initial segments before translation
534
+ cleaned_initial_segments = clean_up_segments(initial_segments)
535
+
536
+ # Convert to SRT string for translation
537
+ original_srt_content_string = segments_to_srt_string(cleaned_initial_segments)
538
 
539
  # Translate the SRT content
540
  translated_srt_content_string = translate_srt_content(original_srt_content_string, target_language_name)
 
548
  # Parse the translated SRT content back into segments
549
  parsed_translated_segments = srt_string_to_segments(translated_srt_content_string)
550
 
551
+ # Clean up timing to ensure no overlaps
552
+ processed_segments = clean_up_segments(parsed_translated_segments)
 
 
 
553
 
554
  # Save the final processed segments to a file
555
  translated_srt_path = os.path.join(temp_dir, "translated_subtitles.srt")
 
561
  status_message = f"زیرنویس به {target_language_name} ترجمه شد و پردازش تکمیل شد!"
562
 
563
  else:
564
+ # Clean up timing to ensure no overlaps
565
+ processed_segments = clean_up_segments(initial_segments)
 
 
 
 
566
 
567
  # Save the final processed segments to a file
568
  original_srt_path = os.path.join(temp_dir, "subtitles.srt")
 
573
  sessions[session_hash]['status'] = 'completed'
574
  status_message = "پردازش با موفقیت انجام شد!"
575
 
 
 
 
 
 
 
 
 
 
 
576
  output_video = None
577
  if embed_subtitles:
578
  output_video_path = os.path.join(temp_dir, "output_with_subtitles.mp4")
579
+ if final_srt_path and os.path.exists(final_srt_path):
580
  if add_subtitles_to_video(video_path, final_srt_path, output_video_path):
581
  output_video = output_video_path
582
  else:
583
+ output_video = video_path
584
  status_message += " (خطا در چسباندن زیرنویس به ویدیو، ویدیوی اصلی برگردانده شد)"
585
  else:
586
+ output_video = video_path
587
  status_message += " (فایل زیرنویس برای چسباندن یافت نشد)"
588
  else:
589
+ output_video = video_path
590
 
591
  sessions[session_hash]['final_srt_path'] = final_srt_path
592
  sessions[session_hash]['final_video_path'] = output_video
 
595
 
596
  except Exception as e:
597
  print(f"Unexpected error in process_video: {e}")
 
598
  if 'temp_dir' in locals() and os.path.exists(temp_dir):
599
  try:
600
  shutil.rmtree(temp_dir)
 
622
  segments = session_data.get('segments', [])
623
  result = "زیرنویس ترجمه شده:\n\n"
624
  for i, seg in enumerate(segments, 1):
 
625
  start_display = f"{seg['start']:.1f}s"
626
  end_display = f"{seg['end']:.1f}s"
627
  result += f"{i}. [{start_display} - {end_display}]: {seg['text']}\n"