Spaces:

suprimedev
/

zir45

Sleeping

App Files Files Community

suprimedev commited on Nov 24, 2025

Commit

d6d00a2

verified ·

1 Parent(s): ad2942a

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -124

app.py CHANGED Viewed

@@ -112,7 +112,7 @@ def transcribe_audio_chunked(audio_path: str, language: str = "fa-IR") -> List[D
     recognizer.dynamic_energy_adjustment_damping = 0.15
     recognizer.dynamic_energy_adjustment_ratio = 1.5
     recognizer.pause_threshold = 0.8
-    recognizer.non_speaking_duration = 1.5
     try:
         duration = get_video_duration(audio_path)
@@ -121,11 +121,12 @@ def transcribe_audio_chunked(audio_path: str, language: str = "fa-IR") -> List[D
             return []
         chunk_duration = 10  # ثانیه
-        chunk_overlap = 1.5    # همپوشانی بین chunk ها (برای مثال 1.5 ثانیه)
-        temp_chunk_dir = tempfile.mkdtemp() # دایرکتوری موقت برای chunk ها
-        for start_time in range(0, int(duration), max(1, chunk_duration - int(chunk_overlap))): # Ensure step is at least 1 sec
             end_time = min(start_time + chunk_duration, duration)
             chunk_filename = f"chunk_{start_time}_{end_time}.wav"
@@ -262,73 +263,14 @@ def srt_string_to_segments(srt_string: str) -> List[Dict]:
     return segments
-def find_longest_suffix_prefix_overlap(text1: str, text2: str, min_overlap_chars: int = 5) -> int:
     """
-    Finds the length of the longest common string that is a suffix of text1 and a prefix of text2.
-    Returns the length of the overlap in characters.
-    """
-    # Iterate from the maximum possible overlap length down to min_overlap_chars
-    for k in range(min(len(text1), len(text2)), min_overlap_chars - 1, -1):
-        # Check if the suffix of text1 of length k matches the prefix of text2 of length k
-        if text1[-k:] == text2[:k]:
-            return k
-    # If no overlap of at least min_overlap_chars is found
-    return 0
-def deduplicate_segments(segments: List[Dict], min_overlap_chars: int = 5) -> List[Dict]:
-    """
-    Removes duplicate text at the boundaries of consecutive segments.
-    """
-    if not segments:
-        return []
-    # Ensure segments are sorted by start time
-    segments.sort(key=lambda x: x['start'])
-    deduplicated_segments = []
-    if segments:
-        deduplicated_segments.append(segments[0]) # Start with the first segment
-    for i in range(1, len(segments)):
-        current_segment = segments[i]
-        # Use the last segment added to the deduplicated list for comparison
-        previous_segment = deduplicated_segments[-1]
-        prev_text = previous_segment['text']
-        curr_text = current_segment['text']
-        # Skip if current text is empty or previous text is empty
-        if not curr_text.strip() or not prev_text.strip():
-            deduplicated_segments.append(current_segment)
-            continue
-        overlap_length = find_longest_suffix_prefix_overlap(prev_text, curr_text, min_overlap_chars)
-        if overlap_length > 0:
-            # Trim the overlapping part from the beginning of the current segment's text
-            current_segment['text'] = curr_text[overlap_length:].strip()
-            # If the current segment's text becomes empty after trimming, it might be redundant.
-            # It will be added with empty text, and clean_up_segments will ensure minimum duration.
-        # Add the current segment (potentially modified) to the list
-        deduplicated_segments.append(current_segment)
-    # Re-apply clean_up_segments to ensure timing is valid and durations are met after text modification.
-    # This also handles cases where text might have become empty or very short.
-    return clean_up_segments(deduplicated_segments)
-def clean_up_segments(segments: List[Dict], min_duration: float = 1.0, min_gap: float = 0.1) -> List[Dict]:
-    """
-    Cleans up subtitle segments to ensure minimum duration and minimum gap between consecutive segments.
-    This helps prevent overlapping subtitles and ensures they are visible for a sufficient time.
     """
     if not segments:
         return []
-    # Sort segments by start time to ensure correct processing order
     segments.sort(key=lambda x: x['start'])
     processed_segments = []
@@ -338,32 +280,20 @@ def clean_up_segments(segments: List[Dict], min_duration: float = 1.0, min_gap:
         end = segment['end']
         text = segment['text']
-        # 1. Ensure minimum duration for the segment itself
-        # If the segment is too short, extend its end time.
-        if end - start < min_duration:
-            end = start + min_duration
-        # 2. Ensure minimum gap *after* this segment and *before* the next one
-        # This prevents consecutive subtitles from overlapping or appearing too close.
         if i < len(segments) - 1:
             next_segment_start = segments[i+1]['start']
-            # If the current segment ends too close to the next one starts (considering the gap)
-            if end + min_gap > next_segment_start:
-                # Adjust the current segment's end time to create the minimum gap
                 end = next_segment_start - min_gap
-                # Re-check minimum duration constraint after adjusting end time
-                # If adjusting end time made it too short, set end time based on min_duration
-                if end - start < min_duration:
-                    end = start + min_duration
-        # Final check: ensure end time is not before start time after all adjustments
-        # This can happen in edge cases or if min_gap is too large relative to segment duration.
-        if end < start:
-            end = start + min_duration # Fallback to minimum duration
         processed_segments.append({
-            "start": round(start, 3), # Round to milliseconds for SRT
             "end": round(end, 3),
             "text": text
         })
@@ -374,7 +304,6 @@ def translate_srt_content(srt_content_string: str, target_language_name: str) ->
     """ترجمه محتوای SRT با استفاده از API"""
     api_key = os.environ.get("API_KEY")
     if not api_key:
-        # Raising a Gradio Error will display it nicely in the UI
         raise gr.Error("API key for translation not found. Please set it as an environment variable 'API_KEY'.")
     url = "https://api.ohmygpt.com/v1/chat/completions"
@@ -382,7 +311,6 @@ def translate_srt_content(srt_content_string: str, target_language_name: str) ->
     temperature = 0.7
     top_p = 0.9
-    # پرامپت حرفه‌ای و دقیق برای ترجمه زیرنویس با تاکید بر حفظ زمان‌بندی
     system_prompt = f"""
 You are an AI assistant specialized in professional subtitle translation. Your task is to translate the provided SRT content from its original language into **{target_language_name}**.
@@ -392,7 +320,7 @@ You are an AI assistant specialized in professional subtitle translation. Your t
 2.  **Translate to Target Language:** Translate the corrected source text into **{target_language_name}**.
 3.  **Preserve SRT Format:** Maintain the **exact SRT format**:
     *   Keep the original sequence numbers (e.g., 1, 2, 3...).
-    *   **Crucially, preserve the original timestamps** for each segment (start and end times). Do not change them unless absolutely necessary for grammatical correctness or natural flow in the target language, and only make minimal, justified adjustments. The primary goal is to have the translated subtitles appear on screen during the same time intervals as the original subtitles. If a character speaks slowly or with pauses, ensure the translated text still fits within the original time bounds as closely as possible.
     *   Maintain line breaks within segments.
 4.  **Natural and Accurate:** Ensure the translated subtitles sound natural, are culturally appropriate, and accurately convey the meaning of the original dialogue.
 5.  **No Extraneous Text:** **Absolutely do not include any introductory phrases, concluding remarks, explanations, or any text outside of the standard SRT format.** Your output must be a complete and valid SRT file.
@@ -420,7 +348,7 @@ Here is the SRT content to process:
     }
     try:
-        response = requests.post(url, headers=headers, json=payload, timeout=180) # Increased timeout for potentially long SRTs
         response.raise_for_status()
         data = response.json()
@@ -428,15 +356,12 @@ Here is the SRT content to process:
         if 'choices' in data and data['choices']:
             translated_text = data['choices'][0]['message']['content']
-            # Basic validation of the output
             if not translated_text or not translated_text.strip():
                  print("Error: Translation API returned empty content.")
                  return "Error: Translation API returned empty content."
-            # Check if it looks like SRT (starts with a number)
             first_line_check = translated_text.strip().split('\n')[0]
             if not first_line_check.isdigit():
-                 # Log the problematic response for debugging
                  print(f"Warning: Translation API response might not be valid SRT. First line: '{first_line_check}'. Response snippet: {translated_text[:200]}...")
                  return f"Error: Translation API returned unexpected format. Does not start with a sequence number. Response snippet: {translated_text[:200]}..."
@@ -483,7 +408,7 @@ def add_subtitles_to_video(video_path: str, srt_path: str, output_path: str) ->
             'ffmpeg',
             '-i', video_path,
             '-vf', f"subtitles={srt_path}:force_style='Fontsize=24,FontName=Arial,MarginV=10'",
-            '-c:a', 'copy', # کپی کردن استریم صوتی بدون تغییر
             '-y',
             output_path
         ]
@@ -599,14 +524,17 @@ def process_video(video_input, url_input, language, embed_subtitles, translate_c
         final_srt_path = None
         status_message = ""
-        processed_segments = [] # This will hold the final, cleaned, and deduplicated segments
         if translate_checkbox:
             target_language_name = translation_language_dropdown
             print(f"Attempting to translate to: {target_language_name}")
-            # Convert initial segments to SRT string for translation
-            original_srt_content_string = segments_to_srt_string(initial_segments)
             # Translate the SRT content
             translated_srt_content_string = translate_srt_content(original_srt_content_string, target_language_name)
@@ -620,11 +548,8 @@ def process_video(video_input, url_input, language, embed_subtitles, translate_c
                 # Parse the translated SRT content back into segments
                 parsed_translated_segments = srt_string_to_segments(translated_srt_content_string)
-                # Deduplicate text from translated segments
-                deduplicated_translated_segments = deduplicate_segments(parsed_translated_segments)
-                # Clean up timing and ensure minimum durations/gaps
-                processed_segments = clean_up_segments(deduplicated_translated_segments)
                 # Save the final processed segments to a file
                 translated_srt_path = os.path.join(temp_dir, "translated_subtitles.srt")
@@ -636,12 +561,8 @@ def process_video(video_input, url_input, language, embed_subtitles, translate_c
                 status_message = f"زیرنویس به {target_language_name} ترجمه شد و پردازش تکمیل شد!"
         else:
-            # If translation is not checked, use the initial segments
-            # Deduplicate text from initial segments
-            deduplicated_initial_segments = deduplicate_segments(initial_segments)
-            # Clean up timing and ensure minimum durations/gaps
-            processed_segments = clean_up_segments(deduplicated_initial_segments)
             # Save the final processed segments to a file
             original_srt_path = os.path.join(temp_dir, "subtitles.srt")
@@ -652,30 +573,20 @@ def process_video(video_input, url_input, language, embed_subtitles, translate_c
             sessions[session_hash]['status'] = 'completed'
             status_message = "پردازش با موفقیت انجام شد!"
-        # Ensure final_srt_path is correctly set if not set above (e.g., if translation failed but we continued)
-        if not final_srt_path and processed_segments:
-             # This case should ideally not be reached if translation or initial SRT creation worked.
-             # But as a safeguard:
-             fallback_srt_path = os.path.join(temp_dir, "fallback_subtitles.srt")
-             create_srt_file(processed_segments, fallback_srt_path)
-             final_srt_path = fallback_srt_path
-             print("Warning: final_srt_path was not set, created a fallback SRT.")
         output_video = None
         if embed_subtitles:
             output_video_path = os.path.join(temp_dir, "output_with_subtitles.mp4")
-            if final_srt_path and os.path.exists(final_srt_path): # Ensure SRT file exists before embedding
                 if add_subtitles_to_video(video_path, final_srt_path, output_video_path):
                     output_video = output_video_path
                 else:
-                    output_video = video_path # Fallback to original video if embedding fails
                     status_message += " (خطا در چسباندن زیرنویس به ویدیو، ویدیوی اصلی برگردانده شد)"
             else:
-                 output_video = video_path # Cannot embed if no SRT file
                  status_message += " (فایل زیرنویس برای چسباندن یافت نشد)"
         else:
-            output_video = video_path # Return original video if not embedding
         sessions[session_hash]['final_srt_path'] = final_srt_path
         sessions[session_hash]['final_video_path'] = output_video
@@ -684,7 +595,6 @@ def process_video(video_input, url_input, language, embed_subtitles, translate_c
     except Exception as e:
         print(f"Unexpected error in process_video: {e}")
-        # Clean up temporary directory and session on critical error
         if 'temp_dir' in locals() and os.path.exists(temp_dir):
             try:
                 shutil.rmtree(temp_dir)
@@ -712,7 +622,6 @@ def track_subtitle(request: gr.Request):
         segments = session_data.get('segments', [])
         result = "زیرنویس ترجمه شده:\n\n"
         for i, seg in enumerate(segments, 1):
-            # Format time to be more readable in tracking output
             start_display = f"{seg['start']:.1f}s"
             end_display = f"{seg['end']:.1f}s"
             result += f"{i}. [{start_display} - {end_display}]: {seg['text']}\n"

     recognizer.dynamic_energy_adjustment_damping = 0.15
     recognizer.dynamic_energy_adjustment_ratio = 1.5
     recognizer.pause_threshold = 0.8
+    recognizer.non_speaking_duration = 0.5
     try:
         duration = get_video_duration(audio_path)
             return []
         chunk_duration = 10  # ثانیه
+        chunk_overlap = 0  # حذف همپوشانی برای جلوگیری از تکرار
+        temp_chunk_dir = tempfile.mkdtemp()
+        # Process chunks without overlap
+        for start_time in range(0, int(duration), chunk_duration):
             end_time = min(start_time + chunk_duration, duration)
             chunk_filename = f"chunk_{start_time}_{end_time}.wav"
     return segments
+def clean_up_segments(segments: List[Dict], min_gap: float = 0.05) -> List[Dict]:
     """
+    پاکسازی segment ها برای اطمینان از عدم همپوشانی
     """
     if not segments:
         return []
+    # مرتب‌سازی بر اساس زمان شروع
     segments.sort(key=lambda x: x['start'])
     processed_segments = []
         end = segment['end']
         text = segment['text']
+        # اطمینان از اینکه segment بعدی شروع نمی‌شود تا قبلی تمام شود
         if i < len(segments) - 1:
             next_segment_start = segments[i+1]['start']
+            # اگر زیرنویس فعلی با بعدی همپوشانی دارد
+            if end > next_segment_start - min_gap:
+                # زمان پایان را کمی قبل از شروع بعدی تنظیم کن
                 end = next_segment_start - min_gap
+        # اطمینان از اینکه زمان پایان بعد از زمان شروع است
+        if end <= start:
+            end = start + 0.5  # حداقل 0.5 ثانیه
         processed_segments.append({
+            "start": round(start, 3),
             "end": round(end, 3),
             "text": text
         })
     """ترجمه محتوای SRT با استفاده از API"""
     api_key = os.environ.get("API_KEY")
     if not api_key:
         raise gr.Error("API key for translation not found. Please set it as an environment variable 'API_KEY'.")
     url = "https://api.ohmygpt.com/v1/chat/completions"
     temperature = 0.7
     top_p = 0.9
     system_prompt = f"""
 You are an AI assistant specialized in professional subtitle translation. Your task is to translate the provided SRT content from its original language into **{target_language_name}**.
 2.  **Translate to Target Language:** Translate the corrected source text into **{target_language_name}**.
 3.  **Preserve SRT Format:** Maintain the **exact SRT format**:
     *   Keep the original sequence numbers (e.g., 1, 2, 3...).
+    *   **Crucially, preserve the original timestamps** for each segment (start and end times). Do not change them unless absolutely necessary for grammatical correctness or natural flow in the target language, and only make minimal, justified adjustments. The primary goal is to have the translated subtitles appear on screen during the same time intervals as the original subtitles.
     *   Maintain line breaks within segments.
 4.  **Natural and Accurate:** Ensure the translated subtitles sound natural, are culturally appropriate, and accurately convey the meaning of the original dialogue.
 5.  **No Extraneous Text:** **Absolutely do not include any introductory phrases, concluding remarks, explanations, or any text outside of the standard SRT format.** Your output must be a complete and valid SRT file.
     }
     try:
+        response = requests.post(url, headers=headers, json=payload, timeout=180)
         response.raise_for_status()
         data = response.json()
         if 'choices' in data and data['choices']:
             translated_text = data['choices'][0]['message']['content']
             if not translated_text or not translated_text.strip():
                  print("Error: Translation API returned empty content.")
                  return "Error: Translation API returned empty content."
             first_line_check = translated_text.strip().split('\n')[0]
             if not first_line_check.isdigit():
                  print(f"Warning: Translation API response might not be valid SRT. First line: '{first_line_check}'. Response snippet: {translated_text[:200]}...")
                  return f"Error: Translation API returned unexpected format. Does not start with a sequence number. Response snippet: {translated_text[:200]}..."
             'ffmpeg',
             '-i', video_path,
             '-vf', f"subtitles={srt_path}:force_style='Fontsize=24,FontName=Arial,MarginV=10'",
+            '-c:a', 'copy',
             '-y',
             output_path
         ]
         final_srt_path = None
         status_message = ""
+        processed_segments = []
         if translate_checkbox:
             target_language_name = translation_language_dropdown
             print(f"Attempting to translate to: {target_language_name}")
+            # Clean up initial segments before translation
+            cleaned_initial_segments = clean_up_segments(initial_segments)
+            # Convert to SRT string for translation
+            original_srt_content_string = segments_to_srt_string(cleaned_initial_segments)
             # Translate the SRT content
             translated_srt_content_string = translate_srt_content(original_srt_content_string, target_language_name)
                 # Parse the translated SRT content back into segments
                 parsed_translated_segments = srt_string_to_segments(translated_srt_content_string)
+                # Clean up timing to ensure no overlaps
+                processed_segments = clean_up_segments(parsed_translated_segments)
                 # Save the final processed segments to a file
                 translated_srt_path = os.path.join(temp_dir, "translated_subtitles.srt")
                 status_message = f"زیرنویس به {target_language_name} ترجمه شد و پردازش تکمیل شد!"
         else:
+            # Clean up timing to ensure no overlaps
+            processed_segments = clean_up_segments(initial_segments)
             # Save the final processed segments to a file
             original_srt_path = os.path.join(temp_dir, "subtitles.srt")
             sessions[session_hash]['status'] = 'completed'
             status_message = "پردازش با موفقیت انجام شد!"
         output_video = None
         if embed_subtitles:
             output_video_path = os.path.join(temp_dir, "output_with_subtitles.mp4")
+            if final_srt_path and os.path.exists(final_srt_path):
                 if add_subtitles_to_video(video_path, final_srt_path, output_video_path):
                     output_video = output_video_path
                 else:
+                    output_video = video_path
                     status_message += " (خطا در چسباندن زیرنویس به ویدیو، ویدیوی اصلی برگردانده شد)"
             else:
+                 output_video = video_path
                  status_message += " (فایل زیرنویس برای چسباندن یافت نشد)"
         else:
+            output_video = video_path
         sessions[session_hash]['final_srt_path'] = final_srt_path
         sessions[session_hash]['final_video_path'] = output_video
     except Exception as e:
         print(f"Unexpected error in process_video: {e}")
         if 'temp_dir' in locals() and os.path.exists(temp_dir):
             try:
                 shutil.rmtree(temp_dir)
         segments = session_data.get('segments', [])
         result = "زیرنویس ترجمه شده:\n\n"
         for i, seg in enumerate(segments, 1):
             start_display = f"{seg['start']:.1f}s"
             end_display = f"{seg['end']:.1f}s"
             result += f"{i}. [{start_display} - {end_display}]: {seg['text']}\n"