audio-to-srt

Sleeping

App Files Files Community

sampleacc-3003 commited on 18 days ago

Commit

8b505a3

verified ·

1 Parent(s): 5ea61b6

Update app.py

Browse files

Files changed (1) hide show

app.py +294 -47

app.py CHANGED Viewed

@@ -6,11 +6,11 @@ import gradio as gr
 import pysrt
 import requests
 import tempfile
 from faster_whisper import WhisperModel
 from datetime import timedelta
 from urllib.parse import urlparse
 # -----------------------------
 # Core subtitle generator
 # -----------------------------
@@ -45,27 +45,117 @@ class LinearSubtitleGenerator:
                 })
         return words
     def create_linear_subtitles(self, words):
         subs = pysrt.SubRipFile()
         total_words = len(words)
-        index = 0
         subtitle_index = 1
-        current_size = 1  # 1,2,3,4,...
         while index < total_words:
             planned_size = current_size
             remaining = total_words - (index + planned_size)
             next_size = current_size + 1
-            # absorb leftovers to avoid tiny last subtitle
             if remaining > 0 and remaining < next_size:
                 planned_size += remaining
             subtitle_words = []
             start_time = None
             end_time = None
             for _ in range(planned_size):
                 if index >= total_words:
                     break
@@ -75,7 +165,56 @@ class LinearSubtitleGenerator:
                     start_time = w["start"]
                 end_time = w["end"]
                 index += 1
             subs.append(
                 pysrt.SubRipItem(
                     index=subtitle_index,
@@ -85,12 +224,12 @@ class LinearSubtitleGenerator:
                 )
             )
             subtitle_index += 1
             if planned_size == current_size:
                 current_size += 1
             else:
                 break
         return subs
     def _to_time(self, seconds):
@@ -102,10 +241,9 @@ class LinearSubtitleGenerator:
             milliseconds=td.microseconds // 1000
         )
-# -----------------------------
-# Helper: download audio from URL
-# -----------------------------
 def download_audio(url: str) -> str:
     parsed = urlparse(url)
     if parsed.scheme not in ("http", "https"):
@@ -123,41 +261,130 @@ def download_audio(url: str) -> str:
     tmp.close()
     return tmp.name
-# -----------------------------
-# Gradio callable function
-# -----------------------------
-def generate_srt(audio_file, audio_url, model_size):
-    # exactly one input must be provided
-    if bool(audio_file) == bool(audio_url):
-        raise gr.Error(
-            "Please provide EITHER an audio file OR an audio URL (not both)."
-        )
-    if audio_url:
-        audio_path = download_audio(audio_url)
     else:
-        audio_path = audio_file
-    generator = LinearSubtitleGenerator(model_size)
-    segments = generator.transcribe(audio_path)
-    words = generator.extract_words(segments)
-    subs = generator.create_linear_subtitles(words)
-    out = tempfile.NamedTemporaryFile(delete=False, suffix=".srt")
-    subs.save(out.name, encoding="utf-8")
-    return out.name
-# -----------------------------
-# Gradio UI (UNCHANGED)
-# -----------------------------
 with gr.Blocks(title="Subtitle Generator") as demo:
     gr.Markdown(
         """
-        # srt generator
         """
     )
@@ -178,16 +405,36 @@ with gr.Blocks(title="Subtitle Generator") as demo:
         label="Whisper Model"
     )
-    generate_btn = gr.Button("Generate SRT")
     output_file = gr.File(label="Download SRT")
     generate_btn.click(
         fn=generate_srt,
         inputs=[audio_file, audio_url, model_choice],
-        outputs=output_file
     )
 if __name__ == "__main__":
-    demo.launch(mcp_server=True)

 import pysrt
 import requests
 import tempfile
+import time
 from faster_whisper import WhisperModel
 from datetime import timedelta
 from urllib.parse import urlparse
 # -----------------------------
 # Core subtitle generator
 # -----------------------------
                 })
         return words
+    def find_sentence_boundaries(self, words):
+        """
+        Find first and last sentence boundaries based on periods.
+        Returns: (first_period_idx, last_period_idx)
+        """
+        first_period_idx = None
+        last_period_idx = None
+        for idx, word_data in enumerate(words):
+            word = word_data["word"]
+            # Check if word ends with period (and not abbreviation)
+            if word.endswith('.') or word.endswith('!') or word.endswith('?'):
+                if first_period_idx is None:
+                    first_period_idx = idx
+                last_period_idx = idx
+        return first_period_idx, last_period_idx
     def create_linear_subtitles(self, words):
+        """
+        Create subtitles with:
+        - First sentence as first subtitle
+        - Middle content with linear pattern (1, 2, 3, 4... words)
+        - Last sentence as last subtitle
+        """
         subs = pysrt.SubRipFile()
+        if not words:
+            return subs
         total_words = len(words)
+        first_period_idx, last_period_idx = self.find_sentence_boundaries(words)
+        # Edge case: No periods found - use original linear pattern
+        if first_period_idx is None:
+            return self._create_basic_linear_subtitles(words)
+        # Edge case: Only one sentence (first = last)
+        if first_period_idx == last_period_idx:
+            # Single sentence becomes single subtitle
+            self._add_subtitle(subs, 1, words, 0, total_words)
+            return subs
         subtitle_index = 1
+        # 1. First sentence as first subtitle
+        first_sentence_words = words[0:first_period_idx + 1]
+        self._add_subtitle(subs, subtitle_index, first_sentence_words, 0, len(first_sentence_words))
+        subtitle_index += 1
+        # 2. Middle content with linear pattern
+        middle_start = first_period_idx + 1
+        middle_end = last_period_idx
+        if middle_start < middle_end:
+            middle_words = words[middle_start:middle_end]
+            subtitle_index = self._add_linear_pattern(subs, middle_words, subtitle_index)
+        # 3. Last sentence as last subtitle
+        last_sentence_words = words[last_period_idx:total_words]
+        if last_sentence_words:
+            self._add_subtitle(subs, subtitle_index, last_sentence_words, 0, len(last_sentence_words))
+        return subs
+    def _add_subtitle(self, subs, index, words, start_idx, end_idx):
+        """Helper to add a single subtitle from word range"""
+        if start_idx >= end_idx or start_idx >= len(words):
+            return
+        subtitle_words = []
+        start_time = None
+        end_time = None
+        for i in range(start_idx, min(end_idx, len(words))):
+            w = words[i]
+            subtitle_words.append(w["word"])
+            if start_time is None:
+                start_time = w["start"]
+            end_time = w["end"]
+        if subtitle_words:
+            subs.append(
+                pysrt.SubRipItem(
+                    index=index,
+                    start=self._to_time(start_time),
+                    end=self._to_time(end_time),
+                    text=" ".join(subtitle_words)
+                )
+            )
+    def _add_linear_pattern(self, subs, words, start_index):
+        """Apply linear pattern (1, 2, 3, 4... words) to words list"""
+        total_words = len(words)
+        index = 0
+        subtitle_index = start_index
+        current_size = 1
         while index < total_words:
             planned_size = current_size
             remaining = total_words - (index + planned_size)
             next_size = current_size + 1
+            # Absorb leftovers to avoid tiny last subtitle
             if remaining > 0 and remaining < next_size:
                 planned_size += remaining
             subtitle_words = []
             start_time = None
             end_time = None
             for _ in range(planned_size):
                 if index >= total_words:
                     break
                     start_time = w["start"]
                 end_time = w["end"]
                 index += 1
+            if subtitle_words:
+                subs.append(
+                    pysrt.SubRipItem(
+                        index=subtitle_index,
+                        start=self._to_time(start_time),
+                        end=self._to_time(end_time),
+                        text=" ".join(subtitle_words)
+                    )
+                )
+                subtitle_index += 1
+            # Progress to next size only if we didn't absorb leftovers
+            if planned_size == current_size:
+                current_size += 1
+            else:
+                break
+        return subtitle_index
+    def _create_basic_linear_subtitles(self, words):
+        """Fallback: Original linear pattern when no periods found"""
+        subs = pysrt.SubRipFile()
+        total_words = len(words)
+        index = 0
+        subtitle_index = 1
+        current_size = 1
+        while index < total_words:
+            planned_size = current_size
+            remaining = total_words - (index + planned_size)
+            next_size = current_size + 1
+            if remaining > 0 and remaining < next_size:
+                planned_size += remaining
+            subtitle_words = []
+            start_time = None
+            end_time = None
+            for _ in range(planned_size):
+                if index >= total_words:
+                    break
+                w = words[index]
+                subtitle_words.append(w["word"])
+                if start_time is None:
+                    start_time = w["start"]
+                end_time = w["end"]
+                index += 1
             subs.append(
                 pysrt.SubRipItem(
                     index=subtitle_index,
                 )
             )
             subtitle_index += 1
             if planned_size == current_size:
                 current_size += 1
             else:
                 break
         return subs
     def _to_time(self, seconds):
             milliseconds=td.microseconds // 1000
         )
+    # -----------------------------
+    # Helper: download audio from URL
+    # -----------------------------
 def download_audio(url: str) -> str:
     parsed = urlparse(url)
     if parsed.scheme not in ("http", "https"):
     tmp.close()
     return tmp.name
+    # -----------------------------
+    # Helper: format elapsed time
+    # -----------------------------
+def format_time(seconds):
+    """Format seconds into readable time string"""
+    if seconds < 60:
+        return f"{seconds:.1f}s"
+    elif seconds < 3600:
+        mins = int(seconds // 60)
+        secs = int(seconds % 60)
+        return f"{mins}m {secs}s"
     else:
+        hours = int(seconds // 3600)
+        mins = int((seconds % 3600) // 60)
+        return f"{hours}h {mins}m"
+    # -----------------------------
+    # Gradio callable function with status updates
+    # -----------------------------
+def generate_srt(audio_file, audio_url, model_size):
+    start_time = time.time()
+    status_messages = []
+    try:
+        # Validation
+        if bool(audio_file) == bool(audio_url):
+            error_msg = "❌ Error: Please provide EITHER an audio file OR an audio URL (not both)."
+            return None, error_msg
+        status_messages.append("🚀 Starting subtitle generation...")
+        yield None, "\n".join(status_messages)
+        # Step 1: Get audio file
+        if audio_url:
+            status_messages.append("📥 Downloading audio from URL...")
+            yield None, "\n".join(status_messages)
+            download_start = time.time()
+            audio_path = download_audio(audio_url)
+            download_time = time.time() - download_start
+            status_messages.append(f"✓ Download completed in {format_time(download_time)}")
+            yield None, "\n".join(status_messages)
+        else:
+            audio_path = audio_file
+            status_messages.append("✓ Audio file loaded")
+            yield None, "\n".join(status_messages)
+        # Step 2: Load model
+        status_messages.append(f"🧠 Loading Whisper model ({model_size})...")
+        yield None, "\n".join(status_messages)
+        model_start = time.time()
+        generator = LinearSubtitleGenerator(model_size)
+        model_time = time.time() - model_start
+        status_messages.append(f"✓ Model loaded in {format_time(model_time)}")
+        yield None, "\n".join(status_messages)
+        # Step 3: Transcribe
+        status_messages.append("🎤 Transcribing audio (this may take a while)...")
+        yield None, "\n".join(status_messages)
+        transcribe_start = time.time()
+        segments = generator.transcribe(audio_path)
+        words = generator.extract_words(segments)
+        transcribe_time = time.time() - transcribe_start
+        status_messages.append(f"✓ Transcription completed in {format_time(transcribe_time)}")
+        status_messages.append(f"📊 Extracted {len(words)} words")
+        yield None, "\n".join(status_messages)
+        # Step 4: Generate subtitles
+        status_messages.append("📝 Generating SRT subtitles...")
+        yield None, "\n".join(status_messages)
+        srt_start = time.time()
+        subs = generator.create_linear_subtitles(words)
+        srt_time = time.time() - srt_start
+        status_messages.append(f"✓ Created {len(subs)} subtitle segments in {format_time(srt_time)}")
+        yield None, "\n".join(status_messages)
+        # Step 5: Save file
+        status_messages.append("💾 Saving SRT file...")
+        yield None, "\n".join(status_messages)
+        out = tempfile.NamedTemporaryFile(delete=False, suffix=".srt")
+        subs.save(out.name, encoding="utf-8")
+        # Calculate total time
+        total_time = time.time() - start_time
+        # Final success message
+        status_messages.append(f"✅ SUCCESS! Total time: {format_time(total_time)}")
+        status_messages.append(f"📁 SRT file ready for download")
+        yield out.name, "\n".join(status_messages)
+    except requests.RequestException as e:
+        error_msg = f"❌ Network Error: Failed to download audio\nDetails: {str(e)}"
+        yield None, error_msg
+    except ValueError as e:
+        error_msg = f"❌ Validation Error: {str(e)}"
+        yield None, error_msg
+    except Exception as e:
+        total_time = time.time() - start_time
+        error_msg = f"❌ Error occurred after {format_time(total_time)}\nDetails: {str(e)}"
+        yield None, error_msg
+    # -----------------------------
+    # Gradio UI with Status Bar
+    # -----------------------------
 with gr.Blocks(title="Subtitle Generator") as demo:
     gr.Markdown(
         """
+        # SRT Generator with Smart Sentence Handling
+        **Features:**
+        - First sentence → First subtitle
+        - Middle content → Linear pattern (1, 2, 3, 4... words)
+        - Last sentence → Last subtitle
         """
     )
         label="Whisper Model"
     )
+    generate_btn = gr.Button("Generate SRT", variant="primary")
+    # Status display
+    status_box = gr.Textbox(
+        label="Status",
+        placeholder="Status updates will appear here...",
+        lines=10,
+        max_lines=15,
+        interactive=False
+    )
     output_file = gr.File(label="Download SRT")
+    # Event handler
     generate_btn.click(
         fn=generate_srt,
         inputs=[audio_file, audio_url, model_choice],
+        outputs=[output_file, status_box]
     )
+    gr.Markdown(
+        """
+        ---
+        **Tips:**
+        - Larger models (small/medium) are more accurate but slower
+        - For best results, use clear audio with minimal background noise
+        - Processing time depends on audio length and model size
+        """
+    )
 if __name__ == "__main__":
+    demo.launch()