audio-to-srt

Running

App Files Files Community

sampleacc-3003 commited on Feb 10

Commit

7ac3eba

verified ·

1 Parent(s): 8b505a3

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -12

app.py CHANGED Viewed

@@ -11,6 +11,9 @@ from faster_whisper import WhisperModel
 from datetime import timedelta
 from urllib.parse import urlparse
 # -----------------------------
 # Core subtitle generator
 # -----------------------------
@@ -63,7 +66,7 @@ class LinearSubtitleGenerator:
         return first_period_idx, last_period_idx
-    def create_linear_subtitles(self, words):
         """
         Create subtitles with:
         - First sentence as first subtitle
@@ -80,7 +83,7 @@ class LinearSubtitleGenerator:
         # Edge case: No periods found - use original linear pattern
         if first_period_idx is None:
-            return self._create_basic_linear_subtitles(words)
         # Edge case: Only one sentence (first = last)
         if first_period_idx == last_period_idx:
@@ -101,7 +104,9 @@ class LinearSubtitleGenerator:
         if middle_start < middle_end:
             middle_words = words[middle_start:middle_end]
-            subtitle_index = self._add_linear_pattern(subs, middle_words, subtitle_index)
         # 3. Last sentence as last subtitle
         last_sentence_words = words[last_period_idx:total_words]
@@ -136,8 +141,13 @@ class LinearSubtitleGenerator:
                 )
             )
-    def _add_linear_pattern(self, subs, words, start_index):
-        """Apply linear pattern (1, 2, 3, 4... words) to words list"""
         total_words = len(words)
         index = 0
         subtitle_index = start_index
@@ -145,6 +155,8 @@ class LinearSubtitleGenerator:
         while index < total_words:
             planned_size = current_size
             remaining = total_words - (index + planned_size)
             next_size = current_size + 1
@@ -178,15 +190,23 @@ class LinearSubtitleGenerator:
                 subtitle_index += 1
             # Progress to next size only if we didn't absorb leftovers
             if planned_size == current_size:
-                current_size += 1
             else:
                 break
         return subtitle_index
-    def _create_basic_linear_subtitles(self, words):
-        """Fallback: Original linear pattern when no periods found"""
         subs = pysrt.SubRipFile()
         total_words = len(words)
         index = 0
@@ -195,6 +215,8 @@ class LinearSubtitleGenerator:
         while index < total_words:
             planned_size = current_size
             remaining = total_words - (index + planned_size)
             next_size = current_size + 1
@@ -226,7 +248,10 @@ class LinearSubtitleGenerator:
             subtitle_index += 1
             if planned_size == current_size:
-                current_size += 1
             else:
                 break
@@ -338,7 +363,7 @@ def generate_srt(audio_file, audio_url, model_size):
         yield None, "\n".join(status_messages)
         srt_start = time.time()
-        subs = generator.create_linear_subtitles(words)
         srt_time = time.time() - srt_start
         status_messages.append(f"✓ Created {len(subs)} subtitle segments in {format_time(srt_time)}")
@@ -436,5 +461,4 @@ with gr.Blocks(title="Subtitle Generator") as demo:
     )
 if __name__ == "__main__":
-    demo.launch()

 from datetime import timedelta
 from urllib.parse import urlparse
+# Maximum words per subtitle (set to None to disable)
+DEFAULT_MAX_WORDS = 18
 # -----------------------------
 # Core subtitle generator
 # -----------------------------
         return first_period_idx, last_period_idx
+    def create_linear_subtitles(self, words, max_words=None):
         """
         Create subtitles with:
         - First sentence as first subtitle
         # Edge case: No periods found - use original linear pattern
         if first_period_idx is None:
+            return self._create_basic_linear_subtitles(words, max_words=max_words)
         # Edge case: Only one sentence (first = last)
         if first_period_idx == last_period_idx:
         if middle_start < middle_end:
             middle_words = words[middle_start:middle_end]
+            subtitle_index = self._add_linear_pattern(
+                subs, middle_words, subtitle_index, max_words=max_words
+            )
         # 3. Last sentence as last subtitle
         last_sentence_words = words[last_period_idx:total_words]
                 )
             )
+    def _add_linear_pattern(self, subs, words, start_index, max_words=None):
+        """Apply linear pattern (1, 2, 3, 4... words) to words list
+        If `max_words` is provided, no subtitle will contain more than
+        `max_words` words. Once the linear size reaches `max_words` it
+        will remain at that size for subsequent subtitles.
+        """
         total_words = len(words)
         index = 0
         subtitle_index = start_index
         while index < total_words:
             planned_size = current_size
+            if max_words is not None:
+                planned_size = min(planned_size, max_words)
             remaining = total_words - (index + planned_size)
             next_size = current_size + 1
                 subtitle_index += 1
             # Progress to next size only if we didn't absorb leftovers
+            # and we're not already at the configured maximum.
             if planned_size == current_size:
+                if max_words is None or current_size < max_words:
+                    current_size += 1
+                else:
+                    # stay at max_words for following subtitles
+                    current_size = max_words
             else:
                 break
         return subtitle_index
+    def _create_basic_linear_subtitles(self, words, max_words=None):
+        """Fallback: Original linear pattern when no periods found
+        Honors `max_words` similarly to the linear pattern above.
+        """
         subs = pysrt.SubRipFile()
         total_words = len(words)
         index = 0
         while index < total_words:
             planned_size = current_size
+            if max_words is not None:
+                planned_size = min(planned_size, max_words)
             remaining = total_words - (index + planned_size)
             next_size = current_size + 1
             subtitle_index += 1
             if planned_size == current_size:
+                if max_words is None or current_size < max_words:
+                    current_size += 1
+                else:
+                    current_size = max_words
             else:
                 break
         yield None, "\n".join(status_messages)
         srt_start = time.time()
+        subs = generator.create_linear_subtitles(words, max_words=DEFAULT_MAX_WORDS)
         srt_time = time.time() - srt_start
         status_messages.append(f"✓ Created {len(subs)} subtitle segments in {format_time(srt_time)}")
     )
 if __name__ == "__main__":
+    demo.launch()