Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,6 +11,9 @@ from faster_whisper import WhisperModel
|
|
| 11 |
from datetime import timedelta
|
| 12 |
from urllib.parse import urlparse
|
| 13 |
|
|
|
|
|
|
|
|
|
|
| 14 |
# -----------------------------
|
| 15 |
# Core subtitle generator
|
| 16 |
# -----------------------------
|
|
@@ -63,7 +66,7 @@ class LinearSubtitleGenerator:
|
|
| 63 |
|
| 64 |
return first_period_idx, last_period_idx
|
| 65 |
|
| 66 |
-
def create_linear_subtitles(self, words):
|
| 67 |
"""
|
| 68 |
Create subtitles with:
|
| 69 |
- First sentence as first subtitle
|
|
@@ -80,7 +83,7 @@ class LinearSubtitleGenerator:
|
|
| 80 |
|
| 81 |
# Edge case: No periods found - use original linear pattern
|
| 82 |
if first_period_idx is None:
|
| 83 |
-
return self._create_basic_linear_subtitles(words)
|
| 84 |
|
| 85 |
# Edge case: Only one sentence (first = last)
|
| 86 |
if first_period_idx == last_period_idx:
|
|
@@ -101,7 +104,9 @@ class LinearSubtitleGenerator:
|
|
| 101 |
|
| 102 |
if middle_start < middle_end:
|
| 103 |
middle_words = words[middle_start:middle_end]
|
| 104 |
-
subtitle_index = self._add_linear_pattern(
|
|
|
|
|
|
|
| 105 |
|
| 106 |
# 3. Last sentence as last subtitle
|
| 107 |
last_sentence_words = words[last_period_idx:total_words]
|
|
@@ -136,8 +141,13 @@ class LinearSubtitleGenerator:
|
|
| 136 |
)
|
| 137 |
)
|
| 138 |
|
| 139 |
-
def _add_linear_pattern(self, subs, words, start_index):
|
| 140 |
-
"""Apply linear pattern (1, 2, 3, 4... words) to words list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
total_words = len(words)
|
| 142 |
index = 0
|
| 143 |
subtitle_index = start_index
|
|
@@ -145,6 +155,8 @@ class LinearSubtitleGenerator:
|
|
| 145 |
|
| 146 |
while index < total_words:
|
| 147 |
planned_size = current_size
|
|
|
|
|
|
|
| 148 |
remaining = total_words - (index + planned_size)
|
| 149 |
next_size = current_size + 1
|
| 150 |
|
|
@@ -178,15 +190,23 @@ class LinearSubtitleGenerator:
|
|
| 178 |
subtitle_index += 1
|
| 179 |
|
| 180 |
# Progress to next size only if we didn't absorb leftovers
|
|
|
|
| 181 |
if planned_size == current_size:
|
| 182 |
-
current_size
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
else:
|
| 184 |
break
|
| 185 |
|
| 186 |
return subtitle_index
|
| 187 |
|
| 188 |
-
def _create_basic_linear_subtitles(self, words):
|
| 189 |
-
"""Fallback: Original linear pattern when no periods found
|
|
|
|
|
|
|
|
|
|
| 190 |
subs = pysrt.SubRipFile()
|
| 191 |
total_words = len(words)
|
| 192 |
index = 0
|
|
@@ -195,6 +215,8 @@ class LinearSubtitleGenerator:
|
|
| 195 |
|
| 196 |
while index < total_words:
|
| 197 |
planned_size = current_size
|
|
|
|
|
|
|
| 198 |
remaining = total_words - (index + planned_size)
|
| 199 |
next_size = current_size + 1
|
| 200 |
|
|
@@ -226,7 +248,10 @@ class LinearSubtitleGenerator:
|
|
| 226 |
subtitle_index += 1
|
| 227 |
|
| 228 |
if planned_size == current_size:
|
| 229 |
-
current_size
|
|
|
|
|
|
|
|
|
|
| 230 |
else:
|
| 231 |
break
|
| 232 |
|
|
@@ -338,7 +363,7 @@ def generate_srt(audio_file, audio_url, model_size):
|
|
| 338 |
yield None, "\n".join(status_messages)
|
| 339 |
|
| 340 |
srt_start = time.time()
|
| 341 |
-
subs = generator.create_linear_subtitles(words)
|
| 342 |
srt_time = time.time() - srt_start
|
| 343 |
|
| 344 |
status_messages.append(f"✓ Created {len(subs)} subtitle segments in {format_time(srt_time)}")
|
|
@@ -436,5 +461,4 @@ with gr.Blocks(title="Subtitle Generator") as demo:
|
|
| 436 |
)
|
| 437 |
|
| 438 |
if __name__ == "__main__":
|
| 439 |
-
demo.launch()
|
| 440 |
-
|
|
|
|
| 11 |
from datetime import timedelta
|
| 12 |
from urllib.parse import urlparse
|
| 13 |
|
| 14 |
+
# Maximum words per subtitle (set to None to disable)
|
| 15 |
+
DEFAULT_MAX_WORDS = 18
|
| 16 |
+
|
| 17 |
# -----------------------------
|
| 18 |
# Core subtitle generator
|
| 19 |
# -----------------------------
|
|
|
|
| 66 |
|
| 67 |
return first_period_idx, last_period_idx
|
| 68 |
|
| 69 |
+
def create_linear_subtitles(self, words, max_words=None):
|
| 70 |
"""
|
| 71 |
Create subtitles with:
|
| 72 |
- First sentence as first subtitle
|
|
|
|
| 83 |
|
| 84 |
# Edge case: No periods found - use original linear pattern
|
| 85 |
if first_period_idx is None:
|
| 86 |
+
return self._create_basic_linear_subtitles(words, max_words=max_words)
|
| 87 |
|
| 88 |
# Edge case: Only one sentence (first = last)
|
| 89 |
if first_period_idx == last_period_idx:
|
|
|
|
| 104 |
|
| 105 |
if middle_start < middle_end:
|
| 106 |
middle_words = words[middle_start:middle_end]
|
| 107 |
+
subtitle_index = self._add_linear_pattern(
|
| 108 |
+
subs, middle_words, subtitle_index, max_words=max_words
|
| 109 |
+
)
|
| 110 |
|
| 111 |
# 3. Last sentence as last subtitle
|
| 112 |
last_sentence_words = words[last_period_idx:total_words]
|
|
|
|
| 141 |
)
|
| 142 |
)
|
| 143 |
|
| 144 |
+
def _add_linear_pattern(self, subs, words, start_index, max_words=None):
|
| 145 |
+
"""Apply linear pattern (1, 2, 3, 4... words) to words list
|
| 146 |
+
|
| 147 |
+
If `max_words` is provided, no subtitle will contain more than
|
| 148 |
+
`max_words` words. Once the linear size reaches `max_words` it
|
| 149 |
+
will remain at that size for subsequent subtitles.
|
| 150 |
+
"""
|
| 151 |
total_words = len(words)
|
| 152 |
index = 0
|
| 153 |
subtitle_index = start_index
|
|
|
|
| 155 |
|
| 156 |
while index < total_words:
|
| 157 |
planned_size = current_size
|
| 158 |
+
if max_words is not None:
|
| 159 |
+
planned_size = min(planned_size, max_words)
|
| 160 |
remaining = total_words - (index + planned_size)
|
| 161 |
next_size = current_size + 1
|
| 162 |
|
|
|
|
| 190 |
subtitle_index += 1
|
| 191 |
|
| 192 |
# Progress to next size only if we didn't absorb leftovers
|
| 193 |
+
# and we're not already at the configured maximum.
|
| 194 |
if planned_size == current_size:
|
| 195 |
+
if max_words is None or current_size < max_words:
|
| 196 |
+
current_size += 1
|
| 197 |
+
else:
|
| 198 |
+
# stay at max_words for following subtitles
|
| 199 |
+
current_size = max_words
|
| 200 |
else:
|
| 201 |
break
|
| 202 |
|
| 203 |
return subtitle_index
|
| 204 |
|
| 205 |
+
def _create_basic_linear_subtitles(self, words, max_words=None):
|
| 206 |
+
"""Fallback: Original linear pattern when no periods found
|
| 207 |
+
|
| 208 |
+
Honors `max_words` similarly to the linear pattern above.
|
| 209 |
+
"""
|
| 210 |
subs = pysrt.SubRipFile()
|
| 211 |
total_words = len(words)
|
| 212 |
index = 0
|
|
|
|
| 215 |
|
| 216 |
while index < total_words:
|
| 217 |
planned_size = current_size
|
| 218 |
+
if max_words is not None:
|
| 219 |
+
planned_size = min(planned_size, max_words)
|
| 220 |
remaining = total_words - (index + planned_size)
|
| 221 |
next_size = current_size + 1
|
| 222 |
|
|
|
|
| 248 |
subtitle_index += 1
|
| 249 |
|
| 250 |
if planned_size == current_size:
|
| 251 |
+
if max_words is None or current_size < max_words:
|
| 252 |
+
current_size += 1
|
| 253 |
+
else:
|
| 254 |
+
current_size = max_words
|
| 255 |
else:
|
| 256 |
break
|
| 257 |
|
|
|
|
| 363 |
yield None, "\n".join(status_messages)
|
| 364 |
|
| 365 |
srt_start = time.time()
|
| 366 |
+
subs = generator.create_linear_subtitles(words, max_words=DEFAULT_MAX_WORDS)
|
| 367 |
srt_time = time.time() - srt_start
|
| 368 |
|
| 369 |
status_messages.append(f"✓ Created {len(subs)} subtitle segments in {format_time(srt_time)}")
|
|
|
|
| 461 |
)
|
| 462 |
|
| 463 |
if __name__ == "__main__":
|
| 464 |
+
demo.launch()
|
|
|