sampleacc-3003 commited on
Commit
7ac3eba
·
verified ·
1 Parent(s): 8b505a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -12
app.py CHANGED
@@ -11,6 +11,9 @@ from faster_whisper import WhisperModel
11
  from datetime import timedelta
12
  from urllib.parse import urlparse
13
 
 
 
 
14
  # -----------------------------
15
  # Core subtitle generator
16
  # -----------------------------
@@ -63,7 +66,7 @@ class LinearSubtitleGenerator:
63
 
64
  return first_period_idx, last_period_idx
65
 
66
- def create_linear_subtitles(self, words):
67
  """
68
  Create subtitles with:
69
  - First sentence as first subtitle
@@ -80,7 +83,7 @@ class LinearSubtitleGenerator:
80
 
81
  # Edge case: No periods found - use original linear pattern
82
  if first_period_idx is None:
83
- return self._create_basic_linear_subtitles(words)
84
 
85
  # Edge case: Only one sentence (first = last)
86
  if first_period_idx == last_period_idx:
@@ -101,7 +104,9 @@ class LinearSubtitleGenerator:
101
 
102
  if middle_start < middle_end:
103
  middle_words = words[middle_start:middle_end]
104
- subtitle_index = self._add_linear_pattern(subs, middle_words, subtitle_index)
 
 
105
 
106
  # 3. Last sentence as last subtitle
107
  last_sentence_words = words[last_period_idx:total_words]
@@ -136,8 +141,13 @@ class LinearSubtitleGenerator:
136
  )
137
  )
138
 
139
- def _add_linear_pattern(self, subs, words, start_index):
140
- """Apply linear pattern (1, 2, 3, 4... words) to words list"""
 
 
 
 
 
141
  total_words = len(words)
142
  index = 0
143
  subtitle_index = start_index
@@ -145,6 +155,8 @@ class LinearSubtitleGenerator:
145
 
146
  while index < total_words:
147
  planned_size = current_size
 
 
148
  remaining = total_words - (index + planned_size)
149
  next_size = current_size + 1
150
 
@@ -178,15 +190,23 @@ class LinearSubtitleGenerator:
178
  subtitle_index += 1
179
 
180
  # Progress to next size only if we didn't absorb leftovers
 
181
  if planned_size == current_size:
182
- current_size += 1
 
 
 
 
183
  else:
184
  break
185
 
186
  return subtitle_index
187
 
188
- def _create_basic_linear_subtitles(self, words):
189
- """Fallback: Original linear pattern when no periods found"""
 
 
 
190
  subs = pysrt.SubRipFile()
191
  total_words = len(words)
192
  index = 0
@@ -195,6 +215,8 @@ class LinearSubtitleGenerator:
195
 
196
  while index < total_words:
197
  planned_size = current_size
 
 
198
  remaining = total_words - (index + planned_size)
199
  next_size = current_size + 1
200
 
@@ -226,7 +248,10 @@ class LinearSubtitleGenerator:
226
  subtitle_index += 1
227
 
228
  if planned_size == current_size:
229
- current_size += 1
 
 
 
230
  else:
231
  break
232
 
@@ -338,7 +363,7 @@ def generate_srt(audio_file, audio_url, model_size):
338
  yield None, "\n".join(status_messages)
339
 
340
  srt_start = time.time()
341
- subs = generator.create_linear_subtitles(words)
342
  srt_time = time.time() - srt_start
343
 
344
  status_messages.append(f"✓ Created {len(subs)} subtitle segments in {format_time(srt_time)}")
@@ -436,5 +461,4 @@ with gr.Blocks(title="Subtitle Generator") as demo:
436
  )
437
 
438
  if __name__ == "__main__":
439
- demo.launch()
440
-
 
11
  from datetime import timedelta
12
  from urllib.parse import urlparse
13
 
14
+ # Maximum words per subtitle (set to None to disable)
15
+ DEFAULT_MAX_WORDS = 18
16
+
17
  # -----------------------------
18
  # Core subtitle generator
19
  # -----------------------------
 
66
 
67
  return first_period_idx, last_period_idx
68
 
69
+ def create_linear_subtitles(self, words, max_words=None):
70
  """
71
  Create subtitles with:
72
  - First sentence as first subtitle
 
83
 
84
  # Edge case: No periods found - use original linear pattern
85
  if first_period_idx is None:
86
+ return self._create_basic_linear_subtitles(words, max_words=max_words)
87
 
88
  # Edge case: Only one sentence (first = last)
89
  if first_period_idx == last_period_idx:
 
104
 
105
  if middle_start < middle_end:
106
  middle_words = words[middle_start:middle_end]
107
+ subtitle_index = self._add_linear_pattern(
108
+ subs, middle_words, subtitle_index, max_words=max_words
109
+ )
110
 
111
  # 3. Last sentence as last subtitle
112
  last_sentence_words = words[last_period_idx:total_words]
 
141
  )
142
  )
143
 
144
+ def _add_linear_pattern(self, subs, words, start_index, max_words=None):
145
+ """Apply linear pattern (1, 2, 3, 4... words) to words list
146
+
147
+ If `max_words` is provided, no subtitle will contain more than
148
+ `max_words` words. Once the linear size reaches `max_words` it
149
+ will remain at that size for subsequent subtitles.
150
+ """
151
  total_words = len(words)
152
  index = 0
153
  subtitle_index = start_index
 
155
 
156
  while index < total_words:
157
  planned_size = current_size
158
+ if max_words is not None:
159
+ planned_size = min(planned_size, max_words)
160
  remaining = total_words - (index + planned_size)
161
  next_size = current_size + 1
162
 
 
190
  subtitle_index += 1
191
 
192
  # Progress to next size only if we didn't absorb leftovers
193
+ # and we're not already at the configured maximum.
194
  if planned_size == current_size:
195
+ if max_words is None or current_size < max_words:
196
+ current_size += 1
197
+ else:
198
+ # stay at max_words for following subtitles
199
+ current_size = max_words
200
  else:
201
  break
202
 
203
  return subtitle_index
204
 
205
+ def _create_basic_linear_subtitles(self, words, max_words=None):
206
+ """Fallback: Original linear pattern when no periods found
207
+
208
+ Honors `max_words` similarly to the linear pattern above.
209
+ """
210
  subs = pysrt.SubRipFile()
211
  total_words = len(words)
212
  index = 0
 
215
 
216
  while index < total_words:
217
  planned_size = current_size
218
+ if max_words is not None:
219
+ planned_size = min(planned_size, max_words)
220
  remaining = total_words - (index + planned_size)
221
  next_size = current_size + 1
222
 
 
248
  subtitle_index += 1
249
 
250
  if planned_size == current_size:
251
+ if max_words is None or current_size < max_words:
252
+ current_size += 1
253
+ else:
254
+ current_size = max_words
255
  else:
256
  break
257
 
 
363
  yield None, "\n".join(status_messages)
364
 
365
  srt_start = time.time()
366
+ subs = generator.create_linear_subtitles(words, max_words=DEFAULT_MAX_WORDS)
367
  srt_time = time.time() - srt_start
368
 
369
  status_messages.append(f"✓ Created {len(subs)} subtitle segments in {format_time(srt_time)}")
 
461
  )
462
 
463
  if __name__ == "__main__":
464
+ demo.launch()