sampleacc-3003 commited on
Commit
8b505a3
Β·
verified Β·
1 Parent(s): 5ea61b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +294 -47
app.py CHANGED
@@ -6,11 +6,11 @@ import gradio as gr
6
  import pysrt
7
  import requests
8
  import tempfile
 
9
  from faster_whisper import WhisperModel
10
  from datetime import timedelta
11
  from urllib.parse import urlparse
12
 
13
-
14
  # -----------------------------
15
  # Core subtitle generator
16
  # -----------------------------
@@ -45,27 +45,117 @@ class LinearSubtitleGenerator:
45
  })
46
  return words
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def create_linear_subtitles(self, words):
 
 
 
 
 
 
49
  subs = pysrt.SubRipFile()
50
-
 
 
 
51
  total_words = len(words)
52
- index = 0
 
 
 
 
 
 
 
 
 
 
 
53
  subtitle_index = 1
54
- current_size = 1 # 1,2,3,4,...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  while index < total_words:
57
  planned_size = current_size
58
  remaining = total_words - (index + planned_size)
59
  next_size = current_size + 1
60
-
61
- # absorb leftovers to avoid tiny last subtitle
62
  if remaining > 0 and remaining < next_size:
63
  planned_size += remaining
64
-
65
  subtitle_words = []
66
  start_time = None
67
  end_time = None
68
-
69
  for _ in range(planned_size):
70
  if index >= total_words:
71
  break
@@ -75,7 +165,56 @@ class LinearSubtitleGenerator:
75
  start_time = w["start"]
76
  end_time = w["end"]
77
  index += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  subs.append(
80
  pysrt.SubRipItem(
81
  index=subtitle_index,
@@ -85,12 +224,12 @@ class LinearSubtitleGenerator:
85
  )
86
  )
87
  subtitle_index += 1
88
-
89
  if planned_size == current_size:
90
  current_size += 1
91
  else:
92
  break
93
-
94
  return subs
95
 
96
  def _to_time(self, seconds):
@@ -102,10 +241,9 @@ class LinearSubtitleGenerator:
102
  milliseconds=td.microseconds // 1000
103
  )
104
 
105
-
106
- # -----------------------------
107
- # Helper: download audio from URL
108
- # -----------------------------
109
  def download_audio(url: str) -> str:
110
  parsed = urlparse(url)
111
  if parsed.scheme not in ("http", "https"):
@@ -123,41 +261,130 @@ def download_audio(url: str) -> str:
123
  tmp.close()
124
  return tmp.name
125
 
126
-
127
- # -----------------------------
128
- # Gradio callable function
129
- # -----------------------------
130
- def generate_srt(audio_file, audio_url, model_size):
131
- # exactly one input must be provided
132
- if bool(audio_file) == bool(audio_url):
133
- raise gr.Error(
134
- "Please provide EITHER an audio file OR an audio URL (not both)."
135
- )
136
-
137
- if audio_url:
138
- audio_path = download_audio(audio_url)
139
  else:
140
- audio_path = audio_file
141
-
142
- generator = LinearSubtitleGenerator(model_size)
143
-
144
- segments = generator.transcribe(audio_path)
145
- words = generator.extract_words(segments)
146
- subs = generator.create_linear_subtitles(words)
147
-
148
- out = tempfile.NamedTemporaryFile(delete=False, suffix=".srt")
149
- subs.save(out.name, encoding="utf-8")
150
-
151
- return out.name
152
-
153
 
154
- # -----------------------------
155
- # Gradio UI (UNCHANGED)
156
- # -----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  with gr.Blocks(title="Subtitle Generator") as demo:
158
  gr.Markdown(
159
  """
160
- # srt generator
 
 
 
 
 
161
  """
162
  )
163
 
@@ -178,16 +405,36 @@ with gr.Blocks(title="Subtitle Generator") as demo:
178
  label="Whisper Model"
179
  )
180
 
181
- generate_btn = gr.Button("Generate SRT")
 
 
 
 
 
 
 
 
 
182
 
183
  output_file = gr.File(label="Download SRT")
184
 
 
185
  generate_btn.click(
186
  fn=generate_srt,
187
  inputs=[audio_file, audio_url, model_choice],
188
- outputs=output_file
189
  )
190
 
 
 
 
 
 
 
 
 
 
191
 
192
  if __name__ == "__main__":
193
- demo.launch(mcp_server=True)
 
 
6
  import pysrt
7
  import requests
8
  import tempfile
9
+ import time
10
  from faster_whisper import WhisperModel
11
  from datetime import timedelta
12
  from urllib.parse import urlparse
13
 
 
14
  # -----------------------------
15
  # Core subtitle generator
16
  # -----------------------------
 
45
  })
46
  return words
47
 
48
+ def find_sentence_boundaries(self, words):
49
+ """
50
+ Find first and last sentence boundaries based on periods.
51
+ Returns: (first_period_idx, last_period_idx)
52
+ """
53
+ first_period_idx = None
54
+ last_period_idx = None
55
+
56
+ for idx, word_data in enumerate(words):
57
+ word = word_data["word"]
58
+ # Check if word ends with period (and not abbreviation)
59
+ if word.endswith('.') or word.endswith('!') or word.endswith('?'):
60
+ if first_period_idx is None:
61
+ first_period_idx = idx
62
+ last_period_idx = idx
63
+
64
+ return first_period_idx, last_period_idx
65
+
66
  def create_linear_subtitles(self, words):
67
+ """
68
+ Create subtitles with:
69
+ - First sentence as first subtitle
70
+ - Middle content with linear pattern (1, 2, 3, 4... words)
71
+ - Last sentence as last subtitle
72
+ """
73
  subs = pysrt.SubRipFile()
74
+
75
+ if not words:
76
+ return subs
77
+
78
  total_words = len(words)
79
+ first_period_idx, last_period_idx = self.find_sentence_boundaries(words)
80
+
81
+ # Edge case: No periods found - use original linear pattern
82
+ if first_period_idx is None:
83
+ return self._create_basic_linear_subtitles(words)
84
+
85
+ # Edge case: Only one sentence (first = last)
86
+ if first_period_idx == last_period_idx:
87
+ # Single sentence becomes single subtitle
88
+ self._add_subtitle(subs, 1, words, 0, total_words)
89
+ return subs
90
+
91
  subtitle_index = 1
92
+
93
+ # 1. First sentence as first subtitle
94
+ first_sentence_words = words[0:first_period_idx + 1]
95
+ self._add_subtitle(subs, subtitle_index, first_sentence_words, 0, len(first_sentence_words))
96
+ subtitle_index += 1
97
+
98
+ # 2. Middle content with linear pattern
99
+ middle_start = first_period_idx + 1
100
+ middle_end = last_period_idx
101
+
102
+ if middle_start < middle_end:
103
+ middle_words = words[middle_start:middle_end]
104
+ subtitle_index = self._add_linear_pattern(subs, middle_words, subtitle_index)
105
+
106
+ # 3. Last sentence as last subtitle
107
+ last_sentence_words = words[last_period_idx:total_words]
108
+ if last_sentence_words:
109
+ self._add_subtitle(subs, subtitle_index, last_sentence_words, 0, len(last_sentence_words))
110
+
111
+ return subs
112
 
113
+ def _add_subtitle(self, subs, index, words, start_idx, end_idx):
114
+ """Helper to add a single subtitle from word range"""
115
+ if start_idx >= end_idx or start_idx >= len(words):
116
+ return
117
+
118
+ subtitle_words = []
119
+ start_time = None
120
+ end_time = None
121
+
122
+ for i in range(start_idx, min(end_idx, len(words))):
123
+ w = words[i]
124
+ subtitle_words.append(w["word"])
125
+ if start_time is None:
126
+ start_time = w["start"]
127
+ end_time = w["end"]
128
+
129
+ if subtitle_words:
130
+ subs.append(
131
+ pysrt.SubRipItem(
132
+ index=index,
133
+ start=self._to_time(start_time),
134
+ end=self._to_time(end_time),
135
+ text=" ".join(subtitle_words)
136
+ )
137
+ )
138
+
139
+ def _add_linear_pattern(self, subs, words, start_index):
140
+ """Apply linear pattern (1, 2, 3, 4... words) to words list"""
141
+ total_words = len(words)
142
+ index = 0
143
+ subtitle_index = start_index
144
+ current_size = 1
145
+
146
  while index < total_words:
147
  planned_size = current_size
148
  remaining = total_words - (index + planned_size)
149
  next_size = current_size + 1
150
+
151
+ # Absorb leftovers to avoid tiny last subtitle
152
  if remaining > 0 and remaining < next_size:
153
  planned_size += remaining
154
+
155
  subtitle_words = []
156
  start_time = None
157
  end_time = None
158
+
159
  for _ in range(planned_size):
160
  if index >= total_words:
161
  break
 
165
  start_time = w["start"]
166
  end_time = w["end"]
167
  index += 1
168
+
169
+ if subtitle_words:
170
+ subs.append(
171
+ pysrt.SubRipItem(
172
+ index=subtitle_index,
173
+ start=self._to_time(start_time),
174
+ end=self._to_time(end_time),
175
+ text=" ".join(subtitle_words)
176
+ )
177
+ )
178
+ subtitle_index += 1
179
+
180
+ # Progress to next size only if we didn't absorb leftovers
181
+ if planned_size == current_size:
182
+ current_size += 1
183
+ else:
184
+ break
185
+
186
+ return subtitle_index
187
 
188
+ def _create_basic_linear_subtitles(self, words):
189
+ """Fallback: Original linear pattern when no periods found"""
190
+ subs = pysrt.SubRipFile()
191
+ total_words = len(words)
192
+ index = 0
193
+ subtitle_index = 1
194
+ current_size = 1
195
+
196
+ while index < total_words:
197
+ planned_size = current_size
198
+ remaining = total_words - (index + planned_size)
199
+ next_size = current_size + 1
200
+
201
+ if remaining > 0 and remaining < next_size:
202
+ planned_size += remaining
203
+
204
+ subtitle_words = []
205
+ start_time = None
206
+ end_time = None
207
+
208
+ for _ in range(planned_size):
209
+ if index >= total_words:
210
+ break
211
+ w = words[index]
212
+ subtitle_words.append(w["word"])
213
+ if start_time is None:
214
+ start_time = w["start"]
215
+ end_time = w["end"]
216
+ index += 1
217
+
218
  subs.append(
219
  pysrt.SubRipItem(
220
  index=subtitle_index,
 
224
  )
225
  )
226
  subtitle_index += 1
227
+
228
  if planned_size == current_size:
229
  current_size += 1
230
  else:
231
  break
232
+
233
  return subs
234
 
235
  def _to_time(self, seconds):
 
241
  milliseconds=td.microseconds // 1000
242
  )
243
 
244
+ # -----------------------------
245
+ # Helper: download audio from URL
246
+ # -----------------------------
 
247
  def download_audio(url: str) -> str:
248
  parsed = urlparse(url)
249
  if parsed.scheme not in ("http", "https"):
 
261
  tmp.close()
262
  return tmp.name
263
 
264
+ # -----------------------------
265
+ # Helper: format elapsed time
266
+ # -----------------------------
267
+ def format_time(seconds):
268
+ """Format seconds into readable time string"""
269
+ if seconds < 60:
270
+ return f"{seconds:.1f}s"
271
+ elif seconds < 3600:
272
+ mins = int(seconds // 60)
273
+ secs = int(seconds % 60)
274
+ return f"{mins}m {secs}s"
 
 
275
  else:
276
+ hours = int(seconds // 3600)
277
+ mins = int((seconds % 3600) // 60)
278
+ return f"{hours}h {mins}m"
 
 
 
 
 
 
 
 
 
 
279
 
280
+ # -----------------------------
281
+ # Gradio callable function with status updates
282
+ # -----------------------------
283
+ def generate_srt(audio_file, audio_url, model_size):
284
+ start_time = time.time()
285
+ status_messages = []
286
+
287
+ try:
288
+ # Validation
289
+ if bool(audio_file) == bool(audio_url):
290
+ error_msg = "❌ Error: Please provide EITHER an audio file OR an audio URL (not both)."
291
+ return None, error_msg
292
+
293
+ status_messages.append("πŸš€ Starting subtitle generation...")
294
+ yield None, "\n".join(status_messages)
295
+
296
+ # Step 1: Get audio file
297
+ if audio_url:
298
+ status_messages.append("πŸ“₯ Downloading audio from URL...")
299
+ yield None, "\n".join(status_messages)
300
+
301
+ download_start = time.time()
302
+ audio_path = download_audio(audio_url)
303
+ download_time = time.time() - download_start
304
+
305
+ status_messages.append(f"βœ“ Download completed in {format_time(download_time)}")
306
+ yield None, "\n".join(status_messages)
307
+ else:
308
+ audio_path = audio_file
309
+ status_messages.append("βœ“ Audio file loaded")
310
+ yield None, "\n".join(status_messages)
311
+
312
+ # Step 2: Load model
313
+ status_messages.append(f"🧠 Loading Whisper model ({model_size})...")
314
+ yield None, "\n".join(status_messages)
315
+
316
+ model_start = time.time()
317
+ generator = LinearSubtitleGenerator(model_size)
318
+ model_time = time.time() - model_start
319
+
320
+ status_messages.append(f"βœ“ Model loaded in {format_time(model_time)}")
321
+ yield None, "\n".join(status_messages)
322
+
323
+ # Step 3: Transcribe
324
+ status_messages.append("🎀 Transcribing audio (this may take a while)...")
325
+ yield None, "\n".join(status_messages)
326
+
327
+ transcribe_start = time.time()
328
+ segments = generator.transcribe(audio_path)
329
+ words = generator.extract_words(segments)
330
+ transcribe_time = time.time() - transcribe_start
331
+
332
+ status_messages.append(f"βœ“ Transcription completed in {format_time(transcribe_time)}")
333
+ status_messages.append(f"πŸ“Š Extracted {len(words)} words")
334
+ yield None, "\n".join(status_messages)
335
+
336
+ # Step 4: Generate subtitles
337
+ status_messages.append("πŸ“ Generating SRT subtitles...")
338
+ yield None, "\n".join(status_messages)
339
+
340
+ srt_start = time.time()
341
+ subs = generator.create_linear_subtitles(words)
342
+ srt_time = time.time() - srt_start
343
+
344
+ status_messages.append(f"βœ“ Created {len(subs)} subtitle segments in {format_time(srt_time)}")
345
+ yield None, "\n".join(status_messages)
346
+
347
+ # Step 5: Save file
348
+ status_messages.append("πŸ’Ύ Saving SRT file...")
349
+ yield None, "\n".join(status_messages)
350
+
351
+ out = tempfile.NamedTemporaryFile(delete=False, suffix=".srt")
352
+ subs.save(out.name, encoding="utf-8")
353
+
354
+ # Calculate total time
355
+ total_time = time.time() - start_time
356
+
357
+ # Final success message
358
+ status_messages.append(f"βœ… SUCCESS! Total time: {format_time(total_time)}")
359
+ status_messages.append(f"πŸ“ SRT file ready for download")
360
+
361
+ yield out.name, "\n".join(status_messages)
362
+
363
+ except requests.RequestException as e:
364
+ error_msg = f"❌ Network Error: Failed to download audio\nDetails: {str(e)}"
365
+ yield None, error_msg
366
+
367
+ except ValueError as e:
368
+ error_msg = f"❌ Validation Error: {str(e)}"
369
+ yield None, error_msg
370
+
371
+ except Exception as e:
372
+ total_time = time.time() - start_time
373
+ error_msg = f"❌ Error occurred after {format_time(total_time)}\nDetails: {str(e)}"
374
+ yield None, error_msg
375
+
376
+ # -----------------------------
377
+ # Gradio UI with Status Bar
378
+ # -----------------------------
379
  with gr.Blocks(title="Subtitle Generator") as demo:
380
  gr.Markdown(
381
  """
382
+ # SRT Generator with Smart Sentence Handling
383
+
384
+ **Features:**
385
+ - First sentence β†’ First subtitle
386
+ - Middle content β†’ Linear pattern (1, 2, 3, 4... words)
387
+ - Last sentence β†’ Last subtitle
388
  """
389
  )
390
 
 
405
  label="Whisper Model"
406
  )
407
 
408
+ generate_btn = gr.Button("Generate SRT", variant="primary")
409
+
410
+ # Status display
411
+ status_box = gr.Textbox(
412
+ label="Status",
413
+ placeholder="Status updates will appear here...",
414
+ lines=10,
415
+ max_lines=15,
416
+ interactive=False
417
+ )
418
 
419
  output_file = gr.File(label="Download SRT")
420
 
421
+ # Event handler
422
  generate_btn.click(
423
  fn=generate_srt,
424
  inputs=[audio_file, audio_url, model_choice],
425
+ outputs=[output_file, status_box]
426
  )
427
 
428
+ gr.Markdown(
429
+ """
430
+ ---
431
+ **Tips:**
432
+ - Larger models (small/medium) are more accurate but slower
433
+ - For best results, use clear audio with minimal background noise
434
+ - Processing time depends on audio length and model size
435
+ """
436
+ )
437
 
438
  if __name__ == "__main__":
439
+ demo.launch()
440
+