xTHExBEASTx commited on
Commit
95e5f5e
·
verified ·
1 Parent(s): b4a8b32

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -120
app.py CHANGED
@@ -70,7 +70,6 @@ def srt_to_vtt(srt_path):
70
  vtt_content = "WEBVTT\n\n"
71
 
72
  # Replace comma timestamps (00:00:01,000) with dot (00:00:01.000)
73
- # And copy the rest
74
  vtt_content += re.sub(r'(\d{2}:\d{2}:\d{2}),(\d{3})', r'\1.\2', content)
75
 
76
  with open(vtt_path, 'w', encoding='utf-8') as f:
@@ -113,7 +112,8 @@ def create_srt_segments(chunks, total_video_duration):
113
  else:
114
  start_time, end_time = 0.0, None
115
 
116
- if end_time is None: end_time = total_video_duration
 
117
 
118
  lines = split_text_into_lines(text, max_chars=80)
119
  duration = end_time - start_time
@@ -149,12 +149,12 @@ def batch_translate(texts, src_lang, tgt_lang, batch_size=8, progress=gr.Progres
149
  return results
150
 
151
  def process_translation(filepath, src_lang_code, tgt_lang_code):
152
- if filepath is None: return None, None
153
  try:
154
  with open(filepath, 'r', encoding='utf-8') as f:
155
  subtitles = list(srt.parse(f.read()))
156
  except Exception as e:
157
- return f"Error: {str(e)}", None
158
 
159
  texts = [sub.content for sub in subtitles]
160
  translated = batch_translate(texts, src_lang_code, tgt_lang_code)
@@ -199,7 +199,7 @@ def video_to_srt(video_path, progress=gr.Progress()):
199
  # 4. Create Preview (HTML + VTT)
200
  vtt_path = srt_to_vtt(srt_path)
201
 
202
- # We use Gradio's /file/ route to serve local files
203
  html_preview = f"""
204
  <h3>Video Preview</h3>
205
  <video controls width="100%" height="400px" style="background:black">
@@ -226,9 +226,7 @@ with gr.Blocks(title="SRT Master Tool") as demo:
226
  video_input = gr.Video(label="Upload Video", sources=["upload"])
227
 
228
  with gr.Column():
229
- # The new Preview Player
230
  preview_output = gr.HTML(label="Preview Player")
231
- # The download button
232
  srt_output_gen = gr.File(label="Download Generated SRT")
233
 
234
  btn1 = gr.Button("Generate SRT & Preview", variant="primary")
@@ -248,116 +246,4 @@ with gr.Blocks(title="SRT Master Tool") as demo:
248
  btn2.click(process_translation, inputs=[srt_input, src_l, tgt_l], outputs=srt_output_trans)
249
 
250
  if __name__ == "__main__":
251
- demo.launch()
252
- for line in lines:
253
- current_end = current_start + duration_per_line
254
-
255
- srt_subtitles.append(
256
- srt.Subtitle(
257
- index=index_counter,
258
- start=timedelta(seconds=current_start),
259
- end=timedelta(seconds=current_end),
260
- content=line
261
- )
262
- )
263
- index_counter += 1
264
- current_start = current_end # Next line starts where this one ended
265
-
266
- return srt_subtitles
267
-
268
- # ---------------------------------------------------------
269
- # Logic 1: Translation (NLLB)
270
- # ---------------------------------------------------------
271
- def batch_translate(texts, src_lang, tgt_lang, batch_size=8, progress=gr.Progress()):
272
- results = []
273
- tokenizer_nllb.src_lang = src_lang
274
-
275
- for i, start_idx in enumerate(range(0, len(texts), batch_size)):
276
- batch = texts[start_idx : start_idx + batch_size]
277
- inputs = tokenizer_nllb(batch, return_tensors="pt", padding=True, truncation=True, max_length=512)
278
- forced_bos_token_id = tokenizer_nllb.convert_tokens_to_ids(tgt_lang)
279
-
280
- with torch.no_grad():
281
- generated_tokens = model_nllb.generate(**inputs, forced_bos_token_id=forced_bos_token_id, max_length=512)
282
-
283
- results.extend(tokenizer_nllb.batch_decode(generated_tokens, skip_special_tokens=True))
284
- return results
285
-
286
- def process_translation(filepath, src_lang_code, tgt_lang_code):
287
- if filepath is None: return None
288
- try:
289
- with open(filepath, 'r', encoding='utf-8') as f:
290
- subtitles = list(srt.parse(f.read()))
291
- except Exception as e:
292
- return f"Error: {str(e)}"
293
-
294
- texts = [sub.content for sub in subtitles]
295
- translated = batch_translate(texts, src_lang_code, tgt_lang_code)
296
-
297
- for sub, trans in zip(subtitles, translated):
298
- sub.content = trans
299
-
300
- out_path = "translated_subtitles.srt"
301
- with open(out_path, 'w', encoding='utf-8') as f:
302
- f.write(srt.compose(subtitles))
303
- return out_path
304
-
305
- # ---------------------------------------------------------
306
- # Logic 2: Video to SRT (Whisper)
307
- # ---------------------------------------------------------
308
- def video_to_srt(video_path, progress=gr.Progress()):
309
- if video_path is None: return None
310
-
311
- progress(0.1, desc="Extracting Audio...")
312
- try:
313
- audio_path = extract_audio(video_path)
314
- except Exception as e:
315
- return f"Error extracting audio: {str(e)}"
316
-
317
- progress(0.3, desc="Transcribing...")
318
- # We enable return_timestamps=True to get segment-level timing
319
- outputs = whisper_pipe(audio_path, return_timestamps=True, generate_kwargs={"language": "english"})
320
-
321
- chunks = outputs.get("chunks", [])
322
- if not chunks:
323
- chunks = [{"text": outputs.get("text", ""), "timestamp": (0.0, 5.0)}]
324
-
325
- progress(0.8, desc="Formatting SRT...")
326
-
327
- # Use the new Smart Splitter function
328
- srt_subtitles = create_srt_segments(chunks)
329
-
330
- out_path = "generated_captions.srt"
331
- with open(out_path, 'w', encoding='utf-8') as f:
332
- f.write(srt.compose(srt_subtitles))
333
-
334
- return out_path
335
-
336
- # ---------------------------------------------------------
337
- # Gradio Interface
338
- # ---------------------------------------------------------
339
- with gr.Blocks(title="SRT Master Tool") as demo:
340
- gr.Markdown("# 🎬 Auto Subtitle & Translator")
341
-
342
- with gr.Tabs():
343
- with gr.TabItem("Step 1: Video to SRT"):
344
- gr.Markdown("### Convert Video to English Subtitles")
345
- with gr.Row():
346
- video_input = gr.Video(label="Upload Video")
347
- srt_output_gen = gr.File(label="Generated SRT")
348
- btn1 = gr.Button("Generate SRT", variant="primary")
349
- btn1.click(video_to_srt, inputs=video_input, outputs=srt_output_gen)
350
-
351
- with gr.TabItem("Step 2: Translate SRT"):
352
- gr.Markdown("### Translate Subtitles to Arabic")
353
- with gr.Row():
354
- srt_input = gr.File(label="Upload SRT")
355
- with gr.Column():
356
- src_l = gr.Dropdown(["eng_Latn", "fra_Latn"], label="From", value="eng_Latn")
357
- tgt_l = gr.Dropdown(["arb_Arab", "arz_Arab"], label="To", value="arb_Arab")
358
- srt_output_trans = gr.File(label="Translated SRT")
359
- btn2 = gr.Button("Translate", variant="primary")
360
- btn2.click(process_translation, inputs=[srt_input, src_l, tgt_l], outputs=srt_output_trans)
361
-
362
- if __name__ == "__main__":
363
- demo.launch()
 
70
  vtt_content = "WEBVTT\n\n"
71
 
72
  # Replace comma timestamps (00:00:01,000) with dot (00:00:01.000)
 
73
  vtt_content += re.sub(r'(\d{2}:\d{2}:\d{2}),(\d{3})', r'\1.\2', content)
74
 
75
  with open(vtt_path, 'w', encoding='utf-8') as f:
 
112
  else:
113
  start_time, end_time = 0.0, None
114
 
115
+ if end_time is None:
116
+ end_time = total_video_duration
117
 
118
  lines = split_text_into_lines(text, max_chars=80)
119
  duration = end_time - start_time
 
149
  return results
150
 
151
  def process_translation(filepath, src_lang_code, tgt_lang_code):
152
+ if filepath is None: return None
153
  try:
154
  with open(filepath, 'r', encoding='utf-8') as f:
155
  subtitles = list(srt.parse(f.read()))
156
  except Exception as e:
157
+ return f"Error: {str(e)}"
158
 
159
  texts = [sub.content for sub in subtitles]
160
  translated = batch_translate(texts, src_lang_code, tgt_lang_code)
 
199
  # 4. Create Preview (HTML + VTT)
200
  vtt_path = srt_to_vtt(srt_path)
201
 
202
+ # Create the HTML player
203
  html_preview = f"""
204
  <h3>Video Preview</h3>
205
  <video controls width="100%" height="400px" style="background:black">
 
226
  video_input = gr.Video(label="Upload Video", sources=["upload"])
227
 
228
  with gr.Column():
 
229
  preview_output = gr.HTML(label="Preview Player")
 
230
  srt_output_gen = gr.File(label="Download Generated SRT")
231
 
232
  btn1 = gr.Button("Generate SRT & Preview", variant="primary")
 
246
  btn2.click(process_translation, inputs=[srt_input, src_l, tgt_l], outputs=srt_output_trans)
247
 
248
  if __name__ == "__main__":
249
+ demo.launch()