xTHExBEASTx commited on
Commit
b4a8b32
·
verified ·
1 Parent(s): 0a1e2fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +170 -20
app.py CHANGED
@@ -6,11 +6,10 @@ import os
6
  import math
7
  from datetime import timedelta
8
  import subprocess
 
9
 
10
  # --- Configuration ---
11
  TRANSLATION_MODEL = "facebook/nllb-200-distilled-1.3B"
12
- # We use OpenAI's original small model for better segmentation on CPU
13
- # It is often better at splitting sentences than Distil-Large for subtitles
14
  WHISPER_MODEL = "openai/whisper-small"
15
 
16
  print("Loading Models...")
@@ -32,8 +31,19 @@ whisper_pipe = pipeline(
32
  print("Models Loaded Successfully!")
33
 
34
  # ---------------------------------------------------------
35
- # Helper: Extract Audio
36
  # ---------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
37
  def extract_audio(video_path):
38
  output_audio_path = "temp_audio.mp3"
39
  if os.path.exists(output_audio_path):
@@ -48,10 +58,30 @@ def extract_audio(video_path):
48
  return output_audio_path
49
 
50
  # ---------------------------------------------------------
51
- # Helper: Smart SRT Splitter (The Fix!)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  # ---------------------------------------------------------
53
  def split_text_into_lines(text, max_chars=80):
54
- """Breaks long text into smaller lines based on character limit."""
55
  words = text.split()
56
  lines = []
57
  current_line = []
@@ -70,11 +100,7 @@ def split_text_into_lines(text, max_chars=80):
70
  lines.append(" ".join(current_line))
71
  return lines
72
 
73
- def create_srt_segments(chunks):
74
- """
75
- Takes raw Whisper chunks and breaks them down into clean SRT subtitles.
76
- Distributes time proportionally if a chunk is split into multiple lines.
77
- """
78
  srt_subtitles = []
79
  index_counter = 1
80
 
@@ -82,23 +108,147 @@ def create_srt_segments(chunks):
82
  text = chunk['text'].strip()
83
  timestamp = chunk['timestamp']
84
 
85
- # Safe unpacking of timestamps
86
  if isinstance(timestamp, (list, tuple)):
87
  start_time, end_time = timestamp
88
  else:
89
- continue # Skip bad chunks
90
-
91
- if end_time is None: end_time = start_time + 5.0
92
-
93
- # Smart Split: If text is too long (>80 chars), split it
94
  lines = split_text_into_lines(text, max_chars=80)
 
 
95
 
96
- # Calculate duration per line (Proportional split)
97
- total_duration = end_time - start_time
98
- duration_per_line = total_duration / len(lines) if lines else 0
99
-
100
  current_start = start_time
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  for line in lines:
103
  current_end = current_start + duration_per_line
104
 
 
6
  import math
7
  from datetime import timedelta
8
  import subprocess
9
+ import re
10
 
11
  # --- Configuration ---
12
  TRANSLATION_MODEL = "facebook/nllb-200-distilled-1.3B"
 
 
13
  WHISPER_MODEL = "openai/whisper-small"
14
 
15
  print("Loading Models...")
 
31
  print("Models Loaded Successfully!")
32
 
33
  # ---------------------------------------------------------
34
+ # Helper: Extract Audio & Duration
35
  # ---------------------------------------------------------
36
+ def get_media_duration(filename):
37
+ try:
38
+ result = subprocess.run(
39
+ ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", filename],
40
+ stdout=subprocess.PIPE,
41
+ stderr=subprocess.STDOUT
42
+ )
43
+ return float(result.stdout)
44
+ except:
45
+ return 30.0
46
+
47
  def extract_audio(video_path):
48
  output_audio_path = "temp_audio.mp3"
49
  if os.path.exists(output_audio_path):
 
58
  return output_audio_path
59
 
60
  # ---------------------------------------------------------
61
+ # Helper: VTT Converter (For Browser Preview)
62
+ # ---------------------------------------------------------
63
+ def srt_to_vtt(srt_path):
64
+ """Converts SRT to VTT format for the HTML5 video player."""
65
+ vtt_path = srt_path.replace(".srt", ".vtt")
66
+ with open(srt_path, 'r', encoding='utf-8') as f:
67
+ content = f.read()
68
+
69
+ # VTT Header
70
+ vtt_content = "WEBVTT\n\n"
71
+
72
+ # Replace comma timestamps (00:00:01,000) with dot (00:00:01.000)
73
+ # And copy the rest
74
+ vtt_content += re.sub(r'(\d{2}:\d{2}:\d{2}),(\d{3})', r'\1.\2', content)
75
+
76
+ with open(vtt_path, 'w', encoding='utf-8') as f:
77
+ f.write(vtt_content)
78
+
79
+ return vtt_path
80
+
81
+ # ---------------------------------------------------------
82
+ # Helper: Smart Splitter logic
83
  # ---------------------------------------------------------
84
  def split_text_into_lines(text, max_chars=80):
 
85
  words = text.split()
86
  lines = []
87
  current_line = []
 
100
  lines.append(" ".join(current_line))
101
  return lines
102
 
103
+ def create_srt_segments(chunks, total_video_duration):
 
 
 
 
104
  srt_subtitles = []
105
  index_counter = 1
106
 
 
108
  text = chunk['text'].strip()
109
  timestamp = chunk['timestamp']
110
 
 
111
  if isinstance(timestamp, (list, tuple)):
112
  start_time, end_time = timestamp
113
  else:
114
+ start_time, end_time = 0.0, None
115
+
116
+ if end_time is None: end_time = total_video_duration
117
+
 
118
  lines = split_text_into_lines(text, max_chars=80)
119
+ duration = end_time - start_time
120
+ if duration <= 0: duration = 5.0
121
 
122
+ step = duration / len(lines) if lines else 0
 
 
 
123
  current_start = start_time
124
 
125
+ for line in lines:
126
+ current_end = current_start + step
127
+ srt_subtitles.append(
128
+ srt.Subtitle(index=index_counter, start=timedelta(seconds=current_start), end=timedelta(seconds=current_end), content=line)
129
+ )
130
+ index_counter += 1
131
+ current_start = current_end
132
+
133
+ return srt_subtitles
134
+
135
+ # ---------------------------------------------------------
136
+ # Logic 1: Translation (NLLB)
137
+ # ---------------------------------------------------------
138
+ def batch_translate(texts, src_lang, tgt_lang, batch_size=8, progress=gr.Progress()):
139
+ results = []
140
+ tokenizer_nllb.src_lang = src_lang
141
+
142
+ for i, start_idx in enumerate(range(0, len(texts), batch_size)):
143
+ batch = texts[start_idx : start_idx + batch_size]
144
+ inputs = tokenizer_nllb(batch, return_tensors="pt", padding=True, truncation=True, max_length=512)
145
+ forced_bos_token_id = tokenizer_nllb.convert_tokens_to_ids(tgt_lang)
146
+ with torch.no_grad():
147
+ generated_tokens = model_nllb.generate(**inputs, forced_bos_token_id=forced_bos_token_id, max_length=512)
148
+ results.extend(tokenizer_nllb.batch_decode(generated_tokens, skip_special_tokens=True))
149
+ return results
150
+
151
+ def process_translation(filepath, src_lang_code, tgt_lang_code):
152
+ if filepath is None: return None, None
153
+ try:
154
+ with open(filepath, 'r', encoding='utf-8') as f:
155
+ subtitles = list(srt.parse(f.read()))
156
+ except Exception as e:
157
+ return f"Error: {str(e)}", None
158
+
159
+ texts = [sub.content for sub in subtitles]
160
+ translated = batch_translate(texts, src_lang_code, tgt_lang_code)
161
+
162
+ for sub, trans in zip(subtitles, translated):
163
+ sub.content = trans
164
+
165
+ out_path = "translated_subtitles.srt"
166
+ with open(out_path, 'w', encoding='utf-8') as f:
167
+ f.write(srt.compose(subtitles))
168
+
169
+ return out_path
170
+
171
+ # ---------------------------------------------------------
172
+ # Logic 2: Video to SRT + Preview
173
+ # ---------------------------------------------------------
174
+ def video_to_srt(video_path, progress=gr.Progress()):
175
+ if video_path is None: return None, None
176
+
177
+ # 1. Audio & Duration
178
+ progress(0.1, desc="Extracting Audio...")
179
+ try:
180
+ audio_path = extract_audio(video_path)
181
+ duration = get_media_duration(audio_path)
182
+ except Exception as e:
183
+ return None, f"Error: {str(e)}"
184
+
185
+ # 2. Transcribe
186
+ progress(0.3, desc="Transcribing...")
187
+ outputs = whisper_pipe(audio_path, return_timestamps=True, generate_kwargs={"language": "english"})
188
+ chunks = outputs.get("chunks", [])
189
+ if not chunks: chunks = [{"text": outputs.get("text", ""), "timestamp": (0.0, None)}]
190
+
191
+ # 3. Format SRT
192
+ progress(0.8, desc="Formatting...")
193
+ srt_subtitles = create_srt_segments(chunks, duration)
194
+
195
+ srt_path = "generated_captions.srt"
196
+ with open(srt_path, 'w', encoding='utf-8') as f:
197
+ f.write(srt.compose(srt_subtitles))
198
+
199
+ # 4. Create Preview (HTML + VTT)
200
+ vtt_path = srt_to_vtt(srt_path)
201
+
202
+ # We use Gradio's /file/ route to serve local files
203
+ html_preview = f"""
204
+ <h3>Video Preview</h3>
205
+ <video controls width="100%" height="400px" style="background:black">
206
+ <source src="/file={video_path}" type="video/mp4">
207
+ <track kind="captions" src="/file={vtt_path}" srclang="en" label="English" default>
208
+ Your browser does not support the video tag.
209
+ </video>
210
+ <p style="margin-top:10px; color: #666;">Note: Subtitles are overlaid for preview only. They are not burned into the video.</p>
211
+ """
212
+
213
+ return srt_path, html_preview
214
+
215
+ # ---------------------------------------------------------
216
+ # Gradio Interface
217
+ # ---------------------------------------------------------
218
+ with gr.Blocks(title="SRT Master Tool") as demo:
219
+ gr.Markdown("# 🎬 Auto Subtitle & Translator")
220
+
221
+ with gr.Tabs():
222
+ # --- TAB 1 ---
223
+ with gr.TabItem("Step 1: Video to SRT"):
224
+ gr.Markdown("### 1. Upload Video -> 2. Check Preview -> 3. Download SRT")
225
+ with gr.Row():
226
+ video_input = gr.Video(label="Upload Video", sources=["upload"])
227
+
228
+ with gr.Column():
229
+ # The new Preview Player
230
+ preview_output = gr.HTML(label="Preview Player")
231
+ # The download button
232
+ srt_output_gen = gr.File(label="Download Generated SRT")
233
+
234
+ btn1 = gr.Button("Generate SRT & Preview", variant="primary")
235
+ btn1.click(video_to_srt, inputs=video_input, outputs=[srt_output_gen, preview_output])
236
+
237
+ # --- TAB 2 ---
238
+ with gr.TabItem("Step 2: Translate SRT"):
239
+ gr.Markdown("### Translate Subtitles to Arabic")
240
+ with gr.Row():
241
+ srt_input = gr.File(label="Upload SRT")
242
+ with gr.Column():
243
+ src_l = gr.Dropdown(["eng_Latn", "fra_Latn"], label="From", value="eng_Latn")
244
+ tgt_l = gr.Dropdown(["arb_Arab", "arz_Arab"], label="To", value="arb_Arab")
245
+ srt_output_trans = gr.File(label="Translated SRT")
246
+
247
+ btn2 = gr.Button("Translate", variant="primary")
248
+ btn2.click(process_translation, inputs=[srt_input, src_l, tgt_l], outputs=srt_output_trans)
249
+
250
+ if __name__ == "__main__":
251
+ demo.launch()
252
  for line in lines:
253
  current_end = current_start + duration_per_line
254