Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -70,7 +70,6 @@ def srt_to_vtt(srt_path):
|
|
| 70 |
vtt_content = "WEBVTT\n\n"
|
| 71 |
|
| 72 |
# Replace comma timestamps (00:00:01,000) with dot (00:00:01.000)
|
| 73 |
-
# And copy the rest
|
| 74 |
vtt_content += re.sub(r'(\d{2}:\d{2}:\d{2}),(\d{3})', r'\1.\2', content)
|
| 75 |
|
| 76 |
with open(vtt_path, 'w', encoding='utf-8') as f:
|
|
@@ -113,7 +112,8 @@ def create_srt_segments(chunks, total_video_duration):
|
|
| 113 |
else:
|
| 114 |
start_time, end_time = 0.0, None
|
| 115 |
|
| 116 |
-
if end_time is None:
|
|
|
|
| 117 |
|
| 118 |
lines = split_text_into_lines(text, max_chars=80)
|
| 119 |
duration = end_time - start_time
|
|
@@ -149,12 +149,12 @@ def batch_translate(texts, src_lang, tgt_lang, batch_size=8, progress=gr.Progres
|
|
| 149 |
return results
|
| 150 |
|
| 151 |
def process_translation(filepath, src_lang_code, tgt_lang_code):
|
| 152 |
-
if filepath is None: return None
|
| 153 |
try:
|
| 154 |
with open(filepath, 'r', encoding='utf-8') as f:
|
| 155 |
subtitles = list(srt.parse(f.read()))
|
| 156 |
except Exception as e:
|
| 157 |
-
return f"Error: {str(e)}"
|
| 158 |
|
| 159 |
texts = [sub.content for sub in subtitles]
|
| 160 |
translated = batch_translate(texts, src_lang_code, tgt_lang_code)
|
|
@@ -199,7 +199,7 @@ def video_to_srt(video_path, progress=gr.Progress()):
|
|
| 199 |
# 4. Create Preview (HTML + VTT)
|
| 200 |
vtt_path = srt_to_vtt(srt_path)
|
| 201 |
|
| 202 |
-
#
|
| 203 |
html_preview = f"""
|
| 204 |
<h3>Video Preview</h3>
|
| 205 |
<video controls width="100%" height="400px" style="background:black">
|
|
@@ -226,9 +226,7 @@ with gr.Blocks(title="SRT Master Tool") as demo:
|
|
| 226 |
video_input = gr.Video(label="Upload Video", sources=["upload"])
|
| 227 |
|
| 228 |
with gr.Column():
|
| 229 |
-
# The new Preview Player
|
| 230 |
preview_output = gr.HTML(label="Preview Player")
|
| 231 |
-
# The download button
|
| 232 |
srt_output_gen = gr.File(label="Download Generated SRT")
|
| 233 |
|
| 234 |
btn1 = gr.Button("Generate SRT & Preview", variant="primary")
|
|
@@ -248,116 +246,4 @@ with gr.Blocks(title="SRT Master Tool") as demo:
|
|
| 248 |
btn2.click(process_translation, inputs=[srt_input, src_l, tgt_l], outputs=srt_output_trans)
|
| 249 |
|
| 250 |
if __name__ == "__main__":
|
| 251 |
-
demo.launch()
|
| 252 |
-
for line in lines:
|
| 253 |
-
current_end = current_start + duration_per_line
|
| 254 |
-
|
| 255 |
-
srt_subtitles.append(
|
| 256 |
-
srt.Subtitle(
|
| 257 |
-
index=index_counter,
|
| 258 |
-
start=timedelta(seconds=current_start),
|
| 259 |
-
end=timedelta(seconds=current_end),
|
| 260 |
-
content=line
|
| 261 |
-
)
|
| 262 |
-
)
|
| 263 |
-
index_counter += 1
|
| 264 |
-
current_start = current_end # Next line starts where this one ended
|
| 265 |
-
|
| 266 |
-
return srt_subtitles
|
| 267 |
-
|
| 268 |
-
# ---------------------------------------------------------
|
| 269 |
-
# Logic 1: Translation (NLLB)
|
| 270 |
-
# ---------------------------------------------------------
|
| 271 |
-
def batch_translate(texts, src_lang, tgt_lang, batch_size=8, progress=gr.Progress()):
|
| 272 |
-
results = []
|
| 273 |
-
tokenizer_nllb.src_lang = src_lang
|
| 274 |
-
|
| 275 |
-
for i, start_idx in enumerate(range(0, len(texts), batch_size)):
|
| 276 |
-
batch = texts[start_idx : start_idx + batch_size]
|
| 277 |
-
inputs = tokenizer_nllb(batch, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
| 278 |
-
forced_bos_token_id = tokenizer_nllb.convert_tokens_to_ids(tgt_lang)
|
| 279 |
-
|
| 280 |
-
with torch.no_grad():
|
| 281 |
-
generated_tokens = model_nllb.generate(**inputs, forced_bos_token_id=forced_bos_token_id, max_length=512)
|
| 282 |
-
|
| 283 |
-
results.extend(tokenizer_nllb.batch_decode(generated_tokens, skip_special_tokens=True))
|
| 284 |
-
return results
|
| 285 |
-
|
| 286 |
-
def process_translation(filepath, src_lang_code, tgt_lang_code):
|
| 287 |
-
if filepath is None: return None
|
| 288 |
-
try:
|
| 289 |
-
with open(filepath, 'r', encoding='utf-8') as f:
|
| 290 |
-
subtitles = list(srt.parse(f.read()))
|
| 291 |
-
except Exception as e:
|
| 292 |
-
return f"Error: {str(e)}"
|
| 293 |
-
|
| 294 |
-
texts = [sub.content for sub in subtitles]
|
| 295 |
-
translated = batch_translate(texts, src_lang_code, tgt_lang_code)
|
| 296 |
-
|
| 297 |
-
for sub, trans in zip(subtitles, translated):
|
| 298 |
-
sub.content = trans
|
| 299 |
-
|
| 300 |
-
out_path = "translated_subtitles.srt"
|
| 301 |
-
with open(out_path, 'w', encoding='utf-8') as f:
|
| 302 |
-
f.write(srt.compose(subtitles))
|
| 303 |
-
return out_path
|
| 304 |
-
|
| 305 |
-
# ---------------------------------------------------------
|
| 306 |
-
# Logic 2: Video to SRT (Whisper)
|
| 307 |
-
# ---------------------------------------------------------
|
| 308 |
-
def video_to_srt(video_path, progress=gr.Progress()):
|
| 309 |
-
if video_path is None: return None
|
| 310 |
-
|
| 311 |
-
progress(0.1, desc="Extracting Audio...")
|
| 312 |
-
try:
|
| 313 |
-
audio_path = extract_audio(video_path)
|
| 314 |
-
except Exception as e:
|
| 315 |
-
return f"Error extracting audio: {str(e)}"
|
| 316 |
-
|
| 317 |
-
progress(0.3, desc="Transcribing...")
|
| 318 |
-
# We enable return_timestamps=True to get segment-level timing
|
| 319 |
-
outputs = whisper_pipe(audio_path, return_timestamps=True, generate_kwargs={"language": "english"})
|
| 320 |
-
|
| 321 |
-
chunks = outputs.get("chunks", [])
|
| 322 |
-
if not chunks:
|
| 323 |
-
chunks = [{"text": outputs.get("text", ""), "timestamp": (0.0, 5.0)}]
|
| 324 |
-
|
| 325 |
-
progress(0.8, desc="Formatting SRT...")
|
| 326 |
-
|
| 327 |
-
# Use the new Smart Splitter function
|
| 328 |
-
srt_subtitles = create_srt_segments(chunks)
|
| 329 |
-
|
| 330 |
-
out_path = "generated_captions.srt"
|
| 331 |
-
with open(out_path, 'w', encoding='utf-8') as f:
|
| 332 |
-
f.write(srt.compose(srt_subtitles))
|
| 333 |
-
|
| 334 |
-
return out_path
|
| 335 |
-
|
| 336 |
-
# ---------------------------------------------------------
|
| 337 |
-
# Gradio Interface
|
| 338 |
-
# ---------------------------------------------------------
|
| 339 |
-
with gr.Blocks(title="SRT Master Tool") as demo:
|
| 340 |
-
gr.Markdown("# 🎬 Auto Subtitle & Translator")
|
| 341 |
-
|
| 342 |
-
with gr.Tabs():
|
| 343 |
-
with gr.TabItem("Step 1: Video to SRT"):
|
| 344 |
-
gr.Markdown("### Convert Video to English Subtitles")
|
| 345 |
-
with gr.Row():
|
| 346 |
-
video_input = gr.Video(label="Upload Video")
|
| 347 |
-
srt_output_gen = gr.File(label="Generated SRT")
|
| 348 |
-
btn1 = gr.Button("Generate SRT", variant="primary")
|
| 349 |
-
btn1.click(video_to_srt, inputs=video_input, outputs=srt_output_gen)
|
| 350 |
-
|
| 351 |
-
with gr.TabItem("Step 2: Translate SRT"):
|
| 352 |
-
gr.Markdown("### Translate Subtitles to Arabic")
|
| 353 |
-
with gr.Row():
|
| 354 |
-
srt_input = gr.File(label="Upload SRT")
|
| 355 |
-
with gr.Column():
|
| 356 |
-
src_l = gr.Dropdown(["eng_Latn", "fra_Latn"], label="From", value="eng_Latn")
|
| 357 |
-
tgt_l = gr.Dropdown(["arb_Arab", "arz_Arab"], label="To", value="arb_Arab")
|
| 358 |
-
srt_output_trans = gr.File(label="Translated SRT")
|
| 359 |
-
btn2 = gr.Button("Translate", variant="primary")
|
| 360 |
-
btn2.click(process_translation, inputs=[srt_input, src_l, tgt_l], outputs=srt_output_trans)
|
| 361 |
-
|
| 362 |
-
if __name__ == "__main__":
|
| 363 |
-
demo.launch()
|
|
|
|
| 70 |
vtt_content = "WEBVTT\n\n"
|
| 71 |
|
| 72 |
# Replace comma timestamps (00:00:01,000) with dot (00:00:01.000)
|
|
|
|
| 73 |
vtt_content += re.sub(r'(\d{2}:\d{2}:\d{2}),(\d{3})', r'\1.\2', content)
|
| 74 |
|
| 75 |
with open(vtt_path, 'w', encoding='utf-8') as f:
|
|
|
|
| 112 |
else:
|
| 113 |
start_time, end_time = 0.0, None
|
| 114 |
|
| 115 |
+
if end_time is None:
|
| 116 |
+
end_time = total_video_duration
|
| 117 |
|
| 118 |
lines = split_text_into_lines(text, max_chars=80)
|
| 119 |
duration = end_time - start_time
|
|
|
|
| 149 |
return results
|
| 150 |
|
| 151 |
def process_translation(filepath, src_lang_code, tgt_lang_code):
|
| 152 |
+
if filepath is None: return None
|
| 153 |
try:
|
| 154 |
with open(filepath, 'r', encoding='utf-8') as f:
|
| 155 |
subtitles = list(srt.parse(f.read()))
|
| 156 |
except Exception as e:
|
| 157 |
+
return f"Error: {str(e)}"
|
| 158 |
|
| 159 |
texts = [sub.content for sub in subtitles]
|
| 160 |
translated = batch_translate(texts, src_lang_code, tgt_lang_code)
|
|
|
|
| 199 |
# 4. Create Preview (HTML + VTT)
|
| 200 |
vtt_path = srt_to_vtt(srt_path)
|
| 201 |
|
| 202 |
+
# Create the HTML player
|
| 203 |
html_preview = f"""
|
| 204 |
<h3>Video Preview</h3>
|
| 205 |
<video controls width="100%" height="400px" style="background:black">
|
|
|
|
| 226 |
video_input = gr.Video(label="Upload Video", sources=["upload"])
|
| 227 |
|
| 228 |
with gr.Column():
|
|
|
|
| 229 |
preview_output = gr.HTML(label="Preview Player")
|
|
|
|
| 230 |
srt_output_gen = gr.File(label="Download Generated SRT")
|
| 231 |
|
| 232 |
btn1 = gr.Button("Generate SRT & Preview", variant="primary")
|
|
|
|
| 246 |
btn2.click(process_translation, inputs=[srt_input, src_l, tgt_l], outputs=srt_output_trans)
|
| 247 |
|
| 248 |
if __name__ == "__main__":
|
| 249 |
+
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|