Spaces:

isaac-talb
/

srt-generator

Sleeping

App Files Files Community

isaac-talb commited on Sep 10, 2025

Commit

3df92ba

1 Parent(s): 983432f

Add application file

Browse files

Files changed (4) hide show

app.py +69 -0
generate.py +51 -0
requirements.txt +3 -0
worker.py +30 -0

app.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import os
+import gradio as gr
+import whisper
+import tempfile
+# Cache models so they don’t reload each time
+loaded_models = {}
+def load_model(model_name):
+    if model_name not in loaded_models:
+        loaded_models[model_name] = whisper.load_model(model_name)
+    return loaded_models[model_name]
+def transcribe(video, model_name, language):
+    # Save uploaded video to temp file
+    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+    temp_file.write(video.read())
+    temp_file.close()
+    # Load selected model
+    model = load_model(model_name)
+    # Transcribe with Whisper
+    result = model.transcribe(temp_file.name, language=language, fp16=False)
+    # Generate SRT text
+    srt_text = []
+    for i, segment in enumerate(result["segments"], start=1):
+        start = segment["start"]
+        end = segment["end"]
+        text = segment["text"]
+        srt_text.append(f"{i}")
+        srt_text.append(f"{format_timestamp(start)} --> {format_timestamp(end)}")
+        srt_text.append(text.strip())
+        srt_text.append("")
+    srt_output = "\n".join(srt_text)
+    # Save as .srt file
+    srt_file_path = tempfile.NamedTemporaryFile(delete=False, suffix=".srt").name
+    with open(srt_file_path, "w", encoding="utf-8") as f:
+        f.write(srt_output)
+    # Delete uploaded video to save space
+    os.remove(temp_file.name)
+    return srt_file_path
+def format_timestamp(seconds: float) -> str:
+    """Convert seconds to SRT timestamp format."""
+    millisec = int((seconds - int(seconds)) * 1000)
+    return f"{int(seconds // 3600):02}:{int((seconds % 3600) // 60):02}:{int(seconds % 60):02},{millisec:03}"
+# Gradio interface
+demo = gr.Interface(
+    fn=transcribe,
+    inputs=[
+        gr.File(type="file", label="Upload Video"),
+        gr.Dropdown(choices=["tiny", "base", "small"], value="tiny", label="Choose Whisper Model"),
+        gr.Dropdown(choices=["en", "my", "fr", "es", "de", "ja", "zh"], value="en", label="Language Code (ISO)")
+    ],
+    outputs=gr.File(label="Download SRT File"),
+    title="Video → SRT Generator",
+    description="Upload a video, select Whisper model and language, then download auto-generated subtitles. Video is deleted after processing."
+)
+if __name__ == "__main__":
+    demo.launch()

generate.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from moviepy.editor import VideoFileClip
+import whisper
+import torch
+# Path to your video
+video_path = "C:/Users/path/Downloads/movie.mp4"
+clip = VideoFileClip(video_path)
+duration = clip.duration
+chunk_length = 60  # 1 minute chunks
+# Load whisper model
+model = whisper.load_model("tiny.en")
+model = model.to(torch.device("cpu"))  # force CPU
+# Store all segments here
+all_segments = []
+for i, start in enumerate(range(0, int(duration), chunk_length)):
+    end = min(start + chunk_length, duration)
+    print(f"Processing chunk {i+1}: {start}-{end} seconds")
+    # Create subclip
+    subclip = clip.subclip(start, end)
+    subclip_path = f"chunk_{i}.mp4"
+    subclip.write_videofile(subclip_path, audio_codec="aac", verbose=False, logger=None)
+    # Transcribe subclip
+    result = model.transcribe(subclip_path, language="English")
+    # Add segments to main list
+    all_segments.extend(result["segments"])
+# Save all segments to a single SRT file
+with open("output.srt", "w", encoding="utf-8") as f:
+    for i, segment in enumerate(all_segments, start=1):
+        start = segment["start"]
+        end = segment["end"]
+        text = segment["text"].strip()
+        # Format SRT timestamp
+        def srt_time(seconds):
+            ms = int((seconds % 1) * 1000)
+            h = int(seconds // 3600)
+            m = int((seconds % 3600) // 60)
+            s = int(seconds % 60)
+            return f"{h:02}:{m:02}:{s:02},{ms:03}"
+        f.write(f"{i}\n{srt_time(start)} --> {srt_time(end)}\n{text}\n\n")
+print("✅ SRT file generated: output.srt")

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+openai-whisper
+torch

worker.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import whisper
+def transcribe_video(video_path, model_size="tiny"):
+    """
+    Transcribe video into SRT file using OpenAI Whisper.
+    """
+    # Load model (tiny, base, or small)
+    model = whisper.load_model(model_size)
+    # Run transcription
+    result = model.transcribe(video_path, language="English")
+    # Save as .srt
+    srt_path = video_path + ".srt"
+    with open(srt_path, "w", encoding="utf-8") as f:
+        for i, segment in enumerate(result["segments"], start=1):
+            start = segment["start"]
+            end = segment["end"]
+            text = segment["text"].strip()
+            def srt_time(seconds):
+                ms = int((seconds % 1) * 1000)
+                h = int(seconds // 3600)
+                m = int((seconds % 3600) // 60)
+                s = int(seconds % 60)
+                return f"{h:02}:{m:02}:{s:02},{ms:03}"
+            f.write(f"{i}\n{srt_time(start)} --> {srt_time(end)}\n{text}\n\n")
+    return srt_path