isaac-talb commited on
Commit
3df92ba
·
1 Parent(s): 983432f

Add application file

Browse files
Files changed (4) hide show
  1. app.py +69 -0
  2. generate.py +51 -0
  3. requirements.txt +3 -0
  4. worker.py +30 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import whisper
4
+ import tempfile
5
+
6
+ # Cache models so they don’t reload each time
7
+ loaded_models = {}
8
+
9
+ def load_model(model_name):
10
+ if model_name not in loaded_models:
11
+ loaded_models[model_name] = whisper.load_model(model_name)
12
+ return loaded_models[model_name]
13
+
14
+ def transcribe(video, model_name, language):
15
+ # Save uploaded video to temp file
16
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
17
+ temp_file.write(video.read())
18
+ temp_file.close()
19
+
20
+ # Load selected model
21
+ model = load_model(model_name)
22
+
23
+ # Transcribe with Whisper
24
+ result = model.transcribe(temp_file.name, language=language, fp16=False)
25
+
26
+ # Generate SRT text
27
+ srt_text = []
28
+ for i, segment in enumerate(result["segments"], start=1):
29
+ start = segment["start"]
30
+ end = segment["end"]
31
+ text = segment["text"]
32
+
33
+ srt_text.append(f"{i}")
34
+ srt_text.append(f"{format_timestamp(start)} --> {format_timestamp(end)}")
35
+ srt_text.append(text.strip())
36
+ srt_text.append("")
37
+
38
+ srt_output = "\n".join(srt_text)
39
+
40
+ # Save as .srt file
41
+ srt_file_path = tempfile.NamedTemporaryFile(delete=False, suffix=".srt").name
42
+ with open(srt_file_path, "w", encoding="utf-8") as f:
43
+ f.write(srt_output)
44
+
45
+ # Delete uploaded video to save space
46
+ os.remove(temp_file.name)
47
+
48
+ return srt_file_path
49
+
50
+ def format_timestamp(seconds: float) -> str:
51
+ """Convert seconds to SRT timestamp format."""
52
+ millisec = int((seconds - int(seconds)) * 1000)
53
+ return f"{int(seconds // 3600):02}:{int((seconds % 3600) // 60):02}:{int(seconds % 60):02},{millisec:03}"
54
+
55
+ # Gradio interface
56
+ demo = gr.Interface(
57
+ fn=transcribe,
58
+ inputs=[
59
+ gr.File(type="file", label="Upload Video"),
60
+ gr.Dropdown(choices=["tiny", "base", "small"], value="tiny", label="Choose Whisper Model"),
61
+ gr.Dropdown(choices=["en", "my", "fr", "es", "de", "ja", "zh"], value="en", label="Language Code (ISO)")
62
+ ],
63
+ outputs=gr.File(label="Download SRT File"),
64
+ title="Video → SRT Generator",
65
+ description="Upload a video, select Whisper model and language, then download auto-generated subtitles. Video is deleted after processing."
66
+ )
67
+
68
+ if __name__ == "__main__":
69
+ demo.launch()
generate.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from moviepy.editor import VideoFileClip
2
+ import whisper
3
+ import torch
4
+
5
+ # Path to your video
6
+ video_path = "C:/Users/path/Downloads/movie.mp4"
7
+ clip = VideoFileClip(video_path)
8
+ duration = clip.duration
9
+
10
+ chunk_length = 60 # 1 minute chunks
11
+
12
+ # Load whisper model
13
+ model = whisper.load_model("tiny.en")
14
+ model = model.to(torch.device("cpu")) # force CPU
15
+
16
+ # Store all segments here
17
+ all_segments = []
18
+
19
+ for i, start in enumerate(range(0, int(duration), chunk_length)):
20
+ end = min(start + chunk_length, duration)
21
+ print(f"Processing chunk {i+1}: {start}-{end} seconds")
22
+
23
+ # Create subclip
24
+ subclip = clip.subclip(start, end)
25
+ subclip_path = f"chunk_{i}.mp4"
26
+ subclip.write_videofile(subclip_path, audio_codec="aac", verbose=False, logger=None)
27
+
28
+ # Transcribe subclip
29
+ result = model.transcribe(subclip_path, language="English")
30
+
31
+ # Add segments to main list
32
+ all_segments.extend(result["segments"])
33
+
34
+ # Save all segments to a single SRT file
35
+ with open("output.srt", "w", encoding="utf-8") as f:
36
+ for i, segment in enumerate(all_segments, start=1):
37
+ start = segment["start"]
38
+ end = segment["end"]
39
+ text = segment["text"].strip()
40
+
41
+ # Format SRT timestamp
42
+ def srt_time(seconds):
43
+ ms = int((seconds % 1) * 1000)
44
+ h = int(seconds // 3600)
45
+ m = int((seconds % 3600) // 60)
46
+ s = int(seconds % 60)
47
+ return f"{h:02}:{m:02}:{s:02},{ms:03}"
48
+
49
+ f.write(f"{i}\n{srt_time(start)} --> {srt_time(end)}\n{text}\n\n")
50
+
51
+ print("✅ SRT file generated: output.srt")
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ openai-whisper
3
+ torch
worker.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+
3
+ def transcribe_video(video_path, model_size="tiny"):
4
+ """
5
+ Transcribe video into SRT file using OpenAI Whisper.
6
+ """
7
+ # Load model (tiny, base, or small)
8
+ model = whisper.load_model(model_size)
9
+
10
+ # Run transcription
11
+ result = model.transcribe(video_path, language="English")
12
+
13
+ # Save as .srt
14
+ srt_path = video_path + ".srt"
15
+ with open(srt_path, "w", encoding="utf-8") as f:
16
+ for i, segment in enumerate(result["segments"], start=1):
17
+ start = segment["start"]
18
+ end = segment["end"]
19
+ text = segment["text"].strip()
20
+
21
+ def srt_time(seconds):
22
+ ms = int((seconds % 1) * 1000)
23
+ h = int(seconds // 3600)
24
+ m = int((seconds % 3600) // 60)
25
+ s = int(seconds % 60)
26
+ return f"{h:02}:{m:02}:{s:02},{ms:03}"
27
+
28
+ f.write(f"{i}\n{srt_time(start)} --> {srt_time(end)}\n{text}\n\n")
29
+
30
+ return srt_path