umerfarooq29 commited on
Commit
6159702
·
verified ·
1 Parent(s): 7389eb8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +193 -0
app.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face Space - Video subtitle editor + translator (Gradio app)
2
+ # Single-file Gradio app. Put this file in a Space (repository) and add requirements.txt
3
+ # Requirements (example):
4
+ # gradio
5
+ # faster-whisper
6
+ # ffmpeg-python
7
+ # googletrans==4.0.0-rc1
8
+ # torch
9
+ # tqdm
10
+ # Note: ffmpeg must be available in the environment (apt-get install ffmpeg on linux or include static ffmpeg binary).
11
+
12
+ import os
13
+ import subprocess
14
+ import tempfile
15
+ from pathlib import Path
16
+ from typing import Optional
17
+
18
+ import gradio as gr
19
+ from faster_whisper import WhisperModel
20
+ from googletrans import Translator
21
+
22
+ # Choose model size you want: tiny, base, small, medium, large-v2. large models need GPU & more RAM.
23
+ MODEL_NAME = os.environ.get("WHISPER_MODEL", "large-v2")
24
+ DEVICE = "cuda" if (os.environ.get("CUDA_VISIBLE_DEVICES") or False) else "cpu"
25
+
26
+ # Create model once (cached by global variable)
27
+ _model = None
28
+
29
+ def get_model():
30
+ global _model
31
+ if _model is None:
32
+ # compute_type selection can be tuned based on device. On CPU, int8 helps memory.
33
+ compute_type = "float16" if DEVICE.startswith("cuda") else "int8"
34
+ _model = WhisperModel(MODEL_NAME, device=DEVICE, compute_type=compute_type)
35
+ return _model
36
+
37
+
38
+ def extract_audio(input_video_path: str, output_audio_path: str) -> None:
39
+ """Extract audio to WAV using ffmpeg (stereo, 16k or 16kHz recommended)."""
40
+ cmd = [
41
+ "ffmpeg",
42
+ "-y",
43
+ "-i",
44
+ input_video_path,
45
+ "-vn",
46
+ "-acodec",
47
+ "pcm_s16le",
48
+ "-ar",
49
+ "16000",
50
+ "-ac",
51
+ "1",
52
+ output_audio_path,
53
+ ]
54
+ subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
55
+
56
+
57
+ def segments_to_srt(segments):
58
+ """Convert whisper segments to SRT text."""
59
+ def fmt_time(s):
60
+ h = int(s // 3600)
61
+ m = int((s % 3600) // 60)
62
+ sec = s % 60
63
+ return f"{h:02d}:{m:02d}:{sec:06.3f}".replace('.', ',')
64
+
65
+ srt_lines = []
66
+ for i, seg in enumerate(segments, start=1):
67
+ start = fmt_time(seg["start"])
68
+ end = fmt_time(seg["end"])
69
+ text = seg["text"].strip()
70
+ srt_lines.append(f"{i}\n{start} --> {end}\n{text}\n")
71
+ return "\n".join(srt_lines)
72
+
73
+
74
+ def transcribe_and_translate(video_file: str, target_lang: Optional[str], burn_subs: bool):
75
+ """
76
+ 1) Extract audio
77
+ 2) Use faster-whisper to transcribe (get timestamps)
78
+ 3) Optionally translate each segment to target language using googletrans
79
+ 4) Generate .srt file
80
+ 5) If burn_subs True, use ffmpeg to burn subtitles into new video
81
+ Returns paths to output files: srt_path, processed_video_path (or None)
82
+ """
83
+ model = get_model()
84
+
85
+ tempdir = Path(tempfile.mkdtemp())
86
+ input_path = Path(video_file)
87
+ audio_path = tempdir / "audio.wav"
88
+ srt_path = tempdir / f"subtitles_{input_path.stem}.srt"
89
+ processed_video_path = None
90
+
91
+ # 1) extract audio
92
+ extract_audio(str(input_path), str(audio_path))
93
+
94
+ # 2) transcribe with timestamps
95
+ # faster-whisper returns segments as dicts with start,end,text
96
+ task = "translate" if target_lang and target_lang.lower() == "english" else "transcribe"
97
+ # We'll transcribe first (original text) then translate segments if requested to any language.
98
+ segments_all = []
99
+ transcribe_options = {"beam_size": 5, "word_timestamps": False}
100
+ for segment in model.transcribe(str(audio_path), beam_size=5, vad_filter=True, **transcribe_options):
101
+ # segment is a dict-like with start, end, text
102
+ segments_all.append({"start": segment.start, "end": segment.end, "text": segment.text})
103
+
104
+ # 3) translate segments if requested and not English-only special case
105
+ if target_lang and target_lang.lower() not in ["", "none"]:
106
+ translator = Translator()
107
+ translated_segments = []
108
+ for seg in segments_all:
109
+ src_text = seg["text"].strip()
110
+ # Use googletrans to translate to target lang code (like 'ur' for Urdu)
111
+ try:
112
+ res = translator.translate(src_text, dest=target_lang)
113
+ translated_text = res.text
114
+ except Exception:
115
+ # fallback to original if translator fails
116
+ translated_text = src_text
117
+ translated_segments.append({"start": seg["start"], "end": seg["end"], "text": translated_text})
118
+ segments_used = translated_segments
119
+ else:
120
+ segments_used = segments_all
121
+
122
+ # 4) write srt
123
+ srt_text = segments_to_srt(segments_used)
124
+ srt_path.write_text(srt_text, encoding="utf-8")
125
+
126
+ # 5) optional burn subtitles into video
127
+ if burn_subs:
128
+ out_video = tempdir / f"burned_{input_path.name}"
129
+ # ffmpeg can burn subtitles using subtitles filter, but it needs a proper encoding and path
130
+ cmd = [
131
+ "ffmpeg",
132
+ "-y",
133
+ "-i",
134
+ str(input_path),
135
+ "-vf",
136
+ f"subtitles={str(srt_path)}:force_style='FontName=Arial,FontSize=24'",
137
+ "-c:a",
138
+ "copy",
139
+ str(out_video),
140
+ ]
141
+ subprocess.run(cmd, check=True)
142
+ processed_video_path = str(out_video)
143
+
144
+ return str(srt_path), processed_video_path
145
+
146
+
147
+ # ------- Gradio UI -------
148
+
149
+ LANG_OPTIONS = [
150
+ ("No translation (keep original)", "none"),
151
+ ("English", "en"),
152
+ ("Urdu", "ur"),
153
+ ("Hindi", "hi"),
154
+ ("Spanish", "es"),
155
+ ("French", "fr"),
156
+ ("German", "de"),
157
+ ]
158
+
159
+ with gr.Blocks() as demo:
160
+ gr.Markdown("# Video subtitle editor + translator (Gradio)\nUpload a video, transcribe, optionally translate and download SRT or burn subtitles into video.")
161
+
162
+ with gr.Row():
163
+ video_in = gr.File(label="Upload video (mp4, mov, mkv)")
164
+ lang = gr.Dropdown(label="Translate to (choose language)", choices=[opt[0] for opt in LANG_OPTIONS], value=LANG_OPTIONS[0][0])
165
+
166
+ burn = gr.Checkbox(label="Burn subtitles into video (hardcoded) - may be slow", value=False)
167
+ out_srt = gr.File(label="Generated SRT")
168
+ out_video = gr.File(label="Processed video (if burned)")
169
+ status = gr.Textbox(label="Status / logs", interactive=False)
170
+
171
+ def run_pipeline(uploaded_file, chosen_lang_label, burn_subs_flag):
172
+ if uploaded_file is None:
173
+ return None, None, "Please upload a video file."
174
+
175
+ # map chosen label back to code
176
+ label_to_code = {k: v for k, v in LANG_OPTIONS}
177
+ lang_code = label_to_code.get(chosen_lang_label, "none")
178
+
179
+ status_msg = "Starting processing..."
180
+ try:
181
+ srt_path, processed_video = transcribe_and_translate(uploaded_file.name, lang_code, burn_subs_flag)
182
+ status_msg = f"Done. SRT: {srt_path}"
183
+ return srt_path, processed_video, status_msg
184
+ except subprocess.CalledProcessError as e:
185
+ return None, None, f"ffmpeg error: {e}"
186
+ except Exception as e:
187
+ return None, None, f"Error: {e}"
188
+
189
+ btn = gr.Button("Run")
190
+ btn.click(run_pipeline, inputs=[video_in, lang, burn], outputs=[out_srt, out_video, status])
191
+
192
+ if __name__ == "__main__":
193
+ demo.launch()