umerfarooq29 commited on
Commit
3a12b87
·
verified ·
1 Parent(s): 77faa65

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -107
app.py CHANGED
@@ -1,61 +1,36 @@
1
- # Hugging Face Space - Video subtitle editor + translator (Gradio app)
2
- # Single-file Gradio app. Put this file in a Space (repository) and add requirements.txt
3
- # Requirements (example):
4
- # gradio
5
- # faster-whisper
6
- # ffmpeg-python
7
- # googletrans==4.0.0-rc1
8
- # torch
9
- # tqdm
10
- # Note: ffmpeg must be available in the environment (apt-get install ffmpeg on linux or include static ffmpeg binary).
11
-
12
  import os
13
  import subprocess
14
  import tempfile
15
  from pathlib import Path
16
  from typing import Optional
17
 
18
- import gradio as gr
19
  from faster_whisper import WhisperModel
20
  from googletrans import Translator
21
 
22
- # Choose model size you want: tiny, base, small, medium, large-v2. large models need GPU & more RAM.
23
  MODEL_NAME = os.environ.get("WHISPER_MODEL", "large-v2")
24
  DEVICE = "cuda" if (os.environ.get("CUDA_VISIBLE_DEVICES") or False) else "cpu"
25
 
26
- # Create model once (cached by global variable)
27
  _model = None
28
-
29
  def get_model():
30
  global _model
31
  if _model is None:
32
- # compute_type selection can be tuned based on device. On CPU, int8 helps memory.
33
  compute_type = "float16" if DEVICE.startswith("cuda") else "int8"
34
  _model = WhisperModel(MODEL_NAME, device=DEVICE, compute_type=compute_type)
35
  return _model
36
 
37
 
38
- def extract_audio(input_video_path: str, output_audio_path: str) -> None:
39
- """Extract audio to WAV using ffmpeg (stereo, 16k or 16kHz recommended)."""
40
  cmd = [
41
- "ffmpeg",
42
- "-y",
43
- "-i",
44
- input_video_path,
45
- "-vn",
46
- "-acodec",
47
- "pcm_s16le",
48
- "-ar",
49
- "16000",
50
- "-ac",
51
- "1",
52
  output_audio_path,
53
  ]
54
  subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
55
 
56
 
57
  def segments_to_srt(segments):
58
- """Convert whisper segments to SRT text."""
59
  def fmt_time(s):
60
  h = int(s // 3600)
61
  m = int((s % 3600) // 60)
@@ -72,71 +47,44 @@ def segments_to_srt(segments):
72
 
73
 
74
  def transcribe_and_translate(video_file: str, target_lang: Optional[str], burn_subs: bool):
75
- """
76
- 1) Extract audio
77
- 2) Use faster-whisper to transcribe (get timestamps)
78
- 3) Optionally translate each segment to target language using googletrans
79
- 4) Generate .srt file
80
- 5) If burn_subs True, use ffmpeg to burn subtitles into new video
81
- Returns paths to output files: srt_path, processed_video_path (or None)
82
- """
83
  model = get_model()
84
-
85
  tempdir = Path(tempfile.mkdtemp())
86
  input_path = Path(video_file)
87
  audio_path = tempdir / "audio.wav"
88
  srt_path = tempdir / f"subtitles_{input_path.stem}.srt"
89
  processed_video_path = None
90
 
91
- # 1) extract audio
92
  extract_audio(str(input_path), str(audio_path))
93
 
94
- # 2) transcribe with timestamps
95
- # faster-whisper returns segments as dicts with start,end,text
96
- task = "translate" if target_lang and target_lang.lower() == "english" else "transcribe"
97
- # We'll transcribe first (original text) then translate segments if requested to any language.
98
  segments_all = []
99
  transcribe_options = {"beam_size": 5, "word_timestamps": False}
100
  for segment in model.transcribe(str(audio_path), beam_size=5, vad_filter=True, **transcribe_options):
101
- # segment is a dict-like with start, end, text
102
  segments_all.append({"start": segment.start, "end": segment.end, "text": segment.text})
103
 
104
- # 3) translate segments if requested and not English-only special case
105
  if target_lang and target_lang.lower() not in ["", "none"]:
106
  translator = Translator()
107
  translated_segments = []
108
  for seg in segments_all:
109
  src_text = seg["text"].strip()
110
- # Use googletrans to translate to target lang code (like 'ur' for Urdu)
111
  try:
112
  res = translator.translate(src_text, dest=target_lang)
113
  translated_text = res.text
114
  except Exception:
115
- # fallback to original if translator fails
116
  translated_text = src_text
117
  translated_segments.append({"start": seg["start"], "end": seg["end"], "text": translated_text})
118
  segments_used = translated_segments
119
  else:
120
  segments_used = segments_all
121
 
122
- # 4) write srt
123
  srt_text = segments_to_srt(segments_used)
124
  srt_path.write_text(srt_text, encoding="utf-8")
125
 
126
- # 5) optional burn subtitles into video
127
  if burn_subs:
128
  out_video = tempdir / f"burned_{input_path.name}"
129
- # ffmpeg can burn subtitles using subtitles filter, but it needs a proper encoding and path
130
  cmd = [
131
- "ffmpeg",
132
- "-y",
133
- "-i",
134
- str(input_path),
135
- "-vf",
136
- f"subtitles={str(srt_path)}:force_style='FontName=Arial,FontSize=24'",
137
- "-c:a",
138
- "copy",
139
- str(out_video),
140
  ]
141
  subprocess.run(cmd, check=True)
142
  processed_video_path = str(out_video)
@@ -144,50 +92,34 @@ def transcribe_and_translate(video_file: str, target_lang: Optional[str], burn_s
144
  return str(srt_path), processed_video_path
145
 
146
 
147
- # ------- Gradio UI -------
148
-
149
- LANG_OPTIONS = [
150
- ("No translation (keep original)", "none"),
151
- ("English", "en"),
152
- ("Urdu", "ur"),
153
- ("Hindi", "hi"),
154
- ("Spanish", "es"),
155
- ("French", "fr"),
156
- ("German", "de"),
157
- ]
158
-
159
- with gr.Blocks() as demo:
160
- gr.Markdown("# Video subtitle editor + translator (Gradio)\nUpload a video, transcribe, optionally translate and download SRT or burn subtitles into video.")
161
-
162
- with gr.Row():
163
- video_in = gr.File(label="Upload video (mp4, mov, mkv)")
164
- lang = gr.Dropdown(label="Translate to (choose language)", choices=[opt[0] for opt in LANG_OPTIONS], value=LANG_OPTIONS[0][0])
165
-
166
- burn = gr.Checkbox(label="Burn subtitles into video (hardcoded) - may be slow", value=False)
167
- out_srt = gr.File(label="Generated SRT")
168
- out_video = gr.File(label="Processed video (if burned)")
169
- status = gr.Textbox(label="Status / logs", interactive=False)
170
-
171
- def run_pipeline(uploaded_file, chosen_lang_label, burn_subs_flag):
172
- if uploaded_file is None:
173
- return None, None, "Please upload a video file."
174
-
175
- # map chosen label back to code
176
- label_to_code = {k: v for k, v in LANG_OPTIONS}
177
- lang_code = label_to_code.get(chosen_lang_label, "none")
178
-
179
- status_msg = "Starting processing..."
180
- try:
181
- srt_path, processed_video = transcribe_and_translate(uploaded_file.name, lang_code, burn_subs_flag)
182
- status_msg = f"Done. SRT: {srt_path}"
183
- return srt_path, processed_video, status_msg
184
- except subprocess.CalledProcessError as e:
185
- return None, None, f"ffmpeg error: {e}"
186
- except Exception as e:
187
- return None, None, f"Error: {e}"
188
-
189
- btn = gr.Button("Run")
190
- btn.click(run_pipeline, inputs=[video_in, lang, burn], outputs=[out_srt, out_video, status])
191
-
192
- if __name__ == "__main__":
193
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import subprocess
3
  import tempfile
4
  from pathlib import Path
5
  from typing import Optional
6
 
7
+ import streamlit as st
8
  from faster_whisper import WhisperModel
9
  from googletrans import Translator
10
 
11
+ # ---------------- CONFIG ----------------
12
  MODEL_NAME = os.environ.get("WHISPER_MODEL", "large-v2")
13
  DEVICE = "cuda" if (os.environ.get("CUDA_VISIBLE_DEVICES") or False) else "cpu"
14
 
 
15
  _model = None
 
16
  def get_model():
17
  global _model
18
  if _model is None:
 
19
  compute_type = "float16" if DEVICE.startswith("cuda") else "int8"
20
  _model = WhisperModel(MODEL_NAME, device=DEVICE, compute_type=compute_type)
21
  return _model
22
 
23
 
24
+ def extract_audio(input_video_path: str, output_audio_path: str):
 
25
  cmd = [
26
+ "ffmpeg", "-y", "-i", input_video_path,
27
+ "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1",
 
 
 
 
 
 
 
 
 
28
  output_audio_path,
29
  ]
30
  subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
31
 
32
 
33
  def segments_to_srt(segments):
 
34
  def fmt_time(s):
35
  h = int(s // 3600)
36
  m = int((s % 3600) // 60)
 
47
 
48
 
49
  def transcribe_and_translate(video_file: str, target_lang: Optional[str], burn_subs: bool):
 
 
 
 
 
 
 
 
50
  model = get_model()
 
51
  tempdir = Path(tempfile.mkdtemp())
52
  input_path = Path(video_file)
53
  audio_path = tempdir / "audio.wav"
54
  srt_path = tempdir / f"subtitles_{input_path.stem}.srt"
55
  processed_video_path = None
56
 
 
57
  extract_audio(str(input_path), str(audio_path))
58
 
 
 
 
 
59
  segments_all = []
60
  transcribe_options = {"beam_size": 5, "word_timestamps": False}
61
  for segment in model.transcribe(str(audio_path), beam_size=5, vad_filter=True, **transcribe_options):
 
62
  segments_all.append({"start": segment.start, "end": segment.end, "text": segment.text})
63
 
 
64
  if target_lang and target_lang.lower() not in ["", "none"]:
65
  translator = Translator()
66
  translated_segments = []
67
  for seg in segments_all:
68
  src_text = seg["text"].strip()
 
69
  try:
70
  res = translator.translate(src_text, dest=target_lang)
71
  translated_text = res.text
72
  except Exception:
 
73
  translated_text = src_text
74
  translated_segments.append({"start": seg["start"], "end": seg["end"], "text": translated_text})
75
  segments_used = translated_segments
76
  else:
77
  segments_used = segments_all
78
 
 
79
  srt_text = segments_to_srt(segments_used)
80
  srt_path.write_text(srt_text, encoding="utf-8")
81
 
 
82
  if burn_subs:
83
  out_video = tempdir / f"burned_{input_path.name}"
 
84
  cmd = [
85
+ "ffmpeg", "-y", "-i", str(input_path),
86
+ "-vf", f"subtitles={str(srt_path)}:force_style='FontName=Arial,FontSize=24'",
87
+ "-c:a", "copy", str(out_video),
 
 
 
 
 
 
88
  ]
89
  subprocess.run(cmd, check=True)
90
  processed_video_path = str(out_video)
 
92
  return str(srt_path), processed_video_path
93
 
94
 
95
+ # ---------------- UI (Streamlit) ----------------
96
+ st.set_page_config(page_title="Video Subtitle Editor + Translator", layout="wide")
97
+ st.title("🎬 Video Subtitle Editor + Translator (Streamlit)")
98
+
99
+ video_file = st.file_uploader("Upload your video (mp4, mov, mkv)", type=["mp4", "mov", "mkv"])
100
+ lang_choice = st.selectbox(
101
+ "Translate subtitles to:",
102
+ ["None", "English (en)", "Urdu (ur)", "Hindi (hi)", "Spanish (es)", "French (fr)", "German (de)"]
103
+ )
104
+ burn_option = st.checkbox("Burn subtitles into video (hardcoded) - slow but permanent", value=False)
105
+
106
+ if st.button("Run"):
107
+ if video_file is None:
108
+ st.warning("Please upload a video file first.")
109
+ else:
110
+ with st.spinner("Processing... Please wait ⏳"):
111
+ temp_input = Path(tempfile.mkdtemp()) / video_file.name
112
+ with open(temp_input, "wb") as f:
113
+ f.write(video_file.read())
114
+
115
+ lang_code = lang_choice.split("(")[-1].replace(")", "").strip().lower() if "(" in lang_choice else "none"
116
+ try:
117
+ srt_path, processed_video = transcribe_and_translate(str(temp_input), lang_code, burn_option)
118
+ st.success("✅ Done! Subtitles generated successfully.")
119
+ st.download_button("📥 Download SRT", open(srt_path, "rb"), file_name=os.path.basename(srt_path))
120
+ if processed_video:
121
+ st.download_button("📽️ Download Video with Subtitles", open(processed_video, "rb"), file_name=os.path.basename(processed_video))
122
+ except subprocess.CalledProcessError as e:
123
+ st.error(f"ffmpeg error: {e}")
124
+ except Exception as e:
125
+ st.error(f"Error: {e}")