Spaces:

umerfarooq29
/

vedio-editor

Build error

App Files Files Community

umerfarooq29 commited on Oct 5, 2025

Commit

6159702

verified ·

1 Parent(s): 7389eb8

Create app.py

Browse files

Files changed (1) hide show

app.py +193 -0

app.py ADDED Viewed

	@@ -0,0 +1,193 @@

+# Hugging Face Space - Video subtitle editor + translator (Gradio app)
+# Single-file Gradio app. Put this file in a Space (repository) and add requirements.txt
+# Requirements (example):
+# gradio
+# faster-whisper
+# ffmpeg-python
+# googletrans==4.0.0-rc1
+# torch
+# tqdm
+# Note: ffmpeg must be available in the environment (apt-get install ffmpeg on linux or include static ffmpeg binary).
+import os
+import subprocess
+import tempfile
+from pathlib import Path
+from typing import Optional
+import gradio as gr
+from faster_whisper import WhisperModel
+from googletrans import Translator
+# Choose model size you want: tiny, base, small, medium, large-v2. large models need GPU & more RAM.
+MODEL_NAME = os.environ.get("WHISPER_MODEL", "large-v2")
+DEVICE = "cuda" if (os.environ.get("CUDA_VISIBLE_DEVICES") or False) else "cpu"
+# Create model once (cached by global variable)
+_model = None
+def get_model():
+    global _model
+    if _model is None:
+        # compute_type selection can be tuned based on device. On CPU, int8 helps memory.
+        compute_type = "float16" if DEVICE.startswith("cuda") else "int8"
+        _model = WhisperModel(MODEL_NAME, device=DEVICE, compute_type=compute_type)
+    return _model
+def extract_audio(input_video_path: str, output_audio_path: str) -> None:
+    """Extract audio to WAV using ffmpeg (stereo, 16k or 16kHz recommended)."""
+    cmd = [
+        "ffmpeg",
+        "-y",
+        "-i",
+        input_video_path,
+        "-vn",
+        "-acodec",
+        "pcm_s16le",
+        "-ar",
+        "16000",
+        "-ac",
+        "1",
+        output_audio_path,
+    ]
+    subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+def segments_to_srt(segments):
+    """Convert whisper segments to SRT text."""
+    def fmt_time(s):
+        h = int(s // 3600)
+        m = int((s % 3600) // 60)
+        sec = s % 60
+        return f"{h:02d}:{m:02d}:{sec:06.3f}".replace('.', ',')
+    srt_lines = []
+    for i, seg in enumerate(segments, start=1):
+        start = fmt_time(seg["start"])
+        end = fmt_time(seg["end"])
+        text = seg["text"].strip()
+        srt_lines.append(f"{i}\n{start} --> {end}\n{text}\n")
+    return "\n".join(srt_lines)
+def transcribe_and_translate(video_file: str, target_lang: Optional[str], burn_subs: bool):
+    """
+    1) Extract audio
+    2) Use faster-whisper to transcribe (get timestamps)
+    3) Optionally translate each segment to target language using googletrans
+    4) Generate .srt file
+    5) If burn_subs True, use ffmpeg to burn subtitles into new video
+    Returns paths to output files: srt_path, processed_video_path (or None)
+    """
+    model = get_model()
+    tempdir = Path(tempfile.mkdtemp())
+    input_path = Path(video_file)
+    audio_path = tempdir / "audio.wav"
+    srt_path = tempdir / f"subtitles_{input_path.stem}.srt"
+    processed_video_path = None
+    # 1) extract audio
+    extract_audio(str(input_path), str(audio_path))
+    # 2) transcribe with timestamps
+    # faster-whisper returns segments as dicts with start,end,text
+    task = "translate" if target_lang and target_lang.lower() == "english" else "transcribe"
+    # We'll transcribe first (original text) then translate segments if requested to any language.
+    segments_all = []
+    transcribe_options = {"beam_size": 5, "word_timestamps": False}
+    for segment in model.transcribe(str(audio_path), beam_size=5, vad_filter=True, **transcribe_options):
+        # segment is a dict-like with start, end, text
+        segments_all.append({"start": segment.start, "end": segment.end, "text": segment.text})
+    # 3) translate segments if requested and not English-only special case
+    if target_lang and target_lang.lower() not in ["", "none"]:
+        translator = Translator()
+        translated_segments = []
+        for seg in segments_all:
+            src_text = seg["text"].strip()
+            # Use googletrans to translate to target lang code (like 'ur' for Urdu)
+            try:
+                res = translator.translate(src_text, dest=target_lang)
+                translated_text = res.text
+            except Exception:
+                # fallback to original if translator fails
+                translated_text = src_text
+            translated_segments.append({"start": seg["start"], "end": seg["end"], "text": translated_text})
+        segments_used = translated_segments
+    else:
+        segments_used = segments_all
+    # 4) write srt
+    srt_text = segments_to_srt(segments_used)
+    srt_path.write_text(srt_text, encoding="utf-8")
+    # 5) optional burn subtitles into video
+    if burn_subs:
+        out_video = tempdir / f"burned_{input_path.name}"
+        # ffmpeg can burn subtitles using subtitles filter, but it needs a proper encoding and path
+        cmd = [
+            "ffmpeg",
+            "-y",
+            "-i",
+            str(input_path),
+            "-vf",
+            f"subtitles={str(srt_path)}:force_style='FontName=Arial,FontSize=24'",
+            "-c:a",
+            "copy",
+            str(out_video),
+        ]
+        subprocess.run(cmd, check=True)
+        processed_video_path = str(out_video)
+    return str(srt_path), processed_video_path
+# ------- Gradio UI -------
+LANG_OPTIONS = [
+    ("No translation (keep original)", "none"),
+    ("English", "en"),
+    ("Urdu", "ur"),
+    ("Hindi", "hi"),
+    ("Spanish", "es"),
+    ("French", "fr"),
+    ("German", "de"),
+]
+with gr.Blocks() as demo:
+    gr.Markdown("# Video subtitle editor + translator (Gradio)\nUpload a video, transcribe, optionally translate and download SRT or burn subtitles into video.")
+    with gr.Row():
+        video_in = gr.File(label="Upload video (mp4, mov, mkv)")
+        lang = gr.Dropdown(label="Translate to (choose language)", choices=[opt[0] for opt in LANG_OPTIONS], value=LANG_OPTIONS[0][0])
+    burn = gr.Checkbox(label="Burn subtitles into video (hardcoded) - may be slow", value=False)
+    out_srt = gr.File(label="Generated SRT")
+    out_video = gr.File(label="Processed video (if burned)")
+    status = gr.Textbox(label="Status / logs", interactive=False)
+    def run_pipeline(uploaded_file, chosen_lang_label, burn_subs_flag):
+        if uploaded_file is None:
+            return None, None, "Please upload a video file."
+        # map chosen label back to code
+        label_to_code = {k: v for k, v in LANG_OPTIONS}
+        lang_code = label_to_code.get(chosen_lang_label, "none")
+        status_msg = "Starting processing..."
+        try:
+            srt_path, processed_video = transcribe_and_translate(uploaded_file.name, lang_code, burn_subs_flag)
+            status_msg = f"Done. SRT: {srt_path}"
+            return srt_path, processed_video, status_msg
+        except subprocess.CalledProcessError as e:
+            return None, None, f"ffmpeg error: {e}"
+        except Exception as e:
+            return None, None, f"Error: {e}"
+    btn = gr.Button("Run")
+    btn.click(run_pipeline, inputs=[video_in, lang, burn], outputs=[out_srt, out_video, status])
+if __name__ == "__main__":
+    demo.launch()