# app.py import os import tempfile import uuid from pathlib import Path import gradio as gr import ffmpeg from faster_whisper import WhisperModel # -------- Helper functions -------- def _format_timestamp(seconds: float) -> str: ms = int(round(seconds * 1000)) hours = ms // 3600000 ms_rem = ms % 3600000 minutes = ms_rem // 60000 ms_rem = ms_rem % 60000 secs = ms_rem // 1000 millis = ms_rem % 1000 return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}" def segments_to_srt(segments: list) -> str: lines = [] for i, seg in enumerate(segments, start=1): start_ts = _format_timestamp(seg["start"]) end_ts = _format_timestamp(seg["end"]) text = seg["text"].replace("\n", " ").strip() if not text: continue block = f"{i}\n{start_ts} --> {end_ts}\n{text}\n" lines.append(block) return "\n".join(lines) # -------- Config -------- MODEL_NAME = "Systran/faster-whisper-small" # good for HF CPU DEVICE = "cpu" OUTPUT_DIR = Path("outputs/subtitles") OUTPUT_DIR.mkdir(parents=True, exist_ok=True) print(f"Loading model {MODEL_NAME} on {DEVICE} ...") model = WhisperModel(MODEL_NAME, device=DEVICE) print("Model loaded.") # -------- Core functions -------- def extract_audio(input_path: str, out_path: str): """Extracts mono 16 kHz WAV using ffmpeg""" try: ( ffmpeg .input(input_path) .output(out_path, format="wav", acodec="pcm_s16le", ac=1, ar="16000") .overwrite_output() .run(quiet=True) ) except ffmpeg.Error as e: stderr = getattr(e, "stderr", None) msg = stderr.decode() if stderr else str(e) raise RuntimeError(f"ffmpeg error: {msg}") def transcribe_file_to_srt(file_obj, language: str = "en"): """Transcribe uploaded file to SRT; compatible with HF Spaces""" tmp_dir = Path(tempfile.mkdtemp(prefix="subgen_")) # Handle Hugging Face NamedString / Path input_path = Path(file_obj.name) if not input_path.exists(): input_path = tmp_dir / Path(file_obj.name).name if hasattr(file_obj, "read_bytes"): with open(input_path, "wb") as f: f.write(file_obj.read_bytes()) else: with open(file_obj.name, "rb") as src, open(input_path, "wb") as dst: dst.write(src.read()) # Extract audio and transcribe audio_path = tmp_dir / "audio.wav" extract_audio(str(input_path), str(audio_path)) segments, _ = model.transcribe(str(audio_path), language=language) segs = [{"start": s.start, "end": s.end, "text": s.text} for s in segments] srt_text = segments_to_srt(segs) # Save .srt file output_path = OUTPUT_DIR / f"{Path(file_obj.name).stem}.srt" with open(output_path, "w", encoding="utf-8") as f: f.write(srt_text) return str(output_path), "✅ Subtitles generated successfully!" # -------- Gradio UI -------- with gr.Blocks(title="AI Subtitle Generator") as demo: theme_state = gr.State("light") def toggle_theme(current): return "dark" if current == "light" else "light" def apply_theme(theme_mode): if theme_mode == "dark": bg = "linear-gradient(135deg, #0f2027, #203a43, #2c5364)" color = "#ffffff" else: bg = "linear-gradient(135deg, #fdfbfb, #ebedee)" color = "#000000" return gr.update( value=f"" ) gr.HTML("
Upload a video or audio file to generate English .srt subtitles.
" ) style_box = gr.HTML("") theme_btn = gr.Button("🌙 Toggle Light/Dark Mode") with gr.Row(): input_file = gr.File(label="Upload video/audio file") output_file = gr.File(label="Download .srt file") status_box = gr.Textbox(label="Status", interactive=False) def on_click(file): srt_path, msg = transcribe_file_to_srt(file) return srt_path, msg theme_btn.click( toggle_theme, inputs=[theme_state], outputs=[theme_state] ).then(apply_theme, inputs=[theme_state], outputs=[style_box]) generate_btn = gr.Button("Generate Subtitles") generate_btn.click(on_click, inputs=[input_file], outputs=[output_file, status_box]) gr.HTML( "Powered by Faster-Whisper + Gradio UI
" ) if __name__ == "__main__": demo.launch()