Spaces:
Sleeping
Sleeping
File size: 4,625 Bytes
cce83a6 00a1efc cce83a6 00a1efc eb04a70 cc8bbf8 696104d 00a1efc 04f489d 696104d cce83a6 696104d cce83a6 696104d cce83a6 696104d 749833a 696104d cce83a6 696104d 749833a cc8bbf8 696104d 749833a 696104d cce83a6 696104d 749833a 696104d 749833a cce83a6 749833a cce83a6 696104d 749833a 696104d cce83a6 696104d ef2a1d2 cce83a6 dccaaef 696104d cce83a6 04f489d 696104d cce83a6 696104d f888a3d 696104d 749833a 696104d cce83a6 04f489d 696104d 00a1efc cce83a6 28609fc cce83a6 749833a ef2a1d2 cce83a6 00a1efc 04f489d cce83a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
# app.py
import os
import tempfile
import uuid
from pathlib import Path
import gradio as gr
import ffmpeg
from faster_whisper import WhisperModel
# -------- Helper functions --------
def _format_timestamp(seconds: float) -> str:
ms = int(round(seconds * 1000))
hours = ms // 3600000
ms_rem = ms % 3600000
minutes = ms_rem // 60000
ms_rem = ms_rem % 60000
secs = ms_rem // 1000
millis = ms_rem % 1000
return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}"
def segments_to_srt(segments: list) -> str:
lines = []
for i, seg in enumerate(segments, start=1):
start_ts = _format_timestamp(seg["start"])
end_ts = _format_timestamp(seg["end"])
text = seg["text"].replace("\n", " ").strip()
if not text:
continue
block = f"{i}\n{start_ts} --> {end_ts}\n{text}\n"
lines.append(block)
return "\n".join(lines)
# -------- Config --------
MODEL_NAME = "Systran/faster-whisper-small" # good for HF CPU
DEVICE = "cpu"
OUTPUT_DIR = Path("outputs/subtitles")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
print(f"Loading model {MODEL_NAME} on {DEVICE} ...")
model = WhisperModel(MODEL_NAME, device=DEVICE)
print("Model loaded.")
# -------- Core functions --------
def extract_audio(input_path: str, out_path: str):
"""Extracts mono 16 kHz WAV using ffmpeg"""
try:
(
ffmpeg
.input(input_path)
.output(out_path, format="wav", acodec="pcm_s16le", ac=1, ar="16000")
.overwrite_output()
.run(quiet=True)
)
except ffmpeg.Error as e:
stderr = getattr(e, "stderr", None)
msg = stderr.decode() if stderr else str(e)
raise RuntimeError(f"ffmpeg error: {msg}")
def transcribe_file_to_srt(file_obj, language: str = "en"):
"""Transcribe uploaded file to SRT; compatible with HF Spaces"""
tmp_dir = Path(tempfile.mkdtemp(prefix="subgen_"))
# Handle Hugging Face NamedString / Path
input_path = Path(file_obj.name)
if not input_path.exists():
input_path = tmp_dir / Path(file_obj.name).name
if hasattr(file_obj, "read_bytes"):
with open(input_path, "wb") as f:
f.write(file_obj.read_bytes())
else:
with open(file_obj.name, "rb") as src, open(input_path, "wb") as dst:
dst.write(src.read())
# Extract audio and transcribe
audio_path = tmp_dir / "audio.wav"
extract_audio(str(input_path), str(audio_path))
segments, _ = model.transcribe(str(audio_path), language=language)
segs = [{"start": s.start, "end": s.end, "text": s.text} for s in segments]
srt_text = segments_to_srt(segs)
# Save .srt file
output_path = OUTPUT_DIR / f"{Path(file_obj.name).stem}.srt"
with open(output_path, "w", encoding="utf-8") as f:
f.write(srt_text)
return str(output_path), "β
Subtitles generated successfully!"
# -------- Gradio UI --------
with gr.Blocks(title="AI Subtitle Generator") as demo:
theme_state = gr.State("light")
def toggle_theme(current):
return "dark" if current == "light" else "light"
def apply_theme(theme_mode):
if theme_mode == "dark":
bg = "linear-gradient(135deg, #0f2027, #203a43, #2c5364)"
color = "#ffffff"
else:
bg = "linear-gradient(135deg, #fdfbfb, #ebedee)"
color = "#000000"
return gr.update(
value=f"<style>body {{ background: {bg}; color: {color}; }}</style>"
)
gr.HTML("<h1 style='text-align:center;'>π¬ AI Subtitle Generator</h1>")
gr.HTML(
"<p style='text-align:center;'>Upload a video or audio file to generate English <b>.srt</b> subtitles.</p>"
)
style_box = gr.HTML("")
theme_btn = gr.Button("π Toggle Light/Dark Mode")
with gr.Row():
input_file = gr.File(label="Upload video/audio file")
output_file = gr.File(label="Download .srt file")
status_box = gr.Textbox(label="Status", interactive=False)
def on_click(file):
srt_path, msg = transcribe_file_to_srt(file)
return srt_path, msg
theme_btn.click(
toggle_theme, inputs=[theme_state], outputs=[theme_state]
).then(apply_theme, inputs=[theme_state], outputs=[style_box])
generate_btn = gr.Button("Generate Subtitles")
generate_btn.click(on_click, inputs=[input_file], outputs=[output_file, status_box])
gr.HTML(
"<p style='text-align:center;font-size:14px;opacity:0.7;'>Powered by Faster-Whisper + Gradio UI</p>"
)
if __name__ == "__main__":
demo.launch() |