Spaces:
Configuration error
Configuration error
AIGoose
feat: Bright Studio style — Space Grotesk 100px top, captions, trim, music, navy border rule
b10b75c | #!/usr/bin/env python3 | |
| """gen_captions.py - Whisper -> SRT for burned-in captions via HF Inference API""" | |
| import os, sys, base64, requests, subprocess, argparse | |
| def transcribe_hf(audio_path, hf_token): | |
| with open(audio_path, "rb") as f: | |
| audio_b64 = base64.b64encode(f.read()).decode() | |
| headers = {"Authorization": f"Bearer {hf_token}", "Content-Type": "application/json"} | |
| body = {"inputs": audio_b64, "parameters": {"language": "id", "return_timestamps": "word"}} | |
| url = "https://router.huggingface.co/hf-inference/models/openai/whisper-large-v3" | |
| print("Transcribing via HF Inference API (whisper-large-v3)...") | |
| r = requests.post(url, headers=headers, json=body, timeout=300) | |
| r.raise_for_status() | |
| chunks = r.json().get("chunks", []) | |
| print(f"Got {len(chunks)} word chunks") | |
| return chunks | |
| def chunks_to_srt(chunks, max_chars=38, gap=0.7): | |
| lines = [] | |
| idx = 1 | |
| i = 0 | |
| while i < len(chunks): | |
| line_words, line_start, line_end = [], None, None | |
| while i < len(chunks): | |
| c = chunks[i] | |
| ts = c.get("timestamp", [0, 0]) | |
| s = ts[0] if ts[0] is not None else 0 | |
| e = ts[1] if ts[1] is not None else s + 0.3 | |
| w = c.get("text", "").strip() | |
| if line_start is None: | |
| line_start = s | |
| if line_words and ( | |
| len(" ".join(line_words) + " " + w) > max_chars | |
| or (line_end is not None and s - line_end > gap) | |
| ): | |
| break | |
| line_words.append(w) | |
| line_end = e | |
| i += 1 | |
| if line_words and line_start is not None: | |
| text = " ".join(line_words).strip() | |
| if text: | |
| def fmt(t): | |
| h=int(t//3600); m=int((t%3600)//60); sec=t%60 | |
| return f"{h:02d}:{m:02d}:{sec:06.3f}".replace(".",",") | |
| lines.append(f"{idx}\n{fmt(line_start)} --> {fmt(line_end)}\n{text}\n") | |
| idx += 1 | |
| return "\n".join(lines) | |
| def main(): | |
| p = argparse.ArgumentParser() | |
| p.add_argument("--input", required=True) | |
| p.add_argument("--srt-out", default="project/assets/captions.srt") | |
| args = p.parse_args() | |
| hf_token = os.environ.get("HF_TOKEN", "") | |
| audio_path = "/tmp/caption_audio.wav" | |
| subprocess.run(["ffmpeg","-y","-i",args.input,"-vn","-ar","16000","-ac","1", | |
| "-b:a","32k",audio_path], capture_output=True, check=True) | |
| print(f"Audio extracted: {os.path.getsize(audio_path)/1024:.0f} KB") | |
| if hf_token: | |
| try: | |
| chunks = transcribe_hf(audio_path, hf_token) | |
| srt = chunks_to_srt(chunks) | |
| os.makedirs(os.path.dirname(args.srt_out), exist_ok=True) | |
| with open(args.srt_out, "w", encoding="utf-8") as f: | |
| f.write(srt) | |
| print(f"SRT written: {args.srt_out} ({len(srt.splitlines())} lines)") | |
| return | |
| except Exception as e: | |
| print(f"HF transcription failed: {e} — using fallback SRT") | |
| fallback = "1\n00:00:00,500 --> 00:00:03,200\nTraining yang langsung bisa dipraktek\n\n2\n00:00:03,500 --> 00:00:06,000\nHasilnya nyata dan bisa dipakai tim\n\n" | |
| os.makedirs(os.path.dirname(args.srt_out), exist_ok=True) | |
| with open(args.srt_out, "w", encoding="utf-8") as f: | |
| f.write(fallback) | |
| print(f"Fallback SRT written: {args.srt_out}") | |
| if __name__ == "__main__": | |
| main() | |