""" Run Trelis Chorus v1 (GGML) on an audio file. Prints per-speaker transcripts. Usage: python run_chorus.py audio.wav # Q4_K model, both speakers python run_chorus.py audio.wav --model f16 # use f16 model instead python run_chorus.py audio.wav --json out.json # JSON output python run_chorus.py audio.wav --whisper-cli PATH # custom whisper-cli path Prerequisites: - Patched whisper.cpp built with `--speaker` support (see patches/whisper.cpp.patch) - GGML model files alongside this script (ggml-chorus-v1-q4_k.bin etc.) """ from __future__ import annotations import argparse, json, re, subprocess, sys from pathlib import Path HERE = Path(__file__).resolve().parent MODELS = {"q4_k": "ggml-chorus-v1-q4_k.bin", "f16": "ggml-chorus-v1-f16.bin"} SEG_RE = re.compile(r"^\[(\d\d:\d\d:\d\d\.\d+)\s*-->\s*(\d\d:\d\d:\d\d\.\d+)\]\s+(.*)$") def ts_to_sec(ts: str) -> float: h, m, s = ts.split(":") return int(h) * 3600 + int(m) * 60 + float(s) def transcribe(wav: Path, speaker: int, model_path: Path, cli: Path) -> list[dict]: r = subprocess.run( [str(cli), "-m", str(model_path), "-f", str(wav), "-l", "en", "--speaker", str(speaker), "-nfa", "-np"], capture_output=True, text=True, check=True, ) segs = [] for line in r.stdout.splitlines(): m = SEG_RE.match(line.strip()) if m: segs.append({"start": ts_to_sec(m.group(1)), "end": ts_to_sec(m.group(2)), "text": m.group(3).strip()}) return segs def main(): ap = argparse.ArgumentParser() ap.add_argument("audio") ap.add_argument("--model", choices=list(MODELS), default="q4_k") ap.add_argument("--models-dir", default=str(HERE.parent), help="directory containing ggml-chorus-v1-*.bin") ap.add_argument("--whisper-cli", default=str(HERE.parent / "whisper.cpp/build/bin/whisper-cli")) ap.add_argument("--speaker", type=int, choices=(0, 1, 2), default=0, help="0=both (default)") ap.add_argument("--json", help="write JSON result to this path") args = ap.parse_args() model_path = Path(args.models_dir) / MODELS[args.model] if not model_path.exists(): sys.exit(f"Model not found: {model_path}") cli = Path(args.whisper_cli) if not cli.exists(): sys.exit(f"whisper-cli not found: {cli}") wav = Path(args.audio) speakers = [args.speaker] if args.speaker else [1, 2] result = {"audio": str(wav), "model": args.model, "speakers": {}} for sp in speakers: segs = transcribe(wav, sp, model_path, cli) result["speakers"][f"speaker{sp}"] = segs print(f"\n--- speaker {sp} ---") for s in segs: print(f" [{s['start']:6.2f} --> {s['end']:6.2f}] {s['text']}") if args.json: Path(args.json).write_text(json.dumps(result, indent=2)) print(f"\nWrote {args.json}") if __name__ == "__main__": main()