| """ |
| Run Trelis Chorus v1 (GGML) on an audio file. Prints per-speaker transcripts. |
| |
| Usage: |
| python run_chorus.py audio.wav # Q4_K model, both speakers |
| python run_chorus.py audio.wav --model f16 # use f16 model instead |
| python run_chorus.py audio.wav --json out.json # JSON output |
| python run_chorus.py audio.wav --whisper-cli PATH # custom whisper-cli path |
| |
| Prerequisites: |
| - Patched whisper.cpp built with `--speaker` support (see patches/whisper.cpp.patch) |
| - GGML model files alongside this script (ggml-chorus-v1-q4_k.bin etc.) |
| """ |
| from __future__ import annotations |
| import argparse, json, re, subprocess, sys |
| from pathlib import Path |
|
|
| HERE = Path(__file__).resolve().parent |
| MODELS = {"q4_k": "ggml-chorus-v1-q4_k.bin", "f16": "ggml-chorus-v1-f16.bin"} |
| SEG_RE = re.compile(r"^\[(\d\d:\d\d:\d\d\.\d+)\s*-->\s*(\d\d:\d\d:\d\d\.\d+)\]\s+(.*)$") |
|
|
|
|
| def ts_to_sec(ts: str) -> float: |
| h, m, s = ts.split(":") |
| return int(h) * 3600 + int(m) * 60 + float(s) |
|
|
|
|
| def transcribe(wav: Path, speaker: int, model_path: Path, cli: Path) -> list[dict]: |
| r = subprocess.run( |
| [str(cli), "-m", str(model_path), "-f", str(wav), |
| "-l", "en", "--speaker", str(speaker), "-nfa", "-np"], |
| capture_output=True, text=True, check=True, |
| ) |
| segs = [] |
| for line in r.stdout.splitlines(): |
| m = SEG_RE.match(line.strip()) |
| if m: |
| segs.append({"start": ts_to_sec(m.group(1)), |
| "end": ts_to_sec(m.group(2)), |
| "text": m.group(3).strip()}) |
| return segs |
|
|
|
|
| def main(): |
| ap = argparse.ArgumentParser() |
| ap.add_argument("audio") |
| ap.add_argument("--model", choices=list(MODELS), default="q4_k") |
| ap.add_argument("--models-dir", default=str(HERE.parent), help="directory containing ggml-chorus-v1-*.bin") |
| ap.add_argument("--whisper-cli", default=str(HERE.parent / "whisper.cpp/build/bin/whisper-cli")) |
| ap.add_argument("--speaker", type=int, choices=(0, 1, 2), default=0, help="0=both (default)") |
| ap.add_argument("--json", help="write JSON result to this path") |
| args = ap.parse_args() |
|
|
| model_path = Path(args.models_dir) / MODELS[args.model] |
| if not model_path.exists(): |
| sys.exit(f"Model not found: {model_path}") |
| cli = Path(args.whisper_cli) |
| if not cli.exists(): |
| sys.exit(f"whisper-cli not found: {cli}") |
|
|
| wav = Path(args.audio) |
| speakers = [args.speaker] if args.speaker else [1, 2] |
| result = {"audio": str(wav), "model": args.model, "speakers": {}} |
| for sp in speakers: |
| segs = transcribe(wav, sp, model_path, cli) |
| result["speakers"][f"speaker{sp}"] = segs |
| print(f"\n--- speaker {sp} ---") |
| for s in segs: |
| print(f" [{s['start']:6.2f} --> {s['end']:6.2f}] {s['text']}") |
|
|
| if args.json: |
| Path(args.json).write_text(json.dumps(result, indent=2)) |
| print(f"\nWrote {args.json}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|