Chorus-v1-GGML / scripts /run_chorus.py
RonanMcGovern's picture
Initial upload: f16 + q4_k/q5_k/q8_0 GGML, scripts, whisper.cpp patch
5887db6 verified
"""
Run Trelis Chorus v1 (GGML) on an audio file. Prints per-speaker transcripts.
Usage:
python run_chorus.py audio.wav # Q4_K model, both speakers
python run_chorus.py audio.wav --model f16 # use f16 model instead
python run_chorus.py audio.wav --json out.json # JSON output
python run_chorus.py audio.wav --whisper-cli PATH # custom whisper-cli path
Prerequisites:
- Patched whisper.cpp built with `--speaker` support (see patches/whisper.cpp.patch)
- GGML model files alongside this script (ggml-chorus-v1-q4_k.bin etc.)
"""
from __future__ import annotations
import argparse, json, re, subprocess, sys
from pathlib import Path
HERE = Path(__file__).resolve().parent
MODELS = {"q4_k": "ggml-chorus-v1-q4_k.bin", "f16": "ggml-chorus-v1-f16.bin"}
SEG_RE = re.compile(r"^\[(\d\d:\d\d:\d\d\.\d+)\s*-->\s*(\d\d:\d\d:\d\d\.\d+)\]\s+(.*)$")
def ts_to_sec(ts: str) -> float:
h, m, s = ts.split(":")
return int(h) * 3600 + int(m) * 60 + float(s)
def transcribe(wav: Path, speaker: int, model_path: Path, cli: Path) -> list[dict]:
r = subprocess.run(
[str(cli), "-m", str(model_path), "-f", str(wav),
"-l", "en", "--speaker", str(speaker), "-nfa", "-np"],
capture_output=True, text=True, check=True,
)
segs = []
for line in r.stdout.splitlines():
m = SEG_RE.match(line.strip())
if m:
segs.append({"start": ts_to_sec(m.group(1)),
"end": ts_to_sec(m.group(2)),
"text": m.group(3).strip()})
return segs
def main():
ap = argparse.ArgumentParser()
ap.add_argument("audio")
ap.add_argument("--model", choices=list(MODELS), default="q4_k")
ap.add_argument("--models-dir", default=str(HERE.parent), help="directory containing ggml-chorus-v1-*.bin")
ap.add_argument("--whisper-cli", default=str(HERE.parent / "whisper.cpp/build/bin/whisper-cli"))
ap.add_argument("--speaker", type=int, choices=(0, 1, 2), default=0, help="0=both (default)")
ap.add_argument("--json", help="write JSON result to this path")
args = ap.parse_args()
model_path = Path(args.models_dir) / MODELS[args.model]
if not model_path.exists():
sys.exit(f"Model not found: {model_path}")
cli = Path(args.whisper_cli)
if not cli.exists():
sys.exit(f"whisper-cli not found: {cli}")
wav = Path(args.audio)
speakers = [args.speaker] if args.speaker else [1, 2]
result = {"audio": str(wav), "model": args.model, "speakers": {}}
for sp in speakers:
segs = transcribe(wav, sp, model_path, cli)
result["speakers"][f"speaker{sp}"] = segs
print(f"\n--- speaker {sp} ---")
for s in segs:
print(f" [{s['start']:6.2f} --> {s['end']:6.2f}] {s['text']}")
if args.json:
Path(args.json).write_text(json.dumps(result, indent=2))
print(f"\nWrote {args.json}")
if __name__ == "__main__":
main()