Initial upload: f16 + q4_k/q5_k/q8_0 GGML, scripts, whisper.cpp patch

5887db6 verified about 1 month ago

2.98 kB

	"""
	Run Trelis Chorus v1 (GGML) on an audio file. Prints per-speaker transcripts.

	Usage:
	python run_chorus.py audio.wav # Q4_K model, both speakers
	python run_chorus.py audio.wav --model f16 # use f16 model instead
	python run_chorus.py audio.wav --json out.json # JSON output
	python run_chorus.py audio.wav --whisper-cli PATH # custom whisper-cli path

	Prerequisites:
	- Patched whisper.cpp built with `--speaker` support (see patches/whisper.cpp.patch)
	- GGML model files alongside this script (ggml-chorus-v1-q4_k.bin etc.)
	"""
	from __future__ import annotations
	import argparse, json, re, subprocess, sys
	from pathlib import Path

	HERE = Path(__file__).resolve().parent
	MODELS = {"q4_k": "ggml-chorus-v1-q4_k.bin", "f16": "ggml-chorus-v1-f16.bin"}
	SEG_RE = re.compile(r"^\[(\d\d:\d\d:\d\d\.\d+)\s-->\s(\d\d:\d\d:\d\d\.\d+)\]\s+(.*)$")


	def ts_to_sec(ts: str) -> float:
	h, m, s = ts.split(":")
	return int(h) * 3600 + int(m) * 60 + float(s)


	def transcribe(wav: Path, speaker: int, model_path: Path, cli: Path) -> list[dict]:
	r = subprocess.run(
	[str(cli), "-m", str(model_path), "-f", str(wav),
	"-l", "en", "--speaker", str(speaker), "-nfa", "-np"],
	capture_output=True, text=True, check=True,
	)
	segs = []
	for line in r.stdout.splitlines():
	m = SEG_RE.match(line.strip())
	if m:
	segs.append({"start": ts_to_sec(m.group(1)),
	"end": ts_to_sec(m.group(2)),
	"text": m.group(3).strip()})
	return segs


	def main():
	ap = argparse.ArgumentParser()
	ap.add_argument("audio")
	ap.add_argument("--model", choices=list(MODELS), default="q4_k")
	ap.add_argument("--models-dir", default=str(HERE.parent), help="directory containing ggml-chorus-v1-*.bin")
	ap.add_argument("--whisper-cli", default=str(HERE.parent / "whisper.cpp/build/bin/whisper-cli"))
	ap.add_argument("--speaker", type=int, choices=(0, 1, 2), default=0, help="0=both (default)")
	ap.add_argument("--json", help="write JSON result to this path")
	args = ap.parse_args()

	model_path = Path(args.models_dir) / MODELS[args.model]
	if not model_path.exists():
	sys.exit(f"Model not found: {model_path}")
	cli = Path(args.whisper_cli)
	if not cli.exists():
	sys.exit(f"whisper-cli not found: {cli}")

	wav = Path(args.audio)
	speakers = [args.speaker] if args.speaker else [1, 2]
	result = {"audio": str(wav), "model": args.model, "speakers": {}}
	for sp in speakers:
	segs = transcribe(wav, sp, model_path, cli)
	result["speakers"][f"speaker{sp}"] = segs
	print(f"\n--- speaker {sp} ---")
	for s in segs:
	print(f" [{s['start']:6.2f} --> {s['end']:6.2f}] {s['text']}")

	if args.json:
	Path(args.json).write_text(json.dumps(result, indent=2))
	print(f"\nWrote {args.json}")


	if __name__ == "__main__":
	main()