Chorus-v1-GGML / scripts /serve_chorus.py

Initial upload: f16 + q4_k/q5_k/q8_0 GGML, scripts, whisper.cpp patch

5887db6 verified about 1 month ago

3.01 kB

	"""
	Chorus v1 (GGML) HTTP inference server.

	POST /transcribe multipart form with `file=<audio>` → JSON {speaker1, speaker2}
	GET /healthz → 200 OK

	Run:
	uv run --with fastapi --with uvicorn --with python-multipart \
	python serve_chorus.py --model q4_k --port 8000

	Requires `whisper-cli` on PATH or --whisper-cli, and a ggml-chorus-v1-*.bin file.
	"""
	from __future__ import annotations
	import argparse, io, os, re, subprocess, tempfile
	from pathlib import Path

	from fastapi import FastAPI, UploadFile, File, HTTPException
	import uvicorn

	HERE = Path(__file__).resolve().parent
	MODELS = {"q4_k": "ggml-chorus-v1-q4_k.bin", "f16": "ggml-chorus-v1-f16.bin"}
	SEG_RE = re.compile(r"^\[(\d\d:\d\d:\d\d\.\d+)\s-->\s(\d\d:\d\d:\d\d\.\d+)\]\s+(.*)$")
	MAX_BYTES = 50 * 1024 * 1024


	def ts_to_sec(ts: str) -> float:
	h, m, s = ts.split(":")
	return int(h) * 3600 + int(m) * 60 + float(s)


	def build_app(model_path: Path, cli: Path) -> FastAPI:
	app = FastAPI(title="Chorus v1 (GGML)")

	def run(wav: Path, speaker: int):
	r = subprocess.run(
	[str(cli), "-m", str(model_path), "-f", str(wav),
	"-l", "en", "--speaker", str(speaker), "-nfa", "-np"],
	capture_output=True, text=True, check=True, timeout=120,
	)
	segs = []
	for line in r.stdout.splitlines():
	m = SEG_RE.match(line.strip())
	if m:
	segs.append({"start": ts_to_sec(m.group(1)),
	"end": ts_to_sec(m.group(2)),
	"text": m.group(3).strip()})
	return segs

	@app.get("/healthz")
	def health():
	return {"ok": True, "model": str(model_path.name)}

	@app.post("/transcribe")
	async def transcribe(file: UploadFile = File(...)):
	data = await file.read()
	if len(data) > MAX_BYTES:
	raise HTTPException(400, "file too large (50MB max)")
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as tmp:
	tmp.write(data); tmp.flush()
	wav = Path(tmp.name)
	# whisper-cli accepts wav/mp3/flac/etc. via ffmpeg; it validates internally.
	s1 = run(wav, 1)
	s2 = run(wav, 2)
	return {"speaker1": s1, "speaker2": s2}

	return app


	def main():
	ap = argparse.ArgumentParser()
	ap.add_argument("--model", choices=list(MODELS), default="q4_k")
	ap.add_argument("--models-dir", default=str(HERE.parent))
	ap.add_argument("--whisper-cli", default=str(HERE.parent / "whisper.cpp/build/bin/whisper-cli"))
	ap.add_argument("--host", default="0.0.0.0")
	ap.add_argument("--port", type=int, default=8000)
	args = ap.parse_args()

	model_path = Path(args.models_dir) / MODELS[args.model]
	cli = Path(args.whisper_cli)
	for p in (model_path, cli):
	if not p.exists():
	raise SystemExit(f"missing: {p}")
	uvicorn.run(build_app(model_path, cli), host=args.host, port=args.port)


	if __name__ == "__main__":
	main()