Chorus-v1-GGML / scripts /serve_chorus.py
RonanMcGovern's picture
Initial upload: f16 + q4_k/q5_k/q8_0 GGML, scripts, whisper.cpp patch
5887db6 verified
"""
Chorus v1 (GGML) HTTP inference server.
POST /transcribe multipart form with `file=<audio>` → JSON {speaker1, speaker2}
GET /healthz → 200 OK
Run:
uv run --with fastapi --with uvicorn --with python-multipart \
python serve_chorus.py --model q4_k --port 8000
Requires `whisper-cli` on PATH or --whisper-cli, and a ggml-chorus-v1-*.bin file.
"""
from __future__ import annotations
import argparse, io, os, re, subprocess, tempfile
from pathlib import Path
from fastapi import FastAPI, UploadFile, File, HTTPException
import uvicorn
HERE = Path(__file__).resolve().parent
MODELS = {"q4_k": "ggml-chorus-v1-q4_k.bin", "f16": "ggml-chorus-v1-f16.bin"}
SEG_RE = re.compile(r"^\[(\d\d:\d\d:\d\d\.\d+)\s*-->\s*(\d\d:\d\d:\d\d\.\d+)\]\s+(.*)$")
MAX_BYTES = 50 * 1024 * 1024
def ts_to_sec(ts: str) -> float:
h, m, s = ts.split(":")
return int(h) * 3600 + int(m) * 60 + float(s)
def build_app(model_path: Path, cli: Path) -> FastAPI:
app = FastAPI(title="Chorus v1 (GGML)")
def run(wav: Path, speaker: int):
r = subprocess.run(
[str(cli), "-m", str(model_path), "-f", str(wav),
"-l", "en", "--speaker", str(speaker), "-nfa", "-np"],
capture_output=True, text=True, check=True, timeout=120,
)
segs = []
for line in r.stdout.splitlines():
m = SEG_RE.match(line.strip())
if m:
segs.append({"start": ts_to_sec(m.group(1)),
"end": ts_to_sec(m.group(2)),
"text": m.group(3).strip()})
return segs
@app.get("/healthz")
def health():
return {"ok": True, "model": str(model_path.name)}
@app.post("/transcribe")
async def transcribe(file: UploadFile = File(...)):
data = await file.read()
if len(data) > MAX_BYTES:
raise HTTPException(400, "file too large (50MB max)")
with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as tmp:
tmp.write(data); tmp.flush()
wav = Path(tmp.name)
# whisper-cli accepts wav/mp3/flac/etc. via ffmpeg; it validates internally.
s1 = run(wav, 1)
s2 = run(wav, 2)
return {"speaker1": s1, "speaker2": s2}
return app
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--model", choices=list(MODELS), default="q4_k")
ap.add_argument("--models-dir", default=str(HERE.parent))
ap.add_argument("--whisper-cli", default=str(HERE.parent / "whisper.cpp/build/bin/whisper-cli"))
ap.add_argument("--host", default="0.0.0.0")
ap.add_argument("--port", type=int, default=8000)
args = ap.parse_args()
model_path = Path(args.models_dir) / MODELS[args.model]
cli = Path(args.whisper_cli)
for p in (model_path, cli):
if not p.exists():
raise SystemExit(f"missing: {p}")
uvicorn.run(build_app(model_path, cli), host=args.host, port=args.port)
if __name__ == "__main__":
main()