ncair-asr-api / app.py
PlotweaverModel's picture
Upload 4 files
fe3947d verified
Raw
History Blame Contribute Delete
3.39 kB
#!/usr/bin/env python3
"""
NCAIR ASR API — multi-language serving app
==========================================
OpenAI-compatible speech-to-text over all four NCAIR / N-ATLaS Whisper models.
Picks the model from the request's `language` field, so one Space serves the
whole voice assistant. Exposes:
POST /v1/audio/transcriptions multipart: file [, model] [, language] -> {"text": ...}
GET /health
Wire the voice-ai-demo's per-language ASR at it, e.g.:
ASR_YORUBA_BASE_URL = https://<this-space>.hf.space/v1 (language hint "yo")
ASR_IGBO_BASE_URL = https://<this-space>.hf.space/v1 (language hint "ig")
Models are GATED: accept each model's terms on HF and set HF_TOKEN.
"""
import functools
import os
import torch
import uvicorn
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
from transformers import pipeline
HF_TOKEN = os.environ.get("HF_TOKEN") or None
DEVICE = 0 if torch.cuda.is_available() else -1
# Map an ASR language hint (what the assistant sends as `language`) -> NCAIR model.
LANG_MODELS = {
"yo": "NCAIR1/Yoruba-ASR", "yoruba": "NCAIR1/Yoruba-ASR",
"ig": "NCAIR1/Igbo-ASR", "igbo": "NCAIR1/Igbo-ASR",
"ha": "NCAIR1/Hausa-ASR", "hausa": "NCAIR1/Hausa-ASR",
"en": "NCAIR1/NigerianAccentedEnglish", "english": "NCAIR1/NigerianAccentedEnglish",
"ng": "NCAIR1/NigerianAccentedEnglish", "pcm": "NCAIR1/NigerianAccentedEnglish",
}
DEFAULT_MODEL = os.environ.get("DEFAULT_ASR_MODEL", "NCAIR1/Yoruba-ASR")
@functools.lru_cache(maxsize=8)
def load_pipe(model_id: str):
"""Load (and cache) a pipeline. lru_cache only stores successes, so a failed
load can be retried."""
return pipeline(
"automatic-speech-recognition",
model=model_id,
token=HF_TOKEN,
device=DEVICE,
chunk_length_s=30,
)
def resolve_model(language: str, model_field: str) -> str:
"""Choose the NCAIR model. Prefer an explicit NCAIR repo if sent; otherwise
map from the language hint; else the default."""
if model_field and model_field.startswith("NCAIR1/"):
return model_field
key = (language or "").strip().lower()
return LANG_MODELS.get(key, DEFAULT_MODEL)
app = FastAPI(title="NCAIR ASR API")
@app.get("/health")
def health():
return {
"status": "ok",
"device": "cuda" if DEVICE == 0 else "cpu",
"models": sorted(set(LANG_MODELS.values())),
"default": DEFAULT_MODEL,
}
@app.post("/v1/audio/transcriptions")
async def transcriptions(
file: UploadFile = File(...),
model: str = Form(default=""),
language: str = Form(default=""),
):
audio_bytes = await file.read()
if not audio_bytes:
raise HTTPException(status_code=400, detail="Empty audio")
model_id = resolve_model(language, model)
try:
pipe = load_pipe(model_id)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Could not load {model_id}: {e}")
try:
result = pipe(audio_bytes)
except Exception as e:
raise HTTPException(status_code=500, detail=f"ASR failed ({model_id}): {e}")
text = (result.get("text") if isinstance(result, dict) else str(result)) or ""
return {"text": text.strip(), "model": model_id}
if __name__ == "__main__":
port = int(os.environ.get("PORT", 7860))
uvicorn.run(app, host="0.0.0.0", port=port, log_level="info")