#!/usr/bin/env python3 """ NCAIR ASR API — multi-language serving app ========================================== OpenAI-compatible speech-to-text over all four NCAIR / N-ATLaS Whisper models. Picks the model from the request's `language` field, so one Space serves the whole voice assistant. Exposes: POST /v1/audio/transcriptions multipart: file [, model] [, language] -> {"text": ...} GET /health Wire the voice-ai-demo's per-language ASR at it, e.g.: ASR_YORUBA_BASE_URL = https://.hf.space/v1 (language hint "yo") ASR_IGBO_BASE_URL = https://.hf.space/v1 (language hint "ig") Models are GATED: accept each model's terms on HF and set HF_TOKEN. """ import functools import os import torch import uvicorn from fastapi import FastAPI, UploadFile, File, Form, HTTPException from transformers import pipeline HF_TOKEN = os.environ.get("HF_TOKEN") or None DEVICE = 0 if torch.cuda.is_available() else -1 # Map an ASR language hint (what the assistant sends as `language`) -> NCAIR model. LANG_MODELS = { "yo": "NCAIR1/Yoruba-ASR", "yoruba": "NCAIR1/Yoruba-ASR", "ig": "NCAIR1/Igbo-ASR", "igbo": "NCAIR1/Igbo-ASR", "ha": "NCAIR1/Hausa-ASR", "hausa": "NCAIR1/Hausa-ASR", "en": "NCAIR1/NigerianAccentedEnglish", "english": "NCAIR1/NigerianAccentedEnglish", "ng": "NCAIR1/NigerianAccentedEnglish", "pcm": "NCAIR1/NigerianAccentedEnglish", } DEFAULT_MODEL = os.environ.get("DEFAULT_ASR_MODEL", "NCAIR1/Yoruba-ASR") @functools.lru_cache(maxsize=8) def load_pipe(model_id: str): """Load (and cache) a pipeline. lru_cache only stores successes, so a failed load can be retried.""" return pipeline( "automatic-speech-recognition", model=model_id, token=HF_TOKEN, device=DEVICE, chunk_length_s=30, ) def resolve_model(language: str, model_field: str) -> str: """Choose the NCAIR model. Prefer an explicit NCAIR repo if sent; otherwise map from the language hint; else the default.""" if model_field and model_field.startswith("NCAIR1/"): return model_field key = (language or "").strip().lower() return LANG_MODELS.get(key, DEFAULT_MODEL) app = FastAPI(title="NCAIR ASR API") @app.get("/health") def health(): return { "status": "ok", "device": "cuda" if DEVICE == 0 else "cpu", "models": sorted(set(LANG_MODELS.values())), "default": DEFAULT_MODEL, } @app.post("/v1/audio/transcriptions") async def transcriptions( file: UploadFile = File(...), model: str = Form(default=""), language: str = Form(default=""), ): audio_bytes = await file.read() if not audio_bytes: raise HTTPException(status_code=400, detail="Empty audio") model_id = resolve_model(language, model) try: pipe = load_pipe(model_id) except Exception as e: raise HTTPException(status_code=500, detail=f"Could not load {model_id}: {e}") try: result = pipe(audio_bytes) except Exception as e: raise HTTPException(status_code=500, detail=f"ASR failed ({model_id}): {e}") text = (result.get("text") if isinstance(result, dict) else str(result)) or "" return {"text": text.strip(), "model": model_id} if __name__ == "__main__": port = int(os.environ.get("PORT", 7860)) uvicorn.run(app, host="0.0.0.0", port=port, log_level="info")