Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| NCAIR ASR API — multi-language serving app | |
| ========================================== | |
| OpenAI-compatible speech-to-text over all four NCAIR / N-ATLaS Whisper models. | |
| Picks the model from the request's `language` field, so one Space serves the | |
| whole voice assistant. Exposes: | |
| POST /v1/audio/transcriptions multipart: file [, model] [, language] -> {"text": ...} | |
| GET /health | |
| Wire the voice-ai-demo's per-language ASR at it, e.g.: | |
| ASR_YORUBA_BASE_URL = https://<this-space>.hf.space/v1 (language hint "yo") | |
| ASR_IGBO_BASE_URL = https://<this-space>.hf.space/v1 (language hint "ig") | |
| Models are GATED: accept each model's terms on HF and set HF_TOKEN. | |
| """ | |
| import functools | |
| import os | |
| import torch | |
| import uvicorn | |
| from fastapi import FastAPI, UploadFile, File, Form, HTTPException | |
| from transformers import pipeline | |
| HF_TOKEN = os.environ.get("HF_TOKEN") or None | |
| DEVICE = 0 if torch.cuda.is_available() else -1 | |
| # Map an ASR language hint (what the assistant sends as `language`) -> NCAIR model. | |
| LANG_MODELS = { | |
| "yo": "NCAIR1/Yoruba-ASR", "yoruba": "NCAIR1/Yoruba-ASR", | |
| "ig": "NCAIR1/Igbo-ASR", "igbo": "NCAIR1/Igbo-ASR", | |
| "ha": "NCAIR1/Hausa-ASR", "hausa": "NCAIR1/Hausa-ASR", | |
| "en": "NCAIR1/NigerianAccentedEnglish", "english": "NCAIR1/NigerianAccentedEnglish", | |
| "ng": "NCAIR1/NigerianAccentedEnglish", "pcm": "NCAIR1/NigerianAccentedEnglish", | |
| } | |
| DEFAULT_MODEL = os.environ.get("DEFAULT_ASR_MODEL", "NCAIR1/Yoruba-ASR") | |
| def load_pipe(model_id: str): | |
| """Load (and cache) a pipeline. lru_cache only stores successes, so a failed | |
| load can be retried.""" | |
| return pipeline( | |
| "automatic-speech-recognition", | |
| model=model_id, | |
| token=HF_TOKEN, | |
| device=DEVICE, | |
| chunk_length_s=30, | |
| ) | |
| def resolve_model(language: str, model_field: str) -> str: | |
| """Choose the NCAIR model. Prefer an explicit NCAIR repo if sent; otherwise | |
| map from the language hint; else the default.""" | |
| if model_field and model_field.startswith("NCAIR1/"): | |
| return model_field | |
| key = (language or "").strip().lower() | |
| return LANG_MODELS.get(key, DEFAULT_MODEL) | |
| app = FastAPI(title="NCAIR ASR API") | |
| def health(): | |
| return { | |
| "status": "ok", | |
| "device": "cuda" if DEVICE == 0 else "cpu", | |
| "models": sorted(set(LANG_MODELS.values())), | |
| "default": DEFAULT_MODEL, | |
| } | |
| async def transcriptions( | |
| file: UploadFile = File(...), | |
| model: str = Form(default=""), | |
| language: str = Form(default=""), | |
| ): | |
| audio_bytes = await file.read() | |
| if not audio_bytes: | |
| raise HTTPException(status_code=400, detail="Empty audio") | |
| model_id = resolve_model(language, model) | |
| try: | |
| pipe = load_pipe(model_id) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Could not load {model_id}: {e}") | |
| try: | |
| result = pipe(audio_bytes) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"ASR failed ({model_id}): {e}") | |
| text = (result.get("text") if isinstance(result, dict) else str(result)) or "" | |
| return {"text": text.strip(), "model": model_id} | |
| if __name__ == "__main__": | |
| port = int(os.environ.get("PORT", 7860)) | |
| uvicorn.run(app, host="0.0.0.0", port=port, log_level="info") | |