Spaces:

PlotweaverModel
/

ncair-asr-api

Running

App Files Files Community

ncair-asr-api / app.py

PlotweaverModel

Upload 4 files

fe3947d verified 1 day ago

Raw

History Blame Contribute Delete

3.39 kB

	#!/usr/bin/env python3
	"""
	NCAIR ASR API — multi-language serving app
	==========================================
	OpenAI-compatible speech-to-text over all four NCAIR / N-ATLaS Whisper models.
	Picks the model from the request's `language` field, so one Space serves the
	whole voice assistant. Exposes:

	POST /v1/audio/transcriptions multipart: file [, model] [, language] -> {"text": ...}
	GET /health

	Wire the voice-ai-demo's per-language ASR at it, e.g.:
	ASR_YORUBA_BASE_URL = https://<this-space>.hf.space/v1 (language hint "yo")
	ASR_IGBO_BASE_URL = https://<this-space>.hf.space/v1 (language hint "ig")

	Models are GATED: accept each model's terms on HF and set HF_TOKEN.
	"""

	import functools
	import os

	import torch
	import uvicorn
	from fastapi import FastAPI, UploadFile, File, Form, HTTPException
	from transformers import pipeline

	HF_TOKEN = os.environ.get("HF_TOKEN") or None
	DEVICE = 0 if torch.cuda.is_available() else -1

	# Map an ASR language hint (what the assistant sends as `language`) -> NCAIR model.
	LANG_MODELS = {
	"yo": "NCAIR1/Yoruba-ASR", "yoruba": "NCAIR1/Yoruba-ASR",
	"ig": "NCAIR1/Igbo-ASR", "igbo": "NCAIR1/Igbo-ASR",
	"ha": "NCAIR1/Hausa-ASR", "hausa": "NCAIR1/Hausa-ASR",
	"en": "NCAIR1/NigerianAccentedEnglish", "english": "NCAIR1/NigerianAccentedEnglish",
	"ng": "NCAIR1/NigerianAccentedEnglish", "pcm": "NCAIR1/NigerianAccentedEnglish",
	}
	DEFAULT_MODEL = os.environ.get("DEFAULT_ASR_MODEL", "NCAIR1/Yoruba-ASR")


	@functools.lru_cache(maxsize=8)
	def load_pipe(model_id: str):
	"""Load (and cache) a pipeline. lru_cache only stores successes, so a failed
	load can be retried."""
	return pipeline(
	"automatic-speech-recognition",
	model=model_id,
	token=HF_TOKEN,
	device=DEVICE,
	chunk_length_s=30,
	)


	def resolve_model(language: str, model_field: str) -> str:
	"""Choose the NCAIR model. Prefer an explicit NCAIR repo if sent; otherwise
	map from the language hint; else the default."""
	if model_field and model_field.startswith("NCAIR1/"):
	return model_field
	key = (language or "").strip().lower()
	return LANG_MODELS.get(key, DEFAULT_MODEL)


	app = FastAPI(title="NCAIR ASR API")


	@app.get("/health")
	def health():
	return {
	"status": "ok",
	"device": "cuda" if DEVICE == 0 else "cpu",
	"models": sorted(set(LANG_MODELS.values())),
	"default": DEFAULT_MODEL,
	}


	@app.post("/v1/audio/transcriptions")
	async def transcriptions(
	file: UploadFile = File(...),
	model: str = Form(default=""),
	language: str = Form(default=""),
	):
	audio_bytes = await file.read()
	if not audio_bytes:
	raise HTTPException(status_code=400, detail="Empty audio")

	model_id = resolve_model(language, model)
	try:
	pipe = load_pipe(model_id)
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Could not load {model_id}: {e}")
	try:
	result = pipe(audio_bytes)
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"ASR failed ({model_id}): {e}")

	text = (result.get("text") if isinstance(result, dict) else str(result)) or ""
	return {"text": text.strip(), "model": model_id}


	if __name__ == "__main__":
	port = int(os.environ.get("PORT", 7860))
	uvicorn.run(app, host="0.0.0.0", port=port, log_level="info")