Spaces:
Running
Running
| """ | |
| Sinama Audio Classifier – Hugging Face Spaces API | |
| -------------------------------------------------- | |
| FastAPI app that loads the trained CNN model, accepts audio uploads, | |
| and returns top-5 predicted Cebuano/Sinama words with confidence scores. | |
| Deploy this as a Hugging Face Space (Docker or Gradio SDK). | |
| """ | |
| import json | |
| import os | |
| import tempfile | |
| from contextlib import asynccontextmanager | |
| import librosa | |
| import numpy as np | |
| import tensorflow as tf | |
| from fastapi import FastAPI, File, HTTPException, UploadFile | |
| from fastapi.middleware.cors import CORSMiddleware | |
| # --------------------------------------------------------------------------- | |
| # Config – must match training preprocessing | |
| # --------------------------------------------------------------------------- | |
| SAMPLE_RATE = 22050 | |
| DURATION = 4.0 | |
| N_MELS = 128 | |
| N_FFT = 2048 | |
| HOP_LENGTH = 512 | |
| TARGET_LEN = int(SAMPLE_RATE * DURATION) | |
| # Model files (uploaded to the Space repo) | |
| MODEL_PATH = "best_model.keras" | |
| LABEL_MAP_PATH = "label_map.json" | |
| # --------------------------------------------------------------------------- | |
| # Global state | |
| # --------------------------------------------------------------------------- | |
| model = None | |
| label_map = None | |
| async def lifespan(app: FastAPI): | |
| """Load model and label map on startup.""" | |
| global model, label_map | |
| if not os.path.exists(MODEL_PATH): | |
| raise RuntimeError(f"Model file not found: {MODEL_PATH}") | |
| model = tf.keras.models.load_model(MODEL_PATH) | |
| print(f"[app] Model loaded from {MODEL_PATH}") | |
| with open(LABEL_MAP_PATH, "r", encoding="utf-8") as f: | |
| raw = json.load(f) | |
| label_map = {int(k): v for k, v in raw.items()} | |
| print(f"[app] Loaded {len(label_map)} classes") | |
| yield | |
| # Cleanup | |
| model = None | |
| label_map = None | |
| app = FastAPI( | |
| title="Sinama Audio Classifier", | |
| description="Classify spoken Cebuano/Sinama words from audio clips", | |
| version="1.0.0", | |
| lifespan=lifespan, | |
| ) | |
| # Allow Flutter app / any frontend to call this API | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Audio preprocessing | |
| # --------------------------------------------------------------------------- | |
| def preprocess_audio(audio_bytes: bytes) -> np.ndarray: | |
| """Convert raw audio bytes → Mel spectrogram feature array.""" | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: | |
| tmp.write(audio_bytes) | |
| tmp_path = tmp.name | |
| try: | |
| waveform, _ = librosa.load(tmp_path, sr=SAMPLE_RATE, mono=True) | |
| finally: | |
| os.unlink(tmp_path) | |
| # Pad or trim to fixed duration | |
| if len(waveform) < TARGET_LEN: | |
| waveform = np.pad(waveform, (0, TARGET_LEN - len(waveform))) | |
| else: | |
| waveform = waveform[:TARGET_LEN] | |
| # Mel spectrogram | |
| mel = librosa.feature.melspectrogram( | |
| y=waveform, sr=SAMPLE_RATE, | |
| n_mels=N_MELS, n_fft=N_FFT, hop_length=HOP_LENGTH, | |
| ) | |
| mel_db = librosa.power_to_db(mel, ref=np.max) | |
| # Normalise | |
| mean, std = mel_db.mean(), mel_db.std() | |
| mel_db = (mel_db - mean) / (std + 1e-9) | |
| # Shape: (1, freq_bins, time_steps, 1) | |
| return mel_db[np.newaxis, ..., np.newaxis] | |
| # --------------------------------------------------------------------------- | |
| # Endpoints | |
| # --------------------------------------------------------------------------- | |
| async def health(): | |
| return {"status": "ok", "classes": len(label_map) if label_map else 0} | |
| async def predict(file: UploadFile = File(...)): | |
| """ | |
| Accept an audio file and return top-5 predictions. | |
| Returns: | |
| [{"label": "word", "score": 0.95}, ...] | |
| """ | |
| if model is None or label_map is None: | |
| raise HTTPException(status_code=503, detail="Model not loaded") | |
| audio_bytes = await file.read() | |
| if len(audio_bytes) == 0: | |
| raise HTTPException(status_code=400, detail="Empty audio file") | |
| try: | |
| features = preprocess_audio(audio_bytes) | |
| except Exception as e: | |
| raise HTTPException(status_code=400, detail=f"Audio processing failed: {e}") | |
| preds = model.predict(features, verbose=0)[0] | |
| top_indices = np.argsort(preds)[::-1][:5] | |
| results = [ | |
| {"label": label_map[int(i)], "score": round(float(preds[i]), 4)} | |
| for i in top_indices | |
| ] | |
| return results | |
| async def predict_raw(file: UploadFile = File(...)): | |
| """ | |
| Same as /predict but returns ALL class probabilities. | |
| Useful for debugging or custom logic in the app. | |
| """ | |
| if model is None or label_map is None: | |
| raise HTTPException(status_code=503, detail="Model not loaded") | |
| audio_bytes = await file.read() | |
| if len(audio_bytes) == 0: | |
| raise HTTPException(status_code=400, detail="Empty audio file") | |
| try: | |
| features = preprocess_audio(audio_bytes) | |
| except Exception as e: | |
| raise HTTPException(status_code=400, detail=f"Audio processing failed: {e}") | |
| preds = model.predict(features, verbose=0)[0] | |
| return { | |
| "predictions": { | |
| label_map[i]: round(float(preds[i]), 4) | |
| for i in range(len(preds)) | |
| } | |
| } | |