FredyHoundayi commited on
Commit
4845a07
·
0 Parent(s):

Initial FastAPI deployment for MMS speech-to-text on HF Spaces

Browse files
Files changed (5) hide show
  1. .gitignore +16 -0
  2. Dockerfile +18 -0
  3. README.md +42 -0
  4. app.py +56 -0
  5. requirements.txt +10 -0
.gitignore ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.pyo
4
+ .env
5
+ .venv/
6
+ venv/
7
+ *.egg-info/
8
+ dist/
9
+ build/
10
+ .DS_Store
11
+ *.ipynb_checkpoints/
12
+ .ipynb_checkpoints/
13
+ *.mp3
14
+ *.wav
15
+ *.flac
16
+ *.ogg
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ RUN apt-get update && apt-get install -y \
4
+ ffmpeg \
5
+ libsndfile1 \
6
+ git \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ WORKDIR /app
10
+
11
+ COPY requirements.txt .
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ COPY app.py .
15
+
16
+ EXPOSE 7860
17
+
18
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: MMS ProxyAPI
3
+ emoji: 🎤
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ # MMS Speech-to-Text API
11
+
12
+ REST API built with FastAPI using the `facebook/mms-1b-all` model — supports 1000+ languages including Twi.
13
+
14
+ ## Endpoints
15
+
16
+ | Method | Path | Description |
17
+ |--------|------|-------------|
18
+ | GET | `/` | API info |
19
+ | GET | `/health` | Health check |
20
+ | POST | `/transcribe` | Transcribe audio file |
21
+
22
+ ## Usage
23
+
24
+ ```bash
25
+ curl -X POST "https://fredyhoundayi-mms-proxyapi.hf.space/transcribe" \
26
+ -F "file=@audio.mp3" \
27
+ -F "language=twi"
28
+ ```
29
+
30
+ ### Response
31
+
32
+ ```json
33
+ {
34
+ "transcription": "awaci ciri duya bi hon",
35
+ "language": "twi"
36
+ }
37
+ ```
38
+
39
+ ## Supported languages
40
+
41
+ Pass any MMS language code via the `language` field (default: `twi`).
42
+ Full list: https://dl.fbaipublicfiles.com/mms/misc/language_coverage_mms.html
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+
3
+ import torch
4
+ import librosa
5
+ from fastapi import FastAPI, File, UploadFile, HTTPException
6
+ from fastapi.responses import JSONResponse
7
+ from transformers import Wav2Vec2ForCTC, AutoProcessor
8
+
9
+ app = FastAPI(title="MMS Speech-to-Text API", version="1.0.0")
10
+
11
+ MODEL_ID = "facebook/mms-1b-all"
12
+ processor = None
13
+ model = None
14
+
15
+
16
+ @app.on_event("startup")
17
+ async def load_model():
18
+ global processor, model
19
+ print("Loading MMS model...")
20
+ processor = AutoProcessor.from_pretrained(MODEL_ID)
21
+ model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
22
+ model.eval()
23
+ print("Model loaded.")
24
+
25
+
26
+ @app.get("/")
27
+ def root():
28
+ return {"message": "MMS Speech-to-Text API", "model": MODEL_ID}
29
+
30
+
31
+ @app.get("/health")
32
+ def health():
33
+ return {"status": "ok", "model_loaded": model is not None}
34
+
35
+
36
+ @app.post("/transcribe")
37
+ async def transcribe(file: UploadFile = File(...)):
38
+ if model is None or processor is None:
39
+ raise HTTPException(status_code=503, detail="Model not loaded yet")
40
+
41
+ audio_bytes = await file.read()
42
+
43
+ try:
44
+ audio, sampling_rate = librosa.load(io.BytesIO(audio_bytes), sr=16000, mono=True)
45
+ except Exception as e:
46
+ raise HTTPException(status_code=400, detail=f"Failed to load audio: {e}")
47
+
48
+ inputs = processor(audio, sampling_rate=sampling_rate, return_tensors="pt")
49
+
50
+ with torch.no_grad():
51
+ logits = model(**inputs).logits
52
+
53
+ predicted_ids = torch.argmax(logits, dim=-1)[0]
54
+ transcription = processor.decode(predicted_ids)
55
+
56
+ return JSONResponse({"transcription": transcription})
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.115.0
2
+ uvicorn[standard]==0.30.6
3
+ python-multipart==0.0.9
4
+ torch==2.3.1
5
+ torchaudio==2.3.1
6
+ transformers==4.44.2
7
+ accelerate==0.33.0
8
+ librosa==0.10.2
9
+ numpy==1.26.4
10
+ soundfile==0.12.1