alesamodio commited on
Commit
403e6d8
·
1 Parent(s): 3cd0c0a

Add STT endpoint (no binaries)

Browse files
Files changed (5) hide show
  1. .gitignore +5 -0
  2. Dockerfile +6 -1
  3. app.py +3 -1
  4. requirements.txt +6 -0
  5. routes_stt.py +48 -0
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # audio test files
2
+ *.m4a
3
+ *.wav
4
+ *.mp3
5
+ *.m4p
Dockerfile CHANGED
@@ -2,6 +2,11 @@ FROM python:3.10-slim
2
 
3
  WORKDIR /app
4
 
 
 
 
 
 
5
  # install dependencies
6
  COPY requirements.txt .
7
  RUN pip install --no-cache-dir -r requirements.txt
@@ -10,4 +15,4 @@ RUN pip install --no-cache-dir -r requirements.txt
10
  COPY . .
11
 
12
  # run your FastAPI app
13
- CMD ["uvicorn", "app:APP", "--host", "0.0.0.0", "--port", "7860"]
 
2
 
3
  WORKDIR /app
4
 
5
+ # 🔹 REQUIRED for Whisper (audio decoding)
6
+ RUN apt-get update && \
7
+ apt-get install -y ffmpeg && \
8
+ rm -rf /var/lib/apt/lists/*
9
+
10
  # install dependencies
11
  COPY requirements.txt .
12
  RUN pip install --no-cache-dir -r requirements.txt
 
15
  COPY . .
16
 
17
  # run your FastAPI app
18
+ CMD ["uvicorn", "app:APP", "--host", "0.0.0.0", "--port", "7860"]
app.py CHANGED
@@ -7,6 +7,8 @@ from jose import jwt, JWTError
7
  from supabase_ie import load_history_for_display
8
  from datetime import datetime
9
  from routes_utils import router as utils_router
 
 
10
  from translate_query_response import (
11
  detect_language_code,
12
  translate_to_english,
@@ -18,7 +20,7 @@ from app_nn import run_chat_app # your Socrates logic
18
 
19
  APP = FastAPI(title="Socrates API", version="1.0")
20
  APP.include_router(utils_router)
21
-
22
  APP.add_middleware(
23
  CORSMiddleware,
24
  allow_origins=["*"],
 
7
  from supabase_ie import load_history_for_display
8
  from datetime import datetime
9
  from routes_utils import router as utils_router
10
+ from routes_stt import router as stt_router
11
+
12
  from translate_query_response import (
13
  detect_language_code,
14
  translate_to_english,
 
20
 
21
  APP = FastAPI(title="Socrates API", version="1.0")
22
  APP.include_router(utils_router)
23
+ APP.include_router(stt_router)
24
  APP.add_middleware(
25
  CORSMiddleware,
26
  allow_origins=["*"],
requirements.txt CHANGED
@@ -34,3 +34,9 @@ trafilatura>=1.7
34
  PyJWT
35
  lxml_html_clean
36
 
 
 
 
 
 
 
 
34
  PyJWT
35
  lxml_html_clean
36
 
37
+ #----voice transcription -----
38
+ python-multipart
39
+ openai-whisper
40
+ torch
41
+
42
+
routes_stt.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # routes_stt.py
2
+ import os
3
+ import tempfile
4
+ from fastapi import APIRouter, UploadFile, File, Form, HTTPException
5
+ import whisper
6
+
7
+ router = APIRouter()
8
+
9
+ MODEL_NAME = os.getenv("WHISPER_MODEL", "small") # tiny/base/small/medium/large
10
+ model = whisper.load_model(MODEL_NAME) # load once at startup
11
+
12
+ @router.post("/stt")
13
+ async def stt(
14
+ audio: UploadFile = File(...),
15
+ language: str | None = Form(None), # "en", "it", etc. Optional
16
+ ):
17
+ tmp_path = None
18
+ try:
19
+ # Save uploaded audio to a temp file
20
+ suffix = os.path.splitext(audio.filename or "")[-1] or ".m4a"
21
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
22
+ tmp_path = tmp.name
23
+ tmp.write(await audio.read())
24
+
25
+ # Transcribe
26
+ result = model.transcribe(
27
+ tmp_path,
28
+ language=language, # None => auto-detect
29
+ task="transcribe",
30
+ fp16=False, # safer on CPU
31
+ )
32
+
33
+ text = (result.get("text") or "").strip()
34
+ if not text:
35
+ raise HTTPException(status_code=422, detail="Empty transcription")
36
+
37
+ return {"text": text, "language": result.get("language")}
38
+
39
+ except HTTPException:
40
+ raise
41
+ except Exception as e:
42
+ raise HTTPException(status_code=500, detail=f"STT failed: {str(e)}")
43
+ finally:
44
+ if tmp_path and os.path.exists(tmp_path):
45
+ try:
46
+ os.remove(tmp_path)
47
+ except Exception:
48
+ pass