Spaces:

alesamodio
/

Socrates_docker

Running

App Files Files Community

alesamodio commited on Jan 31

Commit

403e6d8

1 Parent(s): 3cd0c0a

Add STT endpoint (no binaries)

Browse files

Files changed (5) hide show

.gitignore +5 -0
Dockerfile +6 -1
app.py +3 -1
requirements.txt +6 -0
routes_stt.py +48 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+# audio test files
+*.m4a
+*.wav
+*.mp3
+*.m4p

Dockerfile CHANGED Viewed

@@ -2,6 +2,11 @@ FROM python:3.10-slim
 WORKDIR /app
 # install dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
@@ -10,4 +15,4 @@ RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
 # run your FastAPI app
-CMD ["uvicorn", "app:APP", "--host", "0.0.0.0", "--port", "7860"]

 WORKDIR /app
+# 🔹 REQUIRED for Whisper (audio decoding)
+RUN apt-get update && \
+    apt-get install -y ffmpeg && \
+    rm -rf /var/lib/apt/lists/*
 # install dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
 # run your FastAPI app
+CMD ["uvicorn", "app:APP", "--host", "0.0.0.0", "--port", "7860"]

app.py CHANGED Viewed

@@ -7,6 +7,8 @@ from jose import jwt, JWTError
 from supabase_ie import load_history_for_display
 from datetime import datetime
 from routes_utils import router as utils_router
 from translate_query_response import (
     detect_language_code,
     translate_to_english,
@@ -18,7 +20,7 @@ from app_nn import run_chat_app  # your Socrates logic
 APP = FastAPI(title="Socrates API", version="1.0")
 APP.include_router(utils_router)
 APP.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],

 from supabase_ie import load_history_for_display
 from datetime import datetime
 from routes_utils import router as utils_router
+from routes_stt import router as stt_router
 from translate_query_response import (
     detect_language_code,
     translate_to_english,
 APP = FastAPI(title="Socrates API", version="1.0")
 APP.include_router(utils_router)
+APP.include_router(stt_router)
 APP.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],

requirements.txt CHANGED Viewed

@@ -34,3 +34,9 @@ trafilatura>=1.7
 PyJWT
 lxml_html_clean

 PyJWT
 lxml_html_clean
+#----voice transcription -----
+python-multipart
+openai-whisper
+torch

routes_stt.py ADDED Viewed

	@@ -0,0 +1,48 @@

+# routes_stt.py
+import os
+import tempfile
+from fastapi import APIRouter, UploadFile, File, Form, HTTPException
+import whisper
+router = APIRouter()
+MODEL_NAME = os.getenv("WHISPER_MODEL", "small")  # tiny/base/small/medium/large
+model = whisper.load_model(MODEL_NAME)  # load once at startup
+@router.post("/stt")
+async def stt(
+    audio: UploadFile = File(...),
+    language: str | None = Form(None),  # "en", "it", etc. Optional
+):
+    tmp_path = None
+    try:
+        # Save uploaded audio to a temp file
+        suffix = os.path.splitext(audio.filename or "")[-1] or ".m4a"
+        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+            tmp_path = tmp.name
+            tmp.write(await audio.read())
+        # Transcribe
+        result = model.transcribe(
+            tmp_path,
+            language=language,   # None => auto-detect
+            task="transcribe",
+            fp16=False,          # safer on CPU
+        )
+        text = (result.get("text") or "").strip()
+        if not text:
+            raise HTTPException(status_code=422, detail="Empty transcription")
+        return {"text": text, "language": result.get("language")}
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"STT failed: {str(e)}")
+    finally:
+        if tmp_path and os.path.exists(tmp_path):
+            try:
+                os.remove(tmp_path)
+            except Exception:
+                pass