Spaces:

Aqs-shispare
/

transcript-api

Sleeping

App Files Files Community

Hamzaaly234 commited on 14 days ago

Commit

21b2f8c

0 Parent(s):

feat/setup

Browse files

Files changed (8) hide show

Dockerfile +23 -0
README.md +17 -0
app/agents/schemas.py +6 -0
app/api/voice.py +67 -0
app/config/settings.py +9 -0
app/main.py +14 -0
app/stt/whisper.py +8 -0
requirements.txt +9 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+FROM python:3.10-slim
+ENV PYTHONUNBUFFERED=1
+# Install system dependencies (required for whisper)
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    git \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir --upgrade pip
+RUN pip install --no-cache-dir -r requirements.txt
+COPY ./app ./app
+EXPOSE 7860
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,17 @@

+# Voice Transcription API
+FastAPI + Whisper STT deployed on Hugging Face Spaces.
+## Endpoint
+POST /voice
+Form Data:
+- file (.wav, .mp3, .m4a)
+## Response
+{
+  "text": "transcribed text",
+  "language": "en"
+}

app/agents/schemas.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from pydantic import BaseModel
+class TranscriptionOutput(BaseModel):
+    text: str
+    language: str

app/api/voice.py ADDED Viewed

	@@ -0,0 +1,67 @@

+"""
+Voice endpoint - handles audio input and transcription
+"""
+from fastapi import APIRouter, File, UploadFile, HTTPException, status
+from app.agents.schemas import TranscriptionOutput
+from app.stt.whisper import get_stt_service
+from app.config.settings import settings
+import os
+import tempfile
+router = APIRouter(prefix="/voice", tags=["voice"])
+@router.post("", response_model=TranscriptionOutput)
+async def process_voice(
+    file: UploadFile = File(...),
+):
+    """
+    Process audio file and return transcription
+    Accepts: .wav, .mp3, .m4a
+    """
+    # Validate extension
+    file_ext = os.path.splitext(file.filename)[1].lower()
+    if file_ext not in settings.ALLOWED_AUDIO_FORMATS:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Unsupported file format. Allowed: {', '.join(settings.ALLOWED_AUDIO_FORMATS)}"
+        )
+    # Read file
+    contents = await file.read()
+    # Validate file size
+    if len(contents) > settings.MAX_FILE_SIZE:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"File too large. Maximum size: {settings.MAX_FILE_SIZE} bytes"
+        )
+    tmp_file = None
+    try:
+        # Save temp file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp:
+            tmp.write(contents)
+            tmp_file = tmp.name
+        # Transcribe
+        stt_service = get_stt_service()
+        result = stt_service.transcribe(tmp_file)
+        return TranscriptionOutput(
+            text=result["text"],
+            language=result.get("language", "unknown")
+        )
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Transcription failed: {str(e)}"
+        )
+    finally:
+        if tmp_file and os.path.exists(tmp_file):
+            os.unlink(tmp_file)

app/config/settings.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from pydantic_settings import BaseSettings
+class Settings(BaseSettings):
+    ALLOWED_AUDIO_FORMATS: list = [".wav", ".mp3", ".m4a"]
+    MAX_FILE_SIZE: int = 10 * 1024 * 1024  # 10MB
+settings = Settings()

app/main.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from fastapi import FastAPI
+from app.api.voice import router as voice_router
+app = FastAPI(
+    title="Voice Transcription API",
+    version="1.0.0"
+)
+app.include_router(voice_router)
+@app.get("/")
+def health_check():
+    return {"status": "API is running"}

app/stt/whisper.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import whisper
+# Load model only once (important for performance)
+_model = whisper.load_model("tiny.en")  # use tiny for HF free tier
+def get_stt_service():
+    return _model

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastapi
+uvicorn[standard]
+python-multipart
+pydantic
+pydantic-settings
+openai-whisper
+torch --index-url https://download.pytorch.org/whl/cpu
+torchaudio --index-url https://download.pytorch.org/whl/cpu
+numpy