Hamzaaly234 commited on
Commit
21b2f8c
·
0 Parent(s):

feat/setup

Browse files
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ ENV PYTHONUNBUFFERED=1
4
+
5
+ # Install system dependencies (required for whisper)
6
+ RUN apt-get update && apt-get install -y \
7
+ ffmpeg \
8
+ git \
9
+ curl \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ WORKDIR /app
13
+
14
+ COPY requirements.txt .
15
+
16
+ RUN pip install --no-cache-dir --upgrade pip
17
+ RUN pip install --no-cache-dir -r requirements.txt
18
+
19
+ COPY ./app ./app
20
+
21
+ EXPOSE 7860
22
+
23
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Voice Transcription API
2
+
3
+ FastAPI + Whisper STT deployed on Hugging Face Spaces.
4
+
5
+ ## Endpoint
6
+
7
+ POST /voice
8
+
9
+ Form Data:
10
+ - file (.wav, .mp3, .m4a)
11
+
12
+ ## Response
13
+
14
+ {
15
+ "text": "transcribed text",
16
+ "language": "en"
17
+ }
app/agents/schemas.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+
4
+ class TranscriptionOutput(BaseModel):
5
+ text: str
6
+ language: str
app/api/voice.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Voice endpoint - handles audio input and transcription
3
+ """
4
+
5
+ from fastapi import APIRouter, File, UploadFile, HTTPException, status
6
+ from app.agents.schemas import TranscriptionOutput
7
+ from app.stt.whisper import get_stt_service
8
+ from app.config.settings import settings
9
+ import os
10
+ import tempfile
11
+
12
+ router = APIRouter(prefix="/voice", tags=["voice"])
13
+
14
+
15
+ @router.post("", response_model=TranscriptionOutput)
16
+ async def process_voice(
17
+ file: UploadFile = File(...),
18
+ ):
19
+ """
20
+ Process audio file and return transcription
21
+ Accepts: .wav, .mp3, .m4a
22
+ """
23
+
24
+ # Validate extension
25
+ file_ext = os.path.splitext(file.filename)[1].lower()
26
+ if file_ext not in settings.ALLOWED_AUDIO_FORMATS:
27
+ raise HTTPException(
28
+ status_code=status.HTTP_400_BAD_REQUEST,
29
+ detail=f"Unsupported file format. Allowed: {', '.join(settings.ALLOWED_AUDIO_FORMATS)}"
30
+ )
31
+
32
+ # Read file
33
+ contents = await file.read()
34
+
35
+ # Validate file size
36
+ if len(contents) > settings.MAX_FILE_SIZE:
37
+ raise HTTPException(
38
+ status_code=status.HTTP_400_BAD_REQUEST,
39
+ detail=f"File too large. Maximum size: {settings.MAX_FILE_SIZE} bytes"
40
+ )
41
+
42
+ tmp_file = None
43
+
44
+ try:
45
+ # Save temp file
46
+ with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp:
47
+ tmp.write(contents)
48
+ tmp_file = tmp.name
49
+
50
+ # Transcribe
51
+ stt_service = get_stt_service()
52
+ result = stt_service.transcribe(tmp_file)
53
+
54
+ return TranscriptionOutput(
55
+ text=result["text"],
56
+ language=result.get("language", "unknown")
57
+ )
58
+
59
+ except Exception as e:
60
+ raise HTTPException(
61
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
62
+ detail=f"Transcription failed: {str(e)}"
63
+ )
64
+
65
+ finally:
66
+ if tmp_file and os.path.exists(tmp_file):
67
+ os.unlink(tmp_file)
app/config/settings.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings
2
+
3
+
4
+ class Settings(BaseSettings):
5
+ ALLOWED_AUDIO_FORMATS: list = [".wav", ".mp3", ".m4a"]
6
+ MAX_FILE_SIZE: int = 10 * 1024 * 1024 # 10MB
7
+
8
+
9
+ settings = Settings()
app/main.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from app.api.voice import router as voice_router
3
+
4
+ app = FastAPI(
5
+ title="Voice Transcription API",
6
+ version="1.0.0"
7
+ )
8
+
9
+ app.include_router(voice_router)
10
+
11
+
12
+ @app.get("/")
13
+ def health_check():
14
+ return {"status": "API is running"}
app/stt/whisper.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+
3
+ # Load model only once (important for performance)
4
+ _model = whisper.load_model("tiny.en") # use tiny for HF free tier
5
+
6
+
7
+ def get_stt_service():
8
+ return _model
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ python-multipart
4
+ pydantic
5
+ pydantic-settings
6
+ openai-whisper
7
+ torch --index-url https://download.pytorch.org/whl/cpu
8
+ torchaudio --index-url https://download.pytorch.org/whl/cpu
9
+ numpy