Spaces:
Sleeping
Sleeping
File size: 1,915 Bytes
21b2f8c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | """
Voice endpoint - handles audio input and transcription
"""
from fastapi import APIRouter, File, UploadFile, HTTPException, status
from app.agents.schemas import TranscriptionOutput
from app.stt.whisper import get_stt_service
from app.config.settings import settings
import os
import tempfile
router = APIRouter(prefix="/voice", tags=["voice"])
@router.post("", response_model=TranscriptionOutput)
async def process_voice(
file: UploadFile = File(...),
):
"""
Process audio file and return transcription
Accepts: .wav, .mp3, .m4a
"""
# Validate extension
file_ext = os.path.splitext(file.filename)[1].lower()
if file_ext not in settings.ALLOWED_AUDIO_FORMATS:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Unsupported file format. Allowed: {', '.join(settings.ALLOWED_AUDIO_FORMATS)}"
)
# Read file
contents = await file.read()
# Validate file size
if len(contents) > settings.MAX_FILE_SIZE:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"File too large. Maximum size: {settings.MAX_FILE_SIZE} bytes"
)
tmp_file = None
try:
# Save temp file
with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp:
tmp.write(contents)
tmp_file = tmp.name
# Transcribe
stt_service = get_stt_service()
result = stt_service.transcribe(tmp_file)
return TranscriptionOutput(
text=result["text"],
language=result.get("language", "unknown")
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Transcription failed: {str(e)}"
)
finally:
if tmp_file and os.path.exists(tmp_file):
os.unlink(tmp_file) |