Hamzaaly234's picture
feat/setup
21b2f8c
"""
Voice endpoint - handles audio input and transcription
"""
from fastapi import APIRouter, File, UploadFile, HTTPException, status
from app.agents.schemas import TranscriptionOutput
from app.stt.whisper import get_stt_service
from app.config.settings import settings
import os
import tempfile
router = APIRouter(prefix="/voice", tags=["voice"])
@router.post("", response_model=TranscriptionOutput)
async def process_voice(
file: UploadFile = File(...),
):
"""
Process audio file and return transcription
Accepts: .wav, .mp3, .m4a
"""
# Validate extension
file_ext = os.path.splitext(file.filename)[1].lower()
if file_ext not in settings.ALLOWED_AUDIO_FORMATS:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Unsupported file format. Allowed: {', '.join(settings.ALLOWED_AUDIO_FORMATS)}"
)
# Read file
contents = await file.read()
# Validate file size
if len(contents) > settings.MAX_FILE_SIZE:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"File too large. Maximum size: {settings.MAX_FILE_SIZE} bytes"
)
tmp_file = None
try:
# Save temp file
with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp:
tmp.write(contents)
tmp_file = tmp.name
# Transcribe
stt_service = get_stt_service()
result = stt_service.transcribe(tmp_file)
return TranscriptionOutput(
text=result["text"],
language=result.get("language", "unknown")
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Transcription failed: {str(e)}"
)
finally:
if tmp_file and os.path.exists(tmp_file):
os.unlink(tmp_file)