""" Voice endpoint - handles audio input and transcription """ from fastapi import APIRouter, File, UploadFile, HTTPException, status from app.agents.schemas import TranscriptionOutput from app.stt.whisper import get_stt_service from app.config.settings import settings import os import tempfile router = APIRouter(prefix="/voice", tags=["voice"]) @router.post("", response_model=TranscriptionOutput) async def process_voice( file: UploadFile = File(...), ): """ Process audio file and return transcription Accepts: .wav, .mp3, .m4a """ # Validate extension file_ext = os.path.splitext(file.filename)[1].lower() if file_ext not in settings.ALLOWED_AUDIO_FORMATS: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=f"Unsupported file format. Allowed: {', '.join(settings.ALLOWED_AUDIO_FORMATS)}" ) # Read file contents = await file.read() # Validate file size if len(contents) > settings.MAX_FILE_SIZE: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=f"File too large. Maximum size: {settings.MAX_FILE_SIZE} bytes" ) tmp_file = None try: # Save temp file with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp: tmp.write(contents) tmp_file = tmp.name # Transcribe stt_service = get_stt_service() result = stt_service.transcribe(tmp_file) return TranscriptionOutput( text=result["text"], language=result.get("language", "unknown") ) except Exception as e: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Transcription failed: {str(e)}" ) finally: if tmp_file and os.path.exists(tmp_file): os.unlink(tmp_file)