Spaces:

Aqs-shispare
/

transcript-api

Sleeping

File size: 1,915 Bytes

21b2f8c

"""
Voice endpoint - handles audio input and transcription
"""

from fastapi import APIRouter, File, UploadFile, HTTPException, status
from app.agents.schemas import TranscriptionOutput
from app.stt.whisper import get_stt_service
from app.config.settings import settings
import os
import tempfile

router = APIRouter(prefix="/voice", tags=["voice"])


@router.post("", response_model=TranscriptionOutput)
async def process_voice(
    file: UploadFile = File(...),
):
    """
    Process audio file and return transcription
    Accepts: .wav, .mp3, .m4a
    """

    # Validate extension
    file_ext = os.path.splitext(file.filename)[1].lower()
    if file_ext not in settings.ALLOWED_AUDIO_FORMATS:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail=f"Unsupported file format. Allowed: {', '.join(settings.ALLOWED_AUDIO_FORMATS)}"
        )

    # Read file
    contents = await file.read()

    # Validate file size
    if len(contents) > settings.MAX_FILE_SIZE:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail=f"File too large. Maximum size: {settings.MAX_FILE_SIZE} bytes"
        )

    tmp_file = None

    try:
        # Save temp file
        with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp:
            tmp.write(contents)
            tmp_file = tmp.name

        # Transcribe
        stt_service = get_stt_service()
        result = stt_service.transcribe(tmp_file)

        return TranscriptionOutput(
            text=result["text"],
            language=result.get("language", "unknown")
        )

    except Exception as e:
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"Transcription failed: {str(e)}"
        )

    finally:
        if tmp_file and os.path.exists(tmp_file):
            os.unlink(tmp_file)