"""POST /api/v1/transcribe — convert uploaded audio to text.""" from __future__ import annotations import logging import os import tempfile from typing import Annotated from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile from src.api.dependencies import get_transcriber from src.api.schemas import TranscribeResponse from src.engine.transcriber import Transcriber logger = logging.getLogger(__name__) router = APIRouter() SUPPORTED_LANGUAGES = {"bam", "ful"} SUPPORTED_EXTENSIONS = {".wav", ".mp3", ".ogg", ".m4a", ".flac", ".webm"} MAX_AUDIO_BYTES = 10 * 1024 * 1024 # 10 MB @router.post("/transcribe", response_model=TranscribeResponse) async def transcribe_audio( audio_file: Annotated[UploadFile, File(description="Audio file (wav/mp3/ogg/m4a/flac/webm)")], language: Annotated[str, Form(description="Language code: 'bam' (Bambara) or 'ful' (Fula)")] = "bam", transcriber: Transcriber = Depends(get_transcriber), ) -> TranscribeResponse: # Validate language if language not in SUPPORTED_LANGUAGES: raise HTTPException( status_code=422, detail=f"Unsupported language '{language}'. Supported: {sorted(SUPPORTED_LANGUAGES)}", ) # Validate file extension filename = audio_file.filename or "audio.wav" ext = os.path.splitext(filename)[1].lower() if ext not in SUPPORTED_EXTENSIONS: raise HTTPException( status_code=422, detail=f"Unsupported file type '{ext}'. Supported: {sorted(SUPPORTED_EXTENSIONS)}", ) # Read and size-check audio_bytes = await audio_file.read() if len(audio_bytes) > MAX_AUDIO_BYTES: raise HTTPException( status_code=413, detail=f"File too large ({len(audio_bytes) / 1e6:.1f} MB). Max 10 MB.", ) # Windows-safe temp file: delete=False + manual unlink in finally tmp_path = None try: with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp: tmp.write(audio_bytes) tmp_path = tmp.name result = transcriber.transcribe_file(tmp_path, language) except Exception as e: logger.error("Transcription failed: %s", e, exc_info=True) raise HTTPException(status_code=500, detail=str(e)) finally: if tmp_path and os.path.exists(tmp_path): os.unlink(tmp_path) return TranscribeResponse( text=result.text, language=result.language, duration_s=result.duration_s, processing_time_ms=result.processing_time_ms, confidence=result.confidence, )