| from fastapi import APIRouter, UploadFile, File, HTTPException |
| from fastapi.responses import StreamingResponse |
| import io |
| import logging |
| from config import ALLOWED_AUDIO_TYPES, MAX_AUDIO_SIZE |
| from services.stt_service import speech_to_text, load_stt_model |
| from services.tts_service import generate_tts |
| from services.chatbot_service import get_chatbot_response, load_chatbot_model |
| from models.audio import STTResponse, TTSRequest, ChatbotRequest, ChatbotResponse |
|
|
| logger = logging.getLogger(__name__) |
| router = APIRouter(prefix="/audio", tags=["Audio"]) |
|
|
| @router.on_event("startup") |
| async def startup_models(): |
| """Load models on startup""" |
| logger.info("🚀 Loading models...") |
| try: |
| load_stt_model() |
| load_chatbot_model() |
| logger.info("✓ All models loaded successfully") |
| except Exception as e: |
| logger.error(f"⚠️ Model loading issues: {str(e)}") |
|
|
| @router.post("/tts") |
| async def tts(request: TTSRequest): |
| """ |
| Convert text to speech. |
| Returns MP3 audio file. |
| |
| Example: |
| ``` |
| POST /audio/tts |
| { |
| "text": "Hello, welcome to voice chatbot" |
| } |
| ``` |
| """ |
| try: |
| logger.info(f"TTS Request: '{request.text}'") |
| audio_bytes = await generate_tts(request.text) |
| return StreamingResponse( |
| io.BytesIO(audio_bytes), |
| media_type="audio/mpeg", |
| headers={"Content-Disposition": "attachment; filename=output.mp3"} |
| ) |
| except Exception as e: |
| logger.error(f"TTS Error: {str(e)}") |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
| @router.post("/stt", response_model=STTResponse) |
| async def stt(file: UploadFile = File(...)): |
| """ |
| Convert audio file to text. |
| Supports: WAV, MP3, M4A |
| |
| Example: |
| ``` |
| POST /audio/stt |
| File: audio.mp3 |
| ``` |
| """ |
| if file.content_type not in ALLOWED_AUDIO_TYPES: |
| raise HTTPException( |
| status_code=400, |
| detail=f"Unsupported format. Allowed: {', '.join(ALLOWED_AUDIO_TYPES)}" |
| ) |
| |
| try: |
| logger.info(f"STT Request: {file.filename}") |
| audio_bytes = await file.read() |
| |
| if len(audio_bytes) > MAX_AUDIO_SIZE: |
| raise HTTPException( |
| status_code=400, |
| detail=f"File too large. Max: {MAX_AUDIO_SIZE / 1024 / 1024}MB" |
| ) |
| |
| text = await speech_to_text(audio_bytes, file.filename) |
| |
| return STTResponse( |
| text=text, |
| model_name="whisper-base", |
| language="en" |
| ) |
| except HTTPException: |
| raise |
| except Exception as e: |
| logger.error(f"STT Error: {str(e)}") |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
| @router.post("/chatbot") |
| async def chatbot_voice(file: UploadFile = File(...)): |
| """ |
| Full voice chatbot flow: Audio → Text → Response → Audio |
| |
| Example: |
| ``` |
| POST /audio/chatbot |
| File: user_voice.mp3 |
| Returns: Response MP3 audio |
| ``` |
| """ |
| if file.content_type not in ALLOWED_AUDIO_TYPES: |
| raise HTTPException( |
| status_code=400, |
| detail=f"Unsupported format. Allowed: {', '.join(ALLOWED_AUDIO_TYPES)}" |
| ) |
| |
| try: |
| logger.info(f"Voice Chatbot: Processing {file.filename}") |
| |
| audio_bytes = await file.read() |
| if len(audio_bytes) > MAX_AUDIO_SIZE: |
| raise HTTPException( |
| status_code=400, |
| detail=f"File too large. Max: {MAX_AUDIO_SIZE / 1024 / 1024}MB" |
| ) |
| |
| |
| logger.info("Step 1/3: Converting speech to text...") |
| user_text = await speech_to_text(audio_bytes, file.filename) |
| |
| |
| logger.info("Step 2/3: Generating response...") |
| response_text = await get_chatbot_response(user_text) |
| |
| |
| logger.info("Step 3/3: Converting response to speech...") |
| audio_response = await generate_tts(response_text) |
| |
| logger.info("✓ Voice chatbot complete") |
| |
| return StreamingResponse( |
| io.BytesIO(audio_response), |
| media_type="audio/mpeg", |
| headers={"Content-Disposition": "attachment; filename=response.mp3"} |
| ) |
| |
| except HTTPException: |
| raise |
| except Exception as e: |
| logger.error(f"Voice Chatbot Error: {str(e)}") |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
| @router.post("/chatbot-text", response_model=ChatbotResponse) |
| async def chatbot_text(request: ChatbotRequest): |
| """ |
| Text-only chatbot (no audio). |
| |
| Example: |
| ``` |
| POST /audio/chatbot-text |
| { |
| "text": "What is artificial intelligence?" |
| } |
| ``` |
| """ |
| try: |
| logger.info(f"Text Chatbot: '{request.text}'") |
| bot_response = await get_chatbot_response(request.text) |
| |
| return ChatbotResponse( |
| user_input=request.text, |
| bot_response=bot_response, |
| model_name="DialoGPT-medium" |
| ) |
| except Exception as e: |
| logger.error(f"Text Chatbot Error: {str(e)}") |
| raise HTTPException(status_code=500, detail=str(e)) |