File size: 5,244 Bytes
9aa985d
 
 
4a13628
918acab
95cb26e
4a13628
95cb26e
4a13628
 
 
c7fc3b6
 
 
95cb26e
 
 
 
 
 
 
 
218c6a3
73d4f3c
4a13628
d4b6133
95cb26e
218c6a3
4a13628
 
 
95cb26e
4a13628
9aa985d
4a13628
 
95cb26e
9aa985d
4a13628
9aa985d
c7fc3b6
 
4a13628
73d4f3c
520a06a
95cb26e
4a13628
 
 
 
95cb26e
520a06a
4a13628
520a06a
 
 
4a13628
520a06a
4a13628
9aa985d
4a13628
d4b6133
918acab
 
 
 
 
 
 
 
218c6a3
4a13628
 
 
95cb26e
4a13628
 
 
9aa985d
4a13628
9aa985d
544d113
 
 
4a13628
d4b6133
95cb26e
4a13628
 
 
 
95cb26e
d4b6133
4a13628
d4b6133
 
 
4a13628
d4b6133
4a13628
544d113
4a13628
 
 
544d113
918acab
 
 
 
 
 
 
 
d4b6133
4a13628
 
 
 
 
 
 
544d113
4a13628
 
95cb26e
4a13628
544d113
4a13628
544d113
 
4a13628
 
 
 
95cb26e
4a13628
 
 
 
 
 
 
 
 
 
 
 
 
95cb26e
4a13628
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
from fastapi import APIRouter, UploadFile, File, HTTPException
from fastapi.responses import StreamingResponse
import io
import logging
from config import ALLOWED_AUDIO_TYPES, MAX_AUDIO_SIZE
from services.stt_service import speech_to_text, load_stt_model
from services.tts_service import generate_tts
from services.chatbot_service import get_chatbot_response, load_chatbot_model
from models.audio import STTResponse, TTSRequest, TTSResponse, ChatbotRequest, ChatbotResponse

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/audio", tags=["Audio"])

# Pre-load models on router startup
@router.on_event("startup")
async def startup_event():
    """Load models when the router starts"""
    logger.info("Loading free STT and Chatbot models...")
    load_stt_model()
    load_chatbot_model()


@router.post("/tts")
async def tts(request: TTSRequest):
    """
    Convert text to speech and return audio file using free gTTS.
    
    Example:
    - POST /audio/tts
    - Body: {"text": "Hello, welcome to our system"}
    - Returns: MP3 audio file
    """
    try:
        logger.info(f"TTS request received for text: '{request.text}'")
        audio_bytes = await generate_tts(request.text)
        return StreamingResponse(io.BytesIO(audio_bytes), media_type="audio/mp3")
    except Exception as e:
        logger.error(f"TTS error: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/stt", response_model=STTResponse)
async def stt(file: UploadFile = File(...)):
    """
    Convert audio file to text using free Whisper model.
    
    Example:
    - POST /audio/stt
    - File: audio.mp3 (or .wav, .m4a)
    - Returns: {"text": "transcribed text", "model_name": "whisper-small", ...}
    """
    # Validate file type
    if file.content_type not in ALLOWED_AUDIO_TYPES:
        raise HTTPException(
            status_code=400,
            detail=f"Unsupported format: {file.content_type}. Supported: WAV, MP3, M4A"
        )
    
    try:
        logger.info(f"STT request received for file: {file.filename}")
        audio_bytes = await file.read()
        
        # Check file size
        if len(audio_bytes) > MAX_AUDIO_SIZE:
            raise HTTPException(
                status_code=400,
                detail=f"Audio file too large. Max size: {MAX_AUDIO_SIZE / 1024 / 1024}MB"
            )
        
        text = await speech_to_text(audio_bytes, file.filename)
        
        return STTResponse(
            text=text,
            model_name="whisper-small",
            language="en",
            duration_seconds=None
        )
    except Exception as e:
        logger.error(f"STT error: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/chatbot")
async def chatbot_voice(file: UploadFile = File(...)):
    """
    Full voice chatbot flow using free models (Audio → Text → Response → Audio).
    
    Example:
    - POST /audio/chatbot
    - File: user_voice.mp3
    - Returns: Response audio file (MP3)
    """
    # Validate file type
    if file.content_type not in ALLOWED_AUDIO_TYPES:
        raise HTTPException(
            status_code=400,
            detail=f"Unsupported format: {file.content_type}. Supported: WAV, MP3, M4A"
        )
    
    try:
        logger.info(f"Voice chatbot request received for file: {file.filename}")
        
        # Step 1: Convert audio to text
        audio_bytes = await file.read()
        
        # Check file size
        if len(audio_bytes) > MAX_AUDIO_SIZE:
            raise HTTPException(
                status_code=400,
                detail=f"Audio file too large. Max size: {MAX_AUDIO_SIZE / 1024 / 1024}MB"
            )
            
        user_text = await speech_to_text(audio_bytes, file.filename)
        logger.info(f"Step 1 - STT: {user_text}")
        
        # Step 2: Generate chatbot response
        response_text = await get_chatbot_response(user_text)
        logger.info(f"Step 2 - Response: {response_text}")
        
        # Step 3: Convert response to audio
        audio_response = await generate_tts(response_text)
        logger.info("Step 3 - TTS: Complete")
        
        return StreamingResponse(io.BytesIO(audio_response), media_type="audio/mp3")
        
    except Exception as e:
        logger.error(f"Voice chatbot error: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/chatbot-text", response_model=ChatbotResponse)
async def chatbot_text(request: ChatbotRequest):
    """
    Chatbot interaction with text input/output using free DialoGPT model.
    
    Example:
    - POST /audio/chatbot-text
    - Body: {"text": "What is the capital of France?"}
    - Returns: {"user_input": "What is...", "bot_response": "The capital...", ...}
    """
    try:
        logger.info(f"Text chatbot request: {request.text}")
        response_text = await get_chatbot_response(request.text)
        
        return ChatbotResponse(
            user_input=request.text,
            bot_response=response_text,
            model_name="DialoGPT-medium"
        )
    except Exception as e:
        logger.error(f"Text chatbot error: {str(e)}")
        raise HTTPException(status_code=500, detail=str(e))