Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Sleeping

FastAPI-Backend-Models / routes /audio.py

malek-messaoudii

Enhance configuration validation and audio processing limits

918acab 5 months ago

5.06 kB

	from fastapi import APIRouter, UploadFile, File, HTTPException
	from fastapi.responses import StreamingResponse
	import io
	import logging
	from config import ALLOWED_AUDIO_TYPES, MAX_AUDIO_SIZE
	from services.stt_service import speech_to_text
	from services.tts_service import generate_tts
	from services.chatbot_service import get_chatbot_response
	from models.audio import STTResponse, TTSRequest, TTSResponse, ChatbotRequest, ChatbotResponse

	logger = logging.getLogger(__name__)

	router = APIRouter(prefix="/audio", tags=["Audio"])


	@router.post("/tts")
	async def tts(request: TTSRequest):
	"""
	Convert text to speech and return audio file.

	Example:
	- POST /audio/tts
	- Body: {"text": "Hello, welcome to our system"}
	- Returns: WAV audio file
	"""
	try:
	logger.info(f"TTS request received for text: '{request.text}'")
	audio_bytes = await generate_tts(request.text)
	return StreamingResponse(io.BytesIO(audio_bytes), media_type="audio/wav")
	except Exception as e:
	logger.error(f"TTS error: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))


	@router.post("/stt", response_model=STTResponse)
	async def stt(file: UploadFile = File(...)):
	"""
	Convert audio file to text.

	Example:
	- POST /audio/stt
	- File: audio.mp3 (or .wav, .m4a)
	- Returns: {"text": "transcribed text", "model_name": "gemini-2.5-flash", ...}
	"""
	# Validate file type
	if file.content_type not in ALLOWED_AUDIO_TYPES:
	raise HTTPException(
	status_code=400,
	detail=f"Unsupported format: {file.content_type}. Supported: WAV, MP3, M4A"
	)

	try:
	logger.info(f"STT request received for file: {file.filename}")
	audio_bytes = await file.read()

	# Check file size
	if len(audio_bytes) > MAX_AUDIO_SIZE:
	raise HTTPException(
	status_code=400,
	detail=f"Audio file too large. Max size: {MAX_AUDIO_SIZE / 1024 / 1024}MB"
	)

	text = await speech_to_text(audio_bytes, file.filename)

	return STTResponse(
	text=text,
	model_name="gemini-2.5-flash",
	language="en",
	duration_seconds=None
	)
	except Exception as e:
	logger.error(f"STT error: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))


	@router.post("/chatbot")
	async def chatbot_voice(file: UploadFile = File(...)):
	"""
	Full voice chatbot flow (Audio → Text → Response → Audio).

	Example:
	- POST /audio/chatbot
	- File: user_voice.mp3
	- Returns: Response audio file (WAV)

	Process:
	1. Converts user's audio to text (STT)
	2. Generates chatbot response to user's text
	3. Converts response back to audio (TTS)
	"""
	# Validate file type
	if file.content_type not in ALLOWED_AUDIO_TYPES:
	raise HTTPException(
	status_code=400,
	detail=f"Unsupported format: {file.content_type}. Supported: WAV, MP3, M4A"
	)

	try:
	logger.info(f"Voice chatbot request received for file: {file.filename}")

	# Step 1: Convert audio to text
	audio_bytes = await file.read()

	# Check file size
	if len(audio_bytes) > MAX_AUDIO_SIZE:
	raise HTTPException(
	status_code=400,
	detail=f"Audio file too large. Max size: {MAX_AUDIO_SIZE / 1024 / 1024}MB"
	)

	user_text = await speech_to_text(audio_bytes, file.filename)
	logger.info(f"Step 1 - STT: {user_text}")

	# Step 2: Generate chatbot response
	response_text = await get_chatbot_response(user_text)
	logger.info(f"Step 2 - Response: {response_text}")

	# Step 3: Convert response to audio
	audio_response = await generate_tts(response_text)
	logger.info("Step 3 - TTS: Complete")

	return StreamingResponse(io.BytesIO(audio_response), media_type="audio/wav")

	except Exception as e:
	logger.error(f"Voice chatbot error: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))


	@router.post("/chatbot-text", response_model=ChatbotResponse)
	async def chatbot_text(request: ChatbotRequest):
	"""
	Chatbot interaction with text input/output (no audio).

	Example:
	- POST /audio/chatbot-text
	- Body: {"text": "What is the capital of France?"}
	- Returns: {"user_input": "What is...", "bot_response": "The capital...", ...}
	"""
	try:
	logger.info(f"Text chatbot request: {request.text}")
	response_text = await get_chatbot_response(request.text)

	return ChatbotResponse(
	user_input=request.text,
	bot_response=response_text,
	model_name="gemini-2.5-flash"
	)
	except Exception as e:
	logger.error(f"Text chatbot error: {str(e)}")
	raise HTTPException(status_code=500, detail=str(e))