Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Running

malek-messaoudii

Refactor audio models and services for improved error handling and response streaming

9aa985d about 1 month ago

1.63 kB

	from pydantic import BaseModel, Field, ConfigDict
	from typing import Optional

	# ==============================
	# SPEECH TO TEXT RESPONSE
	# ==============================
	class STTResponse(BaseModel):
	model_config = ConfigDict(
	json_schema_extra={
	"example": {
	"text": "hello how are you",
	"model_name": "openai/whisper-large-v3",
	"language": "en",
	"duration_seconds": 3.2
	}
	}
	)
	text: str = Field(..., description="Transcribed text from the input audio")
	model_name: str = Field(..., description="STT model used for inference")
	language: Optional[str] = Field(None, description="Detected language")
	duration_seconds: Optional[float] = Field(
	None,
	description="Approximate audio duration in seconds"
	)


	# ==============================
	# TEXT TO SPEECH REQUEST / RESPONSE
	# ==============================
	class TTSRequest(BaseModel):
	model_config = ConfigDict(
	json_schema_extra={"example": {"text": "Hello, welcome to our AI system."}}
	)
	text: str = Field(..., min_length=1, max_length=500, description="Text to convert to speech")


	class TTSResponse(BaseModel):
	model_config = ConfigDict(
	json_schema_extra={
	"example": {
	"message": "Audio generated successfully",
	"audio_format": "wav",
	"length_seconds": 2.5,
	"model_name": "suno/bark"
	}
	}
	)
	message: str
	audio_format: str
	length_seconds: Optional[float] = None
	model_name: str