Spaces:

DroolingPanda
/

teachingAssistant

Sleeping

teachingAssistant / src /domain /models /speech_synthesis_request.py

Michael Hu

Migrate existing TTS providers to infrastructure layer

1f9c751 5 months ago

4 kB

	"""SpeechSynthesisRequest value object for TTS synthesis requests."""

	from dataclasses import dataclass
	from typing import Optional
	from .text_content import TextContent
	from .voice_settings import VoiceSettings


	@dataclass(frozen=True)
	class SpeechSynthesisRequest:
	"""Value object representing a speech synthesis request."""

	text_content: TextContent
	voice_settings: VoiceSettings
	output_format: str = 'wav'
	sample_rate: Optional[int] = None

	def __post_init__(self):
	"""Validate speech synthesis request after initialization."""
	self._validate()

	def _validate(self):
	"""Validate speech synthesis request properties."""
	if not isinstance(self.text_content, TextContent):
	raise TypeError("Text must be a TextContent instance")

	if not isinstance(self.voice_settings, VoiceSettings):
	raise TypeError("Voice settings must be a VoiceSettings instance")

	if not isinstance(self.output_format, str):
	raise TypeError("Output format must be a string")

	if self.output_format not in ['wav', 'mp3', 'flac', 'ogg']:
	raise ValueError(f"Unsupported output format: {self.output_format}. Supported formats: wav, mp3, flac, ogg")

	if self.sample_rate is not None:
	if not isinstance(self.sample_rate, int):
	raise TypeError("Sample rate must be an integer")

	if self.sample_rate <= 0:
	raise ValueError("Sample rate must be positive")

	if self.sample_rate < 8000 or self.sample_rate > 192000:
	raise ValueError("Sample rate must be between 8000 and 192000 Hz")

	# Validate that text and voice settings have compatible languages
	if self.text_content.language != self.voice_settings.language:
	raise ValueError(f"Text language ({self.text_content.language}) must match voice language ({self.voice_settings.language})")

	@property
	def estimated_duration_seconds(self) -> float:
	"""Estimate the duration of synthesized speech in seconds."""
	# Rough estimation: average speaking rate is about 150-200 words per minute
	# Adjusted by speed setting
	words_per_minute = 175 / self.voice_settings.speed
	return (self.text_content.word_count / words_per_minute) * 60

	@property
	def is_long_text(self) -> bool:
	"""Check if the text is considered long for TTS processing."""
	return self.text_content.character_count > 5000

	@property
	def effective_sample_rate(self) -> int:
	"""Get the effective sample rate (default 22050 if not specified)."""
	return self.sample_rate if self.sample_rate is not None else 22050

	def with_output_format(self, output_format: str) -> 'SpeechSynthesisRequest':
	"""Create a new SpeechSynthesisRequest with different output format."""
	return SpeechSynthesisRequest(
	text_content=self.text_content,
	voice_settings=self.voice_settings,
	output_format=output_format,
	sample_rate=self.sample_rate
	)

	def with_sample_rate(self, sample_rate: Optional[int]) -> 'SpeechSynthesisRequest':
	"""Create a new SpeechSynthesisRequest with different sample rate."""
	return SpeechSynthesisRequest(
	text_content=self.text_content,
	voice_settings=self.voice_settings,
	output_format=self.output_format,
	sample_rate=sample_rate
	)

	def with_voice_settings(self, voice_settings: VoiceSettings) -> 'SpeechSynthesisRequest':
	"""Create a new SpeechSynthesisRequest with different voice settings."""
	return SpeechSynthesisRequest(
	text_content=self.text_content,
	voice_settings=voice_settings,
	output_format=self.output_format,
	sample_rate=self.sample_rate
	)