Spaces:
Sleeping
Sleeping
| """SpeechSynthesisRequest value object for TTS synthesis requests.""" | |
| from dataclasses import dataclass | |
| from typing import Optional | |
| from .text_content import TextContent | |
| from .voice_settings import VoiceSettings | |
| class SpeechSynthesisRequest: | |
| """Value object representing a speech synthesis request.""" | |
| text_content: TextContent | |
| voice_settings: VoiceSettings | |
| output_format: str = 'wav' | |
| sample_rate: Optional[int] = None | |
| def __post_init__(self): | |
| """Validate speech synthesis request after initialization.""" | |
| self._validate() | |
| def _validate(self): | |
| """Validate speech synthesis request properties.""" | |
| if not isinstance(self.text_content, TextContent): | |
| raise TypeError("Text must be a TextContent instance") | |
| if not isinstance(self.voice_settings, VoiceSettings): | |
| raise TypeError("Voice settings must be a VoiceSettings instance") | |
| if not isinstance(self.output_format, str): | |
| raise TypeError("Output format must be a string") | |
| if self.output_format not in ['wav', 'mp3', 'flac', 'ogg']: | |
| raise ValueError(f"Unsupported output format: {self.output_format}. Supported formats: wav, mp3, flac, ogg") | |
| if self.sample_rate is not None: | |
| if not isinstance(self.sample_rate, int): | |
| raise TypeError("Sample rate must be an integer") | |
| if self.sample_rate <= 0: | |
| raise ValueError("Sample rate must be positive") | |
| if self.sample_rate < 8000 or self.sample_rate > 192000: | |
| raise ValueError("Sample rate must be between 8000 and 192000 Hz") | |
| # Validate that text and voice settings have compatible languages | |
| if self.text_content.language != self.voice_settings.language: | |
| raise ValueError(f"Text language ({self.text_content.language}) must match voice language ({self.voice_settings.language})") | |
| def estimated_duration_seconds(self) -> float: | |
| """Estimate the duration of synthesized speech in seconds.""" | |
| # Rough estimation: average speaking rate is about 150-200 words per minute | |
| # Adjusted by speed setting | |
| words_per_minute = 175 / self.voice_settings.speed | |
| return (self.text_content.word_count / words_per_minute) * 60 | |
| def is_long_text(self) -> bool: | |
| """Check if the text is considered long for TTS processing.""" | |
| return self.text_content.character_count > 5000 | |
| def effective_sample_rate(self) -> int: | |
| """Get the effective sample rate (default 22050 if not specified).""" | |
| return self.sample_rate if self.sample_rate is not None else 22050 | |
| def with_output_format(self, output_format: str) -> 'SpeechSynthesisRequest': | |
| """Create a new SpeechSynthesisRequest with different output format.""" | |
| return SpeechSynthesisRequest( | |
| text_content=self.text_content, | |
| voice_settings=self.voice_settings, | |
| output_format=output_format, | |
| sample_rate=self.sample_rate | |
| ) | |
| def with_sample_rate(self, sample_rate: Optional[int]) -> 'SpeechSynthesisRequest': | |
| """Create a new SpeechSynthesisRequest with different sample rate.""" | |
| return SpeechSynthesisRequest( | |
| text_content=self.text_content, | |
| voice_settings=self.voice_settings, | |
| output_format=self.output_format, | |
| sample_rate=sample_rate | |
| ) | |
| def with_voice_settings(self, voice_settings: VoiceSettings) -> 'SpeechSynthesisRequest': | |
| """Create a new SpeechSynthesisRequest with different voice settings.""" | |
| return SpeechSynthesisRequest( | |
| text_content=self.text_content, | |
| voice_settings=voice_settings, | |
| output_format=self.output_format, | |
| sample_rate=self.sample_rate | |
| ) |