| | from ..llama_configs import LLAMA_CONFIGS |
| |
|
| |
|
| | class T3Config: |
| | def __init__(self, text_tokens_dict_size=704): |
| | self.start_text_token = 255 |
| | self.stop_text_token = 0 |
| | self.text_tokens_dict_size = text_tokens_dict_size |
| | self.max_text_tokens = 2048 |
| |
|
| | self.start_speech_token = 6561 |
| | self.stop_speech_token = 6562 |
| | self.speech_tokens_dict_size = 8194 |
| | self.max_speech_tokens = 4096 |
| |
|
| | self.llama_config_name = "Llama_520M" |
| | self.input_pos_emb = "learned" |
| | self.speech_cond_prompt_len = 150 |
| |
|
| | self.encoder_type = "voice_encoder" |
| | self.speaker_embed_size = 256 |
| | self.use_perceiver_resampler = True |
| | self.emotion_adv = True |
| |
|
| | @property |
| | def n_channels(self): |
| | return LLAMA_CONFIGS[self.llama_config_name]["hidden_size"] |
| | |
| | @property |
| | def is_multilingual(self): |
| | return self.text_tokens_dict_size == 2454 |
| |
|
| | @classmethod |
| | def english_only(cls): |
| | """Create configuration for English-only TTS model.""" |
| | return cls(text_tokens_dict_size=704) |
| | |
| | @classmethod |
| | def multilingual(cls): |
| | """Create configuration for multilingual TTS model.""" |
| | return cls(text_tokens_dict_size=2454) |