Instructions to use Praha-Labs/PrahaTTS-ML with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Chatterbox
How to use Praha-Labs/PrahaTTS-ML with Chatterbox:
# pip install chatterbox-tts import torchaudio as ta from chatterbox.tts import ChatterboxTTS model = ChatterboxTTS.from_pretrained(device="cuda") text = "Ezreal and Jinx teamed up with Ahri, Yasuo, and Teemo to take down the enemy's Nexus in an epic late-game pentakill." wav = model.generate(text) ta.save("test-1.wav", wav, model.sr) # If you want to synthesize with a different voice, specify the audio prompt AUDIO_PROMPT_PATH="YOUR_FILE.wav" wav = model.generate(text, audio_prompt_path=AUDIO_PROMPT_PATH) ta.save("test-2.wav", wav, model.sr) - Notebooks
- Google Colab
- Kaggle
| from dataclasses import dataclass, field | |
| from typing import List, Optional | |
| class IndicTrainConfig: | |
| # Base Chatterbox model files downloaded by setup.py. | |
| model_dir: str = "./pretrained_models" | |
| # Dataset layout. Mixed-language metadata should include a language column: | |
| # filename|raw_text|normalized_text|language_id | |
| csv_path: str = "./IndicFinetuning/datasets/MalayalamDataset/metadata.csv" | |
| metadata_path: str = "./IndicFinetuning/datasets/metadata.json" | |
| wav_dir: str = "./IndicFinetuning/datasets/MalayalamDataset/wavs" | |
| preprocessed_dir: str = "./IndicFinetuning/datasets/MalayalamDataset/preprocess" | |
| output_dir: str = "./IndicFinetuning/outputs" | |
| tokenizer_path: str = "./IndicFinetuning/tokenizer/tokenizer_indic.json" | |
| # Model selection. | |
| is_turbo: bool = False | |
| is_lora: bool = True | |
| # Toggle languages here. For single-language Malayalam training, keep ["ml"]. | |
| target_languages: List[str] = field(default_factory=lambda: ["ml"]) | |
| default_language: str = "ml" | |
| metadata_language_column: Optional[int] = 3 | |
| add_language_tag: bool = True | |
| normalize_unicode: str = "NFC" | |
| # Dataset format. | |
| ljspeech: bool = True | |
| json_format: bool = False | |
| preprocess: bool = True | |
| # Inference smoke test. | |
| is_inference: bool = False | |
| inference_language: str = "ml" | |
| inference_prompt_path: str = "/workspace/Indic-ChatterBox/IndicFinetuning/outputs/reference_trimmed.wav" | |
| inference_test_text: str = "പ്രണവേ എനിക്ക് നിന്നെ കാണാൻ really തോന്നുന്നു ഇന്ന് whole day mind full of thoughts ആയിരുന്നു നീ എവിടെയാ, എന്താ doing എന്ന് constantly ഓർമ്മ വരുന്നു just come back once, എനിക്ക് സംസാരിക്കണം നിന്നോട്" | |
| # Vocabulary. Update after building the Indic tokenizer. | |
| new_vocab_size: int = 2573 | |
| # LoRA. | |
| lora_r: int = 128 | |
| lora_alpha: int = 256 | |
| lora_target_modules: List[str] = field(default_factory=lambda: ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", "spkr_enc"]) | |
| turbo_lora_target_modules: List[str] = field(default_factory=lambda: ["c_attn", "c_proj", "c_fc", "spkr_enc"]) | |
| lora_modules_to_save: List[str] = field(default_factory=lambda: ["text_emb", "text_head"]) | |
| # Training. | |
| batch_size: int = 16 | |
| grad_accum: int = 1 | |
| learning_rate: float = 1e-4 | |
| num_epochs: int = 10 | |
| save_steps: int = 500 | |
| save_total_limit: int = 5 | |
| dataloader_num_workers: int = 8 | |
| # Sequence constraints. | |
| start_text_token: int = 255 | |
| stop_text_token: int = 0 | |
| max_text_len: int = 256 | |
| max_speech_len: int = 850 | |
| prompt_duration: float = 3.0 | |