| { |
| "tokenizer_id": "zuhri025/TTS-Tokenizer-T", |
| "model_config_id": "ekwek/Soprano-1.1-80M", |
| "save_dir": "tts_weights_T2", |
| "repo_name": "tts_weights_T2", |
| "active_datasets": [ |
| "globe", |
| "long_audio", |
| "emilia", |
| "gemini", |
| "soda" |
| ], |
| "dataset_limits": { |
| "gemini": 10000, |
| "globe": 250000, |
| "soda": 250000, |
| "emilia": 80000, |
| "long_audio": 70000 |
| }, |
| "max_samples": 100000, |
| "device": "cuda", |
| "num_workers": 2, |
| "pin_memory": true, |
| "compile_model": true, |
| "use_mixed_precision": true, |
| "seed": 1337, |
| "max_steps": 20000, |
| "batch_size": 2, |
| "grad_accum_steps": 4, |
| "seq_len": 1024, |
| "max_lr": 0.001, |
| "min_lr": 5e-05, |
| "warmup_ratio": 0.1, |
| "cooldown_ratio": 0.1, |
| "betas": [ |
| 0.9, |
| 0.95 |
| ], |
| "weight_decay": 0.1, |
| "grad_clip_norm": 1.0, |
| "text_loss_weight": 0.5, |
| "text_mode_ratio": 1.0, |
| "phoneme_mode_ratio": 0.0, |
| "mix_mode_ratio": 0.0, |
| "unconditional_prob": 0.05, |
| "voice_dropout_prob": 0.1, |
| "word_mix_ratio": 0.5, |
| "val_freq": 1500, |
| "save_freq": 1500, |
| "log_freq": 10, |
| "val_steps": 20, |
| "test_split_size": 0.01, |
| "upload_tensorboard": true, |
| "tensorboard_dir": "tensorboard_logs" |
| } |