Shore-TTS-0.1 / config.json
PoTaTo721's picture
Upload folder using huggingface_hub
8a9575d verified
Raw
History Blame Contribute Delete
2.07 kB
{
"seed": 42,
"data": {
"data_path": "/root/Audio-Data/",
"mdct_config": "shore_tts/configs/mdct.json",
"num_workers": 16,
"min_length": 480,
"max_length": 9600,
"batch_size": 64,
"epoch_shuffle": true
},
"text": {
"tokenizer_path": "checkpoints/vocab.json",
"polyphone": true
},
"model": {
"dit": {
"dim": 768,
"depth": 22,
"heads": 12,
"dim_head": 64,
"dropout": 0.1,
"ff_mult": 2,
"text_dim": 512,
"text_mask_padding": true,
"text_embedding_average_upsampling": false,
"qk_norm": null,
"conv_layers": 4,
"pe_attn_head": 1,
"attn_backend": "flash_attn",
"attn_mask_enabled": true,
"long_skip_connection": false,
"checkpoint_activations": true
},
"cfm": {
"sigma": 0.0,
"audio_drop_prob": 0.3,
"cond_drop_prob": 0.2,
"frac_lengths_mask": [
0.7,
1.0
]
}
},
"optim": {
"optimizer_type": "muon_adamw",
"lr": 0.0001,
"weight_decay": 0.05,
"grad_clip": 1.0,
"muon_args": {
"momentum": 0.95,
"nesterov": true,
"ns_steps": 5
},
"adamw_args": {
"betas": [
0.9,
0.95
]
}
},
"scheduler": {
"warmup_steps": 20000,
"warmup_start_factor": 1e-08,
"final_lr_scale": 1e-08
},
"train": {
"epochs": 1000,
"max_steps": 1000000,
"grad_accumulation_steps": 1,
"log_every_steps": 10,
"timing_every_steps": 100,
"save_every_steps": 20000,
"last_per_updates": 1000,
"keep_last_n_checkpoints": 10,
"ema_decay": 0.9999,
"precision": "bf16",
"allow_tf32": false,
"log_samples": {
"enabled": true,
"sample_index": 0,
"sample_steps": 16,
"cfg_strength": 1.0,
"duration_factor": 2.0
},
"save_dir": "checkpoints/pretrain-200M",
"resume_from": "checkpoints/pretrain-200M/model_last.pt",
"tensorboard": {
"enabled": true,
"log_dir": "checkpoints/pretrain-200M/tensorboard"
}
}
}