Spaces:
Running on Zero
Running on Zero
| model_name: pretrain | |
| model: | |
| dim: 1024 | |
| depth: 24 | |
| heads: 16 | |
| ff_mult: 4 | |
| text_dim: 512 | |
| # disable convnext in text embedding | |
| conv_layers: 0 | |
| # phoneme vocab size | |
| text_num_embeds: 200 | |
| mel_dim: 100 | |
| t5_dim: 1024 | |
| clap_dim: 512 | |
| # disable it on a100 | |
| use_checkpoint: false | |
| qk_norm: true | |
| skip: true | |
| mel: | |
| target_sample_rate: 24000 | |
| n_mel_channels: 100 | |
| hop_length: 256 | |
| opt: | |
| learning_rate: 2.0e-04 | |
| beta1: 0.9 | |
| beta2: 0.999 | |
| weight_decay: 0.01 | |
| adam_epsilon: 1.0e-08 | |
| grad_clip: 1.0 | |
| batch_size: 64 | |
| accumulation_steps: 1 | |
| # mask_range: [0.7, 1.0] | |
| drop_spk: 0.1 | |
| drop_text: 0.5 | |
| lr_scheduler: | |
| warmup_steps: 5000 | |
| decay_steps: 150000 | |
| end_factor: 1.0e-02 | |
| data: | |
| trainset: | |
| dataset_dir: "" # your processed path | |
| clap_emb_dir: "./data/clap_embs/" | |
| t5_folder_name: "t5" | |
| phn_folder_name: "g2p" | |
| manifest_name: "manifest" | |
| json_name: "jsons" | |
| dynamic_batching: true | |
| text_pad_token: -1 | |
| audio_pad_token: 0.0 | |
| split: "train_PT" | |
| sr: 24000 | |
| norm_audio: false | |
| valset: | |
| dataset_dir: "" # your processed path | |
| clap_emb_dir: "./data/clap_embs/" | |
| t5_folder_name: "t5" | |
| phn_folder_name: "g2p" | |
| manifest_name: "manifest" | |
| json_name: "jsons" | |
| dynamic_batching: true | |
| text_pad_token: -1 | |
| audio_pad_token: 0.0 | |
| split: "validation_PT" | |
| sr: 24000 | |
| norm_audio: false | |