| { | |
| "base_config": "config/base.json", | |
| "model_type": "NaturalSpeech2", | |
| "dataset": ["libritts"], | |
| "preprocess": { | |
| "use_mel": false, | |
| "use_code": true, | |
| "use_spkid": true, | |
| "use_pitch": true, | |
| "use_duration": true, | |
| "use_phone": true, | |
| "use_len": true, | |
| "use_cross_reference": true, | |
| "train_file": "train.json", | |
| "melspec_dir": "mel", | |
| "code_dir": "code", | |
| "pitch_dir": "pitch", | |
| "duration_dir": "duration", | |
| "clip_mode": "start" | |
| }, | |
| "model": { | |
| "latent_dim": 128, | |
| "prior_encoder": { | |
| "vocab_size": 100, | |
| "pitch_min": 50, | |
| "pitch_max": 1100, | |
| "pitch_bins_num": 512, | |
| "encoder": { | |
| "encoder_layer": 6, | |
| "encoder_hidden": 512, | |
| "encoder_head": 8, | |
| "conv_filter_size": 2048, | |
| "conv_kernel_size": 9, | |
| "encoder_dropout": 0.2, | |
| "use_cln": true | |
| }, | |
| "duration_predictor": { | |
| "input_size": 512, | |
| "filter_size": 512, | |
| "kernel_size": 3, | |
| "conv_layers": 30, | |
| "cross_attn_per_layer": 3, | |
| "attn_head": 8, | |
| "drop_out": 0.5 | |
| }, | |
| "pitch_predictor": { | |
| "input_size": 512, | |
| "filter_size": 512, | |
| "kernel_size": 5, | |
| "conv_layers": 30, | |
| "cross_attn_per_layer": 3, | |
| "attn_head": 8, | |
| "drop_out": 0.5 | |
| } | |
| }, | |
| "diffusion": { | |
| "wavenet": { | |
| "input_size": 128, | |
| "hidden_size": 512, | |
| "out_size": 128, | |
| "num_layers": 40, | |
| "cross_attn_per_layer": 3, | |
| "dilation_cycle": 2, | |
| "attn_head": 8, | |
| "drop_out": 0.2 | |
| }, | |
| "beta_min": 0.05, | |
| "beta_max": 20, | |
| "sigma": 1.0, | |
| "noise_factor": 1.0, | |
| "ode_solver": "euler" | |
| }, | |
| "prompt_encoder": { | |
| "encoder_layer": 6, | |
| "encoder_hidden": 512, | |
| "encoder_head": 8, | |
| "conv_filter_size": 2048, | |
| "conv_kernel_size": 9, | |
| "encoder_dropout": 0.2, | |
| "use_cln": false | |
| }, | |
| "query_emb": { | |
| "query_token_num": 32, | |
| "hidden_size": 512, | |
| "head_num": 8 | |
| } | |
| } | |
| } |