| { | |
| "train": { | |
| "log_interval": 50, | |
| "eval_interval": 1000, | |
| "seed": 1234, | |
| "port": 8001, | |
| "epochs": 10000, | |
| "learning_rate": 0.0002, | |
| "betas": [ | |
| 0.8, | |
| 0.99 | |
| ], | |
| "eps": 1e-09, | |
| "batch_size": 6, | |
| "accumulation_steps": 1, | |
| "fp16_run": false, | |
| "lr_decay": 0.998, | |
| "segment_size": 10240, | |
| "init_lr_ratio": 1, | |
| "warmup_epochs": 0, | |
| "c_mel": 45, | |
| "keep_ckpts": 4 | |
| }, | |
| "data": { | |
| "data_dir": "dataset", | |
| "dataset_type": "SingDataset", | |
| "collate_type": "SingCollate", | |
| "training_filelist": "filelists/train.txt", | |
| "validation_filelist": "filelists/val.txt", | |
| "max_wav_value": 32768.0, | |
| "sampling_rate": 44100, | |
| "n_fft": 2048, | |
| "fmin": 0, | |
| "fmax": 22050, | |
| "hop_length": 512, | |
| "win_size": 2048, | |
| "acoustic_dim": 80, | |
| "c_dim": 256, | |
| "min_level_db": -115, | |
| "ref_level_db": 20, | |
| "min_db": -115, | |
| "max_abs_value": 4.0, | |
| "n_speakers": 200 | |
| }, | |
| "model": { | |
| "hidden_channels": 192, | |
| "spk_channels": 192, | |
| "filter_channels": 768, | |
| "n_heads": 2, | |
| "n_layers": 4, | |
| "kernel_size": 3, | |
| "p_dropout": 0.1, | |
| "prior_hidden_channels": 192, | |
| "prior_filter_channels": 768, | |
| "prior_n_heads": 2, | |
| "prior_n_layers": 4, | |
| "prior_kernel_size": 3, | |
| "prior_p_dropout": 0.1, | |
| "resblock": "1", | |
| "use_spectral_norm": false, | |
| "resblock_kernel_sizes": [ | |
| 3, | |
| 7, | |
| 11 | |
| ], | |
| "resblock_dilation_sizes": [ | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ], | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ], | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ] | |
| ], | |
| "upsample_rates": [ | |
| 8, | |
| 8, | |
| 4, | |
| 2 | |
| ], | |
| "upsample_initial_channel": 256, | |
| "upsample_kernel_sizes": [ | |
| 16, | |
| 16, | |
| 8, | |
| 4 | |
| ], | |
| "n_harmonic": 64, | |
| "n_bands": 65 | |
| }, | |
| "spk": { | |
| "timdillon": 0 | |
| } | |
| } |