| seed_everything: 3407 |
|
|
| data: |
| class_path: unicodec.decoder.dataset.VocosDataModule |
| init_args: |
| train_params: |
| filelist_path: ./data/train/finetune_data |
| sampling_rate: 24000 |
| num_samples: 240000 |
| batch_size: 10 |
| num_workers: 8 |
|
|
| val_params: |
| filelist_path: ./data/infer/large_data_domain |
| sampling_rate: 24000 |
| num_samples: 240000 |
| batch_size: 5 |
| num_workers: 8 |
|
|
| model: |
| class_path: unicodec.decoder.experiment.VocosEncodecExp |
| init_args: |
| sample_rate: 24000 |
| initial_learning_rate: 5e-5 |
| mel_loss_coeff: 450 |
| mrd_loss_coeff: 1.0 |
| |
| num_warmup_steps: 5000 |
| pretrain_mel_steps: 0 |
| use_ema: false |
|
|
| |
| evaluate_utmos: true |
| evaluate_pesq: true |
| evaluate_periodicty: true |
|
|
| resume: true |
| resume_config: |
| resume_model: |
| feature_extractor: |
| class_path: unicodec.decoder.feature_extractors.EncodecFeatures |
| init_args: |
| encodec_model: encodec_24khz |
| bandwidths: [6.6, 6.6, 6.6, 6.6] |
| train_codebooks: true |
| num_quantizers: 1 |
| dowmsamples: [8, 5, 4, 2] |
| vq_bins: 16384 |
| vq_kmeans: 200 |
| use_transformer: true |
| mask: false |
|
|
| backbone: |
| class_path: unicodec.decoder.models.VocosBackbone |
| init_args: |
| input_channels: 512 |
| dim: 768 |
| intermediate_dim: 2304 |
| num_layers: 12 |
| adanorm_num_embeddings: 4 |
|
|
| head: |
| class_path: unicodec.decoder.heads.ISTFTHead |
| init_args: |
| dim: 768 |
| n_fft: 1280 |
| hop_length: 320 |
| padding: same |
|
|
| trainer: |
| logger: |
| class_path: pytorch_lightning.loggers.TensorBoardLogger |
| init_args: |
| save_dir: /debug/ |
| callbacks: |
| - class_path: pytorch_lightning.callbacks.LearningRateMonitor |
| - class_path: pytorch_lightning.callbacks.ModelSummary |
| init_args: |
| max_depth: 2 |
| - class_path: pytorch_lightning.callbacks.ModelCheckpoint |
| init_args: |
| monitor: val_loss |
| filename: vocos_checkpoint_{epoch}_{step}_{val_loss:.4f} |
| save_top_k: 100 |
| save_last: true |
| |
| - class_path: unicodec.decoder.helpers.GradNormCallback |
|
|
| |
| |
| max_steps: 20000000 |
| |
| limit_val_batches: 100 |
| accelerator: gpu |
| strategy: ddp |
| devices: [0,1,2,3,4,5,6,7] |
| num_nodes: 4 |
| log_every_n_steps: 200 |
| |
|
|