| | |
| |
|
| | defaults: |
| | - ../default |
| | - override /dset: audio/default |
| | - _self_ |
| |
|
| | solver: compression |
| | sample_rate: ??? |
| | channels: ??? |
| |
|
| | |
| | losses: |
| | adv: 4. |
| | feat: 4. |
| | l1: 0.1 |
| | mel: 0. |
| | msspec: 2. |
| | sisnr: 0. |
| | balancer: |
| | balance_grads: true |
| | ema_decay: 0.999 |
| | per_batch_item: true |
| | total_norm: 1. |
| |
|
| | adversarial: |
| | every: 1 |
| | adversaries: [msstftd] |
| | adv_loss: hinge |
| | feat_loss: l1 |
| |
|
| | |
| | l1: {} |
| | l2: {} |
| | mrstft: |
| | factor_sc: .5 |
| | factor_mag: .5 |
| | normalized: false |
| | mel: |
| | sample_rate: ${sample_rate} |
| | n_fft: 1024 |
| | hop_length: 256 |
| | win_length: 1024 |
| | n_mels: 64 |
| | f_min: 64 |
| | f_max: null |
| | normalized: false |
| | floor_level: 1e-5 |
| | sisnr: |
| | sample_rate: ${sample_rate} |
| | segment: 5. |
| | msspec: |
| | sample_rate: ${sample_rate} |
| | range_start: 6 |
| | range_end: 11 |
| | n_mels: 64 |
| | f_min: 64 |
| | f_max: null |
| | normalized: true |
| | alphas: false |
| | floor_level: 1e-5 |
| |
|
| | |
| | metrics: |
| | visqol: |
| | mode: audio |
| | bin: null |
| | model: tcdaudio14_aacvopus_coresv_svrnsim_n.68_g.01_c1.model |
| |
|
| | |
| | msstftd: |
| | in_channels: 1 |
| | out_channels: 1 |
| | filters: 32 |
| | norm: weight_norm |
| | n_ffts: [1024, 2048, 512, 256, 128] |
| | hop_lengths: [256, 512, 128, 64, 32] |
| | win_lengths: [1024, 2048, 512, 256, 128] |
| | activation: LeakyReLU |
| | activation_params: {negative_slope: 0.3} |
| | msd: |
| | in_channels: 1 |
| | out_channels: 1 |
| | scale_norms: [spectral_norm, weight_norm, weight_norm] |
| | kernel_sizes: [5, 3] |
| | filters: 16 |
| | max_filters: 1024 |
| | downsample_scales: [4, 4, 4, 4] |
| | inner_kernel_sizes: null |
| | groups: [4, 4, 4, 4] |
| | strides: null |
| | paddings: null |
| | activation: LeakyReLU |
| | activation_params: {negative_slope: 0.3} |
| | mpd: |
| | in_channels: 1 |
| | out_channels: 1 |
| | periods: [2, 3, 5, 7, 11] |
| | n_layers: 5 |
| | kernel_size: 5 |
| | stride: 3 |
| | filters: 8 |
| | filter_scales: 4 |
| | max_filters: 1024 |
| | activation: LeakyReLU |
| | activation_params: {negative_slope: 0.3} |
| | norm: weight_norm |
| |
|
| | |
| | dataset: |
| | batch_size: 64 |
| | num_workers: 10 |
| | segment_duration: 1 |
| | train: |
| | num_samples: 500000 |
| | valid: |
| | num_samples: 10000 |
| | evaluate: |
| | batch_size: 32 |
| | num_samples: 10000 |
| | generate: |
| | batch_size: 32 |
| | num_samples: 50 |
| | segment_duration: 10 |
| |
|
| | |
| | evaluate: |
| | every: 25 |
| | num_workers: 5 |
| | metrics: |
| | visqol: false |
| | sisnr: true |
| | generate: |
| | every: 25 |
| | num_workers: 5 |
| | audio: |
| | sample_rate: ${sample_rate} |
| |
|
| | |
| | checkpoint: |
| | save_last: true |
| | save_every: 25 |
| | keep_last: 10 |
| | keep_every_states: null |
| |
|
| | |
| | optim: |
| | epochs: 200 |
| | updates_per_epoch: 2000 |
| | lr: 3e-4 |
| | max_norm: 0. |
| | optimizer: adam |
| | adam: |
| | betas: [0.5, 0.9] |
| | weight_decay: 0. |
| | ema: |
| | use: true |
| | updates: 1 |
| | device: ${device} |
| | decay: 0.99 |
| |
|