File size: 3,167 Bytes
d1b63e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
generator:
name: ScalarModel
config:
num_bands: 1
sample_rate: 24000
causal: true
num_samples: 2
downsample_factors:
- 2
- 3
- 4
- 4
- 5
downsample_kernel_sizes:
- 4
- 6
- 8
- 8
- 10
upsample_factors:
- 5
- 4
- 4
- 3
- 2
upsample_kernel_sizes:
- 10
- 8
- 8
- 6
- 4
latent_hidden_dim: 136
default_kernel_size: 7
delay_kernel_size: 5
init_channel: 48
res_kernel_size: 7
d_list:
- mfd
mfd:
name: MultiFrequencyDiscriminator
config:
hop_lengths:
- 32
- 64
- 128
- 256
- 512
- 1024
hidden_channels:
- 64
- 128
- 256
- 512
- 512
- 512
domain: double
mel_scale: true
sample_rate: 24000
mpd:
name: MultiPeriodDiscriminator
config:
period_sizes:
- 2
- 3
- 5
- 7
- 11
period_kernel_size: 5
msd:
name: MultiScaleDiscriminator
config:
num_scales: 3
pool_kernel_size: 4
pool_stride: 2
optimizer:
g:
name: AdamW
config:
lr: 0.0002
betas:
- 0.8
- 0.99
eps: 1.0e-06
d:
name: AdamW
config:
lr: 0.0002
betas:
- 0.8
- 0.99
eps: 1.0e-06
lr_scheduler:
g:
name: ExponentialLR
config:
gamma: 0.999
d:
name: ExponentialLR
config:
gamma: 0.999
criterion:
g_criterion:
name: losses.generator_loss.GeneratorSTFTLoss
config:
use_mel_loss: false
adv_criterion: MSEGLoss
mel_loss_weight: 45
use_feature_match: true
feat_match_loss_weight: 20
use_full_stft_loss: true
use_sub_stft_loss: true
full_stft_loss_weight: 1
sub_stft_loss_weight: 1
mel_scale_loss:
sampling_rate: 24000
n_fft: 1024
num_mels: 80
hop_size: 160
win_size: 800
fmin: 0
full_multi_scale_stft_loss:
fft_sizes:
- 512
- 1024
- 2048
win_sizes:
- 480
- 960
- 1200
hop_sizes:
- 120
- 240
- 300
sub_multi_scale_stft_loss:
num_bands: 6
fft_sizes:
- 128
- 256
- 256
win_sizes:
- 80
- 120
- 200
hop_sizes:
- 20
- 40
- 50
d_criterion:
name: losses.discriminator_loss.MSEDiscriminatorLoss
config: null
commit_loss_weight: 1.0
training_file: /home/ydc/code2/ScalartTokenizer16k_m36/data/train_v2.scp
validation_file: /home/ydc/code2/ScalartTokenizer16k_m36/data/val.scp
seed: 2333
cudnn_deterministic: false
tensorboard: true
checkpoint_interval: 5000
summary_interval: 100
validation_interval: 5000
num_epoches: 50
print_freq: 10
discriminator_iter_start: 0
num_ckpt_keep: 10
segment_size: 48000
audio_norm_scale: 0.95
batch_size: 16
num_workers: 4
num_plots: 8
local_rank: -1
basic_model_config: config/scalar24k_64dim.yaml
exp_model_config: null
log_dir: /data9/ydc/exp/s_codec_24k_136dim_scale9_25hz
hop_length: 2000
ngpus_per_node: 4
sample_rate: 24000
model_ckpt_dir: /data9/ydc/exp/s_codec_24k_136dim_scale9_25hz/model_ckpts
|