wyh2000 commited on
Commit
fb2f299
·
1 Parent(s): 2f51bab

add libritts_dac_24k

Browse files
Files changed (28) hide show
  1. exp/codec_train_dac_raw_fs24000/81epoch.pth +3 -0
  2. exp/codec_train_dac_raw_fs24000/config.yaml +292 -0
  3. exp/codec_train_dac_raw_fs24000/images/adv_loss.png +0 -0
  4. exp/codec_train_dac_raw_fs24000/images/codec_commit_loss.png +0 -0
  5. exp/codec_train_dac_raw_fs24000/images/codec_loss.png +0 -0
  6. exp/codec_train_dac_raw_fs24000/images/codec_quantization_loss.png +0 -0
  7. exp/codec_train_dac_raw_fs24000/images/discriminator_backward_time.png +0 -0
  8. exp/codec_train_dac_raw_fs24000/images/discriminator_forward_time.png +0 -0
  9. exp/codec_train_dac_raw_fs24000/images/discriminator_loss.png +0 -0
  10. exp/codec_train_dac_raw_fs24000/images/discriminator_optim_step_time.png +0 -0
  11. exp/codec_train_dac_raw_fs24000/images/discriminator_train_time.png +0 -0
  12. exp/codec_train_dac_raw_fs24000/images/fake_loss.png +0 -0
  13. exp/codec_train_dac_raw_fs24000/images/feat_match_loss.png +0 -0
  14. exp/codec_train_dac_raw_fs24000/images/generator_backward_time.png +0 -0
  15. exp/codec_train_dac_raw_fs24000/images/generator_forward_time.png +0 -0
  16. exp/codec_train_dac_raw_fs24000/images/generator_optim_step_time.png +0 -0
  17. exp/codec_train_dac_raw_fs24000/images/generator_train_time.png +0 -0
  18. exp/codec_train_dac_raw_fs24000/images/gpu_max_cached_mem_GB.png +0 -0
  19. exp/codec_train_dac_raw_fs24000/images/iter_time.png +0 -0
  20. exp/codec_train_dac_raw_fs24000/images/loss.png +0 -0
  21. exp/codec_train_dac_raw_fs24000/images/mel_loss.png +0 -0
  22. exp/codec_train_dac_raw_fs24000/images/mel_loss_real.png +0 -0
  23. exp/codec_train_dac_raw_fs24000/images/optim0_lr0.png +0 -0
  24. exp/codec_train_dac_raw_fs24000/images/optim1_lr0.png +0 -0
  25. exp/codec_train_dac_raw_fs24000/images/real_loss.png +0 -0
  26. exp/codec_train_dac_raw_fs24000/images/reconstruct_loss.png +0 -0
  27. exp/codec_train_dac_raw_fs24000/images/train_time.png +0 -0
  28. meta.yaml +9 -0
exp/codec_train_dac_raw_fs24000/81epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50fc2e132ab24afd2ae767d9bc9a5c50c5f8617b643ee0469054faa83f84af46
3
+ size 383901476
exp/codec_train_dac_raw_fs24000/config.yaml ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_dac.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: chunk
7
+ valid_iterator_type: null
8
+ output_dir: exp/codec_train_dac_raw_fs24000
9
+ ngpu: 1
10
+ seed: 777
11
+ num_workers: 1
12
+ num_att_plot: 0
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: 0
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: true
23
+ sharded_ddp: false
24
+ cudnn_enabled: true
25
+ cudnn_benchmark: false
26
+ cudnn_deterministic: false
27
+ use_tf32: false
28
+ collect_stats: false
29
+ write_collected_feats: false
30
+ max_epoch: 120
31
+ patience: null
32
+ val_scheduler_criterion:
33
+ - valid
34
+ - loss
35
+ early_stopping_criterion:
36
+ - valid
37
+ - loss
38
+ - min
39
+ best_model_criterion:
40
+ - - valid
41
+ - mel_loss
42
+ - min
43
+ - - train
44
+ - mel_loss
45
+ - min
46
+ - - train
47
+ - total_count
48
+ - max
49
+ keep_nbest_models: 5
50
+ nbest_averaging_interval: 0
51
+ grad_clip: -1
52
+ grad_clip_type: 2.0
53
+ grad_noise: false
54
+ accum_grad: 1
55
+ no_forward_run: false
56
+ resume: true
57
+ train_dtype: float32
58
+ use_amp: false
59
+ log_interval: 50
60
+ use_matplotlib: true
61
+ use_tensorboard: true
62
+ create_graph_in_tensorboard: false
63
+ use_wandb: false
64
+ wandb_project: null
65
+ wandb_id: null
66
+ wandb_entity: null
67
+ wandb_name: null
68
+ wandb_model_log_interval: -1
69
+ detect_anomaly: false
70
+ use_adapter: false
71
+ adapter: lora
72
+ save_strategy: all
73
+ adapter_conf: {}
74
+ pretrain_path: null
75
+ init_param: []
76
+ ignore_init_mismatch: false
77
+ freeze_param: []
78
+ num_iters_per_epoch: 5000
79
+ batch_size: 8
80
+ valid_batch_size: null
81
+ batch_bins: 1000000
82
+ valid_batch_bins: null
83
+ train_shape_file:
84
+ - exp/codec_stats_raw_24000/train/audio_shape
85
+ valid_shape_file:
86
+ - exp/codec_stats_raw_24000/valid/audio_shape
87
+ batch_type: unsorted
88
+ valid_batch_type: null
89
+ fold_length:
90
+ - 256000
91
+ sort_in_batch: descending
92
+ shuffle_within_batch: false
93
+ sort_batch: descending
94
+ multiple_iterator: false
95
+ chunk_length: 24000
96
+ chunk_shift_ratio: 0.5
97
+ num_cache_chunks: 64
98
+ chunk_excluded_key_prefixes: []
99
+ chunk_default_fs: null
100
+ train_data_path_and_name_and_type:
101
+ - - dump/raw/train-clean-460/wav.scp
102
+ - audio
103
+ - sound
104
+ valid_data_path_and_name_and_type:
105
+ - - dump/raw/dev-clean/wav.scp
106
+ - audio
107
+ - sound
108
+ multi_task_dataset: false
109
+ allow_variable_data_keys: false
110
+ max_cache_size: 0.0
111
+ max_cache_fd: 32
112
+ allow_multi_rates: false
113
+ valid_max_cache_size: null
114
+ exclude_weight_decay: false
115
+ exclude_weight_decay_conf: {}
116
+ optim: adam
117
+ optim_conf:
118
+ lr: 0.0002
119
+ betas:
120
+ - 0.5
121
+ - 0.9
122
+ eps: 1.0e-09
123
+ weight_decay: 0.0
124
+ scheduler: exponentiallr
125
+ scheduler_conf:
126
+ gamma: 0.999875
127
+ optim2: adam
128
+ optim2_conf:
129
+ lr: 0.0002
130
+ betas:
131
+ - 0.5
132
+ - 0.9
133
+ eps: 1.0e-09
134
+ weight_decay: 0.0
135
+ scheduler2: exponentiallr
136
+ scheduler2_conf:
137
+ gamma: 0.999875
138
+ generator_first: true
139
+ skip_discriminator_prob: 0.0
140
+ model_conf: {}
141
+ use_preprocessor: true
142
+ codec: dac
143
+ codec_conf:
144
+ sampling_rate: 24000
145
+ generator_params:
146
+ hidden_dim: 512
147
+ codebook_dim: 512
148
+ encdec_channels: 1
149
+ encdec_n_filters: 32
150
+ encdec_n_residual_layers: 3
151
+ encdec_ratios:
152
+ - 8
153
+ - 5
154
+ - 4
155
+ - 2
156
+ encdec_activation: Snake
157
+ encdec_norm: weight_norm
158
+ encdec_kernel_size: 7
159
+ encdec_residual_kernel_size: 7
160
+ encdec_last_kernel_size: 7
161
+ encdec_dilation_base: 2
162
+ encdec_causal: false
163
+ encdec_pad_mode: reflect
164
+ encdec_true_skip: false
165
+ encdec_compress: 2
166
+ encdec_lstm: 2
167
+ decoder_trim_right_ratio: 1.0
168
+ decoder_final_activation: null
169
+ decoder_final_activation_params: null
170
+ quantizer_n_q: 32
171
+ quantizer_bins: 1024
172
+ quantizer_decay: 0.99
173
+ quantizer_kmeans_init: true
174
+ quantizer_kmeans_iters: 50
175
+ quantizer_threshold_ema_dead_code: 2
176
+ quantizer_target_bandwidth:
177
+ - 2
178
+ - 4
179
+ - 8
180
+ - 16
181
+ - 32
182
+ quantizer_dropout: true
183
+ sample_rate: 24000
184
+ discriminator_params:
185
+ scales: 3
186
+ scale_downsample_pooling: AvgPool1d
187
+ scale_downsample_pooling_params:
188
+ kernel_size: 4
189
+ stride: 2
190
+ padding: 2
191
+ scale_discriminator_params:
192
+ in_channels: 1
193
+ out_channels: 1
194
+ kernel_sizes:
195
+ - 15
196
+ - 41
197
+ - 5
198
+ - 3
199
+ channels: 128
200
+ max_downsample_channels: 1024
201
+ max_groups: 16
202
+ bias: true
203
+ downsample_scales:
204
+ - 2
205
+ - 2
206
+ - 4
207
+ - 4
208
+ - 1
209
+ nonlinear_activation: LeakyReLU
210
+ nonlinear_activation_params:
211
+ negative_slope: 0.1
212
+ scale_follow_official_norm: false
213
+ msmpmb_discriminator_params:
214
+ rates: []
215
+ sample_rate: 24000
216
+ fft_sizes:
217
+ - 2048
218
+ - 1024
219
+ - 512
220
+ periods:
221
+ - 2
222
+ - 3
223
+ - 5
224
+ - 7
225
+ - 11
226
+ period_discriminator_params:
227
+ in_channels: 1
228
+ out_channels: 1
229
+ kernel_sizes:
230
+ - 5
231
+ - 3
232
+ channels: 32
233
+ downsample_scales:
234
+ - 3
235
+ - 3
236
+ - 3
237
+ - 3
238
+ - 1
239
+ max_downsample_channels: 1024
240
+ bias: true
241
+ nonlinear_activation: LeakyReLU
242
+ nonlinear_activation_params:
243
+ negative_slope: 0.1
244
+ use_weight_norm: true
245
+ use_spectral_norm: false
246
+ band_discriminator_params:
247
+ hop_factor: 0.25
248
+ sample_rate: 24000
249
+ bands:
250
+ - - 0.0
251
+ - 0.1
252
+ - - 0.1
253
+ - 0.25
254
+ - - 0.25
255
+ - 0.5
256
+ - - 0.5
257
+ - 0.75
258
+ - - 0.75
259
+ - 1.0
260
+ channel: 32
261
+ generator_adv_loss_params:
262
+ average_by_discriminators: false
263
+ loss_type: mse
264
+ discriminator_adv_loss_params:
265
+ average_by_discriminators: false
266
+ loss_type: mse
267
+ use_feat_match_loss: true
268
+ feat_match_loss_params:
269
+ average_by_discriminators: false
270
+ average_by_layers: false
271
+ include_final_outputs: true
272
+ use_mel_loss: true
273
+ mel_loss_params:
274
+ range_start: 6
275
+ range_end: 11
276
+ window: hann
277
+ n_mels: 80
278
+ fmin: 0
279
+ fmax: null
280
+ log_base: null
281
+ fs: 24000
282
+ lambda_quantization: 0.25
283
+ lambda_commit: 1.0
284
+ lambda_reconstruct: 1.0
285
+ lambda_adv: 1.0
286
+ lambda_mel: 45.0
287
+ lambda_feat_match: 2.0
288
+ cache_generator_outputs: true
289
+ required:
290
+ - output_dir
291
+ version: '202402'
292
+ distributed: false
exp/codec_train_dac_raw_fs24000/images/adv_loss.png ADDED
exp/codec_train_dac_raw_fs24000/images/codec_commit_loss.png ADDED
exp/codec_train_dac_raw_fs24000/images/codec_loss.png ADDED
exp/codec_train_dac_raw_fs24000/images/codec_quantization_loss.png ADDED
exp/codec_train_dac_raw_fs24000/images/discriminator_backward_time.png ADDED
exp/codec_train_dac_raw_fs24000/images/discriminator_forward_time.png ADDED
exp/codec_train_dac_raw_fs24000/images/discriminator_loss.png ADDED
exp/codec_train_dac_raw_fs24000/images/discriminator_optim_step_time.png ADDED
exp/codec_train_dac_raw_fs24000/images/discriminator_train_time.png ADDED
exp/codec_train_dac_raw_fs24000/images/fake_loss.png ADDED
exp/codec_train_dac_raw_fs24000/images/feat_match_loss.png ADDED
exp/codec_train_dac_raw_fs24000/images/generator_backward_time.png ADDED
exp/codec_train_dac_raw_fs24000/images/generator_forward_time.png ADDED
exp/codec_train_dac_raw_fs24000/images/generator_optim_step_time.png ADDED
exp/codec_train_dac_raw_fs24000/images/generator_train_time.png ADDED
exp/codec_train_dac_raw_fs24000/images/gpu_max_cached_mem_GB.png ADDED
exp/codec_train_dac_raw_fs24000/images/iter_time.png ADDED
exp/codec_train_dac_raw_fs24000/images/loss.png ADDED
exp/codec_train_dac_raw_fs24000/images/mel_loss.png ADDED
exp/codec_train_dac_raw_fs24000/images/mel_loss_real.png ADDED
exp/codec_train_dac_raw_fs24000/images/optim0_lr0.png ADDED
exp/codec_train_dac_raw_fs24000/images/optim1_lr0.png ADDED
exp/codec_train_dac_raw_fs24000/images/real_loss.png ADDED
exp/codec_train_dac_raw_fs24000/images/reconstruct_loss.png ADDED
exp/codec_train_dac_raw_fs24000/images/train_time.png ADDED
meta.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ espnet: '202402'
2
+ files:
3
+ model_file: exp/codec_train_dac_raw_fs24000/81epoch.pth
4
+ python: "3.8.19 | packaged by conda-forge | (default, Mar 20 2024, 12:47:35) \n[GCC\
5
+ \ 12.3.0]"
6
+ timestamp: 1718929830.991067
7
+ torch: 2.3.0+cu121
8
+ yaml_files:
9
+ train_config: exp/codec_train_dac_raw_fs24000/config.yaml