fox7005 commited on
Commit
db2ac35
·
1 Parent(s): 62e11a4

Upload 2 files

Browse files
Files changed (2) hide show
  1. config.yaml +343 -0
  2. model_ckpt_steps_322000.ckpt +3 -0
config.yaml ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ K_step: 1000
2
+ accumulate_grad_batches: 1
3
+ audio_num_mel_bins: 80
4
+ audio_sample_rate: 24000
5
+ base_config:
6
+ - configs/popcs_ds_beta6.yaml
7
+ - configs/midi/cascade/opencs/opencpop_statis.yaml
8
+ binarization_args:
9
+ shuffle: false
10
+ with_align: true
11
+ with_f0: true
12
+ with_f0cwt: true
13
+ with_spk_embed: false
14
+ with_txt: true
15
+ with_wav: true
16
+ binarizer_cls: data_gen.opencpop.OpencpopBinarizer
17
+ binary_data_dir: data/binary/nyaru-midi-dp
18
+ check_val_every_n_epoch: 10
19
+ clip_grad_norm: 1
20
+ content_cond_steps: []
21
+ cwt_add_f0_loss: false
22
+ cwt_hidden_size: 128
23
+ cwt_layers: 2
24
+ cwt_loss: l1
25
+ cwt_std_scale: 0.8
26
+ datasets:
27
+ - opencpop
28
+ debug: false
29
+ dec_ffn_kernel_size: 9
30
+ dec_layers: 4
31
+ decay_steps: 125000
32
+ decoder_type: fft
33
+ dict_dir: ''
34
+ diff_decoder_type: wavenet
35
+ diff_loss_type: l1
36
+ dilation_cycle_length: 4
37
+ dropout: 0.1
38
+ ds_workers: 4
39
+ dur_enc_hidden_stride_kernel:
40
+ - 0,2,3
41
+ - 0,2,3
42
+ - 0,1,3
43
+ dur_loss: mse
44
+ dur_predictor_kernel: 3
45
+ dur_predictor_layers: 5
46
+ enc_ffn_kernel_size: 9
47
+ enc_layers: 4
48
+ encoder_K: 8
49
+ encoder_type: fft
50
+ endless_ds: true
51
+ ffn_act: gelu
52
+ ffn_padding: SAME
53
+ fft_size: 512
54
+ fmax: 12000
55
+ fmin: 30
56
+ fs2_ckpt: ''
57
+ gaussian_start: true
58
+ gen_dir_name: ''
59
+ gen_tgt_spk_id: -1
60
+ hidden_size: 256
61
+ hop_size: 128
62
+ infer: false
63
+ keep_bins: 80
64
+ lambda_commit: 0.25
65
+ lambda_energy: 0.0
66
+ lambda_f0: 1.0
67
+ lambda_ph_dur: 0.3
68
+ lambda_sent_dur: 3.0
69
+ lambda_uv: 0.0
70
+ lambda_word_dur: 1.0
71
+ load_ckpt: ''
72
+ log_interval: 100
73
+ loud_norm: false
74
+ lr: 0.001
75
+ max_beta: 0.02
76
+ max_epochs: 1000
77
+ max_eval_sentences: 1
78
+ max_eval_tokens: 60000
79
+ max_frames: 8000
80
+ max_input_tokens: 1550
81
+ max_sentences: 66
82
+ max_tokens: 40000
83
+ max_updates: 640000
84
+ mel_loss: ssim:0.5|l1:0.5
85
+ mel_vmax: 1.5
86
+ mel_vmin: -6.0
87
+ min_level_db: -120
88
+ norm_type: gn
89
+ num_ckpt_keep: 3
90
+ num_heads: 2
91
+ num_sanity_val_steps: 1
92
+ num_spk: 1
93
+ num_test_samples: 0
94
+ num_valid_plots: 10
95
+ optimizer_adam_beta1: 0.9
96
+ optimizer_adam_beta2: 0.98
97
+ out_wav_norm: false
98
+ pitch_ar: false
99
+ pitch_enc_hidden_stride_kernel:
100
+ - 0,2,5
101
+ - 0,2,5
102
+ - 0,2,5
103
+ pitch_extractor: parselmouth
104
+ pitch_loss: l1
105
+ pitch_norm: log
106
+ pitch_type: frame
107
+ pndm_speedup: 20
108
+ pre_align_args:
109
+ allow_no_txt: false
110
+ denoise: false
111
+ forced_align: mfa
112
+ txt_processor: zh_g2pM
113
+ use_sox: true
114
+ use_tone: false
115
+ pre_align_cls: data_gen.pre_align.SingingPreAlign
116
+ predictor_dropout: 0.5
117
+ predictor_grad: 0.1
118
+ predictor_hidden: -1
119
+ predictor_kernel: 5
120
+ predictor_layers: 5
121
+ prenet_dropout: 0.5
122
+ prenet_hidden_size: 256
123
+ pretrain_fs_ckpt: ''
124
+ processed_data_dir: xxx
125
+ profile_infer: false
126
+ raw_data_dir: data/raw/nyaru/segments
127
+ ref_norm_layer: bn
128
+ rel_pos: true
129
+ reset_phone_dict: true
130
+ residual_channels: 256
131
+ residual_layers: 20
132
+ save_best: false
133
+ save_ckpt: true
134
+ save_codes:
135
+ - configs
136
+ - modules
137
+ - src
138
+ - utils
139
+ save_f0: true
140
+ save_gt: false
141
+ schedule_type: linear
142
+ seed: 1234
143
+ sort_by_len: true
144
+ spec_max:
145
+ - -0.79453
146
+ - -0.81116
147
+ - -0.61631
148
+ - -0.30679
149
+ - -0.13863
150
+ - -0.050652
151
+ - -0.11563
152
+ - -0.10679
153
+ - -0.091068
154
+ - -0.062174
155
+ - -0.075302
156
+ - -0.072217
157
+ - -0.063815
158
+ - -0.073299
159
+ - 0.007361
160
+ - -0.072508
161
+ - -0.050234
162
+ - -0.16534
163
+ - -0.26928
164
+ - -0.20782
165
+ - -0.20823
166
+ - -0.11702
167
+ - -0.070128
168
+ - -0.065868
169
+ - -0.012675
170
+ - 0.0015121
171
+ - -0.089902
172
+ - -0.21392
173
+ - -0.23789
174
+ - -0.28922
175
+ - -0.30405
176
+ - -0.23029
177
+ - -0.22088
178
+ - -0.21542
179
+ - -0.29367
180
+ - -0.30137
181
+ - -0.38281
182
+ - -0.4359
183
+ - -0.28681
184
+ - -0.46855
185
+ - -0.57485
186
+ - -0.47022
187
+ - -0.54266
188
+ - -0.44848
189
+ - -0.6412
190
+ - -0.687
191
+ - -0.6486
192
+ - -0.76436
193
+ - -0.49971
194
+ - -0.71068
195
+ - -0.69724
196
+ - -0.61487
197
+ - -0.55843
198
+ - -0.69773
199
+ - -0.57502
200
+ - -0.70919
201
+ - -0.82431
202
+ - -0.84213
203
+ - -0.90431
204
+ - -0.8284
205
+ - -0.77945
206
+ - -0.82758
207
+ - -0.87699
208
+ - -1.0532
209
+ - -1.0766
210
+ - -1.1198
211
+ - -1.0185
212
+ - -0.98983
213
+ - -1.0001
214
+ - -1.0756
215
+ - -1.0024
216
+ - -1.0304
217
+ - -1.0579
218
+ - -1.0188
219
+ - -1.05
220
+ - -1.0842
221
+ - -1.0923
222
+ - -1.1223
223
+ - -1.2381
224
+ - -1.6467
225
+ spec_min:
226
+ - -6.0
227
+ - -6.0
228
+ - -6.0
229
+ - -6.0
230
+ - -6.0
231
+ - -6.0
232
+ - -6.0
233
+ - -6.0
234
+ - -6.0
235
+ - -6.0
236
+ - -6.0
237
+ - -6.0
238
+ - -6.0
239
+ - -6.0
240
+ - -6.0
241
+ - -6.0
242
+ - -6.0
243
+ - -6.0
244
+ - -6.0
245
+ - -6.0
246
+ - -6.0
247
+ - -6.0
248
+ - -6.0
249
+ - -6.0
250
+ - -6.0
251
+ - -6.0
252
+ - -6.0
253
+ - -6.0
254
+ - -6.0
255
+ - -6.0
256
+ - -6.0
257
+ - -6.0
258
+ - -6.0
259
+ - -6.0
260
+ - -6.0
261
+ - -6.0
262
+ - -6.0
263
+ - -6.0
264
+ - -6.0
265
+ - -6.0
266
+ - -6.0
267
+ - -6.0
268
+ - -6.0
269
+ - -6.0
270
+ - -6.0
271
+ - -6.0
272
+ - -6.0
273
+ - -6.0
274
+ - -6.0
275
+ - -6.0
276
+ - -6.0
277
+ - -6.0
278
+ - -6.0
279
+ - -6.0
280
+ - -6.0
281
+ - -6.0
282
+ - -6.0
283
+ - -6.0
284
+ - -6.0
285
+ - -6.0
286
+ - -6.0
287
+ - -6.0
288
+ - -6.0
289
+ - -6.0
290
+ - -6.0
291
+ - -6.0
292
+ - -6.0
293
+ - -6.0
294
+ - -6.0
295
+ - -6.0
296
+ - -6.0
297
+ - -6.0
298
+ - -6.0
299
+ - -6.0
300
+ - -6.0
301
+ - -6.0
302
+ - -6.0
303
+ - -6.0
304
+ - -6.0
305
+ - -6.0
306
+ spk_cond_steps: []
307
+ stop_token_weight: 5.0
308
+ task_cls: src.diffsinger_task.DiffSingerMIDITask
309
+ test_ids: []
310
+ test_input_dir: ''
311
+ test_num: 0
312
+ test_prefixes:
313
+ - '2044'
314
+ - '2086'
315
+ - '2092'
316
+ - '2093'
317
+ - '2100'
318
+ test_set_name: test
319
+ timesteps: 1000
320
+ train_set_name: train
321
+ use_denoise: false
322
+ use_energy_embed: false
323
+ use_gt_dur: false
324
+ use_gt_f0: false
325
+ use_midi: true
326
+ use_nsf: true
327
+ use_pitch_embed: true
328
+ use_pos_embed: true
329
+ use_spk_embed: false
330
+ use_spk_id: false
331
+ use_split_spk_id: false
332
+ use_uv: false
333
+ use_var_enc: false
334
+ val_check_interval: 2000
335
+ valid_num: 0
336
+ valid_set_name: valid
337
+ vocoder: src.vocoders.hifigan.HifiGAN
338
+ vocoder_ckpt: checkpoints/0109_hifigan_bigpopcs_hop128
339
+ warmup_updates: 2000
340
+ wav2spec_eps: 1e-6
341
+ weight_decay: 0
342
+ win_size: 512
343
+ work_dir: checkpoints/nyaru
model_ckpt_steps_322000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c982b14013de7bfd8465b74aa99898a81ce4d480caee126a5e54e54918f3df29
3
+ size 399827072