GaboChoropan commited on
Commit
6d022ba
·
1 Parent(s): 46eaae9

Upload 2 files

Browse files
Files changed (2) hide show
  1. config.yaml +348 -0
  2. model_ckpt_steps_256000.ckpt +3 -0
config.yaml ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ K_step: 1000
2
+ accumulate_grad_batches: 1
3
+ audio_num_mel_bins: 80
4
+ audio_sample_rate: 24000
5
+ binarization_args:
6
+ shuffle: false
7
+ with_align: true
8
+ with_f0: true
9
+ with_hubert: true
10
+ with_spk_embed: false
11
+ with_wav: false
12
+ binarizer_cls: preprocessing.SVCpre.SVCBinarizer
13
+ binary_data_dir: data/binary/bonzi
14
+ check_val_every_n_epoch: 10
15
+ choose_test_manually: false
16
+ clip_grad_norm: 1
17
+ config_path: training/config.yaml
18
+ content_cond_steps: []
19
+ cwt_add_f0_loss: false
20
+ cwt_hidden_size: 128
21
+ cwt_layers: 2
22
+ cwt_loss: l1
23
+ cwt_std_scale: 0.8
24
+ datasets:
25
+ - opencpop
26
+ debug: false
27
+ dec_ffn_kernel_size: 9
28
+ dec_layers: 4
29
+ decay_steps: 300000
30
+ decoder_type: fft
31
+ dict_dir: ''
32
+ diff_decoder_type: wavenet
33
+ diff_loss_type: l2
34
+ dilation_cycle_length: 4
35
+ dropout: 0.1
36
+ ds_workers: 4
37
+ dur_enc_hidden_stride_kernel:
38
+ - 0,2,3
39
+ - 0,2,3
40
+ - 0,1,3
41
+ dur_loss: mse
42
+ dur_predictor_kernel: 3
43
+ dur_predictor_layers: 5
44
+ enc_ffn_kernel_size: 9
45
+ enc_layers: 4
46
+ encoder_K: 8
47
+ encoder_type: fft
48
+ endless_ds: true
49
+ f0_bin: 256
50
+ f0_max: 1100.0
51
+ f0_min: 50.0
52
+ ffn_act: gelu
53
+ ffn_padding: SAME
54
+ fft_size: 512
55
+ fmax: 12000
56
+ fmin: 30
57
+ fs2_ckpt: ''
58
+ gaussian_start: true
59
+ gen_dir_name: ''
60
+ gen_tgt_spk_id: -1
61
+ hidden_size: 256
62
+ hop_size: 128
63
+ hubert_gpu: true
64
+ hubert_path: checkpoints/hubert/hubert_soft.pt
65
+ infer: false
66
+ keep_bins: 80
67
+ lambda_commit: 0.25
68
+ lambda_energy: 0.0
69
+ lambda_f0: 1.0
70
+ lambda_ph_dur: 0.3
71
+ lambda_sent_dur: 1.0
72
+ lambda_uv: 1.0
73
+ lambda_word_dur: 1.0
74
+ load_ckpt: ''
75
+ log_interval: 100
76
+ loud_norm: false
77
+ lr: 0.0008
78
+ max_beta: 0.02
79
+ max_epochs: 3000
80
+ max_eval_sentences: 1
81
+ max_eval_tokens: 60000
82
+ max_frames: 42000
83
+ max_input_tokens: 60000
84
+ max_sentences: 18
85
+ max_tokens: 128000
86
+ max_updates: 1000000
87
+ mel_loss: ssim:0.5|l1:0.5
88
+ mel_vmax: 1.5
89
+ mel_vmin: -6.0
90
+ min_level_db: -120
91
+ norm_type: gn
92
+ num_ckpt_keep: 10
93
+ num_heads: 2
94
+ num_sanity_val_steps: 1
95
+ num_spk: 1
96
+ num_test_samples: 0
97
+ num_valid_plots: 10
98
+ optimizer_adam_beta1: 0.9
99
+ optimizer_adam_beta2: 0.98
100
+ out_wav_norm: false
101
+ pe_ckpt: checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt
102
+ pe_enable: false
103
+ perform_enhance: true
104
+ pitch_ar: false
105
+ pitch_enc_hidden_stride_kernel:
106
+ - 0,2,5
107
+ - 0,2,5
108
+ - 0,2,5
109
+ pitch_extractor: parselmouth
110
+ pitch_loss: l2
111
+ pitch_norm: log
112
+ pitch_type: frame
113
+ pndm_speedup: 10
114
+ pre_align_args:
115
+ allow_no_txt: false
116
+ denoise: false
117
+ forced_align: mfa
118
+ txt_processor: zh_g2pM
119
+ use_sox: true
120
+ use_tone: false
121
+ pre_align_cls: data_gen.singing.pre_align.SingingPreAlign
122
+ predictor_dropout: 0.5
123
+ predictor_grad: 0.1
124
+ predictor_hidden: -1
125
+ predictor_kernel: 5
126
+ predictor_layers: 5
127
+ prenet_dropout: 0.5
128
+ prenet_hidden_size: 256
129
+ pretrain_fs_ckpt: pretrain/nyaru/model_ckpt_steps_60000.ckpt
130
+ processed_data_dir: xxx
131
+ profile_infer: false
132
+ raw_data_dir: data/raw/bonzi
133
+ ref_norm_layer: bn
134
+ rel_pos: true
135
+ reset_phone_dict: true
136
+ residual_channels: 256
137
+ residual_layers: 20
138
+ save_best: false
139
+ save_ckpt: true
140
+ save_codes:
141
+ - configs
142
+ - modules
143
+ - src
144
+ - utils
145
+ save_f0: true
146
+ save_gt: false
147
+ schedule_type: linear
148
+ seed: 1234
149
+ sort_by_len: true
150
+ speaker_id: bonzi
151
+ spec_max:
152
+ - 0.2987259328365326
153
+ - 0.29721200466156006
154
+ - 0.23978209495544434
155
+ - 0.208412766456604
156
+ - 0.25777050852775574
157
+ - 0.2514476478099823
158
+ - 0.1129382848739624
159
+ - 0.03415697440505028
160
+ - 0.09860049188137054
161
+ - 0.10637332499027252
162
+ - 0.13287633657455444
163
+ - 0.19744250178337097
164
+ - 0.10040587931871414
165
+ - 0.13735432922840118
166
+ - 0.15107455849647522
167
+ - 0.17196381092071533
168
+ - 0.08298977464437485
169
+ - 0.0632769986987114
170
+ - 0.02723858878016472
171
+ - -0.001819317927584052
172
+ - -0.029565516859292984
173
+ - -0.023574354127049446
174
+ - -0.01633293740451336
175
+ - 0.07143621146678925
176
+ - 0.021580500528216362
177
+ - 0.07257916033267975
178
+ - -0.024349519982933998
179
+ - -0.06165708228945732
180
+ - -0.10486568510532379
181
+ - -0.1363687664270401
182
+ - -0.13333871960639954
183
+ - -0.13955898582935333
184
+ - -0.16613495349884033
185
+ - -0.17636367678642273
186
+ - -0.2786925733089447
187
+ - -0.22967253625392914
188
+ - -0.31897130608558655
189
+ - -0.18007366359233856
190
+ - -0.29366692900657654
191
+ - -0.2871025800704956
192
+ - -0.36748355627059937
193
+ - -0.46071451902389526
194
+ - -0.5464922189712524
195
+ - -0.5719417333602905
196
+ - -0.6020897626876831
197
+ - -0.6239874958992004
198
+ - -0.5653440952301025
199
+ - -0.6508013606071472
200
+ - -0.628247857093811
201
+ - -0.6809687614440918
202
+ - -0.569259762763977
203
+ - -0.5423558354377747
204
+ - -0.5811785459518433
205
+ - -0.5359002351760864
206
+ - -0.6565515398979187
207
+ - -0.7143737077713013
208
+ - -0.8502675890922546
209
+ - -0.7979224920272827
210
+ - -0.7110578417778015
211
+ - -0.763409435749054
212
+ - -0.7984790802001953
213
+ - -0.6927220821380615
214
+ - -0.658117413520813
215
+ - -0.7486468553543091
216
+ - -0.5949879884719849
217
+ - -0.7494576573371887
218
+ - -0.7400822639465332
219
+ - -0.6822793483734131
220
+ - -0.7773582339286804
221
+ - -0.661201536655426
222
+ - -0.791329026222229
223
+ - -0.8982341885566711
224
+ - -0.8736728429794312
225
+ - -0.7701027393341064
226
+ - -0.8490535616874695
227
+ - -0.7479292154312134
228
+ - -0.9320166110992432
229
+ - -1.2862414121627808
230
+ - -2.8936190605163574
231
+ - -2.924229860305786
232
+ spec_min:
233
+ - -6.0
234
+ - -6.0
235
+ - -6.0
236
+ - -6.0
237
+ - -6.0
238
+ - -6.0
239
+ - -6.0
240
+ - -6.0
241
+ - -6.0
242
+ - -6.0
243
+ - -6.0
244
+ - -6.0
245
+ - -6.0
246
+ - -6.0
247
+ - -6.0
248
+ - -6.0
249
+ - -6.0
250
+ - -6.0
251
+ - -6.0
252
+ - -6.0
253
+ - -6.0
254
+ - -6.0
255
+ - -6.0
256
+ - -6.0
257
+ - -6.0
258
+ - -6.0
259
+ - -6.0
260
+ - -6.0
261
+ - -6.0
262
+ - -6.0
263
+ - -6.0
264
+ - -6.0
265
+ - -6.0
266
+ - -6.0
267
+ - -6.0
268
+ - -6.0
269
+ - -6.0
270
+ - -6.0
271
+ - -6.0
272
+ - -6.0
273
+ - -6.0
274
+ - -6.0
275
+ - -6.0
276
+ - -6.0
277
+ - -6.0
278
+ - -6.0
279
+ - -6.0
280
+ - -6.0
281
+ - -6.0
282
+ - -6.0
283
+ - -6.0
284
+ - -6.0
285
+ - -6.0
286
+ - -6.0
287
+ - -6.0
288
+ - -6.0
289
+ - -6.0
290
+ - -6.0
291
+ - -6.0
292
+ - -6.0
293
+ - -6.0
294
+ - -6.0
295
+ - -6.0
296
+ - -6.0
297
+ - -6.0
298
+ - -6.0
299
+ - -6.0
300
+ - -5.999454021453857
301
+ - -5.8822431564331055
302
+ - -5.892064571380615
303
+ - -5.882402420043945
304
+ - -5.786972522735596
305
+ - -5.746835231781006
306
+ - -5.8594512939453125
307
+ - -5.7389445304870605
308
+ - -5.718059539794922
309
+ - -5.779720306396484
310
+ - -5.801984786987305
311
+ - -6.0
312
+ - -6.0
313
+ spk_cond_steps: []
314
+ stop_token_weight: 5.0
315
+ task_cls: training.task.SVC_task.SVCTask
316
+ test_ids: []
317
+ test_input_dir: ''
318
+ test_num: 0
319
+ test_prefixes:
320
+ - test
321
+ test_set_name: test
322
+ timesteps: 1000
323
+ train_set_name: train
324
+ use_crepe: true
325
+ use_denoise: false
326
+ use_energy_embed: false
327
+ use_gt_dur: false
328
+ use_gt_f0: false
329
+ use_midi: false
330
+ use_nsf: true
331
+ use_pitch_embed: true
332
+ use_pos_embed: true
333
+ use_spk_embed: false
334
+ use_spk_id: false
335
+ use_split_spk_id: false
336
+ use_uv: false
337
+ use_var_enc: false
338
+ use_vec: false
339
+ val_check_interval: 1000
340
+ valid_num: 0
341
+ valid_set_name: valid
342
+ vocoder: network.vocoders.hifigan.HifiGAN
343
+ vocoder_ckpt: checkpoints/0109_hifigan_bigpopcs_hop128
344
+ warmup_updates: 2000
345
+ wav2spec_eps: 1e-6
346
+ weight_decay: 0
347
+ win_size: 512
348
+ work_dir: /content/drive/Shareddrives/susdrive/haru_diff-svc/bonzi
model_ckpt_steps_256000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9437ee09ffc6f1a50ee466caf474c1d220d680768b84799f83fd90d9d96bc193
3
+ size 373662129