vocodexelysium commited on
Commit
9b9e950
·
1 Parent(s): d647987

Re-add model files after reset

Browse files
Files changed (38) hide show
  1. pupucodec/args.json +270 -0
  2. pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/ckpts.json +3 -0
  3. pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/model.safetensors +3 -0
  4. pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/model_1.safetensors +3 -0
  5. pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/model_2.safetensors +3 -0
  6. pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/model_3.safetensors +3 -0
  7. pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/model_4.safetensors +3 -0
  8. pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/optimizer.bin +3 -0
  9. pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/optimizer_1.bin +3 -0
  10. pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/random_states_0.pkl +3 -0
  11. pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/scheduler.bin +3 -0
  12. pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/scheduler_1.bin +3 -0
  13. pupucodec_large/args.json +270 -0
  14. pupucodec_large/checkpoint/epoch-0053_step-2349317_loss-57.300222 +3 -0
  15. pupuvocoder/args.json +289 -0
  16. pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/ckpts.json +3 -0
  17. pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/model.safetensors +3 -0
  18. pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/model_1.safetensors +3 -0
  19. pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/model_2.safetensors +3 -0
  20. pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/model_3.safetensors +3 -0
  21. pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/model_4.safetensors +3 -0
  22. pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/optimizer.bin +3 -0
  23. pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/optimizer_1.bin +3 -0
  24. pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/random_states_0.pkl +3 -0
  25. pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/scheduler.bin +3 -0
  26. pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/scheduler_1.bin +3 -0
  27. pupuvocoder_large/args.json +286 -0
  28. pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/ckpts.json +3 -0
  29. pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/model.safetensors +3 -0
  30. pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/model_1.safetensors +3 -0
  31. pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/model_2.safetensors +3 -0
  32. pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/model_3.safetensors +3 -0
  33. pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/model_4.safetensors +3 -0
  34. pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/optimizer.bin +3 -0
  35. pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/optimizer_1.bin +3 -0
  36. pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/random_states_0.pkl +3 -0
  37. pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/scheduler.bin +3 -0
  38. pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/scheduler_1.bin +3 -0
pupucodec/args.json ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_config": "egs/afgen/exp_config_afcodec_base.json",
3
+ "dataset": [
4
+ "afgen_all_data",
5
+ ],
6
+ "exp_name": "afcodec",
7
+ "inference": {
8
+ "batch_size": 1,
9
+ },
10
+ "log_dir": "/home/vocod/experiments/afgen",
11
+ "model": {
12
+ "afcodec": {
13
+ "codebook_dim": 8,
14
+ "codebook_size": 1024,
15
+ "decoder_dim": 512,
16
+ "decoder_rates": [
17
+ 8,
18
+ 8,
19
+ 2,
20
+ 2,
21
+ 2,
22
+ ],
23
+ "encoder_dim": 32,
24
+ "encoder_rates": [
25
+ 2,
26
+ 2,
27
+ 2,
28
+ 8,
29
+ 8,
30
+ ],
31
+ "n_codebooks": 9,
32
+ "quantizer_dropout": 0.5,
33
+ },
34
+ "discriminators": [
35
+ "msd",
36
+ "mpd",
37
+ "mssbstftd",
38
+ "mssbcqtd",
39
+ ],
40
+ "generator": "afcodec",
41
+ "mpd": {
42
+ "discriminator_channel_mult_factor": 1,
43
+ "mpd_reshapes": [
44
+ 2,
45
+ 3,
46
+ 5,
47
+ 7,
48
+ 11,
49
+ 17,
50
+ 23,
51
+ 37,
52
+ ],
53
+ "use_spectral_norm": false,
54
+ },
55
+ "mssbcqtd": {
56
+ "bins_per_octaves": [
57
+ 24,
58
+ 36,
59
+ 48,
60
+ ],
61
+ "dilations": [
62
+ 1,
63
+ 2,
64
+ 4,
65
+ ],
66
+ "filters": 32,
67
+ "filters_scale": 1,
68
+ "hop_lengths": [
69
+ 1024,
70
+ 512,
71
+ 512,
72
+ ],
73
+ "in_channels": 1,
74
+ "max_filters": 1024,
75
+ "n_octaves": [
76
+ 10,
77
+ 10,
78
+ 10,
79
+ ],
80
+ "out_channels": 1,
81
+ },
82
+ "msstftd": {
83
+ "filters": 32,
84
+ },
85
+ },
86
+ "model_type": "AFGenCodec",
87
+ "preprocess": {
88
+ "align_mel_duration": false,
89
+ "audio_dir": "audios",
90
+ "bits": 8,
91
+ "contentvec_dir": "contentvec",
92
+ "cut_mel_frame": 128,
93
+ "data_augment": false,
94
+ "dur_dir": "durs",
95
+ "duration_dir": "duration",
96
+ "emo2id": "emo2id.json",
97
+ "energy_dir": "energys",
98
+ "energy_extract_mode": "from_mel",
99
+ "energy_norm": false,
100
+ "energy_remove_outlier": false,
101
+ "extract_acoustic_token": false,
102
+ "extract_amplitude_phase": false,
103
+ "extract_audio": true,
104
+ "extract_contentvec_feature": false,
105
+ "extract_duration": false,
106
+ "extract_energy": false,
107
+ "extract_label": false,
108
+ "extract_linear_spec": false,
109
+ "extract_mcep": false,
110
+ "extract_mel": false,
111
+ "extract_mert_feature": false,
112
+ "extract_one_hot": false,
113
+ "extract_phone": false,
114
+ "extract_pitch": true,
115
+ "extract_uv": true,
116
+ "extract_wenet_feature": false,
117
+ "extract_whisper_feature": false,
118
+ "f0_max": 1975.5,
119
+ "f0_min": 32.7,
120
+ "file_lst": "file.lst",
121
+ "fmax": 22050,
122
+ "fmin": 0,
123
+ "hop_size": 512,
124
+ "imaginary_dir": "imaginarys",
125
+ "is_mu_law": false,
126
+ "lab_dir": "labs",
127
+ "label_dir": "labels",
128
+ "lexicon_path": "./text/lexicon/librispeech-lexicon.txt",
129
+ "linear_dir": "linears",
130
+ "log_amplitude_dir": "log_amplitudes",
131
+ "mcep_dir": "mcep",
132
+ "mel_dir": "mels",
133
+ "mel_extract_mode": "",
134
+ "mel_min_max_norm": false,
135
+ "min_level_db": -115,
136
+ "n_fft": 2048,
137
+ "n_mel": 128,
138
+ "num_silent_frames": 8,
139
+ "phase_dir": "phases",
140
+ "phone_dir": "phones",
141
+ "phone_energy_dir": "phone_energys",
142
+ "phone_extractor": "espeak",
143
+ "phone_pitch_dir": "phone_pitches",
144
+ "phone_seq_file": "phone_seq_file",
145
+ "pitch_bin": 256,
146
+ "pitch_dir": "pitches",
147
+ "pitch_extractor": "rmvpe",
148
+ "pitch_max": 1100.0,
149
+ "pitch_min": 50.0,
150
+ "pitch_norm": false,
151
+ "pitch_remove_outlier": false,
152
+ "processed_dir": "data/",
153
+ "raw_data": "raw_data",
154
+ "real_dir": "reals",
155
+ "ref_level_db": 20,
156
+ "sample_rate": 44100,
157
+ "spk2id": "singers.json",
158
+ "symbols_dict": "symbols.dict",
159
+ "train_file": "train.json",
160
+ "trim_fft_size": 512,
161
+ "trim_hop_size": 128,
162
+ "trim_silence": false,
163
+ "trim_top_db": 30,
164
+ "trimmed_wav_dir": "trimmed_wavs",
165
+ "use_amplitude_phase": false,
166
+ "use_audio": true,
167
+ "use_dur": false,
168
+ "use_emoid": false,
169
+ "use_frame_duration": false,
170
+ "use_frame_energy": false,
171
+ "use_frame_pitch": true,
172
+ "use_lab": false,
173
+ "use_label": false,
174
+ "use_linear": false,
175
+ "use_log_scale_energy": false,
176
+ "use_log_scale_pitch": false,
177
+ "use_mel": false,
178
+ "use_min_max_norm_mel": false,
179
+ "use_one_hot": false,
180
+ "use_phn_seq": false,
181
+ "use_phone": false,
182
+ "use_phone_duration": false,
183
+ "use_phone_energy": false,
184
+ "use_phone_pitch": false,
185
+ "use_spkid": false,
186
+ "use_text": false,
187
+ "use_uv": true,
188
+ "use_wav": false,
189
+ "use_wenet": false,
190
+ "utt2emo": "utt2emo",
191
+ "utt2spk": "utt2spk",
192
+ "uv_dir": "uvs",
193
+ "valid_file": "valid.json",
194
+ "wav_dir": "wavs",
195
+ "wenet_dir": "wenet",
196
+ "win_size": 2048,
197
+ },
198
+ "supported_model_type": [
199
+ "GANVocoder",
200
+ "Fastspeech2",
201
+ "DiffSVC",
202
+ "Transformer",
203
+ "EDM",
204
+ "Autotune",
205
+ "CD",
206
+ ],
207
+ "task_type": "afgen",
208
+ "train": {
209
+ "adamw": {
210
+ "adam_b1": 0.8,
211
+ "adam_b2": 0.99,
212
+ "lr": 0.0001,
213
+ },
214
+ "batch_size": 16,
215
+ "criterions": [
216
+ "feature",
217
+ "discriminator",
218
+ "generator",
219
+ "multimel",
220
+ "codebook",
221
+ "commitment",
222
+ ],
223
+ "dataloader": {
224
+ "num_worker": 32,
225
+ "persistent_workers": true,
226
+ "pin_memory": true,
227
+ "prefetch_factor": 4,
228
+ },
229
+ "ddp": true,
230
+ "exponential_lr": {
231
+ "lr_decay": 0.9999996,
232
+ },
233
+ "freeze_step": 0,
234
+ "gradient_accumulation_step": 1,
235
+ "keep_checkpoint_max": 5,
236
+ "keep_last": [
237
+ 3,
238
+ -1,
239
+ ],
240
+ "max_epoch": 1000000,
241
+ "max_steps": 1000000,
242
+ "multi_speaker_training": false,
243
+ "optimizer": "AdamW",
244
+ "random_seed": 114514,
245
+ "reducelronplateau": {
246
+ "factor": 0.8,
247
+ "min_lr": 0.0001,
248
+ "patience": 10,
249
+ },
250
+ "run_eval": [
251
+ true,
252
+ ],
253
+ "sampler": {
254
+ "drop_last": true,
255
+ "holistic_shuffle": true,
256
+ },
257
+ "save_checkpoint_stride": [
258
+ 1,
259
+ ],
260
+ "save_checkpoints_steps": 10000,
261
+ "save_summary_steps": 500,
262
+ "scheduler": "ReduceLROnPlateau",
263
+ "total_training_steps": 50000,
264
+ "tracker": [
265
+ "tensorboard",
266
+ ],
267
+ "valid_interval": 10000,
268
+ },
269
+ "use_custom_dataset": [],
270
+ }
pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/ckpts.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [
2
+ []
3
+ ]
pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e99d196175c36eb76bb484dedf0735d9d2109d1fe8418a2cf4518c6d7ce4d794
3
+ size 127943572
pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/model_1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45e01c794b9cce470553daa86860516a17629cc01195ad92fbabab7c34d9f539
3
+ size 118557612
pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/model_2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b66c8863bec06bf82ae0bf671b1aa3f09cd97f2863d4043704a8de94cf65f3d
3
+ size 263092072
pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/model_3.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71abf18c28864cf67012f78e93c05944a9fde5cbbcd2a98b02443d25c804b6a2
3
+ size 5695720
pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/model_4.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18aba7e99c90166928bac230681d34d0e9e67a45e49bc92adfa154d1c02f27bf
3
+ size 1560888
pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d69d069333d18162104bf55711403dc4ad1e2e1d3da39ca8f7ccb40c68cb7043
3
+ size 256144660
pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/optimizer_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e93ff06259cd8bdd2da1e756f16abc2589cf45c36d946727c15f01ae812e0008
3
+ size 777186848
pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0ad89a22587db88134843f6fe38faaeb909b4c43ba62d597ed1ac9ab1a15a5e
3
+ size 16513
pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf58b70e9b7d08140cc0ae8cbb43aabd0417766651455c7ce5d46c8ff150fe8b
3
+ size 1401
pupucodec/checkpoint/epoch-0102_step-3038789_loss-76.647869/scheduler_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66df678ca2b6e4ebcb98c1ad1ab38dc119808c8bee4265a823817957dfaf868b
3
+ size 1477
pupucodec_large/args.json ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_config": "egs/afgen/exp_config_afcodec_base.json",
3
+ "dataset": [
4
+ "afgen_all_data",
5
+ ],
6
+ "exp_name": "afcodec_large",
7
+ "inference": {
8
+ "batch_size": 1,
9
+ },
10
+ "log_dir": "/home/vocod/experiments/afgen",
11
+ "model": {
12
+ "afcodec": {
13
+ "codebook_dim": 8,
14
+ "codebook_size": 1024,
15
+ "decoder_dim": 1536,
16
+ "decoder_rates": [
17
+ 8,
18
+ 8,
19
+ 2,
20
+ 2,
21
+ 2,
22
+ ],
23
+ "encoder_dim": 48,
24
+ "encoder_rates": [
25
+ 2,
26
+ 2,
27
+ 2,
28
+ 8,
29
+ 8,
30
+ ],
31
+ "n_codebooks": 9,
32
+ "quantizer_dropout": 0.5,
33
+ },
34
+ "discriminators": [
35
+ "msd",
36
+ "mpd",
37
+ "mssbstftd",
38
+ "mssbcqtd",
39
+ ],
40
+ "generator": "afcodec",
41
+ "mpd": {
42
+ "discriminator_channel_mult_factor": 1,
43
+ "mpd_reshapes": [
44
+ 2,
45
+ 3,
46
+ 5,
47
+ 7,
48
+ 11,
49
+ 17,
50
+ 23,
51
+ 37,
52
+ ],
53
+ "use_spectral_norm": false,
54
+ },
55
+ "mssbcqtd": {
56
+ "bins_per_octaves": [
57
+ 24,
58
+ 36,
59
+ 48,
60
+ ],
61
+ "dilations": [
62
+ 1,
63
+ 2,
64
+ 4,
65
+ ],
66
+ "filters": 32,
67
+ "filters_scale": 1,
68
+ "hop_lengths": [
69
+ 1024,
70
+ 512,
71
+ 512,
72
+ ],
73
+ "in_channels": 1,
74
+ "max_filters": 1024,
75
+ "n_octaves": [
76
+ 10,
77
+ 10,
78
+ 10,
79
+ ],
80
+ "out_channels": 1,
81
+ },
82
+ "msstftd": {
83
+ "filters": 32,
84
+ },
85
+ },
86
+ "model_type": "AFGenCodec",
87
+ "preprocess": {
88
+ "align_mel_duration": false,
89
+ "audio_dir": "audios",
90
+ "bits": 8,
91
+ "contentvec_dir": "contentvec",
92
+ "cut_mel_frame": 128,
93
+ "data_augment": false,
94
+ "dur_dir": "durs",
95
+ "duration_dir": "duration",
96
+ "emo2id": "emo2id.json",
97
+ "energy_dir": "energys",
98
+ "energy_extract_mode": "from_mel",
99
+ "energy_norm": false,
100
+ "energy_remove_outlier": false,
101
+ "extract_acoustic_token": false,
102
+ "extract_amplitude_phase": false,
103
+ "extract_audio": true,
104
+ "extract_contentvec_feature": false,
105
+ "extract_duration": false,
106
+ "extract_energy": false,
107
+ "extract_label": false,
108
+ "extract_linear_spec": false,
109
+ "extract_mcep": false,
110
+ "extract_mel": false,
111
+ "extract_mert_feature": false,
112
+ "extract_one_hot": false,
113
+ "extract_phone": false,
114
+ "extract_pitch": true,
115
+ "extract_uv": true,
116
+ "extract_wenet_feature": false,
117
+ "extract_whisper_feature": false,
118
+ "f0_max": 1975.5,
119
+ "f0_min": 32.7,
120
+ "file_lst": "file.lst",
121
+ "fmax": 22050,
122
+ "fmin": 0,
123
+ "hop_size": 512,
124
+ "imaginary_dir": "imaginarys",
125
+ "is_mu_law": false,
126
+ "lab_dir": "labs",
127
+ "label_dir": "labels",
128
+ "lexicon_path": "./text/lexicon/librispeech-lexicon.txt",
129
+ "linear_dir": "linears",
130
+ "log_amplitude_dir": "log_amplitudes",
131
+ "mcep_dir": "mcep",
132
+ "mel_dir": "mels",
133
+ "mel_extract_mode": "",
134
+ "mel_min_max_norm": false,
135
+ "min_level_db": -115,
136
+ "n_fft": 2048,
137
+ "n_mel": 128,
138
+ "num_silent_frames": 8,
139
+ "phase_dir": "phases",
140
+ "phone_dir": "phones",
141
+ "phone_energy_dir": "phone_energys",
142
+ "phone_extractor": "espeak",
143
+ "phone_pitch_dir": "phone_pitches",
144
+ "phone_seq_file": "phone_seq_file",
145
+ "pitch_bin": 256,
146
+ "pitch_dir": "pitches",
147
+ "pitch_extractor": "rmvpe",
148
+ "pitch_max": 1100.0,
149
+ "pitch_min": 50.0,
150
+ "pitch_norm": false,
151
+ "pitch_remove_outlier": false,
152
+ "processed_dir": "data/",
153
+ "raw_data": "raw_data",
154
+ "real_dir": "reals",
155
+ "ref_level_db": 20,
156
+ "sample_rate": 44100,
157
+ "spk2id": "singers.json",
158
+ "symbols_dict": "symbols.dict",
159
+ "train_file": "train.json",
160
+ "trim_fft_size": 512,
161
+ "trim_hop_size": 128,
162
+ "trim_silence": false,
163
+ "trim_top_db": 30,
164
+ "trimmed_wav_dir": "trimmed_wavs",
165
+ "use_amplitude_phase": false,
166
+ "use_audio": true,
167
+ "use_dur": false,
168
+ "use_emoid": false,
169
+ "use_frame_duration": false,
170
+ "use_frame_energy": false,
171
+ "use_frame_pitch": true,
172
+ "use_lab": false,
173
+ "use_label": false,
174
+ "use_linear": false,
175
+ "use_log_scale_energy": false,
176
+ "use_log_scale_pitch": false,
177
+ "use_mel": false,
178
+ "use_min_max_norm_mel": false,
179
+ "use_one_hot": false,
180
+ "use_phn_seq": false,
181
+ "use_phone": false,
182
+ "use_phone_duration": false,
183
+ "use_phone_energy": false,
184
+ "use_phone_pitch": false,
185
+ "use_spkid": false,
186
+ "use_text": false,
187
+ "use_uv": true,
188
+ "use_wav": false,
189
+ "use_wenet": false,
190
+ "utt2emo": "utt2emo",
191
+ "utt2spk": "utt2spk",
192
+ "uv_dir": "uvs",
193
+ "valid_file": "valid.json",
194
+ "wav_dir": "wavs",
195
+ "wenet_dir": "wenet",
196
+ "win_size": 2048,
197
+ },
198
+ "supported_model_type": [
199
+ "GANVocoder",
200
+ "Fastspeech2",
201
+ "DiffSVC",
202
+ "Transformer",
203
+ "EDM",
204
+ "Autotune",
205
+ "CD",
206
+ ],
207
+ "task_type": "afgen",
208
+ "train": {
209
+ "adamw": {
210
+ "adam_b1": 0.8,
211
+ "adam_b2": 0.99,
212
+ "lr": 0.0001,
213
+ },
214
+ "batch_size": 8,
215
+ "criterions": [
216
+ "feature",
217
+ "discriminator",
218
+ "generator",
219
+ "multimel",
220
+ "codebook",
221
+ "commitment",
222
+ ],
223
+ "dataloader": {
224
+ "num_worker": 32,
225
+ "persistent_workers": true,
226
+ "pin_memory": true,
227
+ "prefetch_factor": 4,
228
+ },
229
+ "ddp": true,
230
+ "exponential_lr": {
231
+ "lr_decay": 0.9999996,
232
+ },
233
+ "freeze_step": 0,
234
+ "gradient_accumulation_step": 1,
235
+ "keep_checkpoint_max": 5,
236
+ "keep_last": [
237
+ 3,
238
+ -1,
239
+ ],
240
+ "max_epoch": 1000000,
241
+ "max_steps": 1000000,
242
+ "multi_speaker_training": false,
243
+ "optimizer": "AdamW",
244
+ "random_seed": 114514,
245
+ "reducelronplateau": {
246
+ "factor": 0.8,
247
+ "min_lr": 0.0001,
248
+ "patience": 10,
249
+ },
250
+ "run_eval": [
251
+ true,
252
+ ],
253
+ "sampler": {
254
+ "drop_last": true,
255
+ "holistic_shuffle": true,
256
+ },
257
+ "save_checkpoint_stride": [
258
+ 1,
259
+ ],
260
+ "save_checkpoints_steps": 10000,
261
+ "save_summary_steps": 500,
262
+ "scheduler": "ReduceLROnPlateau",
263
+ "total_training_steps": 50000,
264
+ "tracker": [
265
+ "tensorboard",
266
+ ],
267
+ "valid_interval": 10000,
268
+ },
269
+ "use_custom_dataset": [],
270
+ }
pupucodec_large/checkpoint/epoch-0053_step-2349317_loss-57.300222 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:760d80a7c5b866dafb5a18eaedee6abcb53c66e949b5cae7cb1f579d4eb988c0
3
+ size 2598313053
pupuvocoder/args.json ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_config": "egs/afgen/exp_config_afvocoder_base.json",
3
+ "dataset": [
4
+ "afgen_all_data",
5
+ ],
6
+ "exp_name": "afvocoder",
7
+ "inference": {
8
+ "batch_size": 1,
9
+ },
10
+ "log_dir": "/home/vocod/experiments/afgen",
11
+ "model": {
12
+ "afvocoder": {
13
+ "activation": "adaasnakebeta",
14
+ "oversampling": true,
15
+ "resblock": "1",
16
+ "resblock_dilation_sizes": [
17
+ [
18
+ 1,
19
+ 3,
20
+ 5,
21
+ ],
22
+ [
23
+ 1,
24
+ 3,
25
+ 5,
26
+ ],
27
+ [
28
+ 1,
29
+ 3,
30
+ 5,
31
+ ],
32
+ ],
33
+ "resblock_kernel_sizes": [
34
+ 3,
35
+ 7,
36
+ 11,
37
+ ],
38
+ "upsample_initial_channel": 512,
39
+ "upsample_kernel_sizes": [
40
+ 16,
41
+ 16,
42
+ 4,
43
+ 4,
44
+ 4,
45
+ ],
46
+ "upsample_rates": [
47
+ 8,
48
+ 8,
49
+ 2,
50
+ 2,
51
+ 2,
52
+ ],
53
+ "upsample_type": "resample",
54
+ },
55
+ "discriminators": [
56
+ "msd",
57
+ "mpd",
58
+ "mssbstftd",
59
+ "mssbcqtd",
60
+ ],
61
+ "generator": "afvocoder",
62
+ "mpd": {
63
+ "discriminator_channel_mult_factor": 1,
64
+ "mpd_reshapes": [
65
+ 2,
66
+ 3,
67
+ 5,
68
+ 7,
69
+ 11,
70
+ 17,
71
+ 23,
72
+ 37,
73
+ ],
74
+ "use_spectral_norm": false,
75
+ },
76
+ "mssbcqtd": {
77
+ "bins_per_octaves": [
78
+ 24,
79
+ 36,
80
+ 48,
81
+ ],
82
+ "dilations": [
83
+ 1,
84
+ 2,
85
+ 4,
86
+ ],
87
+ "filters": 32,
88
+ "filters_scale": 1,
89
+ "hop_lengths": [
90
+ 1024,
91
+ 512,
92
+ 512,
93
+ ],
94
+ "in_channels": 1,
95
+ "max_filters": 1024,
96
+ "n_octaves": [
97
+ 10,
98
+ 10,
99
+ 10,
100
+ ],
101
+ "out_channels": 1,
102
+ },
103
+ "msstftd": {
104
+ "filters": 32,
105
+ },
106
+ },
107
+ "model_type": "AFGenVocoder",
108
+ "preprocess": {
109
+ "align_mel_duration": false,
110
+ "audio_dir": "audios",
111
+ "bits": 8,
112
+ "contentvec_dir": "contentvec",
113
+ "cut_mel_frame": 128,
114
+ "data_augment": false,
115
+ "dur_dir": "durs",
116
+ "duration_dir": "duration",
117
+ "emo2id": "emo2id.json",
118
+ "energy_dir": "energys",
119
+ "energy_extract_mode": "from_mel",
120
+ "energy_norm": false,
121
+ "energy_remove_outlier": false,
122
+ "extract_acoustic_token": false,
123
+ "extract_amplitude_phase": false,
124
+ "extract_audio": true,
125
+ "extract_contentvec_feature": false,
126
+ "extract_duration": false,
127
+ "extract_energy": false,
128
+ "extract_label": false,
129
+ "extract_linear_spec": false,
130
+ "extract_mcep": false,
131
+ "extract_mel": false,
132
+ "extract_mert_feature": false,
133
+ "extract_one_hot": false,
134
+ "extract_phone": false,
135
+ "extract_pitch": true,
136
+ "extract_uv": true,
137
+ "extract_wenet_feature": false,
138
+ "extract_whisper_feature": false,
139
+ "f0_max": 1975.5,
140
+ "f0_min": 32.7,
141
+ "file_lst": "file.lst",
142
+ "fmax": 22050,
143
+ "fmin": 0,
144
+ "hop_size": 512,
145
+ "imaginary_dir": "imaginarys",
146
+ "is_mu_law": false,
147
+ "lab_dir": "labs",
148
+ "label_dir": "labels",
149
+ "lexicon_path": "./text/lexicon/librispeech-lexicon.txt",
150
+ "linear_dir": "linears",
151
+ "log_amplitude_dir": "log_amplitudes",
152
+ "mcep_dir": "mcep",
153
+ "mel_dir": "mels",
154
+ "mel_extract_mode": "",
155
+ "mel_min_max_norm": false,
156
+ "min_level_db": -115,
157
+ "n_fft": 2048,
158
+ "n_mel": 128,
159
+ "num_silent_frames": 8,
160
+ "phase_dir": "phases",
161
+ "phone_dir": "phones",
162
+ "phone_energy_dir": "phone_energys",
163
+ "phone_extractor": "espeak",
164
+ "phone_pitch_dir": "phone_pitches",
165
+ "phone_seq_file": "phone_seq_file",
166
+ "pitch_bin": 256,
167
+ "pitch_dir": "pitches",
168
+ "pitch_extractor": "rmvpe",
169
+ "pitch_max": 1100.0,
170
+ "pitch_min": 50.0,
171
+ "pitch_norm": false,
172
+ "pitch_remove_outlier": false,
173
+ "processed_dir": "data/",
174
+ "raw_data": "raw_data",
175
+ "real_dir": "reals",
176
+ "ref_level_db": 20,
177
+ "sample_rate": 44100,
178
+ "spk2id": "singers.json",
179
+ "symbols_dict": "symbols.dict",
180
+ "train_file": "train.json",
181
+ "trim_fft_size": 512,
182
+ "trim_hop_size": 128,
183
+ "trim_silence": false,
184
+ "trim_top_db": 30,
185
+ "trimmed_wav_dir": "trimmed_wavs",
186
+ "use_amplitude_phase": false,
187
+ "use_audio": true,
188
+ "use_dur": false,
189
+ "use_emoid": false,
190
+ "use_frame_duration": false,
191
+ "use_frame_energy": false,
192
+ "use_frame_pitch": true,
193
+ "use_lab": false,
194
+ "use_label": false,
195
+ "use_linear": false,
196
+ "use_log_scale_energy": false,
197
+ "use_log_scale_pitch": false,
198
+ "use_mel": false,
199
+ "use_min_max_norm_mel": false,
200
+ "use_one_hot": false,
201
+ "use_phn_seq": false,
202
+ "use_phone": false,
203
+ "use_phone_duration": false,
204
+ "use_phone_energy": false,
205
+ "use_phone_pitch": false,
206
+ "use_spkid": false,
207
+ "use_text": false,
208
+ "use_uv": true,
209
+ "use_wav": false,
210
+ "use_wenet": false,
211
+ "utt2emo": "utt2emo",
212
+ "utt2spk": "utt2spk",
213
+ "uv_dir": "uvs",
214
+ "valid_file": "valid.json",
215
+ "wav_dir": "wavs",
216
+ "wenet_dir": "wenet",
217
+ "win_size": 2048,
218
+ },
219
+ "supported_model_type": [
220
+ "GANVocoder",
221
+ "Fastspeech2",
222
+ "DiffSVC",
223
+ "Transformer",
224
+ "EDM",
225
+ "Autotune",
226
+ "CD",
227
+ ],
228
+ "task_type": "afgen",
229
+ "train": {
230
+ "adamw": {
231
+ "adam_b1": 0.8,
232
+ "adam_b2": 0.99,
233
+ "lr": 0.0001,
234
+ },
235
+ "batch_size": 12,
236
+ "criterions": [
237
+ "feature",
238
+ "discriminator",
239
+ "generator",
240
+ "multimel",
241
+ ],
242
+ "dataloader": {
243
+ "num_worker": 32,
244
+ "persistent_workers": true,
245
+ "pin_memory": true,
246
+ "prefetch_factor": 4,
247
+ },
248
+ "ddp": true,
249
+ "exponential_lr": {
250
+ "lr_decay": 0.9999996,
251
+ },
252
+ "freeze_step": 0,
253
+ "gradient_accumulation_step": 1,
254
+ "keep_checkpoint_max": 5,
255
+ "keep_last": [
256
+ 3,
257
+ -1,
258
+ ],
259
+ "max_epoch": 1000000,
260
+ "max_steps": 1000000,
261
+ "multi_speaker_training": false,
262
+ "optimizer": "AdamW",
263
+ "random_seed": 114514,
264
+ "reducelronplateau": {
265
+ "factor": 0.8,
266
+ "min_lr": 0.0001,
267
+ "patience": 10,
268
+ },
269
+ "run_eval": [
270
+ true,
271
+ ],
272
+ "sampler": {
273
+ "drop_last": true,
274
+ "holistic_shuffle": true,
275
+ },
276
+ "save_checkpoint_stride": [
277
+ 1,
278
+ ],
279
+ "save_checkpoints_steps": 10000,
280
+ "save_summary_steps": 500,
281
+ "scheduler": "ReduceLROnPlateau",
282
+ "total_training_steps": 50000,
283
+ "tracker": [
284
+ "tensorboard",
285
+ ],
286
+ "valid_interval": 10000,
287
+ },
288
+ "use_custom_dataset": [],
289
+ }
pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/ckpts.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [
2
+ []
3
+ ]
pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62320716a66f7312584a41b48e5e87bf7969cc88a44cdafed46eb2c6129f9dfe
3
+ size 60989884
pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/model_1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a632cbd40f28de473e27eeabe28588e2bf31f9a35b04117e5167cb6df1a47842
3
+ size 118557612
pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/model_2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a84d21c317403e74097235e6fe65788f325025a68be5e33ff6976f26a9a29c6
3
+ size 263092072
pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/model_3.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6782f5789c6c7e5970784f3953099d1a2d4e7bee1d12b998d42c9df1f2fb5dba
3
+ size 5695720
pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/model_4.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16de04741e454da73f0632ac4538c0fe67580fe6145006e46c1941a87dbf49f4
3
+ size 1560888
pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f839cdb624e9d0f9aceda33851a1ee890883568ff1d44ca348f4e44e62f8bce
3
+ size 122237016
pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/optimizer_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db95ba2fe046e4f26d11ec46ec4dbd7f363e0a8ff0926a042f827982916439ba
3
+ size 777186848
pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bf5a3c748fcda1d56dd26339af118162fc3504e101d72688001523829fc8f12
3
+ size 15537
pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:934394bfacc36e22bcce372e9032346f4b98c69e1d2edda63e443a90665cfd1d
3
+ size 1401
pupuvocoder/checkpoint/epoch-0051_step-2553605_loss-62.135194/scheduler_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02f72e0f40c53d323e413693fe69c4f5f2f1930a766f5a6608ae4eaf06666a7c
3
+ size 1477
pupuvocoder_large/args.json ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_config": "egs/afgen/exp_config_afvocoder_base.json",
3
+ "dataset": [
4
+ "afgen_all_data",
5
+ ],
6
+ "exp_name": "afvocoder_large",
7
+ "inference": {
8
+ "batch_size": 1,
9
+ },
10
+ "log_dir": "/home/vocod/experiments/afgen",
11
+ "model": {
12
+ "afvocoder": {
13
+ "resblock": "1",
14
+ "resblock_dilation_sizes": [
15
+ [
16
+ 1,
17
+ 3,
18
+ 5,
19
+ ],
20
+ [
21
+ 1,
22
+ 3,
23
+ 5,
24
+ ],
25
+ [
26
+ 1,
27
+ 3,
28
+ 5,
29
+ ],
30
+ ],
31
+ "resblock_kernel_sizes": [
32
+ 3,
33
+ 7,
34
+ 11,
35
+ ],
36
+ "upsample_initial_channel": 1536,
37
+ "upsample_kernel_sizes": [
38
+ 16,
39
+ 16,
40
+ 4,
41
+ 4,
42
+ 4,
43
+ ],
44
+ "upsample_rates": [
45
+ 8,
46
+ 8,
47
+ 2,
48
+ 2,
49
+ 2,
50
+ ],
51
+ },
52
+ "discriminators": [
53
+ "msd",
54
+ "mpd",
55
+ "mssbstftd",
56
+ "mssbcqtd",
57
+ ],
58
+ "generator": "afvocoder",
59
+ "mpd": {
60
+ "discriminator_channel_mult_factor": 1,
61
+ "mpd_reshapes": [
62
+ 2,
63
+ 3,
64
+ 5,
65
+ 7,
66
+ 11,
67
+ 17,
68
+ 23,
69
+ 37,
70
+ ],
71
+ "use_spectral_norm": false,
72
+ },
73
+ "mssbcqtd": {
74
+ "bins_per_octaves": [
75
+ 24,
76
+ 36,
77
+ 48,
78
+ ],
79
+ "dilations": [
80
+ 1,
81
+ 2,
82
+ 4,
83
+ ],
84
+ "filters": 32,
85
+ "filters_scale": 1,
86
+ "hop_lengths": [
87
+ 1024,
88
+ 512,
89
+ 512,
90
+ ],
91
+ "in_channels": 1,
92
+ "max_filters": 1024,
93
+ "n_octaves": [
94
+ 10,
95
+ 10,
96
+ 10,
97
+ ],
98
+ "out_channels": 1,
99
+ },
100
+ "msstftd": {
101
+ "filters": 32,
102
+ },
103
+ },
104
+ "model_type": "AFGenVocoder",
105
+ "preprocess": {
106
+ "align_mel_duration": false,
107
+ "audio_dir": "audios",
108
+ "bits": 8,
109
+ "contentvec_dir": "contentvec",
110
+ "cut_mel_frame": 128,
111
+ "data_augment": false,
112
+ "dur_dir": "durs",
113
+ "duration_dir": "duration",
114
+ "emo2id": "emo2id.json",
115
+ "energy_dir": "energys",
116
+ "energy_extract_mode": "from_mel",
117
+ "energy_norm": false,
118
+ "energy_remove_outlier": false,
119
+ "extract_acoustic_token": false,
120
+ "extract_amplitude_phase": false,
121
+ "extract_audio": true,
122
+ "extract_contentvec_feature": false,
123
+ "extract_duration": false,
124
+ "extract_energy": false,
125
+ "extract_label": false,
126
+ "extract_linear_spec": false,
127
+ "extract_mcep": false,
128
+ "extract_mel": false,
129
+ "extract_mert_feature": false,
130
+ "extract_one_hot": false,
131
+ "extract_phone": false,
132
+ "extract_pitch": true,
133
+ "extract_uv": true,
134
+ "extract_wenet_feature": false,
135
+ "extract_whisper_feature": false,
136
+ "f0_max": 1975.5,
137
+ "f0_min": 32.7,
138
+ "file_lst": "file.lst",
139
+ "fmax": 22050,
140
+ "fmin": 0,
141
+ "hop_size": 512,
142
+ "imaginary_dir": "imaginarys",
143
+ "is_mu_law": false,
144
+ "lab_dir": "labs",
145
+ "label_dir": "labels",
146
+ "lexicon_path": "./text/lexicon/librispeech-lexicon.txt",
147
+ "linear_dir": "linears",
148
+ "log_amplitude_dir": "log_amplitudes",
149
+ "mcep_dir": "mcep",
150
+ "mel_dir": "mels",
151
+ "mel_extract_mode": "",
152
+ "mel_min_max_norm": false,
153
+ "min_level_db": -115,
154
+ "n_fft": 2048,
155
+ "n_mel": 128,
156
+ "num_silent_frames": 8,
157
+ "phase_dir": "phases",
158
+ "phone_dir": "phones",
159
+ "phone_energy_dir": "phone_energys",
160
+ "phone_extractor": "espeak",
161
+ "phone_pitch_dir": "phone_pitches",
162
+ "phone_seq_file": "phone_seq_file",
163
+ "pitch_bin": 256,
164
+ "pitch_dir": "pitches",
165
+ "pitch_extractor": "rmvpe",
166
+ "pitch_max": 1100.0,
167
+ "pitch_min": 50.0,
168
+ "pitch_norm": false,
169
+ "pitch_remove_outlier": false,
170
+ "processed_dir": "data/",
171
+ "raw_data": "raw_data",
172
+ "real_dir": "reals",
173
+ "ref_level_db": 20,
174
+ "sample_rate": 44100,
175
+ "spk2id": "singers.json",
176
+ "symbols_dict": "symbols.dict",
177
+ "train_file": "train.json",
178
+ "trim_fft_size": 512,
179
+ "trim_hop_size": 128,
180
+ "trim_silence": false,
181
+ "trim_top_db": 30,
182
+ "trimmed_wav_dir": "trimmed_wavs",
183
+ "use_amplitude_phase": false,
184
+ "use_audio": true,
185
+ "use_dur": false,
186
+ "use_emoid": false,
187
+ "use_frame_duration": false,
188
+ "use_frame_energy": false,
189
+ "use_frame_pitch": true,
190
+ "use_lab": false,
191
+ "use_label": false,
192
+ "use_linear": false,
193
+ "use_log_scale_energy": false,
194
+ "use_log_scale_pitch": false,
195
+ "use_mel": false,
196
+ "use_min_max_norm_mel": false,
197
+ "use_one_hot": false,
198
+ "use_phn_seq": false,
199
+ "use_phone": false,
200
+ "use_phone_duration": false,
201
+ "use_phone_energy": false,
202
+ "use_phone_pitch": false,
203
+ "use_spkid": false,
204
+ "use_text": false,
205
+ "use_uv": true,
206
+ "use_wav": false,
207
+ "use_wenet": false,
208
+ "utt2emo": "utt2emo",
209
+ "utt2spk": "utt2spk",
210
+ "uv_dir": "uvs",
211
+ "valid_file": "valid.json",
212
+ "wav_dir": "wavs",
213
+ "wenet_dir": "wenet",
214
+ "win_size": 2048,
215
+ },
216
+ "supported_model_type": [
217
+ "GANVocoder",
218
+ "Fastspeech2",
219
+ "DiffSVC",
220
+ "Transformer",
221
+ "EDM",
222
+ "Autotune",
223
+ "CD",
224
+ ],
225
+ "task_type": "afgen",
226
+ "train": {
227
+ "adamw": {
228
+ "adam_b1": 0.8,
229
+ "adam_b2": 0.99,
230
+ "lr": 0.0001,
231
+ },
232
+ "batch_size": 4,
233
+ "criterions": [
234
+ "feature",
235
+ "discriminator",
236
+ "generator",
237
+ "multimel",
238
+ ],
239
+ "dataloader": {
240
+ "num_worker": 32,
241
+ "persistent_workers": true,
242
+ "pin_memory": true,
243
+ "prefetch_factor": 4,
244
+ },
245
+ "ddp": true,
246
+ "exponential_lr": {
247
+ "lr_decay": 0.9999996,
248
+ },
249
+ "freeze_step": 0,
250
+ "gradient_accumulation_step": 1,
251
+ "keep_checkpoint_max": 5,
252
+ "keep_last": [
253
+ 3,
254
+ -1,
255
+ ],
256
+ "max_epoch": 1000000,
257
+ "max_steps": 1000000,
258
+ "multi_speaker_training": false,
259
+ "optimizer": "AdamW",
260
+ "random_seed": 114514,
261
+ "reducelronplateau": {
262
+ "factor": 0.8,
263
+ "min_lr": 0.0001,
264
+ "patience": 10,
265
+ },
266
+ "run_eval": [
267
+ true,
268
+ ],
269
+ "sampler": {
270
+ "drop_last": true,
271
+ "holistic_shuffle": true,
272
+ },
273
+ "save_checkpoint_stride": [
274
+ 1,
275
+ ],
276
+ "save_checkpoints_steps": 10000,
277
+ "save_summary_steps": 500,
278
+ "scheduler": "ReduceLROnPlateau",
279
+ "total_training_steps": 50000,
280
+ "tracker": [
281
+ "tensorboard",
282
+ ],
283
+ "valid_interval": 10000,
284
+ },
285
+ "use_custom_dataset": [],
286
+ }
pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/ckpts.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [
2
+ []
3
+ ]
pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf9a439dd1c17719a3a5f2c44ac209a08391b234a8d1e56fb8dcae0fc8904b43
3
+ size 536315052
pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/model_1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d54df842a9676f5b9bde274fb62d4629f751aa87aafcd673c66c99a119772d4
3
+ size 118557612
pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/model_2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c8395b3011862e165e3020ec40cb9959de475fc35b1b447436e563d0b94f74d
3
+ size 263092072
pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/model_3.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24f761b5c32671a1a3beac5574934ac5adc5769d1c6d89d2249cd40e1762a84f
3
+ size 5695720
pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/model_4.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3292abaf7d17007de38fb6fa765e16707148dff97814eb2f8cd83354f12e532d
3
+ size 1560888
pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fd47dffafc21204ac4ddb1e19ab0db318453b335715440e883f8e608a2ce7e6
3
+ size 1072885144
pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/optimizer_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56bbac969b78adfb0cd9e8e6ecf899f15cb70df11a0b4fe53e3a6bb99b5b2af1
3
+ size 777186848
pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07f64d00ee76e090a6c362222c254f35f622a8d399a79380772ddfaaed44f236
3
+ size 16449
pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9a058e0336350ebbda962a13b2283b707db64849ded3671653eac8424de7579
3
+ size 1401
pupuvocoder_large/checkpoint/epoch-0026_step-2315282_loss-46.095750/scheduler_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faed8f23fe895621c9fa0a495358083fdd927036b6f7429166a2ab86f5d08ad7
3
+ size 1477