huseinzol05 commited on
Commit
46308f1
·
verified ·
1 Parent(s): 75ff23d

Upload config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.yaml +99 -0
config.yaml ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed_everything: 3407
2
+
3
+ data:
4
+ class_path: unicodec.decoder.dataset.VocosDataModule
5
+ init_args:
6
+ train_params:
7
+ filelist_path: ./data/train/finetune_data
8
+ sampling_rate: 24000
9
+ num_samples: 240000
10
+ batch_size: 10 #18
11
+ num_workers: 8
12
+
13
+ val_params:
14
+ filelist_path: ./data/infer/large_data_domain
15
+ sampling_rate: 24000
16
+ num_samples: 240000
17
+ batch_size: 5 # 10
18
+ num_workers: 8
19
+
20
+ model:
21
+ class_path: unicodec.decoder.experiment.VocosEncodecExp
22
+ init_args:
23
+ sample_rate: 24000
24
+ initial_learning_rate: 5e-5
25
+ mel_loss_coeff: 450
26
+ mrd_loss_coeff: 1.0
27
+ # ctr_loss_coeff: 0.001
28
+ num_warmup_steps: 5000 # Optimizers warmup steps
29
+ pretrain_mel_steps: 0 # 0 means GAN objective from the first iteration
30
+ use_ema: false
31
+
32
+ # automatic evaluation
33
+ evaluate_utmos: true
34
+ evaluate_pesq: true
35
+ evaluate_periodicty: true
36
+
37
+ resume: true
38
+ resume_config:
39
+ resume_model:
40
+ feature_extractor:
41
+ class_path: unicodec.decoder.feature_extractors.EncodecFeatures
42
+ init_args:
43
+ encodec_model: encodec_24khz
44
+ bandwidths: [6.6, 6.6, 6.6, 6.6]
45
+ train_codebooks: true
46
+ num_quantizers: 1
47
+ dowmsamples: [8, 5, 4, 2]
48
+ vq_bins: 16384
49
+ vq_kmeans: 200
50
+ use_transformer: true
51
+ mask: false
52
+
53
+ backbone:
54
+ class_path: unicodec.decoder.models.VocosBackbone
55
+ init_args:
56
+ input_channels: 512
57
+ dim: 768
58
+ intermediate_dim: 2304
59
+ num_layers: 12
60
+ adanorm_num_embeddings: 4 # len(bandwidths)
61
+
62
+ head:
63
+ class_path: unicodec.decoder.heads.ISTFTHead
64
+ init_args:
65
+ dim: 768
66
+ n_fft: 1280 #4*hop_length
67
+ hop_length: 320 # 8*5*4*2
68
+ padding: same
69
+
70
+ trainer:
71
+ logger:
72
+ class_path: pytorch_lightning.loggers.TensorBoardLogger
73
+ init_args:
74
+ save_dir: /debug/
75
+ callbacks:
76
+ - class_path: pytorch_lightning.callbacks.LearningRateMonitor
77
+ - class_path: pytorch_lightning.callbacks.ModelSummary
78
+ init_args:
79
+ max_depth: 2
80
+ - class_path: pytorch_lightning.callbacks.ModelCheckpoint
81
+ init_args:
82
+ monitor: val_loss
83
+ filename: vocos_checkpoint_{epoch}_{step}_{val_loss:.4f}
84
+ save_top_k: 100
85
+ save_last: true
86
+ # every_n_train_steps: 5000
87
+ - class_path: unicodec.decoder.helpers.GradNormCallback
88
+
89
+ # Lightning calculates max_steps across all optimizer steps (rather than number of batches)
90
+ # This equals to 1M steps per generator and 1M per discriminator
91
+ max_steps: 20000000
92
+ # You might want to limit val batches when evaluating all the metrics, as they are time-consuming
93
+ limit_val_batches: 100
94
+ accelerator: gpu
95
+ strategy: ddp
96
+ devices: [0,1,2,3,4,5,6,7]
97
+ num_nodes: 4
98
+ log_every_n_steps: 200
99
+ # val_check_interval: 5000