Tino3141 commited on
Commit
15aa140
·
verified ·
1 Parent(s): 07717c5

Upload modelMusicSep.yaml

Browse files
Files changed (1) hide show
  1. modelMusicSep.yaml +154 -0
modelMusicSep.yaml ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project: "[Project] SepReformer" ### Dont't change
2
+ notes: "SepReformer final version" ### Insert schanges(plz write details !!!)
3
+ # ------------------------------------------------------------------------------------------------------------------------------ #
4
+ config:
5
+ # ------------------------------------------------------------ #
6
+ dataset:
7
+ max_len: 96000
8
+ sampling_rate: 24000
9
+ type: "ja_capella_power"
10
+ train: "train"
11
+ val: "test"
12
+ test: "test"
13
+ # ------------------------------------------------------------ #
14
+ dataloader:
15
+ batch_size: 2
16
+ pin_memory: false
17
+ num_workers: 4
18
+ drop_last: false
19
+ shuffle: true
20
+ prefetch_factor: 10
21
+ # ------------------------------------------------------------ #
22
+ model:
23
+ num_stages: &var_model_num_stages 3 # R
24
+ num_spks: &var_model_num_spks 7
25
+ module_audio_enc:
26
+ in_channels: 1
27
+ out_channels: &var_model_audio_enc_out_channels 256
28
+ kernel_size: &var_model_audio_enc_kernel_size 32 # L
29
+ stride: &var_model_audio_enc_stride 8 # S
30
+ groups: 1
31
+ bias: false
32
+ module_feature_projector:
33
+ num_channels: *var_model_audio_enc_out_channels
34
+ in_channels: *var_model_audio_enc_out_channels
35
+ out_channels: &feature_projector_out_channels 128 # F
36
+ kernel_size: 1
37
+ bias: false
38
+ module_separator:
39
+ num_stages: *var_model_num_stages
40
+ relative_positional_encoding:
41
+ in_channels: *feature_projector_out_channels
42
+ num_heads: 8
43
+ maxlen: 2000
44
+ embed_v: false
45
+ enc_stage:
46
+ num_patterns: 2
47
+ global_blocks:
48
+ in_channels: *feature_projector_out_channels
49
+ num_mha_heads: 8
50
+ dropout_rate: 0.05
51
+ local_blocks:
52
+ in_channels: *feature_projector_out_channels
53
+ kernel_size: 65
54
+ dropout_rate: 0.05
55
+ down_conv_layer:
56
+ in_channels: *feature_projector_out_channels
57
+ samp_kernel_size: &var_model_samp_kernel_size 5
58
+ spk_split_stage:
59
+ in_channels: *feature_projector_out_channels
60
+ num_spks: *var_model_num_spks
61
+ simple_fusion:
62
+ out_channels: *feature_projector_out_channels
63
+ dec_stage:
64
+ num_spks: *var_model_num_spks
65
+ num_patterns: 3
66
+ global_blocks:
67
+ in_channels: *feature_projector_out_channels
68
+ num_mha_heads: 8
69
+ dropout_rate: 0.05
70
+ local_blocks:
71
+ in_channels: *feature_projector_out_channels
72
+ kernel_size: 65
73
+ dropout_rate: 0.05
74
+ spk_attention:
75
+ in_channels: *feature_projector_out_channels
76
+ num_mha_heads: 8
77
+ dropout_rate: 0.05
78
+ module_output_layer:
79
+ in_channels: *var_model_audio_enc_out_channels
80
+ out_channels: *feature_projector_out_channels
81
+ num_spks: *var_model_num_spks
82
+
83
+ module_audio_dec:
84
+ in_channels: *var_model_audio_enc_out_channels
85
+ out_channels: 1
86
+ kernel_size: *var_model_audio_enc_kernel_size
87
+ stride: *var_model_audio_enc_stride
88
+ bias: false
89
+ losses:
90
+ PIT_SPECTRAL:
91
+ lambda: 0.3
92
+ weights: [1, 1, 1]
93
+ window_lengths: [256, 512, 1024]
94
+ hop_lengths: [64, 128, 256]
95
+ PIT_MEL:
96
+ # overall loss weight
97
+ lambda: 0.7
98
+
99
+ # equal weight for each of the 7 scales
100
+ weights: [1, 1, 1, 1, 1, 1, 1]
101
+
102
+ # number of mel bins at each scale (very coarse → very fine)
103
+ mels: [5, 10, 20, 40, 80, 160, 320]
104
+
105
+ # analysis window lengths (in samples)
106
+ window_lengths: [32, 64, 128, 256, 512, 1024, 2048]
107
+
108
+ # hop lengths (here 25% overlap)
109
+ hop_lengths: [8, 16, 32, 64, 128, 256, 512]
110
+
111
+ # all start from 0 Hz
112
+ mel_fmin: [0, 0, 0, 0, 0, 0, 0]
113
+
114
+ # all go up to Nyquist (16 kHz / 2 = 8 kHz)
115
+ mel_fmax: [8000, 8000, 8000, 8000, 8000, 8000, 8000]
116
+ PIT_L1:
117
+ lambda: 1
118
+ loss_g:
119
+ lambda: 1
120
+ loss_f:
121
+ lambda: 1
122
+ # ------------------------------------------------------------ #
123
+ discriminator:
124
+ rates: [2, 3, 5, 7, 11]
125
+ periods: [2, 3, 5, 7, 11]
126
+ fft_sizes: [2048, 1024, 512]
127
+ # ------------------------------------------------------------ #
128
+ engine:
129
+ ckpt_path_model: ""
130
+ max_epochs: 200
131
+ accum_steps: 10
132
+ gpuid: "0" ### "0"(single-gpu) or "0, 1" (multi-gpu)
133
+ gpu_ids: [0,1,2,3]
134
+ mvn: false
135
+ clip_norm: 5
136
+ start_scheduling: 50
137
+ test_epochs: [100, 120, 150, 170]
138
+ learning_rate: 5e-4
139
+ learning_rate_disc: 2e-4
140
+ weight_decay: 1.0e-2
141
+ log_interval: 20
142
+ ckpt_interval: 1000
143
+ seed: 42
144
+ losses:
145
+ PIT_MEL: 0.7
146
+ PIT_STFT: 0.3
147
+ PIT_L1: 1
148
+ loss_g: 1
149
+ loss_f: 1
150
+ PIT_SDR: 1
151
+ evaluation_metrics:
152
+ PESQ:
153
+ mode: "nb"
154
+ sr: 8000