Dahee96 commited on
Commit
240cb14
·
1 Parent(s): 522dd4a

Add TF-GridNet model trained on MS-SNSD

Browse files
Files changed (2) hide show
  1. config.yaml +235 -0
  2. valid.loss.best.pth +3 -0
config.yaml ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: ./conf/tuning/train_enh_tfgridnet.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: chunk
7
+ valid_iterator_type: null
8
+ output_dir: exp/enh_train_enh_tfgridnet_raw
9
+ ngpu: 1
10
+ seed: 0
11
+ num_workers: 8
12
+ num_att_plot: 3
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: 2
16
+ dist_rank: 0
17
+ local_rank: 0
18
+ dist_master_addr: localhost
19
+ dist_master_port: 53661
20
+ dist_launcher: null
21
+ multiprocessing_distributed: true
22
+ unused_parameters: false
23
+ sharded_ddp: false
24
+ use_deepspeed: false
25
+ deepspeed_config: null
26
+ gradient_as_bucket_view: true
27
+ ddp_comm_hook: null
28
+ cudnn_enabled: true
29
+ cudnn_benchmark: false
30
+ cudnn_deterministic: true
31
+ use_tf32: false
32
+ collect_stats: false
33
+ write_collected_feats: false
34
+ max_epoch: 35
35
+ patience: 5
36
+ val_scheduler_criterion:
37
+ - valid
38
+ - loss
39
+ early_stopping_criterion:
40
+ - valid
41
+ - loss
42
+ - min
43
+ best_model_criterion:
44
+ - - valid
45
+ - si_snr
46
+ - max
47
+ - - valid
48
+ - loss
49
+ - min
50
+ keep_nbest_models: 5
51
+ nbest_averaging_interval: 0
52
+ grad_clip: 5.0
53
+ grad_clip_type: 2.0
54
+ grad_noise: false
55
+ accum_grad: 1
56
+ no_forward_run: false
57
+ resume: true
58
+ train_dtype: float32
59
+ use_amp: false
60
+ log_interval: null
61
+ use_matplotlib: true
62
+ use_tensorboard: true
63
+ create_graph_in_tensorboard: false
64
+ use_wandb: false
65
+ wandb_project: null
66
+ wandb_id: null
67
+ wandb_entity: null
68
+ wandb_name: null
69
+ wandb_model_log_interval: -1
70
+ detect_anomaly: false
71
+ use_adapter: false
72
+ adapter: lora
73
+ save_strategy: all
74
+ adapter_conf: {}
75
+ pretrain_path: null
76
+ init_param: []
77
+ ignore_init_mismatch: false
78
+ freeze_param: []
79
+ num_iters_per_epoch: 5000
80
+ batch_size: 4
81
+ valid_batch_size: null
82
+ batch_bins: 1000000
83
+ valid_batch_bins: null
84
+ category_sample_size: 10
85
+ upsampling_factor: 0.5
86
+ category_upsampling_factor: 0.5
87
+ dataset_upsampling_factor: 0.5
88
+ dataset_scaling_factor: 1.2
89
+ max_batch_size: null
90
+ min_batch_size: 1
91
+ train_shape_file:
92
+ - exp/enh_stats_16k/train/speech_mix_shape
93
+ - exp/enh_stats_16k/train/speech_ref1_shape
94
+ - exp/enh_stats_16k/train/noise_ref1_shape
95
+ valid_shape_file:
96
+ - exp/enh_stats_16k/valid/speech_mix_shape
97
+ - exp/enh_stats_16k/valid/speech_ref1_shape
98
+ - exp/enh_stats_16k/valid/noise_ref1_shape
99
+ batch_type: folded
100
+ valid_batch_type: null
101
+ fold_length:
102
+ - 80000
103
+ - 80000
104
+ - 80000
105
+ sort_in_batch: descending
106
+ shuffle_within_batch: false
107
+ sort_batch: descending
108
+ multiple_iterator: false
109
+ chunk_length: 48000
110
+ chunk_shift_ratio: 0.5
111
+ num_cache_chunks: 1024
112
+ chunk_excluded_key_prefixes: []
113
+ chunk_default_fs: null
114
+ chunk_max_abs_length: null
115
+ chunk_discard_short_samples: true
116
+ train_data_path_and_name_and_type:
117
+ - - dump/raw/tr_ms_snsd/wav.scp
118
+ - speech_mix
119
+ - sound
120
+ - - dump/raw/tr_ms_snsd/spk1.scp
121
+ - speech_ref1
122
+ - sound
123
+ - - dump/raw/tr_ms_snsd/noise1.scp
124
+ - noise_ref1
125
+ - sound
126
+ valid_data_path_and_name_and_type:
127
+ - - dump/raw/cv_ms_snsd/wav.scp
128
+ - speech_mix
129
+ - sound
130
+ - - dump/raw/cv_ms_snsd/spk1.scp
131
+ - speech_ref1
132
+ - sound
133
+ - - dump/raw/cv_ms_snsd/noise1.scp
134
+ - noise_ref1
135
+ - sound
136
+ multi_task_dataset: false
137
+ allow_variable_data_keys: false
138
+ max_cache_size: 0.0
139
+ max_cache_fd: 32
140
+ allow_multi_rates: false
141
+ valid_max_cache_size: null
142
+ exclude_weight_decay: false
143
+ exclude_weight_decay_conf: {}
144
+ optim: adam
145
+ optim_conf:
146
+ lr: 0.001
147
+ eps: 1.0e-08
148
+ weight_decay: 0
149
+ scheduler: reducelronplateau
150
+ scheduler_conf:
151
+ mode: min
152
+ factor: 0.7
153
+ patience: 1
154
+ init: xavier_uniform
155
+ model_conf:
156
+ stft_consistency: false
157
+ loss_type: mask_mse
158
+ mask_type: null
159
+ flexible_numspk: false
160
+ extract_feats_in_collect_stats: false
161
+ normalize_variance: false
162
+ normalize_variance_per_ch: false
163
+ categories: []
164
+ category_weights: []
165
+ always_forward_in_48k: false
166
+ criterions:
167
+ - name: mr_l1_tfd
168
+ conf:
169
+ window_sz:
170
+ - 256
171
+ - 512
172
+ - 768
173
+ - 1024
174
+ hop_sz: null
175
+ eps: 1.0e-08
176
+ time_domain_weight: 0.5
177
+ wrapper: fixed_order
178
+ wrapper_conf:
179
+ weight: 1.0
180
+ - name: si_snr
181
+ conf:
182
+ eps: 1.0e-07
183
+ wrapper: fixed_order
184
+ wrapper_conf:
185
+ weight: 0.0
186
+ speech_volume_normalize: null
187
+ rir_scp: null
188
+ rir_apply_prob: 1.0
189
+ noise_scp: null
190
+ noise_apply_prob: 1.0
191
+ noise_db_range: '13_15'
192
+ short_noise_thres: 0.5
193
+ use_reverberant_ref: false
194
+ num_spk: 1
195
+ num_noise_type: 1
196
+ sample_rate: 8000
197
+ force_single_channel: false
198
+ channel_reordering: false
199
+ categories: []
200
+ speech_segment: null
201
+ avoid_allzero_segment: true
202
+ flexible_numspk: false
203
+ dynamic_mixing: false
204
+ utt2spk: null
205
+ dynamic_mixing_gain_db: 0.0
206
+ encoder: same
207
+ encoder_conf: {}
208
+ separator: tfgridnet
209
+ separator_conf:
210
+ n_srcs: 1
211
+ n_fft: 512
212
+ stride: 256
213
+ window: hann
214
+ n_imics: 1
215
+ n_layers: 4
216
+ lstm_hidden_units: 128
217
+ attn_n_head: 4
218
+ attn_approx_qk_dim: 512
219
+ emb_dim: 32
220
+ emb_ks: 4
221
+ emb_hs: 4
222
+ activation: prelu
223
+ eps: 1.0e-05
224
+ decoder: same
225
+ decoder_conf: {}
226
+ mask_module: multi_mask
227
+ mask_module_conf: {}
228
+ preprocessor: null
229
+ preprocessor_conf: {}
230
+ diffusion_model: null
231
+ diffusion_model_conf: {}
232
+ required:
233
+ - output_dir
234
+ version: '202511'
235
+ distributed: true
valid.loss.best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a680263b1339d86ddc5d168a5f985fbd0b3e658d7711971f3c03fe2d23af7bac
3
+ size 10332955