niobures commited on
Commit
3514ce4
·
verified ·
1 Parent(s): bc4180d

DPRNN (code, models, paper)

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. Dual-Path Transformer Network. Direct Context-Aware Modeling for End-to-End Monaural Speech Separation.pdf +3 -0
  3. code/DPTNet [Anyuan96] +1 Dual-Path-Transformer-Network-PyTorch.zip +3 -0
  4. code/Dual-Path-Transformer-Network-PyTorch [ramincre] +4.zip +3 -0
  5. code/Dual-Path-Transformer-Network-PyTorch.zip +3 -0
  6. models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/.gitattributes +28 -0
  7. models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/README.md +251 -0
  8. models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_stats_8k/train/feats_stats.npz +3 -0
  9. models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/96epoch.pth +3 -0
  10. models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/RESULTS.md +20 -0
  11. models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/config.yaml +147 -0
  12. models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/images/backward_time.png +0 -0
  13. models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/images/forward_time.png +0 -0
  14. models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/images/gpu_max_cached_mem_GB.png +0 -0
  15. models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/images/iter_time.png +0 -0
  16. models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/images/loss.png +0 -0
  17. models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/images/optim0_lr0.png +0 -0
  18. models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/images/optim_step_time.png +0 -0
  19. models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/images/si_snr.png +0 -0
  20. models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/images/train_time.png +0 -0
  21. models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/meta.yaml +8 -0
  22. models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/source.txt +1 -0
  23. models/DPRNNTasNet-ks16_WHAM_sepclean/.gitattributes +8 -0
  24. models/DPRNNTasNet-ks16_WHAM_sepclean/README.md +107 -0
  25. models/DPRNNTasNet-ks16_WHAM_sepclean/pytorch_model.bin +3 -0
  26. models/DPRNNTasNet-ks16_WHAM_sepclean/source.txt +1 -0
  27. models/DPRNNTasNet-ks2_Libri1Mix_enhsingle_16k/.gitattributes +8 -0
  28. models/DPRNNTasNet-ks2_Libri1Mix_enhsingle_16k/README.md +82 -0
  29. models/DPRNNTasNet-ks2_Libri1Mix_enhsingle_16k/pytorch_model.bin +3 -0
  30. models/DPRNNTasNet-ks2_Libri1Mix_enhsingle_16k/source.txt +1 -0
  31. models/DPRNNTasNet-ks2_WHAM_sepclean/.gitattributes +8 -0
  32. models/DPRNNTasNet-ks2_WHAM_sepclean/README.md +84 -0
  33. models/DPRNNTasNet-ks2_WHAM_sepclean/pytorch_model.bin +3 -0
  34. models/DPRNNTasNet-ks2_WHAM_sepclean/source.txt +1 -0
  35. models/DPRNNTasNet_LibriMix_sepclean/.gitattributes +35 -0
  36. models/DPRNNTasNet_LibriMix_sepclean/epoch=9-step=1000.ckpt +3 -0
  37. models/DPRNNTasNet_LibriMix_sepclean/source.txt +1 -0
  38. models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/.gitattributes +27 -0
  39. models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/README.md +251 -0
  40. models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_stats_16k/train/feats_stats.npz +3 -0
  41. models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_train_enh_dprnntac_fasnet_raw/299epoch.pth +3 -0
  42. models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_train_enh_dprnntac_fasnet_raw/RESULTS.md +20 -0
  43. models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_train_enh_dprnntac_fasnet_raw/config.yaml +149 -0
  44. models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_train_enh_dprnntac_fasnet_raw/images/backward_time.png +0 -0
  45. models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_train_enh_dprnntac_fasnet_raw/images/forward_time.png +0 -0
  46. models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_train_enh_dprnntac_fasnet_raw/images/gpu_max_cached_mem_GB.png +0 -0
  47. models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_train_enh_dprnntac_fasnet_raw/images/iter_time.png +0 -0
  48. models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_train_enh_dprnntac_fasnet_raw/images/loss.png +0 -0
  49. models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_train_enh_dprnntac_fasnet_raw/images/optim0_lr0.png +0 -0
  50. models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_train_enh_dprnntac_fasnet_raw/images/optim_step_time.png +0 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Dual-Path[[:space:]]Transformer[[:space:]]Network.[[:space:]]Direct[[:space:]]Context-Aware[[:space:]]Modeling[[:space:]]for[[:space:]]End-to-End[[:space:]]Monaural[[:space:]]Speech[[:space:]]Separation.pdf filter=lfs diff=lfs merge=lfs -text
Dual-Path Transformer Network. Direct Context-Aware Modeling for End-to-End Monaural Speech Separation.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff7d3877cae709c58afa97a92fa7acc5b8529b68e6aebdc171625c3021008044
3
+ size 478343
code/DPTNet [Anyuan96] +1 Dual-Path-Transformer-Network-PyTorch.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce7da2cfb5c0e372222d04289ab7433c9ffbc1359e6615862141ea6b82704d78
3
+ size 4735940
code/Dual-Path-Transformer-Network-PyTorch [ramincre] +4.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b173491858a88fcdb3e10c971e060005f543ba74ee5c75ec77b4908489e5c0f
3
+ size 5964111
code/Dual-Path-Transformer-Network-PyTorch.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:721fae34b2ba68815ab05c4bd3dc3c7fa9c882e9f092f43748b15d22cfe7178d
3
+ size 4735712
models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/.gitattributes ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.wasm filter=lfs diff=lfs merge=lfs -text
25
+ *.xz filter=lfs diff=lfs merge=lfs -text
26
+ *.zip filter=lfs diff=lfs merge=lfs -text
27
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
28
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/README.md ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - audio-to-audio
6
+ language: en
7
+ datasets:
8
+ - wsj0_2mix
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 ENH model
13
+
14
+ ### `lichenda/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet`
15
+
16
+ This model was trained by LiChenda using wsj0_2mix recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ Imported from [zenodo](https://zenodo.org/record/4688000).
19
+
20
+ ### Demo: How to use in ESPnet2
21
+
22
+ ```bash
23
+ cd espnet
24
+ git checkout 54919e2529d6f58f4550d4a72960f57b83f66dc9
25
+ pip install -e .
26
+ cd egs2/wsj0_2mix/enh1
27
+ ./run.sh --skip_data_prep false --skip_train true --download_model lichenda/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet
28
+ ```
29
+
30
+ <!-- Generated by ./scripts/utils/show_enh_score.sh -->
31
+ # RESULTS
32
+ ## Environments
33
+ - date: `Thu Apr 15 00:03:19 CST 2021`
34
+ - python version: `3.7.10 (default, Feb 26 2021, 18:47:35) [GCC 7.3.0]`
35
+ - espnet version: `espnet 0.9.8`
36
+ - pytorch version: `pytorch 1.5.0`
37
+ - Git hash: `2aa2f151b5929dc9ffa4df39a8d8c26ca4dbdb85`
38
+ - Commit date: `Tue Mar 30 09:08:27 2021 +0900`
39
+
40
+
41
+ ## enh_train_enh_dprnn_tasnet_raw
42
+
43
+ config: conf/tuning/train_enh_dprnn_tasnet.yaml
44
+
45
+ |dataset|STOI|SAR|SDR|SIR|
46
+ |---|---|---|---|---|
47
+ |enhanced_cv_min_8k|0.960037|19.0476|18.5438|29.1591|
48
+ |enhanced_tt_min_8k|0.968376|18.8209|18.2925|28.929|
49
+
50
+ ## ENH config
51
+
52
+ <details><summary>expand</summary>
53
+
54
+ ```
55
+ config: conf/tuning/train_enh_dprnn_tasnet.yaml
56
+ print_config: false
57
+ log_level: INFO
58
+ dry_run: false
59
+ iterator_type: chunk
60
+ output_dir: exp/enh_train_enh_dprnn_tasnet_raw
61
+ ngpu: 1
62
+ seed: 0
63
+ num_workers: 4
64
+ num_att_plot: 3
65
+ dist_backend: nccl
66
+ dist_init_method: env://
67
+ dist_world_size: 4
68
+ dist_rank: 0
69
+ local_rank: 0
70
+ dist_master_addr: localhost
71
+ dist_master_port: 45126
72
+ dist_launcher: null
73
+ multiprocessing_distributed: true
74
+ unused_parameters: false
75
+ sharded_ddp: false
76
+ cudnn_enabled: true
77
+ cudnn_benchmark: false
78
+ cudnn_deterministic: true
79
+ collect_stats: false
80
+ write_collected_feats: false
81
+ max_epoch: 150
82
+ patience: 4
83
+ val_scheduler_criterion:
84
+ - valid
85
+ - loss
86
+ early_stopping_criterion:
87
+ - valid
88
+ - loss
89
+ - min
90
+ best_model_criterion:
91
+ - - valid
92
+ - si_snr
93
+ - max
94
+ - - valid
95
+ - loss
96
+ - min
97
+ keep_nbest_models: 1
98
+ grad_clip: 5.0
99
+ grad_clip_type: 2.0
100
+ grad_noise: false
101
+ accum_grad: 1
102
+ no_forward_run: false
103
+ resume: true
104
+ train_dtype: float32
105
+ use_amp: false
106
+ log_interval: null
107
+ use_tensorboard: true
108
+ use_wandb: false
109
+ wandb_project: null
110
+ wandb_id: null
111
+ detect_anomaly: false
112
+ pretrain_path: null
113
+ init_param: []
114
+ freeze_param: []
115
+ num_iters_per_epoch: null
116
+ batch_size: 4
117
+ valid_batch_size: null
118
+ batch_bins: 1000000
119
+ valid_batch_bins: null
120
+ train_shape_file:
121
+ - exp/enh_stats_8k/train/speech_mix_shape
122
+ - exp/enh_stats_8k/train/speech_ref1_shape
123
+ - exp/enh_stats_8k/train/speech_ref2_shape
124
+ valid_shape_file:
125
+ - exp/enh_stats_8k/valid/speech_mix_shape
126
+ - exp/enh_stats_8k/valid/speech_ref1_shape
127
+ - exp/enh_stats_8k/valid/speech_ref2_shape
128
+ batch_type: folded
129
+ valid_batch_type: null
130
+ fold_length:
131
+ - 80000
132
+ - 80000
133
+ - 80000
134
+ sort_in_batch: descending
135
+ sort_batch: descending
136
+ multiple_iterator: false
137
+ chunk_length: 32000
138
+ chunk_shift_ratio: 0.5
139
+ num_cache_chunks: 1024
140
+ train_data_path_and_name_and_type:
141
+ - - dump/raw/tr_min_8k/wav.scp
142
+ - speech_mix
143
+ - sound
144
+ - - dump/raw/tr_min_8k/spk1.scp
145
+ - speech_ref1
146
+ - sound
147
+ - - dump/raw/tr_min_8k/spk2.scp
148
+ - speech_ref2
149
+ - sound
150
+ valid_data_path_and_name_and_type:
151
+ - - dump/raw/cv_min_8k/wav.scp
152
+ - speech_mix
153
+ - sound
154
+ - - dump/raw/cv_min_8k/spk1.scp
155
+ - speech_ref1
156
+ - sound
157
+ - - dump/raw/cv_min_8k/spk2.scp
158
+ - speech_ref2
159
+ - sound
160
+ allow_variable_data_keys: false
161
+ max_cache_size: 0.0
162
+ max_cache_fd: 32
163
+ valid_max_cache_size: null
164
+ optim: adam
165
+ optim_conf:
166
+ lr: 0.001
167
+ eps: 1.0e-08
168
+ weight_decay: 0
169
+ scheduler: reducelronplateau
170
+ scheduler_conf:
171
+ mode: min
172
+ factor: 0.7
173
+ patience: 1
174
+ init: xavier_uniform
175
+ model_conf:
176
+ loss_type: si_snr
177
+ use_preprocessor: false
178
+ encoder: conv
179
+ encoder_conf:
180
+ channel: 64
181
+ kernel_size: 2
182
+ stride: 1
183
+ separator: dprnn
184
+ separator_conf:
185
+ num_spk: 2
186
+ layer: 6
187
+ rnn_type: lstm
188
+ bidirectional: true
189
+ nonlinear: relu
190
+ unit: 128
191
+ segment_size: 250
192
+ dropout: 0.1
193
+ decoder: conv
194
+ decoder_conf:
195
+ channel: 64
196
+ kernel_size: 2
197
+ stride: 1
198
+ required:
199
+ - output_dir
200
+ version: 0.9.8
201
+ distributed: true
202
+ ```
203
+
204
+ </details>
205
+
206
+
207
+
208
+ ### Citing ESPnet
209
+
210
+ ```BibTex
211
+ @inproceedings{watanabe2018espnet,
212
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
213
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
214
+ year={2018},
215
+ booktitle={Proceedings of Interspeech},
216
+ pages={2207--2211},
217
+ doi={10.21437/Interspeech.2018-1456},
218
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
219
+ }
220
+
221
+
222
+ @inproceedings{ESPnet-SE,
223
+ author = {Chenda Li and Jing Shi and Wangyou Zhang and Aswin Shanmugam Subramanian and Xuankai Chang and
224
+ Naoyuki Kamo and Moto Hira and Tomoki Hayashi and Christoph B{"{o}}ddeker and Zhuo Chen and Shinji Watanabe},
225
+ title = {ESPnet-SE: End-To-End Speech Enhancement and Separation Toolkit Designed for {ASR} Integration},
226
+ booktitle = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen, China, January 19-22, 2021},
227
+ pages = {785--792},
228
+ publisher = {{IEEE}},
229
+ year = {2021},
230
+ url = {https://doi.org/10.1109/SLT48900.2021.9383615},
231
+ doi = {10.1109/SLT48900.2021.9383615},
232
+ timestamp = {Mon, 12 Apr 2021 17:08:59 +0200},
233
+ biburl = {https://dblp.org/rec/conf/slt/Li0ZSCKHHBC021.bib},
234
+ bibsource = {dblp computer science bibliography, https://dblp.org}
235
+ }
236
+
237
+
238
+ ```
239
+
240
+ or arXiv:
241
+
242
+ ```bibtex
243
+ @misc{watanabe2018espnet,
244
+ title={ESPnet: End-to-End Speech Processing Toolkit},
245
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
246
+ year={2018},
247
+ eprint={1804.00015},
248
+ archivePrefix={arXiv},
249
+ primaryClass={cs.CL}
250
+ }
251
+ ```
models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_stats_8k/train/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d890c44023968991b362b31f39fcecc453f0d619071befb36205d610e8aabb8b
3
+ size 778
models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/96epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:338bc12bf9db30b178247f8b0b3ecbc24b1eff7739c4771f01aaaf1d456c5212
3
+ size 10393743
models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/RESULTS.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by ./scripts/utils/show_enh_score.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Thu Apr 15 00:03:19 CST 2021`
5
+ - python version: `3.7.10 (default, Feb 26 2021, 18:47:35) [GCC 7.3.0]`
6
+ - espnet version: `espnet 0.9.8`
7
+ - pytorch version: `pytorch 1.5.0`
8
+ - Git hash: `2aa2f151b5929dc9ffa4df39a8d8c26ca4dbdb85`
9
+ - Commit date: `Tue Mar 30 09:08:27 2021 +0900`
10
+
11
+
12
+ ## enh_train_enh_dprnn_tasnet_raw
13
+
14
+ config: conf/tuning/train_enh_dprnn_tasnet.yaml
15
+
16
+ |dataset|STOI|SAR|SDR|SIR|
17
+ |---|---|---|---|---|
18
+ |enhanced_cv_min_8k|0.960037|19.0476|18.5438|29.1591|
19
+ |enhanced_tt_min_8k|0.968376|18.8209|18.2925|28.929|
20
+
models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/config.yaml ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_enh_dprnn_tasnet.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: chunk
6
+ output_dir: exp/enh_train_enh_dprnn_tasnet_raw
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 4
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: 4
14
+ dist_rank: 0
15
+ local_rank: 0
16
+ dist_master_addr: localhost
17
+ dist_master_port: 45126
18
+ dist_launcher: null
19
+ multiprocessing_distributed: true
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 150
28
+ patience: 4
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - si_snr
39
+ - max
40
+ - - valid
41
+ - loss
42
+ - min
43
+ keep_nbest_models: 1
44
+ grad_clip: 5.0
45
+ grad_clip_type: 2.0
46
+ grad_noise: false
47
+ accum_grad: 1
48
+ no_forward_run: false
49
+ resume: true
50
+ train_dtype: float32
51
+ use_amp: false
52
+ log_interval: null
53
+ use_tensorboard: true
54
+ use_wandb: false
55
+ wandb_project: null
56
+ wandb_id: null
57
+ detect_anomaly: false
58
+ pretrain_path: null
59
+ init_param: []
60
+ freeze_param: []
61
+ num_iters_per_epoch: null
62
+ batch_size: 4
63
+ valid_batch_size: null
64
+ batch_bins: 1000000
65
+ valid_batch_bins: null
66
+ train_shape_file:
67
+ - exp/enh_stats_8k/train/speech_mix_shape
68
+ - exp/enh_stats_8k/train/speech_ref1_shape
69
+ - exp/enh_stats_8k/train/speech_ref2_shape
70
+ valid_shape_file:
71
+ - exp/enh_stats_8k/valid/speech_mix_shape
72
+ - exp/enh_stats_8k/valid/speech_ref1_shape
73
+ - exp/enh_stats_8k/valid/speech_ref2_shape
74
+ batch_type: folded
75
+ valid_batch_type: null
76
+ fold_length:
77
+ - 80000
78
+ - 80000
79
+ - 80000
80
+ sort_in_batch: descending
81
+ sort_batch: descending
82
+ multiple_iterator: false
83
+ chunk_length: 32000
84
+ chunk_shift_ratio: 0.5
85
+ num_cache_chunks: 1024
86
+ train_data_path_and_name_and_type:
87
+ - - dump/raw/tr_min_8k/wav.scp
88
+ - speech_mix
89
+ - sound
90
+ - - dump/raw/tr_min_8k/spk1.scp
91
+ - speech_ref1
92
+ - sound
93
+ - - dump/raw/tr_min_8k/spk2.scp
94
+ - speech_ref2
95
+ - sound
96
+ valid_data_path_and_name_and_type:
97
+ - - dump/raw/cv_min_8k/wav.scp
98
+ - speech_mix
99
+ - sound
100
+ - - dump/raw/cv_min_8k/spk1.scp
101
+ - speech_ref1
102
+ - sound
103
+ - - dump/raw/cv_min_8k/spk2.scp
104
+ - speech_ref2
105
+ - sound
106
+ allow_variable_data_keys: false
107
+ max_cache_size: 0.0
108
+ max_cache_fd: 32
109
+ valid_max_cache_size: null
110
+ optim: adam
111
+ optim_conf:
112
+ lr: 0.001
113
+ eps: 1.0e-08
114
+ weight_decay: 0
115
+ scheduler: reducelronplateau
116
+ scheduler_conf:
117
+ mode: min
118
+ factor: 0.7
119
+ patience: 1
120
+ init: xavier_uniform
121
+ model_conf:
122
+ loss_type: si_snr
123
+ use_preprocessor: false
124
+ encoder: conv
125
+ encoder_conf:
126
+ channel: 64
127
+ kernel_size: 2
128
+ stride: 1
129
+ separator: dprnn
130
+ separator_conf:
131
+ num_spk: 2
132
+ layer: 6
133
+ rnn_type: lstm
134
+ bidirectional: true
135
+ nonlinear: relu
136
+ unit: 128
137
+ segment_size: 250
138
+ dropout: 0.1
139
+ decoder: conv
140
+ decoder_conf:
141
+ channel: 64
142
+ kernel_size: 2
143
+ stride: 1
144
+ required:
145
+ - output_dir
146
+ version: 0.9.8
147
+ distributed: true
models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/images/backward_time.png ADDED
models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/images/forward_time.png ADDED
models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/images/gpu_max_cached_mem_GB.png ADDED
models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/images/iter_time.png ADDED
models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/images/loss.png ADDED
models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/images/optim0_lr0.png ADDED
models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/images/optim_step_time.png ADDED
models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/images/si_snr.png ADDED
models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/exp/enh_train_enh_dprnn_tasnet_raw/images/train_time.png ADDED
models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: 0.10.7a1
2
+ files:
3
+ model_file: exp/enh_train_enh_dprnn_tasnet_raw/96epoch.pth
4
+ python: "3.7.11 (default, Jul 27 2021, 14:32:16) \n[GCC 7.5.0]"
5
+ timestamp: 1649682775.265407
6
+ torch: 1.8.1
7
+ yaml_files:
8
+ train_config: exp/enh_train_enh_dprnn_tasnet_raw/config.yaml
models/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/lichenda/Chenda_Li_wsj0_2mix_enh_dprnn_tasnet
models/DPRNNTasNet-ks16_WHAM_sepclean/.gitattributes ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
2
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.h5 filter=lfs diff=lfs merge=lfs -text
5
+ *.tflite filter=lfs diff=lfs merge=lfs -text
6
+ *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.ot filter=lfs diff=lfs merge=lfs -text
8
+ *.onnx filter=lfs diff=lfs merge=lfs -text
models/DPRNNTasNet-ks16_WHAM_sepclean/README.md ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - audio-to-audio
4
+ - asteroid
5
+ - audio
6
+ - audio-source-separation
7
+ datasets:
8
+ - wham
9
+ - sep_clean
10
+ license: cc-by-sa-4.0
11
+ ---
12
+
13
+ ## Asteroid model `mpariente/DPRNNTasNet(ks=16)_WHAM!_sepclean`
14
+
15
+ ♻️ Imported from https://zenodo.org/record/3903795#.X8pMBRNKjUI
16
+
17
+ This model was trained by Manuel Pariente using the wham/DPRNN recipe in [Asteroid](https://github.com/asteroid-team/asteroid). It was trained on the sep_clean task of the WHAM! dataset.
18
+
19
+
20
+ ### Demo: How to use in Asteroid
21
+
22
+ ```python
23
+ # coming soon
24
+ ```
25
+
26
+
27
+ ### Training config
28
+
29
+ - data:
30
+ - mode: min
31
+ - nondefault_nsrc: None
32
+ - sample_rate: 8000
33
+ - segment: 2.0
34
+ - task: sep_clean
35
+ - train_dir: data/wav8k/min/tr
36
+ - valid_dir: data/wav8k/min/cv
37
+ - filterbank:
38
+ - kernel_size: 16
39
+ - n_filters: 64
40
+ - stride: 8
41
+ - main_args:
42
+ - exp_dir: exp/train_dprnn_ks16/
43
+ - help: None
44
+ - masknet:
45
+ - bidirectional: True
46
+ - bn_chan: 128
47
+ - chunk_size: 100
48
+ - dropout: 0
49
+ - hid_size: 128
50
+ - hop_size: 50
51
+ - in_chan: 64
52
+ - mask_act: sigmoid
53
+ - n_repeats: 6
54
+ - n_src: 2
55
+ - out_chan: 64
56
+ - optim:
57
+ - lr: 0.001
58
+ - optimizer: adam
59
+ - weight_decay: 1e-05
60
+ - positional arguments:
61
+ - training:
62
+ - batch_size: 6
63
+ - early_stop: True
64
+ - epochs: 200
65
+ - gradient_clipping: 5
66
+ - half_lr: True
67
+ - num_workers: 6
68
+
69
+ #### Results
70
+
71
+ - `si_sdr`: 18.227683982688003
72
+ - `si_sdr_imp`: 18.22883576588251
73
+ - `sdr`: 18.617789605060587
74
+ - `sdr_imp`: 18.466745426438173
75
+ - `sir`: 29.22773720052717
76
+ - `sir_imp`: 29.07669302190474
77
+ - `sar`: 19.116352171914485
78
+ - `sar_imp`: -130.06009796503054
79
+ - `stoi`: 0.9722025377865715
80
+ - `stoi_imp`: 0.23415680987800583
81
+
82
+ ### Citing Asteroid
83
+
84
+ ```BibTex
85
+ @inproceedings{Pariente2020Asteroid,
86
+ title={Asteroid: the {PyTorch}-based audio source separation toolkit for researchers},
87
+ author={Manuel Pariente and Samuele Cornell and Joris Cosentino and Sunit Sivasankaran and
88
+ Efthymios Tzinis and Jens Heitkaemper and Michel Olvera and Fabian-Robert Stöter and
89
+ Mathieu Hu and Juan M. Martín-Doñas and David Ditter and Ariel Frank and Antoine Deleforge
90
+ and Emmanuel Vincent},
91
+ year={2020},
92
+ booktitle={Proc. Interspeech},
93
+ }
94
+ ```
95
+
96
+ Or on arXiv:
97
+
98
+ ```bibtex
99
+ @misc{pariente2020asteroid,
100
+ title={Asteroid: the PyTorch-based audio source separation toolkit for researchers},
101
+ author={Manuel Pariente and Samuele Cornell and Joris Cosentino and Sunit Sivasankaran and Efthymios Tzinis and Jens Heitkaemper and Michel Olvera and Fabian-Robert Stöter and Mathieu Hu and Juan M. Martín-Doñas and David Ditter and Ariel Frank and Antoine Deleforge and Emmanuel Vincent},
102
+ year={2020},
103
+ eprint={2005.04132},
104
+ archivePrefix={arXiv},
105
+ primaryClass={eess.AS}
106
+ }
107
+ ```
models/DPRNNTasNet-ks16_WHAM_sepclean/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aca83e3a61eb6414f78e87350631cfcd77e04737c2c9bf7844dcde6ac0c576d8
3
+ size 14671835
models/DPRNNTasNet-ks16_WHAM_sepclean/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/julien-c/DPRNNTasNet-ks16_WHAM_sepclean
models/DPRNNTasNet-ks2_Libri1Mix_enhsingle_16k/.gitattributes ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
2
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.h5 filter=lfs diff=lfs merge=lfs -text
5
+ *.tflite filter=lfs diff=lfs merge=lfs -text
6
+ *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.ot filter=lfs diff=lfs merge=lfs -text
8
+ *.onnx filter=lfs diff=lfs merge=lfs -text
models/DPRNNTasNet-ks2_Libri1Mix_enhsingle_16k/README.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - asteroid
4
+ - audio
5
+ - DPRNNTasNet
6
+ - audio-to-audio
7
+ datasets:
8
+ - Libri1Mix
9
+ - enh_single
10
+ license: cc-by-sa-4.0
11
+ ---
12
+
13
+ ## Asteroid model `JorisCos/DPRNNTasNet_Libri1Mix_enhsignle_16k`
14
+
15
+ Description:
16
+
17
+ This model was trained by Joris Cosentino using the librimix recipe in [Asteroid](https://github.com/asteroid-team/asteroid).
18
+ It was trained on the `enh_single` task of the Libri1Mix dataset.
19
+
20
+ Training config:
21
+
22
+ ```yml
23
+ data:
24
+ n_src: 1
25
+ sample_rate: 16000
26
+ segment: 1
27
+ task: enh_single
28
+ train_dir: data/wav16k/min/train-360
29
+ valid_dir: data/wav16k/min/dev
30
+ filterbank:
31
+ kernel_size: 2
32
+ n_filters: 64
33
+ stride: 1
34
+ masknet:
35
+ bidirectional: true
36
+ bn_chan: 128
37
+ chunk_size: 250
38
+ dropout: 0
39
+ hid_size: 128
40
+ hop_size: 125
41
+ in_chan: 64
42
+ mask_act: sigmoid
43
+ n_repeats: 6
44
+ n_src: 1
45
+ out_chan: 64
46
+ optim:
47
+ lr: 0.001
48
+ optimizer: adam
49
+ weight_decay: 1.0e-05
50
+ training:
51
+ batch_size: 2
52
+ early_stop: true
53
+ epochs: 200
54
+ gradient_clipping: 5
55
+ half_lr: true
56
+ num_workers: 4
57
+ ```
58
+
59
+
60
+ Results:
61
+
62
+ On Libri1Mix min test set :
63
+ ```yml
64
+ si_sdr: 14.7228101708889
65
+ si_sdr_imp: 11.2730288650292
66
+ sdr: 15.35661405197161
67
+ sdr_imp: 11.853951252758595
68
+ sir: Infinity
69
+ sir_imp: NaN
70
+ sar: 15.35661405197161
71
+ sar_imp: 11.853951252758595
72
+ stoi: 0.9300461826351578
73
+ stoi_imp: 0.13412635909461715
74
+ ```
75
+
76
+
77
+ License notice:
78
+
79
+ This work "DPRNNTasNet_Libri1Mix_enhsignle_16k" is a derivative of [LibriSpeech ASR corpus](http://www.openslr.org/12) by Vassil Panayotov,
80
+ used under [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/); of The WSJ0 Hipster Ambient Mixtures
81
+ dataset by [Whisper.ai](http://wham.whisper.ai/), used under [CC BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/) (Research only).
82
+ "DPRNNTasNet_Libri1Mix_enhsignle_16k" is licensed under [Attribution-ShareAlike 3.0 Unported](https://creativecommons.org/licenses/by-sa/3.0/) by Joris Cosentino
models/DPRNNTasNet-ks2_Libri1Mix_enhsingle_16k/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b510c07fae3a3db18473b5749316cb9df8dc4f78164c3cdfbb50d3783ee779d
3
+ size 14595773
models/DPRNNTasNet-ks2_Libri1Mix_enhsingle_16k/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/JorisCos/DPRNNTasNet-ks2_Libri1Mix_enhsingle_16k
models/DPRNNTasNet-ks2_WHAM_sepclean/.gitattributes ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
2
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.h5 filter=lfs diff=lfs merge=lfs -text
5
+ *.tflite filter=lfs diff=lfs merge=lfs -text
6
+ *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.ot filter=lfs diff=lfs merge=lfs -text
8
+ *.onnx filter=lfs diff=lfs merge=lfs -text
models/DPRNNTasNet-ks2_WHAM_sepclean/README.md ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - asteroid
4
+ - audio
5
+ - DPRNNTasNet
6
+ - audio-to-audio
7
+ datasets:
8
+ - wham
9
+ - sep_clean
10
+ license: cc-by-sa-4.0
11
+ ---
12
+
13
+ ## Asteroid model `mpariente/DPRNNTasNet-ks2_WHAM_sepclean`
14
+ Imported from [Zenodo](https://zenodo.org/record/3862942)
15
+
16
+ ### Description:
17
+ This model was trained by Manuel Pariente
18
+ using the wham/DPRNN recipe in [Asteroid](https://github.com/asteroid-team/asteroid).
19
+ It was trained on the `sep_clean` task of the WHAM! dataset.
20
+
21
+ ### Training config:
22
+ ```yaml
23
+ data:
24
+ mode: min
25
+ nondefault_nsrc: None
26
+ sample_rate: 8000
27
+ segment: 2.0
28
+ task: sep_clean
29
+ train_dir: data/wav8k/min/tr
30
+ valid_dir: data/wav8k/min/cv
31
+ filterbank:
32
+ kernel_size: 2
33
+ n_filters: 64
34
+ stride: 1
35
+ main_args:
36
+ exp_dir: exp/train_dprnn_new/
37
+ gpus: -1
38
+ help: None
39
+ masknet:
40
+ bidirectional: True
41
+ bn_chan: 128
42
+ chunk_size: 250
43
+ dropout: 0
44
+ hid_size: 128
45
+ hop_size: 125
46
+ in_chan: 64
47
+ mask_act: sigmoid
48
+ n_repeats: 6
49
+ n_src: 2
50
+ out_chan: 64
51
+ optim:
52
+ lr: 0.001
53
+ optimizer: adam
54
+ weight_decay: 1e-05
55
+ positional arguments:
56
+ training:
57
+ batch_size: 3
58
+ early_stop: True
59
+ epochs: 200
60
+ gradient_clipping: 5
61
+ half_lr: True
62
+ num_workers: 8
63
+ ```
64
+
65
+ ### Results:
66
+ ```yaml
67
+ si_sdr: 19.316743490695334
68
+ si_sdr_imp: 19.317895273889842
69
+ sdr: 19.68085347190952
70
+ sdr_imp: 19.5298092932871
71
+ sir: 30.362213998701232
72
+ sir_imp: 30.21116982007881
73
+ sar: 20.15553251343315
74
+ sar_imp: -129.02091762351188
75
+ stoi: 0.97772664309074
76
+ stoi_imp: 0.23968091518217424
77
+ ```
78
+
79
+ ### License notice:
80
+ This work "DPRNNTasNet-ks2_WHAM_sepclean" is a derivative of [CSR-I (WSJ0) Complete](https://catalog.ldc.upenn.edu/LDC93S6A)
81
+ by [LDC](https://www.ldc.upenn.edu/), used under [LDC User Agreement for
82
+ Non-Members](https://catalog.ldc.upenn.edu/license/ldc-non-members-agreement.pdf) (Research only).
83
+ "DPRNNTasNet-ks2_WHAM_sepclean" is licensed under [Attribution-ShareAlike 3.0 Unported](https://creativecommons.org/licenses/by-sa/3.0/)
84
+ by Manuel Pariente.
models/DPRNNTasNet-ks2_WHAM_sepclean/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc1f1fecf24ea3e486521029dc0e1444686bd4b6fdf9715e7757936cbd9ffdf6
3
+ size 14664381
models/DPRNNTasNet-ks2_WHAM_sepclean/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/mpariente/DPRNNTasNet-ks2_WHAM_sepclean
models/DPRNNTasNet_LibriMix_sepclean/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
models/DPRNNTasNet_LibriMix_sepclean/epoch=9-step=1000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e95100fd3792c3a80ed7f9655e55b85c05eadaf944e31549e05af2910b2fba2d
3
+ size 44009512
models/DPRNNTasNet_LibriMix_sepclean/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/Ehsanshr/DPRNNTasNet_LibriMix_sepclean
models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.onnx filter=lfs diff=lfs merge=lfs -text
13
+ *.ot filter=lfs diff=lfs merge=lfs -text
14
+ *.parquet filter=lfs diff=lfs merge=lfs -text
15
+ *.pb filter=lfs diff=lfs merge=lfs -text
16
+ *.pt filter=lfs diff=lfs merge=lfs -text
17
+ *.pth filter=lfs diff=lfs merge=lfs -text
18
+ *.rar filter=lfs diff=lfs merge=lfs -text
19
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
20
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
21
+ *.tflite filter=lfs diff=lfs merge=lfs -text
22
+ *.tgz filter=lfs diff=lfs merge=lfs -text
23
+ *.wasm filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/README.md ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - audio-to-audio
6
+ language: noinfo
7
+ datasets:
8
+ - l3das22
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 ENH model
13
+
14
+ ### `espnet/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave`
15
+
16
+ This model was trained by neillu23 using l3das22 recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ ```bash
21
+ cd espnet
22
+ git checkout da2266fea920e22bb74471565e1a41a89f4cf62c
23
+ pip install -e .
24
+ cd egs2/l3das22/enh1
25
+ ./run.sh --skip_data_prep false --skip_train true --download_model espnet/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave
26
+ ```
27
+
28
+ <!-- Generated by ./scripts/utils/show_enh_score.sh -->
29
+ # RESULTS
30
+ ## Environments
31
+ - date: `Thu Jun 16 09:52:57 UTC 2022`
32
+ - python version: `3.8.13 (default, Mar 28 2022, 11:38:47) [GCC 7.5.0]`
33
+ - espnet version: `espnet 202204`
34
+ - pytorch version: `pytorch 1.8.1`
35
+ - Git hash: `da2266fea920e22bb74471565e1a41a89f4cf62c`
36
+ - Commit date: `Wed Jun 15 11:46:35 2022 +0000`
37
+
38
+
39
+ ## enh_train_enh_dprnntac_fasnet_raw
40
+
41
+ config: conf/tuning/train_enh_dprnntac_fasnet.yaml
42
+
43
+ |dataset|STOI|SAR|SDR|SIR|SI_SNR|
44
+ |---|---|---|---|---|---|
45
+ |enhanced_dev_multich|73.58|3.52|3.52|0.00|-3.47|
46
+ |enhanced_test_multich|73.93|2.83|2.83|0.00|-4.79|
47
+
48
+ ## ENH config
49
+
50
+ <details><summary>expand</summary>
51
+
52
+ ```
53
+ config: conf/tuning/train_enh_dprnntac_fasnet.yaml
54
+ print_config: false
55
+ log_level: INFO
56
+ dry_run: false
57
+ iterator_type: chunk
58
+ output_dir: exp/enh_train_enh_dprnntac_fasnet_raw
59
+ ngpu: 1
60
+ seed: 0
61
+ num_workers: 4
62
+ num_att_plot: 3
63
+ dist_backend: nccl
64
+ dist_init_method: env://
65
+ dist_world_size: 2
66
+ dist_rank: 0
67
+ local_rank: 0
68
+ dist_master_addr: localhost
69
+ dist_master_port: 51533
70
+ dist_launcher: null
71
+ multiprocessing_distributed: true
72
+ unused_parameters: false
73
+ sharded_ddp: false
74
+ cudnn_enabled: true
75
+ cudnn_benchmark: false
76
+ cudnn_deterministic: true
77
+ collect_stats: false
78
+ write_collected_feats: false
79
+ max_epoch: 300
80
+ patience: 10
81
+ val_scheduler_criterion:
82
+ - valid
83
+ - loss
84
+ early_stopping_criterion:
85
+ - valid
86
+ - loss
87
+ - min
88
+ best_model_criterion:
89
+ - - valid
90
+ - si_snr
91
+ - max
92
+ - - valid
93
+ - loss
94
+ - min
95
+ keep_nbest_models: 1
96
+ nbest_averaging_interval: 0
97
+ grad_clip: 5.0
98
+ grad_clip_type: 2.0
99
+ grad_noise: false
100
+ accum_grad: 1
101
+ no_forward_run: false
102
+ resume: true
103
+ train_dtype: float32
104
+ use_amp: false
105
+ log_interval: null
106
+ use_matplotlib: true
107
+ use_tensorboard: true
108
+ use_wandb: false
109
+ wandb_project: null
110
+ wandb_id: null
111
+ wandb_entity: null
112
+ wandb_name: null
113
+ wandb_model_log_interval: -1
114
+ detect_anomaly: false
115
+ pretrain_path: null
116
+ init_param: []
117
+ ignore_init_mismatch: false
118
+ freeze_param: []
119
+ num_iters_per_epoch: null
120
+ batch_size: 24
121
+ valid_batch_size: null
122
+ batch_bins: 1000000
123
+ valid_batch_bins: null
124
+ train_shape_file:
125
+ - exp/enh_stats_16k/train/speech_mix_shape
126
+ - exp/enh_stats_16k/train/speech_ref1_shape
127
+ valid_shape_file:
128
+ - exp/enh_stats_16k/valid/speech_mix_shape
129
+ - exp/enh_stats_16k/valid/speech_ref1_shape
130
+ batch_type: folded
131
+ valid_batch_type: null
132
+ fold_length:
133
+ - 80000
134
+ - 80000
135
+ sort_in_batch: descending
136
+ sort_batch: descending
137
+ multiple_iterator: false
138
+ chunk_length: 32000
139
+ chunk_shift_ratio: 0.5
140
+ num_cache_chunks: 1024
141
+ train_data_path_and_name_and_type:
142
+ - - dump/raw/train_multich/wav.scp
143
+ - speech_mix
144
+ - sound
145
+ - - dump/raw/train_multich/spk1.scp
146
+ - speech_ref1
147
+ - sound
148
+ valid_data_path_and_name_and_type:
149
+ - - dump/raw/dev_multich/wav.scp
150
+ - speech_mix
151
+ - sound
152
+ - - dump/raw/dev_multich/spk1.scp
153
+ - speech_ref1
154
+ - sound
155
+ allow_variable_data_keys: false
156
+ max_cache_size: 0.0
157
+ max_cache_fd: 32
158
+ valid_max_cache_size: null
159
+ optim: adam
160
+ optim_conf:
161
+ lr: 0.001
162
+ eps: 1.0e-08
163
+ weight_decay: 0
164
+ scheduler: steplr
165
+ scheduler_conf:
166
+ step_size: 2
167
+ gamma: 0.98
168
+ init: xavier_uniform
169
+ model_conf:
170
+ stft_consistency: false
171
+ loss_type: mask_mse
172
+ mask_type: null
173
+ criterions:
174
+ - name: si_snr
175
+ conf:
176
+ eps: 1.0e-07
177
+ wrapper: fixed_order
178
+ wrapper_conf:
179
+ weight: 1.0
180
+ use_preprocessor: false
181
+ encoder: same
182
+ encoder_conf: {}
183
+ separator: fasnet
184
+ separator_conf:
185
+ enc_dim: 64
186
+ feature_dim: 64
187
+ hidden_dim: 128
188
+ layer: 6
189
+ segment_size: 24
190
+ num_spk: 1
191
+ win_len: 16
192
+ context_len: 16
193
+ sr: 16000
194
+ fasnet_type: fasnet
195
+ dropout: 0.2
196
+ decoder: same
197
+ decoder_conf: {}
198
+ required:
199
+ - output_dir
200
+ version: '202204'
201
+ distributed: true
202
+ ```
203
+
204
+ </details>
205
+
206
+
207
+
208
+ ### Citing ESPnet
209
+
210
+ ```BibTex
211
+ @inproceedings{watanabe2018espnet,
212
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
213
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
214
+ year={2018},
215
+ booktitle={Proceedings of Interspeech},
216
+ pages={2207--2211},
217
+ doi={10.21437/Interspeech.2018-1456},
218
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
219
+ }
220
+
221
+
222
+ @inproceedings{ESPnet-SE,
223
+ author = {Chenda Li and Jing Shi and Wangyou Zhang and Aswin Shanmugam Subramanian and Xuankai Chang and
224
+ Naoyuki Kamo and Moto Hira and Tomoki Hayashi and Christoph B{"{o}}ddeker and Zhuo Chen and Shinji Watanabe},
225
+ title = {ESPnet-SE: End-To-End Speech Enhancement and Separation Toolkit Designed for {ASR} Integration},
226
+ booktitle = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen, China, January 19-22, 2021},
227
+ pages = {785--792},
228
+ publisher = {{IEEE}},
229
+ year = {2021},
230
+ url = {https://doi.org/10.1109/SLT48900.2021.9383615},
231
+ doi = {10.1109/SLT48900.2021.9383615},
232
+ timestamp = {Mon, 12 Apr 2021 17:08:59 +0200},
233
+ biburl = {https://dblp.org/rec/conf/slt/Li0ZSCKHHBC021.bib},
234
+ bibsource = {dblp computer science bibliography, https://dblp.org}
235
+ }
236
+
237
+
238
+ ```
239
+
240
+ or arXiv:
241
+
242
+ ```bibtex
243
+ @misc{watanabe2018espnet,
244
+ title={ESPnet: End-to-End Speech Processing Toolkit},
245
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
246
+ year={2018},
247
+ eprint={1804.00015},
248
+ archivePrefix={arXiv},
249
+ primaryClass={cs.CL}
250
+ }
251
+ ```
models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_stats_16k/train/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f92b851bbc9340886cd3cb2a322006ca939d3221047336847ff2093861f7db9
3
+ size 826
models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_train_enh_dprnntac_fasnet_raw/299epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ef3ddd09d01e8fa6903e5a879c0635b7dcf3c9b4847233cc2a43efc022ee12b
3
+ size 16366144
models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_train_enh_dprnntac_fasnet_raw/RESULTS.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by ./scripts/utils/show_enh_score.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Thu Jun 16 09:52:57 UTC 2022`
5
+ - python version: `3.8.13 (default, Mar 28 2022, 11:38:47) [GCC 7.5.0]`
6
+ - espnet version: `espnet 202204`
7
+ - pytorch version: `pytorch 1.8.1`
8
+ - Git hash: `da2266fea920e22bb74471565e1a41a89f4cf62c`
9
+ - Commit date: `Wed Jun 15 11:46:35 2022 +0000`
10
+
11
+
12
+ ## enh_train_enh_dprnntac_fasnet_raw
13
+
14
+ config: conf/tuning/train_enh_dprnntac_fasnet.yaml
15
+
16
+ |dataset|STOI|SAR|SDR|SIR|SI_SNR|
17
+ |---|---|---|---|---|---|
18
+ |enhanced_dev_multich|73.58|3.52|3.52|0.00|-3.47|
19
+ |enhanced_test_multich|73.93|2.83|2.83|0.00|-4.79|
20
+
models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_train_enh_dprnntac_fasnet_raw/config.yaml ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_enh_dprnntac_fasnet.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: chunk
6
+ output_dir: exp/enh_train_enh_dprnntac_fasnet_raw
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 4
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: 2
14
+ dist_rank: 0
15
+ local_rank: 0
16
+ dist_master_addr: localhost
17
+ dist_master_port: 51533
18
+ dist_launcher: null
19
+ multiprocessing_distributed: true
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 300
28
+ patience: 10
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - si_snr
39
+ - max
40
+ - - valid
41
+ - loss
42
+ - min
43
+ keep_nbest_models: 1
44
+ nbest_averaging_interval: 0
45
+ grad_clip: 5.0
46
+ grad_clip_type: 2.0
47
+ grad_noise: false
48
+ accum_grad: 1
49
+ no_forward_run: false
50
+ resume: true
51
+ train_dtype: float32
52
+ use_amp: false
53
+ log_interval: null
54
+ use_matplotlib: true
55
+ use_tensorboard: true
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ pretrain_path: null
64
+ init_param: []
65
+ ignore_init_mismatch: false
66
+ freeze_param: []
67
+ num_iters_per_epoch: null
68
+ batch_size: 24
69
+ valid_batch_size: null
70
+ batch_bins: 1000000
71
+ valid_batch_bins: null
72
+ train_shape_file:
73
+ - exp/enh_stats_16k/train/speech_mix_shape
74
+ - exp/enh_stats_16k/train/speech_ref1_shape
75
+ valid_shape_file:
76
+ - exp/enh_stats_16k/valid/speech_mix_shape
77
+ - exp/enh_stats_16k/valid/speech_ref1_shape
78
+ batch_type: folded
79
+ valid_batch_type: null
80
+ fold_length:
81
+ - 80000
82
+ - 80000
83
+ sort_in_batch: descending
84
+ sort_batch: descending
85
+ multiple_iterator: false
86
+ chunk_length: 32000
87
+ chunk_shift_ratio: 0.5
88
+ num_cache_chunks: 1024
89
+ train_data_path_and_name_and_type:
90
+ - - dump/raw/train_multich/wav.scp
91
+ - speech_mix
92
+ - sound
93
+ - - dump/raw/train_multich/spk1.scp
94
+ - speech_ref1
95
+ - sound
96
+ valid_data_path_and_name_and_type:
97
+ - - dump/raw/dev_multich/wav.scp
98
+ - speech_mix
99
+ - sound
100
+ - - dump/raw/dev_multich/spk1.scp
101
+ - speech_ref1
102
+ - sound
103
+ allow_variable_data_keys: false
104
+ max_cache_size: 0.0
105
+ max_cache_fd: 32
106
+ valid_max_cache_size: null
107
+ optim: adam
108
+ optim_conf:
109
+ lr: 0.001
110
+ eps: 1.0e-08
111
+ weight_decay: 0
112
+ scheduler: steplr
113
+ scheduler_conf:
114
+ step_size: 2
115
+ gamma: 0.98
116
+ init: xavier_uniform
117
+ model_conf:
118
+ stft_consistency: false
119
+ loss_type: mask_mse
120
+ mask_type: null
121
+ criterions:
122
+ - name: si_snr
123
+ conf:
124
+ eps: 1.0e-07
125
+ wrapper: fixed_order
126
+ wrapper_conf:
127
+ weight: 1.0
128
+ use_preprocessor: false
129
+ encoder: same
130
+ encoder_conf: {}
131
+ separator: fasnet
132
+ separator_conf:
133
+ enc_dim: 64
134
+ feature_dim: 64
135
+ hidden_dim: 128
136
+ layer: 6
137
+ segment_size: 24
138
+ num_spk: 1
139
+ win_len: 16
140
+ context_len: 16
141
+ sr: 16000
142
+ fasnet_type: fasnet
143
+ dropout: 0.2
144
+ decoder: same
145
+ decoder_conf: {}
146
+ required:
147
+ - output_dir
148
+ version: '202204'
149
+ distributed: true
models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_train_enh_dprnntac_fasnet_raw/images/backward_time.png ADDED
models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_train_enh_dprnntac_fasnet_raw/images/forward_time.png ADDED
models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_train_enh_dprnntac_fasnet_raw/images/gpu_max_cached_mem_GB.png ADDED
models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_train_enh_dprnntac_fasnet_raw/images/iter_time.png ADDED
models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_train_enh_dprnntac_fasnet_raw/images/loss.png ADDED
models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_train_enh_dprnntac_fasnet_raw/images/optim0_lr0.png ADDED
models/Yen-Ju_Lu_l3das22_enh_train_dprnntac_fasnet_valid.loss.ave/exp/enh_train_enh_dprnntac_fasnet_raw/images/optim_step_time.png ADDED