DCCRN, Distil-DCCRN
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +6 -0
- DCCRN/A Convolutional Recurrent Neural Network for Real-Time Speech Enhancement.pdf +3 -0
- DCCRN/AIAP Final Project.pdf +3 -0
- DCCRN/DCCRN. Deep Complex Convolution Recurrent Network for Phase-Aware Speech Enhancement.pdf +3 -0
- DCCRN/Performance comparison evaluation of speech enhancement using various loss functions.pdf +3 -0
- DCCRN/code/DCCRN [Context-Aware-Character-TTS-System].zip +3 -0
- DCCRN/code/DCCRN [Wang-Jingrun].zip +3 -0
- DCCRN/code/DCCRN [maggie0830].zip +3 -0
- DCCRN/code/DCCRN [mahshid1378].zip +3 -0
- DCCRN/code/DCCRN [shaoyuanyu].zip +3 -0
- DCCRN/code/DCCRN [wangtianrui].zip +3 -0
- DCCRN/code/DCCRN [wanliangdaxia].zip +3 -0
- DCCRN/code/DCCRN-2.zip +3 -0
- DCCRN/code/DCCRN-Trial.zip +3 -0
- DCCRN/code/DCCRN-for-DNS5.zip +3 -0
- DCCRN/code/DCCRN-small.zip +3 -0
- DCCRN/code/DCCRN-with-various-loss-functions.zip +3 -0
- DCCRN/code/DCCRN-wncg.zip +3 -0
- DCCRN/code/DCCRN_Keras.zip +3 -0
- DCCRN/code/DCCRN_Pytorch.zip +3 -0
- DCCRN/code/DNN-based-Speech-Enhancement-in-the-frequency-domain.zip +3 -0
- DCCRN/code/DeepComplexCRN (original).zip +3 -0
- DCCRN/code/I-DCCRN-VAE.zip +3 -0
- DCCRN/code/KD_DCCRN.zip +3 -0
- DCCRN/code/Knowledge_distillation.zip +3 -0
- DCCRN/code/S-DCCRN.zip +3 -0
- DCCRN/code/SE-DCCRN.zip +3 -0
- DCCRN/code/Spatial-DCCRN.zip +3 -0
- DCCRN/code/Speech_Enhancement-DCCRN.zip +3 -0
- DCCRN/code/dccrn-plus.zip +3 -0
- DCCRN/code/denosising_model [mcaramba563].zip +3 -0
- DCCRN/code/hf-dccrn.zip +3 -0
- DCCRN/dataset/Speech Enhancement for a Noise-Robust Text-to-Speech Synthesis System Using Deep Recurrent Neural Networks.pdf +3 -0
- DCCRN/models/DCCRN (Ada312)/.gitattributes +35 -0
- DCCRN/models/DCCRN (Ada312)/epoch=44-step=113895.ckpt +3 -0
- DCCRN/models/DCCRN (Ada312)/source.txt +1 -0
- DCCRN/models/DCCRN (chenxie95)/.gitattributes +35 -0
- DCCRN/models/DCCRN (chenxie95)/epoch=44-step=113895.ckpt +3 -0
- DCCRN/models/DCCRN (chenxie95)/source.txt +2 -0
- DCCRN/models/DCCRNet_Libri1Mix_enhsingle_16k/.gitattributes +8 -0
- DCCRN/models/DCCRNet_Libri1Mix_enhsingle_16k/README.md +74 -0
- DCCRN/models/DCCRNet_Libri1Mix_enhsingle_16k/pytorch_model.bin +3 -0
- DCCRN/models/DCCRNet_Libri1Mix_enhsingle_16k/source.txt +1 -0
- DCCRN/models/SE-DCCRN/base_model.pth +3 -0
- DCCRN/models/SE-DCCRN/lite_v1_model.pth +3 -0
- DCCRN/models/SE-DCCRN/lite_v1d_model.pth +3 -0
- DCCRN/models/SE-DCCRN/source.txt +1 -0
- DCCRN/models/SE-DCCRN/summary.md +9 -0
- DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/.gitattributes +27 -0
- DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/README.md +257 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
DCCRN/A[[:space:]]Convolutional[[:space:]]Recurrent[[:space:]]Neural[[:space:]]Network[[:space:]]for[[:space:]]Real-Time[[:space:]]Speech[[:space:]]Enhancement.pdf filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
DCCRN/AIAP[[:space:]]Final[[:space:]]Project.pdf filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
DCCRN/dataset/Speech[[:space:]]Enhancement[[:space:]]for[[:space:]]a[[:space:]]Noise-Robust[[:space:]]Text-to-Speech[[:space:]]Synthesis[[:space:]]System[[:space:]]Using[[:space:]]Deep[[:space:]]Recurrent[[:space:]]Neural[[:space:]]Networks.pdf filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
DCCRN/DCCRN.[[:space:]]Deep[[:space:]]Complex[[:space:]]Convolution[[:space:]]Recurrent[[:space:]]Network[[:space:]]for[[:space:]]Phase-Aware[[:space:]]Speech[[:space:]]Enhancement.pdf filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
DCCRN/Performance[[:space:]]comparison[[:space:]]evaluation[[:space:]]of[[:space:]]speech[[:space:]]enhancement[[:space:]]using[[:space:]]various[[:space:]]loss[[:space:]]functions.pdf filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
Distil-DCCRN/Distil-DCCRN.[[:space:]]A[[:space:]]Small-footprint[[:space:]]DCCRN[[:space:]]Leveraging[[:space:]]Feature-based[[:space:]]Knowledge[[:space:]]Distillation[[:space:]]in[[:space:]]Speech[[:space:]]Enhancement.pdf filter=lfs diff=lfs merge=lfs -text
|
DCCRN/A Convolutional Recurrent Neural Network for Real-Time Speech Enhancement.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b9a1d013c558e4177c0d68b5e5c39648b30626c430aa7b57fb9c9c018123538e
|
| 3 |
+
size 646378
|
DCCRN/AIAP Final Project.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ead31849e4c1e86925f5abb2cd3c57fb57eaea280c763c43fa58696fa08afd81
|
| 3 |
+
size 1246642
|
DCCRN/DCCRN. Deep Complex Convolution Recurrent Network for Phase-Aware Speech Enhancement.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b381ebaadb2b0b94c8d0c498c76878b0a6ae12ba6f686caee5066b4b5cd1cea
|
| 3 |
+
size 844298
|
DCCRN/Performance comparison evaluation of speech enhancement using various loss functions.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85df2fe0ebbbec1acb4e06d404ddb465228f8f5063facbea4c6101aee305c227
|
| 3 |
+
size 800355
|
DCCRN/code/DCCRN [Context-Aware-Character-TTS-System].zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7abc519d46cb25068ec23af18367742f18c2bfb3efa4543e2f47943dea69e406
|
| 3 |
+
size 81376722
|
DCCRN/code/DCCRN [Wang-Jingrun].zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e18415b943fc9035b78952a40d09985827b3acc3a80018c7a9033570a9a4f90
|
| 3 |
+
size 116505131
|
DCCRN/code/DCCRN [maggie0830].zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c32593933532b2a5af921bf85f74ef784e9129c139944be4093175950ef49053
|
| 3 |
+
size 573872
|
DCCRN/code/DCCRN [mahshid1378].zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d4a1cacb0e5ec654048683df723cdb9575571c18693e271cdc42d75a455a0aa
|
| 3 |
+
size 8481588
|
DCCRN/code/DCCRN [shaoyuanyu].zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1ee118fd02ebb77714e2d7328d7f54eae49555ed4208884c5406283e9fbdc8b
|
| 3 |
+
size 2205508
|
DCCRN/code/DCCRN [wangtianrui].zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1cfb8e8ed35d100a1224a3ceb22b46f95252c263c4ed9b61efff7523b80b3e07
|
| 3 |
+
size 2029618
|
DCCRN/code/DCCRN [wanliangdaxia].zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3622ced5e8275edf628c2783d6b4c619ab016d0e64ba41dc0b599d1d91a51bd9
|
| 3 |
+
size 896792
|
DCCRN/code/DCCRN-2.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4d8de3e4ec5a0d3036680fa1cf0a0cf94f2e2e6d3b6d71d4dc54ef5e47b2cbb
|
| 3 |
+
size 2208993
|
DCCRN/code/DCCRN-Trial.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a141df4aaea06fefbab0e154f49ba82216b21d6acb4176dbb8585119883fb764
|
| 3 |
+
size 2262879
|
DCCRN/code/DCCRN-for-DNS5.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd423940ef0309ce8679e50327955d3a035fdea2d16c615c3b4a3e53014a3838
|
| 3 |
+
size 507473469
|
DCCRN/code/DCCRN-small.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63c1f0007edebee9f9eea1e3051d0fbf2563cd8a41936180f6c2d2e8dfac3d37
|
| 3 |
+
size 2547481
|
DCCRN/code/DCCRN-with-various-loss-functions.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95f467976fa3768ad3800ef9cdaa1af8a939a3340c2d65d6ac4b896eb3371e35
|
| 3 |
+
size 33941947
|
DCCRN/code/DCCRN-wncg.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb50b592c67a578442a1b6cac8e6d232c7502b1c977e96a3ff49e041c96ea80c
|
| 3 |
+
size 232163
|
DCCRN/code/DCCRN_Keras.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1bbde2340354d31e7fe3d9bd2e554ebda81c0501ed28c61a339dbe47b6453e1f
|
| 3 |
+
size 37331
|
DCCRN/code/DCCRN_Pytorch.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e685912a3f00fcb8e33fec9b2d99e085fe7aea12e79facfbba0ad3e612d669da
|
| 3 |
+
size 110453
|
DCCRN/code/DNN-based-Speech-Enhancement-in-the-frequency-domain.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a77d84b30770f6bff0861f7fe1c1cd2578ee8b5a617c5e69f9dffb58cd3c37af
|
| 3 |
+
size 410561
|
DCCRN/code/DeepComplexCRN (original).zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a4a36dcb653dc234f146b8dbc11595d8f53e43a7a971524a3c505996989dc21
|
| 3 |
+
size 114573010
|
DCCRN/code/I-DCCRN-VAE.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f15ee047bc618fc54b925a29fc452e2979fec1081f68949f949e031fc5ddf7b9
|
| 3 |
+
size 8232516
|
DCCRN/code/KD_DCCRN.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04a654dcbb5b3117d00afd9546f44bd9ee70fc6bdf4b019a2658b2852a722cb9
|
| 3 |
+
size 41431052
|
DCCRN/code/Knowledge_distillation.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72e5754b08efa7347fb7a4c37ab06363e055936f5105f11eac900c3cc96d86c2
|
| 3 |
+
size 41170858
|
DCCRN/code/S-DCCRN.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:040de712eb711e07ca8a038d6d1806420122f8acac89e720c2f7c0d666134ce8
|
| 3 |
+
size 119122675
|
DCCRN/code/SE-DCCRN.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e57b621edb6ebd89d5e795ed896d6b099b9171b4fafda1da875358cfea26e42
|
| 3 |
+
size 41207805
|
DCCRN/code/Spatial-DCCRN.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0d4b65b8b43f4a8c2f2123eb73d8fdb42bb5f6321a9333145a21e53e37dc92e
|
| 3 |
+
size 74811261
|
DCCRN/code/Speech_Enhancement-DCCRN.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2a1b2d9f3d3e04466661cdb740eeacaab40fe037497e29abae31f01913fbf13
|
| 3 |
+
size 83347937
|
DCCRN/code/dccrn-plus.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e45c2a9acf8e01cdb8de6d6865c69c3106a172bd14f29990503abdbf9233439
|
| 3 |
+
size 82632252
|
DCCRN/code/denosising_model [mcaramba563].zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eef9ece1e48341e311feb153dcdf2497ddc151a2e93a0a74d92cb45b68f857d7
|
| 3 |
+
size 516491216
|
DCCRN/code/hf-dccrn.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68a254da33c961777e5be98ef023f6d149aaaaffb43bf33afbce8b932a7ab619
|
| 3 |
+
size 49593
|
DCCRN/dataset/Speech Enhancement for a Noise-Robust Text-to-Speech Synthesis System Using Deep Recurrent Neural Networks.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30806abb77998dfd47fe9d66d9b069f0974921d1f8734c403f1d765df3280989
|
| 3 |
+
size 10158959
|
DCCRN/models/DCCRN (Ada312)/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
DCCRN/models/DCCRN (Ada312)/epoch=44-step=113895.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63f1a20e51b1194aed4c18c0305a706d386a9e7d8c8bd425e95805ecee94164b
|
| 3 |
+
size 46489036
|
DCCRN/models/DCCRN (Ada312)/source.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
https://huggingface.co/Ada312/DCCRN
|
DCCRN/models/DCCRN (chenxie95)/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
DCCRN/models/DCCRN (chenxie95)/epoch=44-step=113895.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63f1a20e51b1194aed4c18c0305a706d386a9e7d8c8bd425e95805ecee94164b
|
| 3 |
+
size 46489036
|
DCCRN/models/DCCRN (chenxie95)/source.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
https://huggingface.co/chenxie95/DCCRN
|
DCCRN/models/DCCRNet_Libri1Mix_enhsingle_16k/.gitattributes
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.bin.* filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.tar.gz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
DCCRN/models/DCCRNet_Libri1Mix_enhsingle_16k/README.md
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- asteroid
|
| 4 |
+
- audio
|
| 5 |
+
- DCCRNet
|
| 6 |
+
- audio-to-audio
|
| 7 |
+
- speech-enhancement
|
| 8 |
+
datasets:
|
| 9 |
+
- Libri1Mix
|
| 10 |
+
- enh_single
|
| 11 |
+
license: cc-by-sa-4.0
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
## Asteroid model `JorisCos/DCCRNet_Libri1Mix_enhsignle_16k`
|
| 15 |
+
|
| 16 |
+
Description:
|
| 17 |
+
|
| 18 |
+
This model was trained by Joris Cosentino using the librimix recipe in [Asteroid](https://github.com/asteroid-team/asteroid).
|
| 19 |
+
It was trained on the `enh_single` task of the Libri1Mix dataset.
|
| 20 |
+
|
| 21 |
+
Training config:
|
| 22 |
+
|
| 23 |
+
```yml
|
| 24 |
+
data:
|
| 25 |
+
n_src: 1
|
| 26 |
+
sample_rate: 16000
|
| 27 |
+
segment: 3
|
| 28 |
+
task: enh_single
|
| 29 |
+
train_dir: data/wav16k/min/train-360
|
| 30 |
+
valid_dir: data/wav16k/min/dev
|
| 31 |
+
filterbank:
|
| 32 |
+
stft_kernel_size: 400
|
| 33 |
+
stft_n_filters: 512
|
| 34 |
+
stft_stride: 100
|
| 35 |
+
masknet:
|
| 36 |
+
architecture: DCCRN-CL
|
| 37 |
+
n_src: 1
|
| 38 |
+
optim:
|
| 39 |
+
lr: 0.001
|
| 40 |
+
optimizer: adam
|
| 41 |
+
weight_decay: 1.0e-05
|
| 42 |
+
training:
|
| 43 |
+
batch_size: 12
|
| 44 |
+
early_stop: true
|
| 45 |
+
epochs: 200
|
| 46 |
+
gradient_clipping: 5
|
| 47 |
+
half_lr: true
|
| 48 |
+
num_workers: 4
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
Results:
|
| 53 |
+
|
| 54 |
+
On Libri1Mix min test set :
|
| 55 |
+
```yml
|
| 56 |
+
si_sdr: 13.329767398333798
|
| 57 |
+
si_sdr_imp: 9.879986092474098
|
| 58 |
+
sdr: 13.87279932997016
|
| 59 |
+
sdr_imp: 10.370136530757103
|
| 60 |
+
sir: Infinity
|
| 61 |
+
sir_imp: NaN
|
| 62 |
+
sar: 13.87279932997016
|
| 63 |
+
sar_imp: 10.370136530757103
|
| 64 |
+
stoi: 0.9140907015623948
|
| 65 |
+
stoi_imp: 0.11817087802185405
|
| 66 |
+
```
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
License notice:
|
| 70 |
+
|
| 71 |
+
This work "DCCRNet_Libri1Mix_enhsignle_16k" is a derivative of [LibriSpeech ASR corpus](http://www.openslr.org/12) by Vassil Panayotov,
|
| 72 |
+
used under [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/); of The WSJ0 Hipster Ambient Mixtures
|
| 73 |
+
dataset by [Whisper.ai](http://wham.whisper.ai/), used under [CC BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/) (Research only).
|
| 74 |
+
"DCCRNet_Libri1Mix_enhsignle_16k" is licensed under [Attribution-ShareAlike 3.0 Unported](https://creativecommons.org/licenses/by-sa/3.0/) by Joris Cosentino
|
DCCRN/models/DCCRNet_Libri1Mix_enhsingle_16k/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c1f519207604649cd90586e85adf2e93c4f4b26b216f0a71635013a90e64d939
|
| 3 |
+
size 16409109
|
DCCRN/models/DCCRNet_Libri1Mix_enhsingle_16k/source.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
https://huggingface.co/JorisCos/DCCRNet_Libri1Mix_enhsingle_16k
|
DCCRN/models/SE-DCCRN/base_model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd710915fc1b4897f5ba59de66ee1b185449569a0375dc3175c427632c761d97
|
| 3 |
+
size 14740557
|
DCCRN/models/SE-DCCRN/lite_v1_model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b9edf5a6f0c28074f93ef0416dabbe126b2e0fdd480aabdecd14f38bb4f1d69
|
| 3 |
+
size 3731533
|
DCCRN/models/SE-DCCRN/lite_v1d_model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a04fc383e680a6fa4df001d2242eec003f8a442e1f15f1292c931b0cf3cbdf7f
|
| 3 |
+
size 3731533
|
DCCRN/models/SE-DCCRN/source.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
https://github.com/sc0ttms/SE-DCCRN
|
DCCRN/models/SE-DCCRN/summary.md
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# summary
|
| 2 |
+
|
| 3 |
+
| dataset/model | Params(M) | Flops(G) | SI-SDR | STOI | WB_PESQ | NB_PESQ |
|
| 4 |
+
| :-----------: | :-------: | :------: | :--------------: | :---------------: | :--------------: | :--------------: |
|
| 5 |
+
| dataset_train | - | - | 9.18826423150635 | 0.821936784950485 | 1.73518149405717 | 2.42768483359366 |
|
| 6 |
+
| dataset_valid | - | - | 9.15858130001896 | 0.820208461517329 | 1.73266624887353 | 2.41471458952858 |
|
| 7 |
+
| dataset_test | - | - | 9.33677852833749 | 0.821225098997534 | 1.73727285225124 | 2.42875709109791 |
|
| 8 |
+
| base_model | 3.671 | 35.253 | 18.2092751119167 | 0.895062729329303 | 3.02301043262067 | 3.44576535798074 |
|
| 9 |
+
| lite_v1_model | 0.921 | 8.872 | 17.1342839493116 | 0.882091315376406 | 2.81674490229713 | 3.29441381774482 |
|
DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/.gitattributes
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bin.* filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/README.md
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- espnet
|
| 4 |
+
- audio
|
| 5 |
+
- audio-to-audio
|
| 6 |
+
language: en
|
| 7 |
+
datasets:
|
| 8 |
+
- dns_ins20
|
| 9 |
+
license: cc-by-4.0
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
## ESPnet2 ENH model
|
| 13 |
+
|
| 14 |
+
### `Johnson-Lsx/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw`
|
| 15 |
+
|
| 16 |
+
This model was trained by Shaoxiong Lin using dns_ins20 recipe in [espnet](https://github.com/espnet/espnet/).
|
| 17 |
+
|
| 18 |
+
### Demo: How to use in ESPnet2
|
| 19 |
+
|
| 20 |
+
```bash
|
| 21 |
+
cd espnet
|
| 22 |
+
git checkout 4538462eb7dc6a6b858adcbd3a526fb8173d6f73
|
| 23 |
+
pip install -e .
|
| 24 |
+
cd egs2/dns_ins20/enh1
|
| 25 |
+
./run.sh --skip_data_prep false --skip_train true --download_model Johnson-Lsx/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
<!-- Generated by ./scripts/utils/show_enh_score.sh -->
|
| 29 |
+
# RESULTS
|
| 30 |
+
## Environments
|
| 31 |
+
- date: `Thu Feb 10 23:11:40 CST 2022`
|
| 32 |
+
- python version: `3.8.12 (default, Oct 12 2021, 13:49:34) [GCC 7.5.0]`
|
| 33 |
+
- espnet version: `espnet 0.10.5a1`
|
| 34 |
+
- pytorch version: `pytorch 1.9.1`
|
| 35 |
+
- Git hash: `6f66283b9eed7b0d5e5643feb18d8f60118a4afc`
|
| 36 |
+
- Commit date: `Mon Dec 13 15:30:29 2021 +0800`
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
## enh_train_enh_dccrn_batch_size_raw
|
| 40 |
+
|
| 41 |
+
config: ./conf/tuning/train_enh_dccrn_batch_size.yaml
|
| 42 |
+
|
| 43 |
+
|dataset|STOI|SAR|SDR|SIR|
|
| 44 |
+
|---|---|---|---|---|
|
| 45 |
+
|enhanced_cv_synthetic|0.98|24.69|24.69|0.00|
|
| 46 |
+
|enhanced_tt_synthetic_no_reverb|0.96|17.69|17.69|0.00|
|
| 47 |
+
|enhanced_tt_synthetic_with_reverb|0.81|10.45|10.45|0.00|
|
| 48 |
+
|
| 49 |
+
## ENH config
|
| 50 |
+
|
| 51 |
+
<details><summary>expand</summary>
|
| 52 |
+
|
| 53 |
+
```
|
| 54 |
+
config: ./conf/tuning/train_enh_dccrn_batch_size.yaml
|
| 55 |
+
print_config: false
|
| 56 |
+
log_level: INFO
|
| 57 |
+
dry_run: false
|
| 58 |
+
iterator_type: chunk
|
| 59 |
+
output_dir: exp/enh_train_enh_dccrn_batch_size_raw
|
| 60 |
+
ngpu: 1
|
| 61 |
+
seed: 0
|
| 62 |
+
num_workers: 4
|
| 63 |
+
num_att_plot: 3
|
| 64 |
+
dist_backend: nccl
|
| 65 |
+
dist_init_method: env://
|
| 66 |
+
dist_world_size: 4
|
| 67 |
+
dist_rank: 0
|
| 68 |
+
local_rank: 0
|
| 69 |
+
dist_master_addr: localhost
|
| 70 |
+
dist_master_port: 46366
|
| 71 |
+
dist_launcher: null
|
| 72 |
+
multiprocessing_distributed: true
|
| 73 |
+
unused_parameters: false
|
| 74 |
+
sharded_ddp: false
|
| 75 |
+
cudnn_enabled: true
|
| 76 |
+
cudnn_benchmark: false
|
| 77 |
+
cudnn_deterministic: true
|
| 78 |
+
collect_stats: false
|
| 79 |
+
write_collected_feats: false
|
| 80 |
+
max_epoch: 100
|
| 81 |
+
patience: 10
|
| 82 |
+
val_scheduler_criterion:
|
| 83 |
+
- valid
|
| 84 |
+
- loss
|
| 85 |
+
early_stopping_criterion:
|
| 86 |
+
- valid
|
| 87 |
+
- loss
|
| 88 |
+
- min
|
| 89 |
+
best_model_criterion:
|
| 90 |
+
- - valid
|
| 91 |
+
- si_snr
|
| 92 |
+
- max
|
| 93 |
+
- - valid
|
| 94 |
+
- loss
|
| 95 |
+
- min
|
| 96 |
+
keep_nbest_models: 1
|
| 97 |
+
nbest_averaging_interval: 0
|
| 98 |
+
grad_clip: 5.0
|
| 99 |
+
grad_clip_type: 2.0
|
| 100 |
+
grad_noise: false
|
| 101 |
+
accum_grad: 1
|
| 102 |
+
no_forward_run: false
|
| 103 |
+
resume: true
|
| 104 |
+
train_dtype: float32
|
| 105 |
+
use_amp: false
|
| 106 |
+
log_interval: null
|
| 107 |
+
use_tensorboard: true
|
| 108 |
+
use_wandb: false
|
| 109 |
+
wandb_project: null
|
| 110 |
+
wandb_id: null
|
| 111 |
+
wandb_entity: null
|
| 112 |
+
wandb_name: null
|
| 113 |
+
wandb_model_log_interval: -1
|
| 114 |
+
detect_anomaly: false
|
| 115 |
+
pretrain_path: null
|
| 116 |
+
init_param: []
|
| 117 |
+
ignore_init_mismatch: false
|
| 118 |
+
freeze_param: []
|
| 119 |
+
num_iters_per_epoch: null
|
| 120 |
+
batch_size: 32
|
| 121 |
+
valid_batch_size: null
|
| 122 |
+
batch_bins: 1000000
|
| 123 |
+
valid_batch_bins: null
|
| 124 |
+
train_shape_file:
|
| 125 |
+
- exp/enh_stats_16k/train/speech_mix_shape
|
| 126 |
+
- exp/enh_stats_16k/train/speech_ref1_shape
|
| 127 |
+
- exp/enh_stats_16k/train/noise_ref1_shape
|
| 128 |
+
valid_shape_file:
|
| 129 |
+
- exp/enh_stats_16k/valid/speech_mix_shape
|
| 130 |
+
- exp/enh_stats_16k/valid/speech_ref1_shape
|
| 131 |
+
- exp/enh_stats_16k/valid/noise_ref1_shape
|
| 132 |
+
batch_type: folded
|
| 133 |
+
valid_batch_type: null
|
| 134 |
+
fold_length:
|
| 135 |
+
- 80000
|
| 136 |
+
- 80000
|
| 137 |
+
- 80000
|
| 138 |
+
sort_in_batch: descending
|
| 139 |
+
sort_batch: descending
|
| 140 |
+
multiple_iterator: false
|
| 141 |
+
chunk_length: 64000
|
| 142 |
+
chunk_shift_ratio: 0.5
|
| 143 |
+
num_cache_chunks: 1024
|
| 144 |
+
train_data_path_and_name_and_type:
|
| 145 |
+
- - dump/raw/tr_synthetic/wav.scp
|
| 146 |
+
- speech_mix
|
| 147 |
+
- sound
|
| 148 |
+
- - dump/raw/tr_synthetic/spk1.scp
|
| 149 |
+
- speech_ref1
|
| 150 |
+
- sound
|
| 151 |
+
- - dump/raw/tr_synthetic/noise1.scp
|
| 152 |
+
- noise_ref1
|
| 153 |
+
- sound
|
| 154 |
+
valid_data_path_and_name_and_type:
|
| 155 |
+
- - dump/raw/cv_synthetic/wav.scp
|
| 156 |
+
- speech_mix
|
| 157 |
+
- sound
|
| 158 |
+
- - dump/raw/cv_synthetic/spk1.scp
|
| 159 |
+
- speech_ref1
|
| 160 |
+
- sound
|
| 161 |
+
- - dump/raw/cv_synthetic/noise1.scp
|
| 162 |
+
- noise_ref1
|
| 163 |
+
- sound
|
| 164 |
+
allow_variable_data_keys: false
|
| 165 |
+
max_cache_size: 0.0
|
| 166 |
+
max_cache_fd: 32
|
| 167 |
+
valid_max_cache_size: null
|
| 168 |
+
optim: adam
|
| 169 |
+
optim_conf:
|
| 170 |
+
lr: 0.001
|
| 171 |
+
eps: 1.0e-08
|
| 172 |
+
weight_decay: 1.0e-07
|
| 173 |
+
scheduler: reducelronplateau
|
| 174 |
+
scheduler_conf:
|
| 175 |
+
mode: min
|
| 176 |
+
factor: 0.7
|
| 177 |
+
patience: 1
|
| 178 |
+
init: null
|
| 179 |
+
model_conf:
|
| 180 |
+
loss_type: si_snr
|
| 181 |
+
criterions:
|
| 182 |
+
# The first criterion
|
| 183 |
+
- name: si_snr
|
| 184 |
+
conf:
|
| 185 |
+
eps: 1.0e-7
|
| 186 |
+
# the wrapper for the current criterion
|
| 187 |
+
# for single-talker case, we simplely use fixed_order wrapper
|
| 188 |
+
wrapper: fixed_order
|
| 189 |
+
wrapper_conf:
|
| 190 |
+
weight: 1.0
|
| 191 |
+
use_preprocessor: false
|
| 192 |
+
encoder: stft
|
| 193 |
+
encoder_conf:
|
| 194 |
+
n_fft: 512
|
| 195 |
+
win_length: 400
|
| 196 |
+
hop_length: 100
|
| 197 |
+
separator: dccrn
|
| 198 |
+
separator_conf: {}
|
| 199 |
+
decoder: stft
|
| 200 |
+
decoder_conf:
|
| 201 |
+
n_fft: 512
|
| 202 |
+
win_length: 400
|
| 203 |
+
hop_length: 100
|
| 204 |
+
required:
|
| 205 |
+
- output_dir
|
| 206 |
+
version: 0.10.5a1
|
| 207 |
+
distributed: true
|
| 208 |
+
```
|
| 209 |
+
|
| 210 |
+
</details>
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
### Citing ESPnet
|
| 215 |
+
|
| 216 |
+
```BibTex
|
| 217 |
+
@inproceedings{watanabe2018espnet,
|
| 218 |
+
author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
|
| 219 |
+
title={{ESPnet}: End-to-End Speech Processing Toolkit},
|
| 220 |
+
year={2018},
|
| 221 |
+
booktitle={Proceedings of Interspeech},
|
| 222 |
+
pages={2207--2211},
|
| 223 |
+
doi={10.21437/Interspeech.2018-1456},
|
| 224 |
+
url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
@inproceedings{ESPnet-SE,
|
| 229 |
+
author = {Chenda Li and Jing Shi and Wangyou Zhang and Aswin Shanmugam Subramanian and Xuankai Chang and
|
| 230 |
+
Naoyuki Kamo and Moto Hira and Tomoki Hayashi and Christoph B{"{o}}ddeker and Zhuo Chen and Shinji Watanabe},
|
| 231 |
+
title = {ESPnet-SE: End-To-End Speech Enhancement and Separation Toolkit Designed for {ASR} Integration},
|
| 232 |
+
booktitle = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen, China, January 19-22, 2021},
|
| 233 |
+
pages = {785--792},
|
| 234 |
+
publisher = {{IEEE}},
|
| 235 |
+
year = {2021},
|
| 236 |
+
url = {https://doi.org/10.1109/SLT48900.2021.9383615},
|
| 237 |
+
doi = {10.1109/SLT48900.2021.9383615},
|
| 238 |
+
timestamp = {Mon, 12 Apr 2021 17:08:59 +0200},
|
| 239 |
+
biburl = {https://dblp.org/rec/conf/slt/Li0ZSCKHHBC021.bib},
|
| 240 |
+
bibsource = {dblp computer science bibliography, https://dblp.org}
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
```
|
| 245 |
+
|
| 246 |
+
or arXiv:
|
| 247 |
+
|
| 248 |
+
```bibtex
|
| 249 |
+
@misc{watanabe2018espnet,
|
| 250 |
+
title={ESPnet: End-to-End Speech Processing Toolkit},
|
| 251 |
+
author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
|
| 252 |
+
year={2018},
|
| 253 |
+
eprint={1804.00015},
|
| 254 |
+
archivePrefix={arXiv},
|
| 255 |
+
primaryClass={cs.CL}
|
| 256 |
+
}
|
| 257 |
+
```
|