diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..1e9fc80657746d93ba43a085b734438f44fd6ffc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +DCCRN/A[[:space:]]Convolutional[[:space:]]Recurrent[[:space:]]Neural[[:space:]]Network[[:space:]]for[[:space:]]Real-Time[[:space:]]Speech[[:space:]]Enhancement.pdf filter=lfs diff=lfs merge=lfs -text +DCCRN/AIAP[[:space:]]Final[[:space:]]Project.pdf filter=lfs diff=lfs merge=lfs -text +DCCRN/dataset/Speech[[:space:]]Enhancement[[:space:]]for[[:space:]]a[[:space:]]Noise-Robust[[:space:]]Text-to-Speech[[:space:]]Synthesis[[:space:]]System[[:space:]]Using[[:space:]]Deep[[:space:]]Recurrent[[:space:]]Neural[[:space:]]Networks.pdf filter=lfs diff=lfs merge=lfs -text +DCCRN/DCCRN.[[:space:]]Deep[[:space:]]Complex[[:space:]]Convolution[[:space:]]Recurrent[[:space:]]Network[[:space:]]for[[:space:]]Phase-Aware[[:space:]]Speech[[:space:]]Enhancement.pdf filter=lfs diff=lfs merge=lfs -text +DCCRN/Performance[[:space:]]comparison[[:space:]]evaluation[[:space:]]of[[:space:]]speech[[:space:]]enhancement[[:space:]]using[[:space:]]various[[:space:]]loss[[:space:]]functions.pdf filter=lfs diff=lfs merge=lfs -text +Distil-DCCRN/Distil-DCCRN.[[:space:]]A[[:space:]]Small-footprint[[:space:]]DCCRN[[:space:]]Leveraging[[:space:]]Feature-based[[:space:]]Knowledge[[:space:]]Distillation[[:space:]]in[[:space:]]Speech[[:space:]]Enhancement.pdf filter=lfs diff=lfs merge=lfs -text diff --git a/DCCRN/A Convolutional Recurrent Neural Network for Real-Time Speech Enhancement.pdf b/DCCRN/A Convolutional Recurrent Neural Network for Real-Time Speech Enhancement.pdf new file mode 100644 index 0000000000000000000000000000000000000000..80157a989c1e4c68350787773e14de97d6ac8d4f --- /dev/null +++ b/DCCRN/A Convolutional Recurrent Neural Network for Real-Time Speech Enhancement.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9a1d013c558e4177c0d68b5e5c39648b30626c430aa7b57fb9c9c018123538e +size 646378 diff --git a/DCCRN/AIAP Final Project.pdf b/DCCRN/AIAP Final Project.pdf new file mode 100644 index 0000000000000000000000000000000000000000..1fbb655eb257734daa02299b55f6c57fe361e690 --- /dev/null +++ b/DCCRN/AIAP Final Project.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ead31849e4c1e86925f5abb2cd3c57fb57eaea280c763c43fa58696fa08afd81 +size 1246642 diff --git a/DCCRN/DCCRN. Deep Complex Convolution Recurrent Network for Phase-Aware Speech Enhancement.pdf b/DCCRN/DCCRN. Deep Complex Convolution Recurrent Network for Phase-Aware Speech Enhancement.pdf new file mode 100644 index 0000000000000000000000000000000000000000..533ff37066b6186d7833fc93c5b94898a959be09 --- /dev/null +++ b/DCCRN/DCCRN. Deep Complex Convolution Recurrent Network for Phase-Aware Speech Enhancement.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b381ebaadb2b0b94c8d0c498c76878b0a6ae12ba6f686caee5066b4b5cd1cea +size 844298 diff --git a/DCCRN/Performance comparison evaluation of speech enhancement using various loss functions.pdf b/DCCRN/Performance comparison evaluation of speech enhancement using various loss functions.pdf new file mode 100644 index 0000000000000000000000000000000000000000..71bbc08fe9869e174098d2e6983312b641ad3361 --- /dev/null +++ b/DCCRN/Performance comparison evaluation of speech enhancement using various loss functions.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85df2fe0ebbbec1acb4e06d404ddb465228f8f5063facbea4c6101aee305c227 +size 800355 diff --git a/DCCRN/code/DCCRN [Context-Aware-Character-TTS-System].zip b/DCCRN/code/DCCRN [Context-Aware-Character-TTS-System].zip new file mode 100644 index 0000000000000000000000000000000000000000..4d3f428290a5ec4781212967528d7beb1db8a209 --- /dev/null +++ b/DCCRN/code/DCCRN [Context-Aware-Character-TTS-System].zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7abc519d46cb25068ec23af18367742f18c2bfb3efa4543e2f47943dea69e406 +size 81376722 diff --git a/DCCRN/code/DCCRN [Wang-Jingrun].zip b/DCCRN/code/DCCRN [Wang-Jingrun].zip new file mode 100644 index 0000000000000000000000000000000000000000..501700e9a5e35cc7cd7a4312db818d4c1551352f --- /dev/null +++ b/DCCRN/code/DCCRN [Wang-Jingrun].zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e18415b943fc9035b78952a40d09985827b3acc3a80018c7a9033570a9a4f90 +size 116505131 diff --git a/DCCRN/code/DCCRN [maggie0830].zip b/DCCRN/code/DCCRN [maggie0830].zip new file mode 100644 index 0000000000000000000000000000000000000000..19236a0deda11ba794cdc5106a6bdfe7e9e953fd --- /dev/null +++ b/DCCRN/code/DCCRN [maggie0830].zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c32593933532b2a5af921bf85f74ef784e9129c139944be4093175950ef49053 +size 573872 diff --git a/DCCRN/code/DCCRN [mahshid1378].zip b/DCCRN/code/DCCRN [mahshid1378].zip new file mode 100644 index 0000000000000000000000000000000000000000..64abbac696d626bc32b3b0f096569cc19bed6a52 --- /dev/null +++ b/DCCRN/code/DCCRN [mahshid1378].zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d4a1cacb0e5ec654048683df723cdb9575571c18693e271cdc42d75a455a0aa +size 8481588 diff --git a/DCCRN/code/DCCRN [shaoyuanyu].zip b/DCCRN/code/DCCRN [shaoyuanyu].zip new file mode 100644 index 0000000000000000000000000000000000000000..4f250aaf0864dfc46e7197d76a1d9f2329398994 --- /dev/null +++ b/DCCRN/code/DCCRN [shaoyuanyu].zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ee118fd02ebb77714e2d7328d7f54eae49555ed4208884c5406283e9fbdc8b +size 2205508 diff --git a/DCCRN/code/DCCRN [wangtianrui].zip b/DCCRN/code/DCCRN [wangtianrui].zip new file mode 100644 index 0000000000000000000000000000000000000000..b88a404f50e69dc8abb579ef9761d7111df5e30d --- /dev/null +++ b/DCCRN/code/DCCRN [wangtianrui].zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cfb8e8ed35d100a1224a3ceb22b46f95252c263c4ed9b61efff7523b80b3e07 +size 2029618 diff --git a/DCCRN/code/DCCRN [wanliangdaxia].zip b/DCCRN/code/DCCRN [wanliangdaxia].zip new file mode 100644 index 0000000000000000000000000000000000000000..34729f07109f015729cce2ee60bd504727654fc4 --- /dev/null +++ b/DCCRN/code/DCCRN [wanliangdaxia].zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3622ced5e8275edf628c2783d6b4c619ab016d0e64ba41dc0b599d1d91a51bd9 +size 896792 diff --git a/DCCRN/code/DCCRN-2.zip b/DCCRN/code/DCCRN-2.zip new file mode 100644 index 0000000000000000000000000000000000000000..c0115bf02183446f6c07bda79cd6208fcd58838a --- /dev/null +++ b/DCCRN/code/DCCRN-2.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4d8de3e4ec5a0d3036680fa1cf0a0cf94f2e2e6d3b6d71d4dc54ef5e47b2cbb +size 2208993 diff --git a/DCCRN/code/DCCRN-Trial.zip b/DCCRN/code/DCCRN-Trial.zip new file mode 100644 index 0000000000000000000000000000000000000000..7e55005d051de6e822c4f5fa617b31f7941b0092 --- /dev/null +++ b/DCCRN/code/DCCRN-Trial.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a141df4aaea06fefbab0e154f49ba82216b21d6acb4176dbb8585119883fb764 +size 2262879 diff --git a/DCCRN/code/DCCRN-for-DNS5.zip b/DCCRN/code/DCCRN-for-DNS5.zip new file mode 100644 index 0000000000000000000000000000000000000000..3f3ca94ac8acc0603e120d36bfbdb448d94529f6 --- /dev/null +++ b/DCCRN/code/DCCRN-for-DNS5.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd423940ef0309ce8679e50327955d3a035fdea2d16c615c3b4a3e53014a3838 +size 507473469 diff --git a/DCCRN/code/DCCRN-small.zip b/DCCRN/code/DCCRN-small.zip new file mode 100644 index 0000000000000000000000000000000000000000..3b5dead1cec70e45eb26edcccdf24f435ea1069d --- /dev/null +++ b/DCCRN/code/DCCRN-small.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63c1f0007edebee9f9eea1e3051d0fbf2563cd8a41936180f6c2d2e8dfac3d37 +size 2547481 diff --git a/DCCRN/code/DCCRN-with-various-loss-functions.zip b/DCCRN/code/DCCRN-with-various-loss-functions.zip new file mode 100644 index 0000000000000000000000000000000000000000..153f533593b34a75f7f0f53b10eb877d4ad0c6ff --- /dev/null +++ b/DCCRN/code/DCCRN-with-various-loss-functions.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95f467976fa3768ad3800ef9cdaa1af8a939a3340c2d65d6ac4b896eb3371e35 +size 33941947 diff --git a/DCCRN/code/DCCRN-wncg.zip b/DCCRN/code/DCCRN-wncg.zip new file mode 100644 index 0000000000000000000000000000000000000000..e77262d16f1700812cec22bde300526cc5c63999 --- /dev/null +++ b/DCCRN/code/DCCRN-wncg.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb50b592c67a578442a1b6cac8e6d232c7502b1c977e96a3ff49e041c96ea80c +size 232163 diff --git a/DCCRN/code/DCCRN_Keras.zip b/DCCRN/code/DCCRN_Keras.zip new file mode 100644 index 0000000000000000000000000000000000000000..21bcb8eef44fe10232a1fe902bd82730d4b4c9c8 --- /dev/null +++ b/DCCRN/code/DCCRN_Keras.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bbde2340354d31e7fe3d9bd2e554ebda81c0501ed28c61a339dbe47b6453e1f +size 37331 diff --git a/DCCRN/code/DCCRN_Pytorch.zip b/DCCRN/code/DCCRN_Pytorch.zip new file mode 100644 index 0000000000000000000000000000000000000000..f2c1462efe023983b4485d653f41f82228eb1b75 --- /dev/null +++ b/DCCRN/code/DCCRN_Pytorch.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e685912a3f00fcb8e33fec9b2d99e085fe7aea12e79facfbba0ad3e612d669da +size 110453 diff --git a/DCCRN/code/DNN-based-Speech-Enhancement-in-the-frequency-domain.zip b/DCCRN/code/DNN-based-Speech-Enhancement-in-the-frequency-domain.zip new file mode 100644 index 0000000000000000000000000000000000000000..abf13f955cf83b41ca42b48f91e58d79fbaac704 --- /dev/null +++ b/DCCRN/code/DNN-based-Speech-Enhancement-in-the-frequency-domain.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a77d84b30770f6bff0861f7fe1c1cd2578ee8b5a617c5e69f9dffb58cd3c37af +size 410561 diff --git a/DCCRN/code/DeepComplexCRN (original).zip b/DCCRN/code/DeepComplexCRN (original).zip new file mode 100644 index 0000000000000000000000000000000000000000..1f808b7f9d83e76e11686e8029287a54bd208984 --- /dev/null +++ b/DCCRN/code/DeepComplexCRN (original).zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a4a36dcb653dc234f146b8dbc11595d8f53e43a7a971524a3c505996989dc21 +size 114573010 diff --git a/DCCRN/code/I-DCCRN-VAE.zip b/DCCRN/code/I-DCCRN-VAE.zip new file mode 100644 index 0000000000000000000000000000000000000000..67dc6617d46669a912ad9b39f6e995ade80db8cf --- /dev/null +++ b/DCCRN/code/I-DCCRN-VAE.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f15ee047bc618fc54b925a29fc452e2979fec1081f68949f949e031fc5ddf7b9 +size 8232516 diff --git a/DCCRN/code/KD_DCCRN.zip b/DCCRN/code/KD_DCCRN.zip new file mode 100644 index 0000000000000000000000000000000000000000..456c58ad353e8ba7e4a4ffa2a208bdd7bb076122 --- /dev/null +++ b/DCCRN/code/KD_DCCRN.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04a654dcbb5b3117d00afd9546f44bd9ee70fc6bdf4b019a2658b2852a722cb9 +size 41431052 diff --git a/DCCRN/code/Knowledge_distillation.zip b/DCCRN/code/Knowledge_distillation.zip new file mode 100644 index 0000000000000000000000000000000000000000..4e6f17f53ba3922143ceba256654f3056d8b412c --- /dev/null +++ b/DCCRN/code/Knowledge_distillation.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72e5754b08efa7347fb7a4c37ab06363e055936f5105f11eac900c3cc96d86c2 +size 41170858 diff --git a/DCCRN/code/S-DCCRN.zip b/DCCRN/code/S-DCCRN.zip new file mode 100644 index 0000000000000000000000000000000000000000..a749b6fbd79f0cc79404ee17f2c9ee77904872d3 --- /dev/null +++ b/DCCRN/code/S-DCCRN.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:040de712eb711e07ca8a038d6d1806420122f8acac89e720c2f7c0d666134ce8 +size 119122675 diff --git a/DCCRN/code/SE-DCCRN.zip b/DCCRN/code/SE-DCCRN.zip new file mode 100644 index 0000000000000000000000000000000000000000..3fec4320e5cc34e422238496d224474b83b8505f --- /dev/null +++ b/DCCRN/code/SE-DCCRN.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e57b621edb6ebd89d5e795ed896d6b099b9171b4fafda1da875358cfea26e42 +size 41207805 diff --git a/DCCRN/code/Spatial-DCCRN.zip b/DCCRN/code/Spatial-DCCRN.zip new file mode 100644 index 0000000000000000000000000000000000000000..a399ac4304a820d3c55d4c47da7ce28f0eed8b6e --- /dev/null +++ b/DCCRN/code/Spatial-DCCRN.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0d4b65b8b43f4a8c2f2123eb73d8fdb42bb5f6321a9333145a21e53e37dc92e +size 74811261 diff --git a/DCCRN/code/Speech_Enhancement-DCCRN.zip b/DCCRN/code/Speech_Enhancement-DCCRN.zip new file mode 100644 index 0000000000000000000000000000000000000000..786fd496f16d43407ebefc0ae0c7645f571e90df --- /dev/null +++ b/DCCRN/code/Speech_Enhancement-DCCRN.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2a1b2d9f3d3e04466661cdb740eeacaab40fe037497e29abae31f01913fbf13 +size 83347937 diff --git a/DCCRN/code/dccrn-plus.zip b/DCCRN/code/dccrn-plus.zip new file mode 100644 index 0000000000000000000000000000000000000000..2bb380aa3358a06c53ef9ba9a9163abb0f042552 --- /dev/null +++ b/DCCRN/code/dccrn-plus.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e45c2a9acf8e01cdb8de6d6865c69c3106a172bd14f29990503abdbf9233439 +size 82632252 diff --git a/DCCRN/code/denosising_model [mcaramba563].zip b/DCCRN/code/denosising_model [mcaramba563].zip new file mode 100644 index 0000000000000000000000000000000000000000..6eb6bdca37a873dbb055a0433420444f5d809f96 --- /dev/null +++ b/DCCRN/code/denosising_model [mcaramba563].zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eef9ece1e48341e311feb153dcdf2497ddc151a2e93a0a74d92cb45b68f857d7 +size 516491216 diff --git a/DCCRN/code/hf-dccrn.zip b/DCCRN/code/hf-dccrn.zip new file mode 100644 index 0000000000000000000000000000000000000000..3f78b5f5764a12ccbd2c423f3e3c972b15517fa6 --- /dev/null +++ b/DCCRN/code/hf-dccrn.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68a254da33c961777e5be98ef023f6d149aaaaffb43bf33afbce8b932a7ab619 +size 49593 diff --git a/DCCRN/dataset/Speech Enhancement for a Noise-Robust Text-to-Speech Synthesis System Using Deep Recurrent Neural Networks.pdf b/DCCRN/dataset/Speech Enhancement for a Noise-Robust Text-to-Speech Synthesis System Using Deep Recurrent Neural Networks.pdf new file mode 100644 index 0000000000000000000000000000000000000000..52e9400a3396a5c807457064490ed4b36b00dfcc --- /dev/null +++ b/DCCRN/dataset/Speech Enhancement for a Noise-Robust Text-to-Speech Synthesis System Using Deep Recurrent Neural Networks.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30806abb77998dfd47fe9d66d9b069f0974921d1f8734c403f1d765df3280989 +size 10158959 diff --git a/DCCRN/models/DCCRN (Ada312)/.gitattributes b/DCCRN/models/DCCRN (Ada312)/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/DCCRN/models/DCCRN (Ada312)/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/DCCRN/models/DCCRN (Ada312)/epoch=44-step=113895.ckpt b/DCCRN/models/DCCRN (Ada312)/epoch=44-step=113895.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..6e84d3cb7601400f93a2f24c3818fbd1778abef8 --- /dev/null +++ b/DCCRN/models/DCCRN (Ada312)/epoch=44-step=113895.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63f1a20e51b1194aed4c18c0305a706d386a9e7d8c8bd425e95805ecee94164b +size 46489036 diff --git a/DCCRN/models/DCCRN (Ada312)/source.txt b/DCCRN/models/DCCRN (Ada312)/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..efc1870d8706d31b241a111216ef5a91bef26ccd --- /dev/null +++ b/DCCRN/models/DCCRN (Ada312)/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Ada312/DCCRN \ No newline at end of file diff --git a/DCCRN/models/DCCRN (chenxie95)/.gitattributes b/DCCRN/models/DCCRN (chenxie95)/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/DCCRN/models/DCCRN (chenxie95)/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/DCCRN/models/DCCRN (chenxie95)/epoch=44-step=113895.ckpt b/DCCRN/models/DCCRN (chenxie95)/epoch=44-step=113895.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..6e84d3cb7601400f93a2f24c3818fbd1778abef8 --- /dev/null +++ b/DCCRN/models/DCCRN (chenxie95)/epoch=44-step=113895.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63f1a20e51b1194aed4c18c0305a706d386a9e7d8c8bd425e95805ecee94164b +size 46489036 diff --git a/DCCRN/models/DCCRN (chenxie95)/source.txt b/DCCRN/models/DCCRN (chenxie95)/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..3088d35762b0811b6a6b295f45315f5d15ce3549 --- /dev/null +++ b/DCCRN/models/DCCRN (chenxie95)/source.txt @@ -0,0 +1,2 @@ + +https://huggingface.co/chenxie95/DCCRN \ No newline at end of file diff --git a/DCCRN/models/DCCRNet_Libri1Mix_enhsingle_16k/.gitattributes b/DCCRN/models/DCCRNet_Libri1Mix_enhsingle_16k/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..dc08351d4dc0732d9c8af04070ced089b201ce2f --- /dev/null +++ b/DCCRN/models/DCCRNet_Libri1Mix_enhsingle_16k/.gitattributes @@ -0,0 +1,8 @@ +*.bin.* filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tar.gz filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text diff --git a/DCCRN/models/DCCRNet_Libri1Mix_enhsingle_16k/README.md b/DCCRN/models/DCCRNet_Libri1Mix_enhsingle_16k/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c55f77cee2b0bbadf769dcf9e05e23a08426aa61 --- /dev/null +++ b/DCCRN/models/DCCRNet_Libri1Mix_enhsingle_16k/README.md @@ -0,0 +1,74 @@ +--- +tags: +- asteroid +- audio +- DCCRNet +- audio-to-audio +- speech-enhancement +datasets: +- Libri1Mix +- enh_single +license: cc-by-sa-4.0 +--- + +## Asteroid model `JorisCos/DCCRNet_Libri1Mix_enhsignle_16k` + +Description: + +This model was trained by Joris Cosentino using the librimix recipe in [Asteroid](https://github.com/asteroid-team/asteroid). +It was trained on the `enh_single` task of the Libri1Mix dataset. + +Training config: + +```yml +data: + n_src: 1 + sample_rate: 16000 + segment: 3 + task: enh_single + train_dir: data/wav16k/min/train-360 + valid_dir: data/wav16k/min/dev +filterbank: + stft_kernel_size: 400 + stft_n_filters: 512 + stft_stride: 100 +masknet: + architecture: DCCRN-CL + n_src: 1 +optim: + lr: 0.001 + optimizer: adam + weight_decay: 1.0e-05 +training: + batch_size: 12 + early_stop: true + epochs: 200 + gradient_clipping: 5 + half_lr: true + num_workers: 4 +``` + + +Results: + +On Libri1Mix min test set : +```yml +si_sdr: 13.329767398333798 +si_sdr_imp: 9.879986092474098 +sdr: 13.87279932997016 +sdr_imp: 10.370136530757103 +sir: Infinity +sir_imp: NaN +sar: 13.87279932997016 +sar_imp: 10.370136530757103 +stoi: 0.9140907015623948 +stoi_imp: 0.11817087802185405 +``` + + +License notice: + +This work "DCCRNet_Libri1Mix_enhsignle_16k" is a derivative of [LibriSpeech ASR corpus](http://www.openslr.org/12) by Vassil Panayotov, +used under [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/); of The WSJ0 Hipster Ambient Mixtures +dataset by [Whisper.ai](http://wham.whisper.ai/), used under [CC BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/) (Research only). +"DCCRNet_Libri1Mix_enhsignle_16k" is licensed under [Attribution-ShareAlike 3.0 Unported](https://creativecommons.org/licenses/by-sa/3.0/) by Joris Cosentino \ No newline at end of file diff --git a/DCCRN/models/DCCRNet_Libri1Mix_enhsingle_16k/pytorch_model.bin b/DCCRN/models/DCCRNet_Libri1Mix_enhsingle_16k/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f4797ae4178159b4fd42fcd3b4444a5c6eeca9d --- /dev/null +++ b/DCCRN/models/DCCRNet_Libri1Mix_enhsingle_16k/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1f519207604649cd90586e85adf2e93c4f4b26b216f0a71635013a90e64d939 +size 16409109 diff --git a/DCCRN/models/DCCRNet_Libri1Mix_enhsingle_16k/source.txt b/DCCRN/models/DCCRNet_Libri1Mix_enhsingle_16k/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..b5353b932b00900266c618ea2642b7ba629ba36b --- /dev/null +++ b/DCCRN/models/DCCRNet_Libri1Mix_enhsingle_16k/source.txt @@ -0,0 +1 @@ +https://huggingface.co/JorisCos/DCCRNet_Libri1Mix_enhsingle_16k \ No newline at end of file diff --git a/DCCRN/models/SE-DCCRN/base_model.pth b/DCCRN/models/SE-DCCRN/base_model.pth new file mode 100644 index 0000000000000000000000000000000000000000..17da52a647443b8122dab37575e416f6357af1e0 --- /dev/null +++ b/DCCRN/models/SE-DCCRN/base_model.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd710915fc1b4897f5ba59de66ee1b185449569a0375dc3175c427632c761d97 +size 14740557 diff --git a/DCCRN/models/SE-DCCRN/lite_v1_model.pth b/DCCRN/models/SE-DCCRN/lite_v1_model.pth new file mode 100644 index 0000000000000000000000000000000000000000..ebf92d29c72e6dd349323b262635f4e76e720351 --- /dev/null +++ b/DCCRN/models/SE-DCCRN/lite_v1_model.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b9edf5a6f0c28074f93ef0416dabbe126b2e0fdd480aabdecd14f38bb4f1d69 +size 3731533 diff --git a/DCCRN/models/SE-DCCRN/lite_v1d_model.pth b/DCCRN/models/SE-DCCRN/lite_v1d_model.pth new file mode 100644 index 0000000000000000000000000000000000000000..22a06170b45770fbe59c85322ce3c00a96a9452c --- /dev/null +++ b/DCCRN/models/SE-DCCRN/lite_v1d_model.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a04fc383e680a6fa4df001d2242eec003f8a442e1f15f1292c931b0cf3cbdf7f +size 3731533 diff --git a/DCCRN/models/SE-DCCRN/source.txt b/DCCRN/models/SE-DCCRN/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..fae22028b8540c3d5eacb54d9d172a5f4b616272 --- /dev/null +++ b/DCCRN/models/SE-DCCRN/source.txt @@ -0,0 +1 @@ +https://github.com/sc0ttms/SE-DCCRN \ No newline at end of file diff --git a/DCCRN/models/SE-DCCRN/summary.md b/DCCRN/models/SE-DCCRN/summary.md new file mode 100644 index 0000000000000000000000000000000000000000..75657d4731e520b6d69eb7301639d256e345d836 --- /dev/null +++ b/DCCRN/models/SE-DCCRN/summary.md @@ -0,0 +1,9 @@ +# summary + +| dataset/model | Params(M) | Flops(G) | SI-SDR | STOI | WB_PESQ | NB_PESQ | +| :-----------: | :-------: | :------: | :--------------: | :---------------: | :--------------: | :--------------: | +| dataset_train | - | - | 9.18826423150635 | 0.821936784950485 | 1.73518149405717 | 2.42768483359366 | +| dataset_valid | - | - | 9.15858130001896 | 0.820208461517329 | 1.73266624887353 | 2.41471458952858 | +| dataset_test | - | - | 9.33677852833749 | 0.821225098997534 | 1.73727285225124 | 2.42875709109791 | +| base_model | 3.671 | 35.253 | 18.2092751119167 | 0.895062729329303 | 3.02301043262067 | 3.44576535798074 | +| lite_v1_model | 0.921 | 8.872 | 17.1342839493116 | 0.882091315376406 | 2.81674490229713 | 3.29441381774482 | diff --git a/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/.gitattributes b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..bb2672d9c697d5d4d5963c1879083d8e3ea23814 --- /dev/null +++ b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/.gitattributes @@ -0,0 +1,27 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bin.* filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/README.md b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ff71d3f3913ed543a6acfd80dbafd5f8e0eb9f38 --- /dev/null +++ b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/README.md @@ -0,0 +1,257 @@ +--- +tags: +- espnet +- audio +- audio-to-audio +language: en +datasets: +- dns_ins20 +license: cc-by-4.0 +--- + +## ESPnet2 ENH model + +### `Johnson-Lsx/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw` + +This model was trained by Shaoxiong Lin using dns_ins20 recipe in [espnet](https://github.com/espnet/espnet/). + +### Demo: How to use in ESPnet2 + +```bash +cd espnet +git checkout 4538462eb7dc6a6b858adcbd3a526fb8173d6f73 +pip install -e . +cd egs2/dns_ins20/enh1 +./run.sh --skip_data_prep false --skip_train true --download_model Johnson-Lsx/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw +``` + + +# RESULTS +## Environments +- date: `Thu Feb 10 23:11:40 CST 2022` +- python version: `3.8.12 (default, Oct 12 2021, 13:49:34) [GCC 7.5.0]` +- espnet version: `espnet 0.10.5a1` +- pytorch version: `pytorch 1.9.1` +- Git hash: `6f66283b9eed7b0d5e5643feb18d8f60118a4afc` + - Commit date: `Mon Dec 13 15:30:29 2021 +0800` + + +## enh_train_enh_dccrn_batch_size_raw + +config: ./conf/tuning/train_enh_dccrn_batch_size.yaml + +|dataset|STOI|SAR|SDR|SIR| +|---|---|---|---|---| +|enhanced_cv_synthetic|0.98|24.69|24.69|0.00| +|enhanced_tt_synthetic_no_reverb|0.96|17.69|17.69|0.00| +|enhanced_tt_synthetic_with_reverb|0.81|10.45|10.45|0.00| + +## ENH config + +
expand + +``` +config: ./conf/tuning/train_enh_dccrn_batch_size.yaml +print_config: false +log_level: INFO +dry_run: false +iterator_type: chunk +output_dir: exp/enh_train_enh_dccrn_batch_size_raw +ngpu: 1 +seed: 0 +num_workers: 4 +num_att_plot: 3 +dist_backend: nccl +dist_init_method: env:// +dist_world_size: 4 +dist_rank: 0 +local_rank: 0 +dist_master_addr: localhost +dist_master_port: 46366 +dist_launcher: null +multiprocessing_distributed: true +unused_parameters: false +sharded_ddp: false +cudnn_enabled: true +cudnn_benchmark: false +cudnn_deterministic: true +collect_stats: false +write_collected_feats: false +max_epoch: 100 +patience: 10 +val_scheduler_criterion: +- valid +- loss +early_stopping_criterion: +- valid +- loss +- min +best_model_criterion: +- - valid + - si_snr + - max +- - valid + - loss + - min +keep_nbest_models: 1 +nbest_averaging_interval: 0 +grad_clip: 5.0 +grad_clip_type: 2.0 +grad_noise: false +accum_grad: 1 +no_forward_run: false +resume: true +train_dtype: float32 +use_amp: false +log_interval: null +use_tensorboard: true +use_wandb: false +wandb_project: null +wandb_id: null +wandb_entity: null +wandb_name: null +wandb_model_log_interval: -1 +detect_anomaly: false +pretrain_path: null +init_param: [] +ignore_init_mismatch: false +freeze_param: [] +num_iters_per_epoch: null +batch_size: 32 +valid_batch_size: null +batch_bins: 1000000 +valid_batch_bins: null +train_shape_file: +- exp/enh_stats_16k/train/speech_mix_shape +- exp/enh_stats_16k/train/speech_ref1_shape +- exp/enh_stats_16k/train/noise_ref1_shape +valid_shape_file: +- exp/enh_stats_16k/valid/speech_mix_shape +- exp/enh_stats_16k/valid/speech_ref1_shape +- exp/enh_stats_16k/valid/noise_ref1_shape +batch_type: folded +valid_batch_type: null +fold_length: +- 80000 +- 80000 +- 80000 +sort_in_batch: descending +sort_batch: descending +multiple_iterator: false +chunk_length: 64000 +chunk_shift_ratio: 0.5 +num_cache_chunks: 1024 +train_data_path_and_name_and_type: +- - dump/raw/tr_synthetic/wav.scp + - speech_mix + - sound +- - dump/raw/tr_synthetic/spk1.scp + - speech_ref1 + - sound +- - dump/raw/tr_synthetic/noise1.scp + - noise_ref1 + - sound +valid_data_path_and_name_and_type: +- - dump/raw/cv_synthetic/wav.scp + - speech_mix + - sound +- - dump/raw/cv_synthetic/spk1.scp + - speech_ref1 + - sound +- - dump/raw/cv_synthetic/noise1.scp + - noise_ref1 + - sound +allow_variable_data_keys: false +max_cache_size: 0.0 +max_cache_fd: 32 +valid_max_cache_size: null +optim: adam +optim_conf: + lr: 0.001 + eps: 1.0e-08 + weight_decay: 1.0e-07 +scheduler: reducelronplateau +scheduler_conf: + mode: min + factor: 0.7 + patience: 1 +init: null +model_conf: + loss_type: si_snr +criterions: + # The first criterion + - name: si_snr + conf: + eps: 1.0e-7 + # the wrapper for the current criterion + # for single-talker case, we simplely use fixed_order wrapper + wrapper: fixed_order + wrapper_conf: + weight: 1.0 +use_preprocessor: false +encoder: stft +encoder_conf: + n_fft: 512 + win_length: 400 + hop_length: 100 +separator: dccrn +separator_conf: {} +decoder: stft +decoder_conf: + n_fft: 512 + win_length: 400 + hop_length: 100 +required: +- output_dir +version: 0.10.5a1 +distributed: true +``` + +
+ + + +### Citing ESPnet + +```BibTex +@inproceedings{watanabe2018espnet, + author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai}, + title={{ESPnet}: End-to-End Speech Processing Toolkit}, + year={2018}, + booktitle={Proceedings of Interspeech}, + pages={2207--2211}, + doi={10.21437/Interspeech.2018-1456}, + url={http://dx.doi.org/10.21437/Interspeech.2018-1456} +} + + +@inproceedings{ESPnet-SE, + author = {Chenda Li and Jing Shi and Wangyou Zhang and Aswin Shanmugam Subramanian and Xuankai Chang and + Naoyuki Kamo and Moto Hira and Tomoki Hayashi and Christoph B{"{o}}ddeker and Zhuo Chen and Shinji Watanabe}, + title = {ESPnet-SE: End-To-End Speech Enhancement and Separation Toolkit Designed for {ASR} Integration}, + booktitle = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen, China, January 19-22, 2021}, + pages = {785--792}, + publisher = {{IEEE}}, + year = {2021}, + url = {https://doi.org/10.1109/SLT48900.2021.9383615}, + doi = {10.1109/SLT48900.2021.9383615}, + timestamp = {Mon, 12 Apr 2021 17:08:59 +0200}, + biburl = {https://dblp.org/rec/conf/slt/Li0ZSCKHHBC021.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +``` + +or arXiv: + +```bibtex +@misc{watanabe2018espnet, + title={ESPnet: End-to-End Speech Processing Toolkit}, + author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai}, + year={2018}, + eprint={1804.00015}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +``` diff --git a/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_stats_16k/train/feats_stats.npz b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_stats_16k/train/feats_stats.npz new file mode 100644 index 0000000000000000000000000000000000000000..b3897d1d82a1399f52dfd8587780a0bb0f204d0e --- /dev/null +++ b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_stats_16k/train/feats_stats.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7656503aec842704079093ee013b03d3cd2369bd5003e31474f7bd9dba1ab66a +size 778 diff --git a/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/48epoch.pth b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/48epoch.pth new file mode 100644 index 0000000000000000000000000000000000000000..6e17c0891dc051e81b18872004c3557341722978 --- /dev/null +++ b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/48epoch.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbb0b8e7643ca5fd70afb293f74d26ebd0fcae2f36df40b649f622371636a17c +size 14742669 diff --git a/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/RESULTS.md b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/RESULTS.md new file mode 100644 index 0000000000000000000000000000000000000000..10088198c11b5ed4351b5e5f8a1f6fd01909d929 --- /dev/null +++ b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/RESULTS.md @@ -0,0 +1,21 @@ + +# RESULTS +## Environments +- date: `Thu Feb 10 23:11:40 CST 2022` +- python version: `3.8.12 (default, Oct 12 2021, 13:49:34) [GCC 7.5.0]` +- espnet version: `espnet 0.10.5a1` +- pytorch version: `pytorch 1.9.1` +- Git hash: `6f66283b9eed7b0d5e5643feb18d8f60118a4afc` + - Commit date: `Mon Dec 13 15:30:29 2021 +0800` + + +## enh_train_enh_dccrn_batch_size_raw + +config: ./conf/tuning/train_enh_dccrn_batch_size.yaml + +|dataset|STOI|SAR|SDR|SIR| +|---|---|---|---|---| +|enhanced_cv_synthetic|0.98|24.69|24.69|0.00| +|enhanced_tt_synthetic_no_reverb|0.96|17.69|17.69|0.00| +|enhanced_tt_synthetic_with_reverb|0.81|10.45|10.45|0.00| + diff --git a/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/config.yaml b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84f3eb56d462a142c89182bcb22d06a41399d34b --- /dev/null +++ b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/config.yaml @@ -0,0 +1,154 @@ +config: ./conf/tuning/train_enh_dccrn_batch_size.yaml +print_config: false +log_level: INFO +dry_run: false +iterator_type: chunk +output_dir: exp/enh_train_enh_dccrn_batch_size_raw +ngpu: 1 +seed: 0 +num_workers: 4 +num_att_plot: 3 +dist_backend: nccl +dist_init_method: env:// +dist_world_size: 4 +dist_rank: 0 +local_rank: 0 +dist_master_addr: localhost +dist_master_port: 46366 +dist_launcher: null +multiprocessing_distributed: true +unused_parameters: false +sharded_ddp: false +cudnn_enabled: true +cudnn_benchmark: false +cudnn_deterministic: true +collect_stats: false +write_collected_feats: false +max_epoch: 100 +patience: 10 +val_scheduler_criterion: +- valid +- loss +early_stopping_criterion: +- valid +- loss +- min +best_model_criterion: +- - valid + - si_snr + - max +- - valid + - loss + - min +keep_nbest_models: 1 +nbest_averaging_interval: 0 +grad_clip: 5.0 +grad_clip_type: 2.0 +grad_noise: false +accum_grad: 1 +no_forward_run: false +resume: true +train_dtype: float32 +use_amp: false +log_interval: null +use_tensorboard: true +use_wandb: false +wandb_project: null +wandb_id: null +wandb_entity: null +wandb_name: null +wandb_model_log_interval: -1 +detect_anomaly: false +pretrain_path: null +init_param: [] +ignore_init_mismatch: false +freeze_param: [] +num_iters_per_epoch: null +batch_size: 32 +valid_batch_size: null +batch_bins: 1000000 +valid_batch_bins: null +train_shape_file: +- exp/enh_stats_16k/train/speech_mix_shape +- exp/enh_stats_16k/train/speech_ref1_shape +- exp/enh_stats_16k/train/noise_ref1_shape +valid_shape_file: +- exp/enh_stats_16k/valid/speech_mix_shape +- exp/enh_stats_16k/valid/speech_ref1_shape +- exp/enh_stats_16k/valid/noise_ref1_shape +batch_type: folded +valid_batch_type: null +fold_length: +- 80000 +- 80000 +- 80000 +sort_in_batch: descending +sort_batch: descending +multiple_iterator: false +chunk_length: 64000 +chunk_shift_ratio: 0.5 +num_cache_chunks: 1024 +train_data_path_and_name_and_type: +- - dump/raw/tr_synthetic/wav.scp + - speech_mix + - sound +- - dump/raw/tr_synthetic/spk1.scp + - speech_ref1 + - sound +- - dump/raw/tr_synthetic/noise1.scp + - noise_ref1 + - sound +valid_data_path_and_name_and_type: +- - dump/raw/cv_synthetic/wav.scp + - speech_mix + - sound +- - dump/raw/cv_synthetic/spk1.scp + - speech_ref1 + - sound +- - dump/raw/cv_synthetic/noise1.scp + - noise_ref1 + - sound +allow_variable_data_keys: false +max_cache_size: 0.0 +max_cache_fd: 32 +valid_max_cache_size: null +optim: adam +optim_conf: + lr: 0.001 + eps: 1.0e-08 + weight_decay: 1.0e-07 +scheduler: reducelronplateau +scheduler_conf: + mode: min + factor: 0.7 + patience: 1 +init: null +model_conf: + loss_type: si_snr +criterions: + # The first criterion + - name: si_snr + conf: + eps: 1.0e-7 + # the wrapper for the current criterion + # for single-talker case, we simplely use fixed_order wrapper + wrapper: fixed_order + wrapper_conf: + weight: 1.0 +use_preprocessor: false +encoder: stft +encoder_conf: + n_fft: 512 + win_length: 400 + hop_length: 100 +separator: dccrn +separator_conf: {} +decoder: stft +decoder_conf: + n_fft: 512 + win_length: 400 + hop_length: 100 +required: +- output_dir +version: 0.10.5a1 +distributed: true diff --git a/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/backward_time.png b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/backward_time.png new file mode 100644 index 0000000000000000000000000000000000000000..2e313c2c877bd095de7b440761586d6e1ae482d0 Binary files /dev/null and b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/backward_time.png differ diff --git a/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/forward_time.png b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/forward_time.png new file mode 100644 index 0000000000000000000000000000000000000000..41f565cc21ca8ef55eabd810249a4bb6a07b74a5 Binary files /dev/null and b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/forward_time.png differ diff --git a/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/gpu_max_cached_mem_GB.png b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/gpu_max_cached_mem_GB.png new file mode 100644 index 0000000000000000000000000000000000000000..7e9f5bf9192ce7a17ba9aa660585de36a72e6298 Binary files /dev/null and b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/gpu_max_cached_mem_GB.png differ diff --git a/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/iter_time.png b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/iter_time.png new file mode 100644 index 0000000000000000000000000000000000000000..b5704f9a3645d0075d6cf50b68db1970055d629e Binary files /dev/null and b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/iter_time.png differ diff --git a/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/loss.png b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/loss.png new file mode 100644 index 0000000000000000000000000000000000000000..0abcc4e3893043278fefc0314cd7fbca4ca5222f Binary files /dev/null and b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/loss.png differ diff --git a/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/optim0_lr0.png b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/optim0_lr0.png new file mode 100644 index 0000000000000000000000000000000000000000..e6a7153c084ba8bd5676dd30ee6a669ced84fb60 Binary files /dev/null and b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/optim0_lr0.png differ diff --git a/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/optim_step_time.png b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/optim_step_time.png new file mode 100644 index 0000000000000000000000000000000000000000..dd7fb9e1747beb29a6a8c940eef701e7c266e2f0 Binary files /dev/null and b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/optim_step_time.png differ diff --git a/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/si_snr.png b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/si_snr.png new file mode 100644 index 0000000000000000000000000000000000000000..7cac6c3f21e69caffda69845ec2396b62698dd69 Binary files /dev/null and b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/si_snr.png differ diff --git a/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/train_time.png b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/train_time.png new file mode 100644 index 0000000000000000000000000000000000000000..23615e7d10ed7b7ea1f6eae7628d978ea98f0162 Binary files /dev/null and b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/exp/enh_train_enh_dccrn_batch_size_raw/images/train_time.png differ diff --git a/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/meta.yaml b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/meta.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e532c9fc0c0b96517e2a91725fba52c06a4f9745 --- /dev/null +++ b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/meta.yaml @@ -0,0 +1,8 @@ +espnet: 0.10.7a1 +files: + model_file: exp/enh_train_enh_dccrn_batch_size_raw/48epoch.pth +python: "3.8.12 (default, Oct 12 2021, 13:49:34) \n[GCC 7.5.0]" +timestamp: 1646303990.064911 +torch: 1.9.1 +yaml_files: + train_config: exp/enh_train_enh_dccrn_batch_size_raw/config.yaml diff --git a/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/source.txt b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..38622918261654d84cac953901b916fb0dd7c44a --- /dev/null +++ b/DCCRN/models/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw/source.txt @@ -0,0 +1 @@ +https://huggingface.co/Johnson-Lsx/Shaoxiong_Lin_dns_ins20_enh_enh_train_enh_dccrn_raw \ No newline at end of file diff --git a/DCCRN/models/denosising_model (mcaramba563)/dccrn_model.pt b/DCCRN/models/denosising_model (mcaramba563)/dccrn_model.pt new file mode 100644 index 0000000000000000000000000000000000000000..b94c9ee4e59d93ba7174709afe6c37591eb74ba1 --- /dev/null +++ b/DCCRN/models/denosising_model (mcaramba563)/dccrn_model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a1eca68f1e7de92689bba5ac992792e06b1782dad345d42ce2a78c9b77e4ce1 +size 106199766 diff --git a/DCCRN/models/denosising_model (mcaramba563)/source.txt b/DCCRN/models/denosising_model (mcaramba563)/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..edf302a617ac54441792bdbe71f644b88a6004d4 --- /dev/null +++ b/DCCRN/models/denosising_model (mcaramba563)/source.txt @@ -0,0 +1 @@ +https://github.com/mcaramba563/denosising_model \ No newline at end of file diff --git a/DCCRN/models/mayavoz-dccrn-valentini-28spk/.gitattributes b/DCCRN/models/mayavoz-dccrn-valentini-28spk/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..c7d9f3332a950355d5a77d85000f05e6f45435ea --- /dev/null +++ b/DCCRN/models/mayavoz-dccrn-valentini-28spk/.gitattributes @@ -0,0 +1,34 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/DCCRN/models/mayavoz-dccrn-valentini-28spk/README.md b/DCCRN/models/mayavoz-dccrn-valentini-28spk/README.md new file mode 100644 index 0000000000000000000000000000000000000000..154df8298fab5ecf322016157858e08cd1bccbe1 --- /dev/null +++ b/DCCRN/models/mayavoz-dccrn-valentini-28spk/README.md @@ -0,0 +1,3 @@ +--- +license: apache-2.0 +--- diff --git a/DCCRN/models/mayavoz-dccrn-valentini-28spk/config.yaml b/DCCRN/models/mayavoz-dccrn-valentini-28spk/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12942180c3831ce7cd5172b15cd684aebc58ccd9 --- /dev/null +++ b/DCCRN/models/mayavoz-dccrn-valentini-28spk/config.yaml @@ -0,0 +1,106 @@ +model: + _target_: mayavoz.models.dccrn.DCCRN + num_channels: 1 + sampling_rate: 16000 + complex_lstm: true + complex_norm: true + complex_relu: true + masking_mode: E + encoder_decoder: + initial_output_channels: 32 + depth: 6 + kernel_size: 5 + growth_factor: 2 + stride: 2 + padding: 2 + output_padding: 1 + lstm: + num_layers: 2 + hidden_size: 256 + stft: + window_len: 400 + hop_size: 100 + nfft: 512 +dataset: + _target_: mayavoz.data.dataset.MayaDataset + name: vctk + root_dir: /scratch/c.sistc3/DS_10283_2791 + duration: 2 + stride: 1 + sampling_rate: 16000 + batch_size: 32 + min_valid_minutes: 25 + files: + train_clean: clean_trainset_28spk_wav + test_clean: clean_testset_wav + train_noisy: noisy_trainset_28spk_wav + test_noisy: noisy_testset_wav +optimizer: + _target_: torch.optim.Adam + lr: 0.001 + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + weight_decay: 0 + amsgrad: false +hyperparameters: + loss: si-snr + metric: + - stoi + - pesq + lr: 0.001 + ReduceLr_patience: 10 + Early_stop: false + ReduceLr_factor: 0.5 + min_lr: 1.0e-07 + EarlyStopping_patience: 10 +trainer: + _target_: pytorch_lightning.Trainer + accelerator: gpu + accumulate_grad_batches: 1 + amp_backend: native + auto_lr_find: false + auto_scale_batch_size: false + auto_select_gpus: true + benchmark: false + check_val_every_n_epoch: 1 + detect_anomaly: false + deterministic: false + devices: 1 + enable_checkpointing: true + enable_model_summary: true + enable_progress_bar: true + fast_dev_run: false + gpus: null + gradient_clip_val: 0 + gradient_clip_algorithm: norm + ipus: null + limit_predict_batches: 1.0 + limit_test_batches: 1.0 + limit_train_batches: 1.0 + limit_val_batches: 1.0 + log_every_n_steps: 50 + max_epochs: 100 + max_steps: -1 + max_time: null + min_epochs: 1 + min_steps: null + move_metrics_to_cpu: false + multiple_trainloader_mode: max_size_cycle + num_nodes: 1 + num_processes: 1 + num_sanity_val_steps: 2 + overfit_batches: 0.0 + precision: 32 + profiler: null + reload_dataloaders_every_n_epochs: 0 + replace_sampler_ddp: true + strategy: null + sync_batchnorm: false + tpu_cores: null + track_grad_norm: -1 + val_check_interval: 1.0 +mlflow: + experiment_name: shahules/mayavoz + run_name: DCCRN-VCTK diff --git a/DCCRN/models/mayavoz-dccrn-valentini-28spk/pytorch_model.ckpt b/DCCRN/models/mayavoz-dccrn-valentini-28spk/pytorch_model.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..2f799629c48373896a29362d428b269ff52232a7 --- /dev/null +++ b/DCCRN/models/mayavoz-dccrn-valentini-28spk/pytorch_model.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d32eb4e89a9dda820026695dff4c08149b81858b4262e5784687e0bbdc2b3c7 +size 46555717 diff --git a/DCCRN/models/mayavoz-dccrn-valentini-28spk/source.txt b/DCCRN/models/mayavoz-dccrn-valentini-28spk/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..b5ba3fbf4e919f6f91581d0398a32d4b950aeb92 --- /dev/null +++ b/DCCRN/models/mayavoz-dccrn-valentini-28spk/source.txt @@ -0,0 +1 @@ +https://huggingface.co/shahules786/mayavoz-dccrn-valentini-28spk \ No newline at end of file diff --git a/Distil-DCCRN/Distil-DCCRN. A Small-footprint DCCRN Leveraging Feature-based Knowledge Distillation in Speech Enhancement.pdf b/Distil-DCCRN/Distil-DCCRN. A Small-footprint DCCRN Leveraging Feature-based Knowledge Distillation in Speech Enhancement.pdf new file mode 100644 index 0000000000000000000000000000000000000000..3490ae5e3266b3497d5447d738089cfe81fd22c4 --- /dev/null +++ b/Distil-DCCRN/Distil-DCCRN. A Small-footprint DCCRN Leveraging Feature-based Knowledge Distillation in Speech Enhancement.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b1981fd816dfb1caa075787552b4f6f165eb2ea33d5b2bedcdd2223f8b42465 +size 355349 diff --git a/Distil-DCCRN/code/Distil_DCCRN_demo.zip b/Distil-DCCRN/code/Distil_DCCRN_demo.zip new file mode 100644 index 0000000000000000000000000000000000000000..2e6ee7c3478ae3ad5b187f9cfaee95c8cb028e48 --- /dev/null +++ b/Distil-DCCRN/code/Distil_DCCRN_demo.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0adbb3c7ab78b76cffa5e7f1f1b976ee8fc0e2d7069f3c81e29080988172d62 +size 189242762