MetricGAN (code, colab, models, paper)
Browse files- .gitattributes +5 -0
- CGA-MGAN. Metric GAN Based on Convolution-Augmented Gated Attention for Speech Enhancement.pdf +3 -0
- MetricGAN+. An Improved Version of MetricGAN for Speech Enhancement.pdf +3 -0
- MetricGAN-OKD. Multi-Metric Optimization of MetricGAN via Online Knowledge Distillation for Speech Enhancement.pdf +3 -0
- MetricGAN. Generative Adversarial Networks based Black-box Metric Scores Optimization for Speech Enhancement.pdf +3 -0
- code/Audio_procesing.zip +3 -0
- code/Conv-TasNet-MetricGAN-Multi-Discriminator.zip +3 -0
- code/Dereverb_MetricGAN-U.zip +3 -0
- code/MetricGAN-KAN.zip +3 -0
- code/MetricGAN-OKD [u-hyszk] +1.zip +3 -0
- code/MetricGAN-OKD.zip +3 -0
- code/MetricGAN-Plus.zip +3 -0
- code/MetricGAN-Reloaded.zip +3 -0
- code/MetricGAN-plus-pytorch.zip +3 -0
- code/MetricGAN.zip +3 -0
- code/MetricGAN_v1.zip +3 -0
- code/SpeechBrain_MetricGAN.zip +3 -0
- code/Vk_MetricGan.zip +3 -0
- code/metricgan-plus-minus.zip +3 -0
- code/speech-enhancement.zip +3 -0
- colab/SpeechEnhancement.ipynb +0 -0
- models/MetricGAN-OKD/1_PE_CS_Table2.pth +3 -0
- models/MetricGAN-OKD/2_CS_PE_Table2.pth +3 -0
- models/MetricGAN-OKD/3_PE_CS+CB+CO_Table3.pth +3 -0
- models/MetricGAN-OKD/4_PE_CS+CB_Table4.pth +3 -0
- models/MetricGAN-OKD/MetricGAN-OKD-1.weights.zip +3 -0
- models/MetricGAN-OKD/source.txt +1 -0
- models/MetricGAN-Reloaded/README.md +11 -0
- models/MetricGAN-Reloaded/checkpt-dis-8.pt +3 -0
- models/MetricGAN-Reloaded/checkpt-disopt-8.pt +3 -0
- models/MetricGAN-Reloaded/checkpt-gen-8.pt +3 -0
- models/MetricGAN-Reloaded/checkpt-genopt-8.pt +3 -0
- models/MetricGAN-Reloaded/epoch7.zip +3 -0
- models/MetricGAN-Reloaded/npairs_8.pkl +3 -0
- models/MetricGAN-Reloaded/source.txt +1 -0
- models/metricgan-plus-pytorch/CSIG-GAN_trial1.pth +3 -0
- models/metricgan-plus-pytorch/PESQ-GAN_trial1.pth +3 -0
- models/metricgan-plus-pytorch/PESQ-GAN_trial2.pth +3 -0
- models/metricgan-plus-pytorch/PESQ-GAN_trial3.pth +3 -0
- models/metricgan-plus-pytorch/source.txt +1 -0
- models/metricgan-plus-voicebank/.gitattributes +16 -0
- models/metricgan-plus-voicebank/README.md +132 -0
- models/metricgan-plus-voicebank/config.json +3 -0
- models/metricgan-plus-voicebank/enhance_model.ckpt +3 -0
- models/metricgan-plus-voicebank/example.wav +3 -0
- models/metricgan-plus-voicebank/hyperparams.yaml +40 -0
- models/metricgan-plus-voicebank/source.txt +1 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
CGA-MGAN.[[:space:]]Metric[[:space:]]GAN[[:space:]]Based[[:space:]]on[[:space:]]Convolution-Augmented[[:space:]]Gated[[:space:]]Attention[[:space:]]for[[:space:]]Speech[[:space:]]Enhancement.pdf filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
MetricGAN-OKD.[[:space:]]Multi-Metric[[:space:]]Optimization[[:space:]]of[[:space:]]MetricGAN[[:space:]]via[[:space:]]Online[[:space:]]Knowledge[[:space:]]Distillation[[:space:]]for[[:space:]]Speech[[:space:]]Enhancement.pdf filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
MetricGAN.[[:space:]]Generative[[:space:]]Adversarial[[:space:]]Networks[[:space:]]based[[:space:]]Black-box[[:space:]]Metric[[:space:]]Scores[[:space:]]Optimization[[:space:]]for[[:space:]]Speech[[:space:]]Enhancement.pdf filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
MetricGAN+.[[:space:]]An[[:space:]]Improved[[:space:]]Version[[:space:]]of[[:space:]]MetricGAN[[:space:]]for[[:space:]]Speech[[:space:]]Enhancement.pdf filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
models/metricgan-plus-voicebank/example.wav filter=lfs diff=lfs merge=lfs -text
|
CGA-MGAN. Metric GAN Based on Convolution-Augmented Gated Attention for Speech Enhancement.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c52c8a95bc76a9e07d6f381d28e82da5fff1036df7a5f5af1d1e11b212f12b2f
|
| 3 |
+
size 3329413
|
MetricGAN+. An Improved Version of MetricGAN for Speech Enhancement.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d67b76f70f41c5892090d1b407b4731cc3d0000cfee118aa23230fad73739aad
|
| 3 |
+
size 987063
|
MetricGAN-OKD. Multi-Metric Optimization of MetricGAN via Online Knowledge Distillation for Speech Enhancement.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1dffeb81749c0ef46a39b8d380f7d21921c801549c2e8b00e1663bd46704507a
|
| 3 |
+
size 2827824
|
MetricGAN. Generative Adversarial Networks based Black-box Metric Scores Optimization for Speech Enhancement.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb5342f6197c6e5fea3440a5fac0cb1ec955713b7ab5393707fa05df9e959dcc
|
| 3 |
+
size 3092851
|
code/Audio_procesing.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9006bb21dd71002bb18581cf42267748229dbc638c9be3d5057695e0a9c9824a
|
| 3 |
+
size 362533
|
code/Conv-TasNet-MetricGAN-Multi-Discriminator.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4dc204a4c94637fcf5efaa21463b927b3451ffb35ba89705ff40e3c7a380937
|
| 3 |
+
size 517306083
|
code/Dereverb_MetricGAN-U.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6b3a8d73c2f24bd66d2c1c7e0ba47b59018d79a0bd50fe4f77e529fdb32ee60
|
| 3 |
+
size 315373
|
code/MetricGAN-KAN.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:326b4a88b72067ec2a9692e3ff1c7bcf689230e9ec2935e6d7960037ea885aa7
|
| 3 |
+
size 299490
|
code/MetricGAN-OKD [u-hyszk] +1.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56777dde4600042d83fc130da1c64f95b8600c9c2537ae2a0deb13e26115d0c8
|
| 3 |
+
size 102434
|
code/MetricGAN-OKD.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a626c8feeeca38d93a50e66e9fe0eb17cbf1456afa6eedf0f16207439b1454c
|
| 3 |
+
size 85140837
|
code/MetricGAN-Plus.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:852cd71426ce6b3a485fec4d2aeee819efcab56c1fc376f5f363c3c131e25357
|
| 3 |
+
size 14651401
|
code/MetricGAN-Reloaded.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1abdbab5a541f727ed2521661cfa54b347dbc32801d4a254710aaa492530b717
|
| 3 |
+
size 3826437521
|
code/MetricGAN-plus-pytorch.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70254879aadba4576007564ff18a0b6365ee6af398ff0f241f7fef7c560abd96
|
| 3 |
+
size 97064073
|
code/MetricGAN.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6b2aeafd337cacb3ff719706e368925e8b4311e796b693810e4aae3b8e7ebec
|
| 3 |
+
size 5340258
|
code/MetricGAN_v1.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43c01492f0d5059c5674d67020215a6387e58de8d52dbaa0f5f3ef7c6dfd30d4
|
| 3 |
+
size 60974
|
code/SpeechBrain_MetricGAN.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c304a093673a90de779a1280f10d95a546b4574e8cb568b30f0ebfc9412b1dc5
|
| 3 |
+
size 33691
|
code/Vk_MetricGan.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0179ede8b7d55a385ea08dbaf613dc0be221b9ea0177673e2093d4e0cf7cc708
|
| 3 |
+
size 1711799
|
code/metricgan-plus-minus.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35e5de1ba2b63d68a50a172ed90159de7a7525960bcaa4d2233da4c56d9b64d9
|
| 3 |
+
size 53530
|
code/speech-enhancement.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c5a8447a4a8ad8ff5443b7e7bb4a0d3975fef59d90dffb926dbf051220f7b84
|
| 3 |
+
size 34450
|
colab/SpeechEnhancement.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
models/MetricGAN-OKD/1_PE_CS_Table2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4075b22087d30fd4dc01b96411d35d4e6a62a8ddf5bd316e9e6eb192b1553fcc
|
| 3 |
+
size 26117151
|
models/MetricGAN-OKD/2_CS_PE_Table2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:904af3c229f207c5f31227048e3be404191756368e872687d7246f8d1643661b
|
| 3 |
+
size 26117151
|
models/MetricGAN-OKD/3_PE_CS+CB+CO_Table3.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d86f7625b8bd8e2272a2b9738f67f1fdf8aeea6616e4db56931685c4d6fbf81
|
| 3 |
+
size 26117343
|
models/MetricGAN-OKD/4_PE_CS+CB_Table4.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0c2965f6babe681fa0af53ed3aa69b216e17d96877eface2bdc06e52b9a3b36
|
| 3 |
+
size 13285890
|
models/MetricGAN-OKD/MetricGAN-OKD-1.weights.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45ace569ab2ccf3d24e5643d666d203416ec195755780bccf0b822dcd035def8
|
| 3 |
+
size 32362
|
models/MetricGAN-OKD/source.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
https://github.com/wooseok-shin/MetricGAN-OKD
|
models/MetricGAN-Reloaded/README.md
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Note
|
| 2 |
+
|
| 3 |
+
Note that the epochX directory uses 0-indexing, while the checkpoints use 1-indexing.
|
| 4 |
+
This is somewhat unfortunate, but we decided to not change it so as to not use progress
|
| 5 |
+
(the new_pairs array does not depend on whether we use 0- or 1- indexing, since it is
|
| 6 |
+
saved across epochs -- hence thankfully this does not affect correctness).
|
| 7 |
+
|
| 8 |
+
The files in `epoch7/` are the cleaned versions of those in `data/MS-SNSD-dataset-30/train/noisy`
|
| 9 |
+
produced by our model after epoch 8 (1-indexed 8).
|
| 10 |
+
|
| 11 |
+
We did not include previous checkpoints in this repository to save space.
|
models/MetricGAN-Reloaded/checkpt-dis-8.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2515651bb6e40b93cc4f094de7b78c77c1c37e34d0a8d5ef7955f793b102e61
|
| 3 |
+
size 3754370
|
models/MetricGAN-Reloaded/checkpt-disopt-8.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ca286b59d303f75e51afcbe04cfc9cdf38e809dfb080e80b7d862fc1642364f
|
| 3 |
+
size 7399493
|
models/MetricGAN-Reloaded/checkpt-gen-8.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d32a9081edbe0c7965939279ef4aa61907798794c0723ca7a3da48a2d1911a7e
|
| 3 |
+
size 7584219
|
models/MetricGAN-Reloaded/checkpt-genopt-8.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84cd4459c28bb0fcc15836ebbe1cc4d4aa7af615d552ea8655d9460cdb0fec93
|
| 3 |
+
size 15172973
|
models/MetricGAN-Reloaded/epoch7.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff252f7ffc76731bd7678d3f0a9da87c554e9af35610b20a834e5dc280e82e9c
|
| 3 |
+
size 1455558142
|
models/MetricGAN-Reloaded/npairs_8.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a08927cc94da8b7c7d2d48d41b29cb6db5e308c9e333a0e4bd2cb340738b91ac
|
| 3 |
+
size 1116728
|
models/MetricGAN-Reloaded/source.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
https://github.com/testzer0/MetricGAN-Reloaded
|
models/metricgan-plus-pytorch/CSIG-GAN_trial1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10925ae4d094e320b2faa9bbd0a0263a1598438e0e1f5c097186167620ab1fe0
|
| 3 |
+
size 26120424
|
models/metricgan-plus-pytorch/PESQ-GAN_trial1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb6f6ab151f5029a84d7c9ce699ad51dfbff0f1a2d83ac135eb089629473babc
|
| 3 |
+
size 26116959
|
models/metricgan-plus-pytorch/PESQ-GAN_trial2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d9aba66256f6af982a73e38bae85b7ead3e4f857403cb7d0516316f6d99a622
|
| 3 |
+
size 26116895
|
models/metricgan-plus-pytorch/PESQ-GAN_trial3.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90c4b2785dcffc392a61a7fa2a9defa1ee14a58edd69d1c755b3dc15d5626ede
|
| 3 |
+
size 26116959
|
models/metricgan-plus-pytorch/source.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
https://github.com/wooseok-shin/MetricGAN-plus-pytorch/releases/tag/v1.weights
|
models/metricgan-plus-voicebank/.gitattributes
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.bin.* filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.tar.gz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
models/metricgan-plus-voicebank/README.md
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language: "en"
|
| 3 |
+
tags:
|
| 4 |
+
- audio-to-audio
|
| 5 |
+
- speech-enhancement
|
| 6 |
+
- PyTorch
|
| 7 |
+
- speechbrain
|
| 8 |
+
license: "apache-2.0"
|
| 9 |
+
datasets:
|
| 10 |
+
- Voicebank
|
| 11 |
+
- DEMAND
|
| 12 |
+
metrics:
|
| 13 |
+
- PESQ
|
| 14 |
+
- STOI
|
| 15 |
+
inference: false
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
<iframe src="https://ghbtns.com/github-btn.html?user=speechbrain&repo=speechbrain&type=star&count=true&size=large&v=2" frameborder="0" scrolling="0" width="170" height="30" title="GitHub"></iframe>
|
| 19 |
+
<br/><br/>
|
| 20 |
+
|
| 21 |
+
# MetricGAN-trained model for Enhancement
|
| 22 |
+
|
| 23 |
+
This repository provides all the necessary tools to perform enhancement with
|
| 24 |
+
SpeechBrain. For a better experience we encourage you to learn more about
|
| 25 |
+
[SpeechBrain](https://speechbrain.github.io). The model performance is:
|
| 26 |
+
|
| 27 |
+
| Release | Test PESQ | Test STOI |
|
| 28 |
+
|:-----------:|:-----:| :-----:|
|
| 29 |
+
| 21-04-27 | 3.15 | 93.0 |
|
| 30 |
+
|
| 31 |
+
## Install SpeechBrain
|
| 32 |
+
|
| 33 |
+
First of all, please install SpeechBrain with the following command:
|
| 34 |
+
|
| 35 |
+
```
|
| 36 |
+
pip install speechbrain
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
Please notice that we encourage you to read our tutorials and learn more about
|
| 40 |
+
[SpeechBrain](https://speechbrain.github.io).
|
| 41 |
+
|
| 42 |
+
## Pretrained Usage
|
| 43 |
+
|
| 44 |
+
To use the mimic-loss-trained model for enhancement, use the following simple code:
|
| 45 |
+
|
| 46 |
+
```python
|
| 47 |
+
import torch
|
| 48 |
+
import torchaudio
|
| 49 |
+
from speechbrain.inference.enhancement import SpectralMaskEnhancement
|
| 50 |
+
|
| 51 |
+
enhance_model = SpectralMaskEnhancement.from_hparams(
|
| 52 |
+
source="speechbrain/metricgan-plus-voicebank",
|
| 53 |
+
savedir="pretrained_models/metricgan-plus-voicebank",
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
# Load and add fake batch dimension
|
| 57 |
+
noisy = enhance_model.load_audio(
|
| 58 |
+
"speechbrain/metricgan-plus-voicebank/example.wav"
|
| 59 |
+
).unsqueeze(0)
|
| 60 |
+
|
| 61 |
+
# Add relative length tensor
|
| 62 |
+
enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
|
| 63 |
+
|
| 64 |
+
# Saving enhanced signal on disk
|
| 65 |
+
torchaudio.save('enhanced.wav', enhanced.cpu(), 16000)
|
| 66 |
+
```
|
| 67 |
+
|
| 68 |
+
The system is trained with recordings sampled at 16kHz (single channel).
|
| 69 |
+
The code will automatically normalize your audio (i.e., resampling + mono channel selection) when calling *enhance_file* if needed. Make sure your input tensor is compliant with the expected sampling rate if you use *enhance_batch* as in the example.
|
| 70 |
+
|
| 71 |
+
### Inference on GPU
|
| 72 |
+
To perform inference on the GPU, add `run_opts={"device":"cuda"}` when calling the `from_hparams` method.
|
| 73 |
+
|
| 74 |
+
### Training
|
| 75 |
+
The model was trained with SpeechBrain (d0accc8).
|
| 76 |
+
To train it from scratch follows these steps:
|
| 77 |
+
1. Clone SpeechBrain:
|
| 78 |
+
```bash
|
| 79 |
+
git clone https://github.com/speechbrain/speechbrain/
|
| 80 |
+
```
|
| 81 |
+
2. Install it:
|
| 82 |
+
```
|
| 83 |
+
cd speechbrain
|
| 84 |
+
pip install -r requirements.txt
|
| 85 |
+
pip install -e .
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
3. Run Training:
|
| 89 |
+
```
|
| 90 |
+
cd recipes/Voicebank/enhance/MetricGAN
|
| 91 |
+
python train.py hparams/train.yaml --data_folder=your_data_folder
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
You can find our training results (models, logs, etc) [here](https://drive.google.com/drive/folders/1fcVP52gHgoMX9diNN1JxX_My5KaRNZWs?usp=sharing).
|
| 95 |
+
|
| 96 |
+
### Limitations
|
| 97 |
+
The SpeechBrain team does not provide any warranty on the performance achieved by this model when used on other datasets.
|
| 98 |
+
|
| 99 |
+
## Referencing MetricGAN+
|
| 100 |
+
|
| 101 |
+
If you find MetricGAN+ useful, please cite:
|
| 102 |
+
|
| 103 |
+
```
|
| 104 |
+
@article{fu2021metricgan+,
|
| 105 |
+
title={MetricGAN+: An Improved Version of MetricGAN for Speech Enhancement},
|
| 106 |
+
author={Fu, Szu-Wei and Yu, Cheng and Hsieh, Tsun-An and Plantinga, Peter and Ravanelli, Mirco and Lu, Xugang and Tsao, Yu},
|
| 107 |
+
journal={arXiv preprint arXiv:2104.03538},
|
| 108 |
+
year={2021}
|
| 109 |
+
}
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
# **About SpeechBrain**
|
| 113 |
+
- Website: https://speechbrain.github.io/
|
| 114 |
+
- Code: https://github.com/speechbrain/speechbrain/
|
| 115 |
+
- HuggingFace: https://huggingface.co/speechbrain/
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
# **Citing SpeechBrain**
|
| 119 |
+
Please, cite SpeechBrain if you use it for your research or business.
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
```bibtex
|
| 123 |
+
@misc{speechbrain,
|
| 124 |
+
title={{SpeechBrain}: A General-Purpose Speech Toolkit},
|
| 125 |
+
author={Mirco Ravanelli and Titouan Parcollet and Peter Plantinga and Aku Rouhe and Samuele Cornell and Loren Lugosch and Cem Subakan and Nauman Dawalatabad and Abdelwahab Heba and Jianyuan Zhong and Ju-Chieh Chou and Sung-Lin Yeh and Szu-Wei Fu and Chien-Feng Liao and Elena Rastorgueva and François Grondin and William Aris and Hwidong Na and Yan Gao and Renato De Mori and Yoshua Bengio},
|
| 126 |
+
year={2021},
|
| 127 |
+
eprint={2106.04624},
|
| 128 |
+
archivePrefix={arXiv},
|
| 129 |
+
primaryClass={eess.AS},
|
| 130 |
+
note={arXiv:2106.04624}
|
| 131 |
+
}
|
| 132 |
+
```
|
models/metricgan-plus-voicebank/config.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"speechbrain_interface": "SpectralMaskEnhancement"
|
| 3 |
+
}
|
models/metricgan-plus-voicebank/enhance_model.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:147bfb866bac8264603546e035bf283370e716ed2f4b7412d308d2bcee88304f
|
| 3 |
+
size 7586021
|
models/metricgan-plus-voicebank/example.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72bded2034fc64a0de36250e08bd2ff3f21391e1bd941ea96b22f07e407773eb
|
| 3 |
+
size 276040
|
models/metricgan-plus-voicebank/hyperparams.yaml
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# STFT parameters
|
| 2 |
+
sample_rate: 16000
|
| 3 |
+
win_length: 32
|
| 4 |
+
hop_length: 16
|
| 5 |
+
n_fft: 512
|
| 6 |
+
window_fn: !name:torch.hamming_window
|
| 7 |
+
|
| 8 |
+
compute_stft: !new:speechbrain.processing.features.STFT
|
| 9 |
+
sample_rate: !ref <sample_rate>
|
| 10 |
+
n_fft: !ref <n_fft>
|
| 11 |
+
win_length: !ref <win_length>
|
| 12 |
+
hop_length: !ref <hop_length>
|
| 13 |
+
window_fn: !ref <window_fn>
|
| 14 |
+
|
| 15 |
+
compute_istft: !new:speechbrain.processing.features.ISTFT
|
| 16 |
+
sample_rate: !ref <sample_rate>
|
| 17 |
+
n_fft: !ref <n_fft>
|
| 18 |
+
win_length: !ref <win_length>
|
| 19 |
+
hop_length: !ref <hop_length>
|
| 20 |
+
window_fn: !ref <window_fn>
|
| 21 |
+
|
| 22 |
+
spectral_magnitude: !name:speechbrain.processing.features.spectral_magnitude
|
| 23 |
+
power: 0.5
|
| 24 |
+
|
| 25 |
+
resynth: !name:speechbrain.processing.signal_processing.resynthesize
|
| 26 |
+
stft: !ref <compute_stft>
|
| 27 |
+
istft: !ref <compute_istft>
|
| 28 |
+
|
| 29 |
+
enhance_model: !new:speechbrain.lobes.models.MetricGAN.EnhancementGenerator
|
| 30 |
+
input_size: !ref <n_fft> // 2 + 1
|
| 31 |
+
hidden_size: 200
|
| 32 |
+
num_layers: 2
|
| 33 |
+
dropout: 0
|
| 34 |
+
|
| 35 |
+
modules:
|
| 36 |
+
enhance_model: !ref <enhance_model>
|
| 37 |
+
|
| 38 |
+
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
|
| 39 |
+
loadables:
|
| 40 |
+
enhance_model: !ref <enhance_model>
|
models/metricgan-plus-voicebank/source.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
https://huggingface.co/speechbrain/metricgan-plus-voicebank
|