niobures commited on
Commit
2f311dc
·
verified ·
1 Parent(s): 9ab3e79

MetricGAN (code, colab, models, paper)

Browse files
Files changed (47) hide show
  1. .gitattributes +5 -0
  2. CGA-MGAN. Metric GAN Based on Convolution-Augmented Gated Attention for Speech Enhancement.pdf +3 -0
  3. MetricGAN+. An Improved Version of MetricGAN for Speech Enhancement.pdf +3 -0
  4. MetricGAN-OKD. Multi-Metric Optimization of MetricGAN via Online Knowledge Distillation for Speech Enhancement.pdf +3 -0
  5. MetricGAN. Generative Adversarial Networks based Black-box Metric Scores Optimization for Speech Enhancement.pdf +3 -0
  6. code/Audio_procesing.zip +3 -0
  7. code/Conv-TasNet-MetricGAN-Multi-Discriminator.zip +3 -0
  8. code/Dereverb_MetricGAN-U.zip +3 -0
  9. code/MetricGAN-KAN.zip +3 -0
  10. code/MetricGAN-OKD [u-hyszk] +1.zip +3 -0
  11. code/MetricGAN-OKD.zip +3 -0
  12. code/MetricGAN-Plus.zip +3 -0
  13. code/MetricGAN-Reloaded.zip +3 -0
  14. code/MetricGAN-plus-pytorch.zip +3 -0
  15. code/MetricGAN.zip +3 -0
  16. code/MetricGAN_v1.zip +3 -0
  17. code/SpeechBrain_MetricGAN.zip +3 -0
  18. code/Vk_MetricGan.zip +3 -0
  19. code/metricgan-plus-minus.zip +3 -0
  20. code/speech-enhancement.zip +3 -0
  21. colab/SpeechEnhancement.ipynb +0 -0
  22. models/MetricGAN-OKD/1_PE_CS_Table2.pth +3 -0
  23. models/MetricGAN-OKD/2_CS_PE_Table2.pth +3 -0
  24. models/MetricGAN-OKD/3_PE_CS+CB+CO_Table3.pth +3 -0
  25. models/MetricGAN-OKD/4_PE_CS+CB_Table4.pth +3 -0
  26. models/MetricGAN-OKD/MetricGAN-OKD-1.weights.zip +3 -0
  27. models/MetricGAN-OKD/source.txt +1 -0
  28. models/MetricGAN-Reloaded/README.md +11 -0
  29. models/MetricGAN-Reloaded/checkpt-dis-8.pt +3 -0
  30. models/MetricGAN-Reloaded/checkpt-disopt-8.pt +3 -0
  31. models/MetricGAN-Reloaded/checkpt-gen-8.pt +3 -0
  32. models/MetricGAN-Reloaded/checkpt-genopt-8.pt +3 -0
  33. models/MetricGAN-Reloaded/epoch7.zip +3 -0
  34. models/MetricGAN-Reloaded/npairs_8.pkl +3 -0
  35. models/MetricGAN-Reloaded/source.txt +1 -0
  36. models/metricgan-plus-pytorch/CSIG-GAN_trial1.pth +3 -0
  37. models/metricgan-plus-pytorch/PESQ-GAN_trial1.pth +3 -0
  38. models/metricgan-plus-pytorch/PESQ-GAN_trial2.pth +3 -0
  39. models/metricgan-plus-pytorch/PESQ-GAN_trial3.pth +3 -0
  40. models/metricgan-plus-pytorch/source.txt +1 -0
  41. models/metricgan-plus-voicebank/.gitattributes +16 -0
  42. models/metricgan-plus-voicebank/README.md +132 -0
  43. models/metricgan-plus-voicebank/config.json +3 -0
  44. models/metricgan-plus-voicebank/enhance_model.ckpt +3 -0
  45. models/metricgan-plus-voicebank/example.wav +3 -0
  46. models/metricgan-plus-voicebank/hyperparams.yaml +40 -0
  47. models/metricgan-plus-voicebank/source.txt +1 -0
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ CGA-MGAN.[[:space:]]Metric[[:space:]]GAN[[:space:]]Based[[:space:]]on[[:space:]]Convolution-Augmented[[:space:]]Gated[[:space:]]Attention[[:space:]]for[[:space:]]Speech[[:space:]]Enhancement.pdf filter=lfs diff=lfs merge=lfs -text
37
+ MetricGAN-OKD.[[:space:]]Multi-Metric[[:space:]]Optimization[[:space:]]of[[:space:]]MetricGAN[[:space:]]via[[:space:]]Online[[:space:]]Knowledge[[:space:]]Distillation[[:space:]]for[[:space:]]Speech[[:space:]]Enhancement.pdf filter=lfs diff=lfs merge=lfs -text
38
+ MetricGAN.[[:space:]]Generative[[:space:]]Adversarial[[:space:]]Networks[[:space:]]based[[:space:]]Black-box[[:space:]]Metric[[:space:]]Scores[[:space:]]Optimization[[:space:]]for[[:space:]]Speech[[:space:]]Enhancement.pdf filter=lfs diff=lfs merge=lfs -text
39
+ MetricGAN+.[[:space:]]An[[:space:]]Improved[[:space:]]Version[[:space:]]of[[:space:]]MetricGAN[[:space:]]for[[:space:]]Speech[[:space:]]Enhancement.pdf filter=lfs diff=lfs merge=lfs -text
40
+ models/metricgan-plus-voicebank/example.wav filter=lfs diff=lfs merge=lfs -text
CGA-MGAN. Metric GAN Based on Convolution-Augmented Gated Attention for Speech Enhancement.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c52c8a95bc76a9e07d6f381d28e82da5fff1036df7a5f5af1d1e11b212f12b2f
3
+ size 3329413
MetricGAN+. An Improved Version of MetricGAN for Speech Enhancement.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d67b76f70f41c5892090d1b407b4731cc3d0000cfee118aa23230fad73739aad
3
+ size 987063
MetricGAN-OKD. Multi-Metric Optimization of MetricGAN via Online Knowledge Distillation for Speech Enhancement.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dffeb81749c0ef46a39b8d380f7d21921c801549c2e8b00e1663bd46704507a
3
+ size 2827824
MetricGAN. Generative Adversarial Networks based Black-box Metric Scores Optimization for Speech Enhancement.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb5342f6197c6e5fea3440a5fac0cb1ec955713b7ab5393707fa05df9e959dcc
3
+ size 3092851
code/Audio_procesing.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9006bb21dd71002bb18581cf42267748229dbc638c9be3d5057695e0a9c9824a
3
+ size 362533
code/Conv-TasNet-MetricGAN-Multi-Discriminator.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4dc204a4c94637fcf5efaa21463b927b3451ffb35ba89705ff40e3c7a380937
3
+ size 517306083
code/Dereverb_MetricGAN-U.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6b3a8d73c2f24bd66d2c1c7e0ba47b59018d79a0bd50fe4f77e529fdb32ee60
3
+ size 315373
code/MetricGAN-KAN.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:326b4a88b72067ec2a9692e3ff1c7bcf689230e9ec2935e6d7960037ea885aa7
3
+ size 299490
code/MetricGAN-OKD [u-hyszk] +1.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56777dde4600042d83fc130da1c64f95b8600c9c2537ae2a0deb13e26115d0c8
3
+ size 102434
code/MetricGAN-OKD.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a626c8feeeca38d93a50e66e9fe0eb17cbf1456afa6eedf0f16207439b1454c
3
+ size 85140837
code/MetricGAN-Plus.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:852cd71426ce6b3a485fec4d2aeee819efcab56c1fc376f5f363c3c131e25357
3
+ size 14651401
code/MetricGAN-Reloaded.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1abdbab5a541f727ed2521661cfa54b347dbc32801d4a254710aaa492530b717
3
+ size 3826437521
code/MetricGAN-plus-pytorch.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70254879aadba4576007564ff18a0b6365ee6af398ff0f241f7fef7c560abd96
3
+ size 97064073
code/MetricGAN.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6b2aeafd337cacb3ff719706e368925e8b4311e796b693810e4aae3b8e7ebec
3
+ size 5340258
code/MetricGAN_v1.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43c01492f0d5059c5674d67020215a6387e58de8d52dbaa0f5f3ef7c6dfd30d4
3
+ size 60974
code/SpeechBrain_MetricGAN.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c304a093673a90de779a1280f10d95a546b4574e8cb568b30f0ebfc9412b1dc5
3
+ size 33691
code/Vk_MetricGan.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0179ede8b7d55a385ea08dbaf613dc0be221b9ea0177673e2093d4e0cf7cc708
3
+ size 1711799
code/metricgan-plus-minus.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35e5de1ba2b63d68a50a172ed90159de7a7525960bcaa4d2233da4c56d9b64d9
3
+ size 53530
code/speech-enhancement.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c5a8447a4a8ad8ff5443b7e7bb4a0d3975fef59d90dffb926dbf051220f7b84
3
+ size 34450
colab/SpeechEnhancement.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
models/MetricGAN-OKD/1_PE_CS_Table2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4075b22087d30fd4dc01b96411d35d4e6a62a8ddf5bd316e9e6eb192b1553fcc
3
+ size 26117151
models/MetricGAN-OKD/2_CS_PE_Table2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:904af3c229f207c5f31227048e3be404191756368e872687d7246f8d1643661b
3
+ size 26117151
models/MetricGAN-OKD/3_PE_CS+CB+CO_Table3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d86f7625b8bd8e2272a2b9738f67f1fdf8aeea6616e4db56931685c4d6fbf81
3
+ size 26117343
models/MetricGAN-OKD/4_PE_CS+CB_Table4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0c2965f6babe681fa0af53ed3aa69b216e17d96877eface2bdc06e52b9a3b36
3
+ size 13285890
models/MetricGAN-OKD/MetricGAN-OKD-1.weights.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ace569ab2ccf3d24e5643d666d203416ec195755780bccf0b822dcd035def8
3
+ size 32362
models/MetricGAN-OKD/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://github.com/wooseok-shin/MetricGAN-OKD
models/MetricGAN-Reloaded/README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Note
2
+
3
+ Note that the epochX directory uses 0-indexing, while the checkpoints use 1-indexing.
4
+ This is somewhat unfortunate, but we decided to not change it so as to not use progress
5
+ (the new_pairs array does not depend on whether we use 0- or 1- indexing, since it is
6
+ saved across epochs -- hence thankfully this does not affect correctness).
7
+
8
+ The files in `epoch7/` are the cleaned versions of those in `data/MS-SNSD-dataset-30/train/noisy`
9
+ produced by our model after epoch 8 (1-indexed 8).
10
+
11
+ We did not include previous checkpoints in this repository to save space.
models/MetricGAN-Reloaded/checkpt-dis-8.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2515651bb6e40b93cc4f094de7b78c77c1c37e34d0a8d5ef7955f793b102e61
3
+ size 3754370
models/MetricGAN-Reloaded/checkpt-disopt-8.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ca286b59d303f75e51afcbe04cfc9cdf38e809dfb080e80b7d862fc1642364f
3
+ size 7399493
models/MetricGAN-Reloaded/checkpt-gen-8.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d32a9081edbe0c7965939279ef4aa61907798794c0723ca7a3da48a2d1911a7e
3
+ size 7584219
models/MetricGAN-Reloaded/checkpt-genopt-8.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84cd4459c28bb0fcc15836ebbe1cc4d4aa7af615d552ea8655d9460cdb0fec93
3
+ size 15172973
models/MetricGAN-Reloaded/epoch7.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff252f7ffc76731bd7678d3f0a9da87c554e9af35610b20a834e5dc280e82e9c
3
+ size 1455558142
models/MetricGAN-Reloaded/npairs_8.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a08927cc94da8b7c7d2d48d41b29cb6db5e308c9e333a0e4bd2cb340738b91ac
3
+ size 1116728
models/MetricGAN-Reloaded/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://github.com/testzer0/MetricGAN-Reloaded
models/metricgan-plus-pytorch/CSIG-GAN_trial1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10925ae4d094e320b2faa9bbd0a0263a1598438e0e1f5c097186167620ab1fe0
3
+ size 26120424
models/metricgan-plus-pytorch/PESQ-GAN_trial1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb6f6ab151f5029a84d7c9ce699ad51dfbff0f1a2d83ac135eb089629473babc
3
+ size 26116959
models/metricgan-plus-pytorch/PESQ-GAN_trial2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d9aba66256f6af982a73e38bae85b7ead3e4f857403cb7d0516316f6d99a622
3
+ size 26116895
models/metricgan-plus-pytorch/PESQ-GAN_trial3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90c4b2785dcffc392a61a7fa2a9defa1ee14a58edd69d1c755b3dc15d5626ede
3
+ size 26116959
models/metricgan-plus-pytorch/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://github.com/wooseok-shin/MetricGAN-plus-pytorch/releases/tag/v1.weights
models/metricgan-plus-voicebank/.gitattributes ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
2
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.h5 filter=lfs diff=lfs merge=lfs -text
5
+ *.tflite filter=lfs diff=lfs merge=lfs -text
6
+ *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.ot filter=lfs diff=lfs merge=lfs -text
8
+ *.onnx filter=lfs diff=lfs merge=lfs -text
9
+ *.arrow filter=lfs diff=lfs merge=lfs -text
10
+ *.ftz filter=lfs diff=lfs merge=lfs -text
11
+ *.joblib filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.pb filter=lfs diff=lfs merge=lfs -text
15
+ *.pt filter=lfs diff=lfs merge=lfs -text
16
+ *.pth filter=lfs diff=lfs merge=lfs -text
models/metricgan-plus-voicebank/README.md ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: "en"
3
+ tags:
4
+ - audio-to-audio
5
+ - speech-enhancement
6
+ - PyTorch
7
+ - speechbrain
8
+ license: "apache-2.0"
9
+ datasets:
10
+ - Voicebank
11
+ - DEMAND
12
+ metrics:
13
+ - PESQ
14
+ - STOI
15
+ inference: false
16
+ ---
17
+
18
+ <iframe src="https://ghbtns.com/github-btn.html?user=speechbrain&repo=speechbrain&type=star&count=true&size=large&v=2" frameborder="0" scrolling="0" width="170" height="30" title="GitHub"></iframe>
19
+ <br/><br/>
20
+
21
+ # MetricGAN-trained model for Enhancement
22
+
23
+ This repository provides all the necessary tools to perform enhancement with
24
+ SpeechBrain. For a better experience we encourage you to learn more about
25
+ [SpeechBrain](https://speechbrain.github.io). The model performance is:
26
+
27
+ | Release | Test PESQ | Test STOI |
28
+ |:-----------:|:-----:| :-----:|
29
+ | 21-04-27 | 3.15 | 93.0 |
30
+
31
+ ## Install SpeechBrain
32
+
33
+ First of all, please install SpeechBrain with the following command:
34
+
35
+ ```
36
+ pip install speechbrain
37
+ ```
38
+
39
+ Please notice that we encourage you to read our tutorials and learn more about
40
+ [SpeechBrain](https://speechbrain.github.io).
41
+
42
+ ## Pretrained Usage
43
+
44
+ To use the mimic-loss-trained model for enhancement, use the following simple code:
45
+
46
+ ```python
47
+ import torch
48
+ import torchaudio
49
+ from speechbrain.inference.enhancement import SpectralMaskEnhancement
50
+
51
+ enhance_model = SpectralMaskEnhancement.from_hparams(
52
+ source="speechbrain/metricgan-plus-voicebank",
53
+ savedir="pretrained_models/metricgan-plus-voicebank",
54
+ )
55
+
56
+ # Load and add fake batch dimension
57
+ noisy = enhance_model.load_audio(
58
+ "speechbrain/metricgan-plus-voicebank/example.wav"
59
+ ).unsqueeze(0)
60
+
61
+ # Add relative length tensor
62
+ enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
63
+
64
+ # Saving enhanced signal on disk
65
+ torchaudio.save('enhanced.wav', enhanced.cpu(), 16000)
66
+ ```
67
+
68
+ The system is trained with recordings sampled at 16kHz (single channel).
69
+ The code will automatically normalize your audio (i.e., resampling + mono channel selection) when calling *enhance_file* if needed. Make sure your input tensor is compliant with the expected sampling rate if you use *enhance_batch* as in the example.
70
+
71
+ ### Inference on GPU
72
+ To perform inference on the GPU, add `run_opts={"device":"cuda"}` when calling the `from_hparams` method.
73
+
74
+ ### Training
75
+ The model was trained with SpeechBrain (d0accc8).
76
+ To train it from scratch follows these steps:
77
+ 1. Clone SpeechBrain:
78
+ ```bash
79
+ git clone https://github.com/speechbrain/speechbrain/
80
+ ```
81
+ 2. Install it:
82
+ ```
83
+ cd speechbrain
84
+ pip install -r requirements.txt
85
+ pip install -e .
86
+ ```
87
+
88
+ 3. Run Training:
89
+ ```
90
+ cd recipes/Voicebank/enhance/MetricGAN
91
+ python train.py hparams/train.yaml --data_folder=your_data_folder
92
+ ```
93
+
94
+ You can find our training results (models, logs, etc) [here](https://drive.google.com/drive/folders/1fcVP52gHgoMX9diNN1JxX_My5KaRNZWs?usp=sharing).
95
+
96
+ ### Limitations
97
+ The SpeechBrain team does not provide any warranty on the performance achieved by this model when used on other datasets.
98
+
99
+ ## Referencing MetricGAN+
100
+
101
+ If you find MetricGAN+ useful, please cite:
102
+
103
+ ```
104
+ @article{fu2021metricgan+,
105
+ title={MetricGAN+: An Improved Version of MetricGAN for Speech Enhancement},
106
+ author={Fu, Szu-Wei and Yu, Cheng and Hsieh, Tsun-An and Plantinga, Peter and Ravanelli, Mirco and Lu, Xugang and Tsao, Yu},
107
+ journal={arXiv preprint arXiv:2104.03538},
108
+ year={2021}
109
+ }
110
+ ```
111
+
112
+ # **About SpeechBrain**
113
+ - Website: https://speechbrain.github.io/
114
+ - Code: https://github.com/speechbrain/speechbrain/
115
+ - HuggingFace: https://huggingface.co/speechbrain/
116
+
117
+
118
+ # **Citing SpeechBrain**
119
+ Please, cite SpeechBrain if you use it for your research or business.
120
+
121
+
122
+ ```bibtex
123
+ @misc{speechbrain,
124
+ title={{SpeechBrain}: A General-Purpose Speech Toolkit},
125
+ author={Mirco Ravanelli and Titouan Parcollet and Peter Plantinga and Aku Rouhe and Samuele Cornell and Loren Lugosch and Cem Subakan and Nauman Dawalatabad and Abdelwahab Heba and Jianyuan Zhong and Ju-Chieh Chou and Sung-Lin Yeh and Szu-Wei Fu and Chien-Feng Liao and Elena Rastorgueva and François Grondin and William Aris and Hwidong Na and Yan Gao and Renato De Mori and Yoshua Bengio},
126
+ year={2021},
127
+ eprint={2106.04624},
128
+ archivePrefix={arXiv},
129
+ primaryClass={eess.AS},
130
+ note={arXiv:2106.04624}
131
+ }
132
+ ```
models/metricgan-plus-voicebank/config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "speechbrain_interface": "SpectralMaskEnhancement"
3
+ }
models/metricgan-plus-voicebank/enhance_model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:147bfb866bac8264603546e035bf283370e716ed2f4b7412d308d2bcee88304f
3
+ size 7586021
models/metricgan-plus-voicebank/example.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72bded2034fc64a0de36250e08bd2ff3f21391e1bd941ea96b22f07e407773eb
3
+ size 276040
models/metricgan-plus-voicebank/hyperparams.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # STFT parameters
2
+ sample_rate: 16000
3
+ win_length: 32
4
+ hop_length: 16
5
+ n_fft: 512
6
+ window_fn: !name:torch.hamming_window
7
+
8
+ compute_stft: !new:speechbrain.processing.features.STFT
9
+ sample_rate: !ref <sample_rate>
10
+ n_fft: !ref <n_fft>
11
+ win_length: !ref <win_length>
12
+ hop_length: !ref <hop_length>
13
+ window_fn: !ref <window_fn>
14
+
15
+ compute_istft: !new:speechbrain.processing.features.ISTFT
16
+ sample_rate: !ref <sample_rate>
17
+ n_fft: !ref <n_fft>
18
+ win_length: !ref <win_length>
19
+ hop_length: !ref <hop_length>
20
+ window_fn: !ref <window_fn>
21
+
22
+ spectral_magnitude: !name:speechbrain.processing.features.spectral_magnitude
23
+ power: 0.5
24
+
25
+ resynth: !name:speechbrain.processing.signal_processing.resynthesize
26
+ stft: !ref <compute_stft>
27
+ istft: !ref <compute_istft>
28
+
29
+ enhance_model: !new:speechbrain.lobes.models.MetricGAN.EnhancementGenerator
30
+ input_size: !ref <n_fft> // 2 + 1
31
+ hidden_size: 200
32
+ num_layers: 2
33
+ dropout: 0
34
+
35
+ modules:
36
+ enhance_model: !ref <enhance_model>
37
+
38
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
39
+ loadables:
40
+ enhance_model: !ref <enhance_model>
models/metricgan-plus-voicebank/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/speechbrain/metricgan-plus-voicebank