Kyle Howells commited on
Commit
fc73d82
·
1 Parent(s): dffd7c4

Add all DeepFilterNet versions (v1, v2, v3) in subdirectories

Browse files

Consolidate DeepFilterNet v1, v2, and v3 MLX weights into a single
repository with v1/, v2/, v3/ subdirectories and a combined README.

README.md CHANGED
@@ -1,3 +1,123 @@
1
  ---
2
  license: mit
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: mit
3
+ library_name: mlx
4
+ tags:
5
+ - mlx
6
+ - audio
7
+ - speech-enhancement
8
+ - noise-suppression
9
+ - deepfilternet
10
+ - apple-silicon
11
+ base_model:
12
+ - DeepFilterNet/DeepFilterNet
13
+ - DeepFilterNet/DeepFilterNet2
14
+ - DeepFilterNet/DeepFilterNet3
15
+ pipeline_tag: audio-to-audio
16
  ---
17
+
18
+ # DeepFilterNet — MLX
19
+
20
+ MLX-compatible weights for [DeepFilterNet](https://github.com/Rikorose/DeepFilterNet), a real-time speech enhancement framework that suppresses background noise from full-band 48 kHz audio.
21
+
22
+ This repository contains all three model versions (v1, v2, v3), converted directly from the original PyTorch checkpoints to `safetensors` format for use with [MLX](https://github.com/ml-explore/mlx) on Apple Silicon. No fine-tuning or quantization was applied — the weights are numerically identical to the originals.
23
+
24
+ ## Models
25
+
26
+ Each version is stored in its own subfolder:
27
+
28
+ | Version | Subfolder | Weights | Paper |
29
+ |---------|-----------|---------|-------|
30
+ | DeepFilterNet v1 | `v1/` | ~7.2 MB (float32) | [arXiv:2110.05588](https://arxiv.org/abs/2110.05588) |
31
+ | DeepFilterNet v2 | `v2/` | ~8.9 MB (float32) | [arXiv:2205.05474](https://arxiv.org/abs/2205.05474) |
32
+ | DeepFilterNet v3 | `v3/` | ~8.3 MB (float32) | [arXiv:2305.08227](https://arxiv.org/abs/2305.08227) |
33
+
34
+ ## Model Details
35
+
36
+ All versions share the same audio parameters:
37
+
38
+ | Parameter | Value |
39
+ |-----------|-------|
40
+ | Sample rate | 48 kHz |
41
+ | FFT size | 960 |
42
+ | Hop size | 480 |
43
+ | ERB bands | 32 |
44
+ | DF bins | 96 |
45
+ | DF order | 5 |
46
+
47
+ | Version | Embedding hidden dim |
48
+ |---------|---------------------|
49
+ | v1 | 512 |
50
+ | v2 | 256 |
51
+ | v3 | 256 |
52
+
53
+ ## Files
54
+
55
+ ```
56
+ v1/
57
+ config.json # v1 architecture configuration
58
+ model.safetensors # v1 weights
59
+ v2/
60
+ config.json # v2 architecture configuration
61
+ model.safetensors # v2 weights
62
+ v3/
63
+ config.json # v3 architecture configuration
64
+ model.safetensors # v3 weights
65
+ ```
66
+
67
+ ## Usage
68
+
69
+ ### Python (mlx-audio)
70
+
71
+ ```python
72
+ from mlx_audio.sts.models.deepfilternet import DeepFilterNetModel
73
+
74
+ # Load v3 (default)
75
+ model = DeepFilterNetModel.from_pretrained("mlx-community/DeepFilterNet-mlx")
76
+
77
+ # Load a specific version
78
+ model = DeepFilterNetModel.from_pretrained("mlx-community/DeepFilterNet-mlx", subfolder="v1")
79
+
80
+ # Enhance a file
81
+ enhanced = model.enhance("noisy.wav")
82
+ ```
83
+
84
+ ### Swift (mlx-audio-swift)
85
+
86
+ ```swift
87
+ import MLXAudioSTS
88
+
89
+ let model = try await DeepFilterNetModel.fromPretrained("mlx-community/DeepFilterNet-mlx", subfolder: "v3")
90
+ let enhanced = try model.enhance(audioArray)
91
+ ```
92
+
93
+ ## Origin
94
+
95
+ - **Original model:** [DeepFilterNet](https://github.com/Rikorose/DeepFilterNet) by Hendrik Schroeter
96
+ - **License:** MIT (same as the original)
97
+ - **Conversion:** PyTorch → `safetensors`
98
+
99
+ ## Citations
100
+
101
+ ```bibtex
102
+ @inproceedings{schroeter2022deepfilternet,
103
+ title={{DeepFilterNet}: A Low Complexity Speech Enhancement Framework for Full-Band Audio based on Deep Filtering},
104
+ author={Schr{\"o}ter, Hendrik and Escalante-B., Alberto N. and Rosenkranz, Tobias and Maier, Andreas},
105
+ booktitle={ICASSP 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
106
+ year={2022},
107
+ organization={IEEE}
108
+ }
109
+
110
+ @inproceedings{schroeter2022deepfilternet2,
111
+ title={{DeepFilterNet2}: Towards Real-Time Speech Enhancement on Embedded Devices for Full-Band Audio},
112
+ author={Schr{\"o}ter, Hendrik and Escalante-B., Alberto N. and Rosenkranz, Tobias and Maier, Andreas},
113
+ booktitle={17th International Workshop on Acoustic Signal Enhancement (IWAENC 2022)},
114
+ year={2022},
115
+ }
116
+
117
+ @inproceedings{schroeter2023deepfilternet3,
118
+ title={DeepFilterNet: Perceptually Motivated Real-Time Speech Enhancement},
119
+ author={Schr{\"o}ter, Hendrik and Rosenkranz, Tobias and Escalante-B., Alberto N. and Maier, Andreas},
120
+ booktitle={INTERSPEECH},
121
+ year={2023}
122
+ }
123
+ ```
v1/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_rate": 48000,
3
+ "fft_size": 960,
4
+ "hop_size": 480,
5
+ "nb_erb": 32,
6
+ "nb_df": 96,
7
+ "df_order": 5,
8
+ "df_lookahead": 1,
9
+ "lsnr_max": 35,
10
+ "lsnr_min": -15,
11
+ "conv_ch": 64,
12
+ "conv_k_enc": 2,
13
+ "conv_k_dec": 2,
14
+ "conv_width_factor": 1,
15
+ "conv_dec_mode": "transposed",
16
+ "emb_hidden_dim": 512,
17
+ "emb_num_layers": 3,
18
+ "df_hidden_dim": 512,
19
+ "df_num_layers": 2,
20
+ "gru_groups": 8,
21
+ "linear_groups": 8,
22
+ "enc_linear_groups": 8,
23
+ "group_shuffle": true,
24
+ "mask_pf": false,
25
+ "conv_lookahead": 2,
26
+ "conv_depthwise": true,
27
+ "convt_depthwise": true,
28
+ "enc_concat": false,
29
+ "emb_gru_skip_enc": "none",
30
+ "emb_gru_skip": "none",
31
+ "df_gru_skip": "groupedlinear",
32
+ "dfop_method": "real_unfold",
33
+ "conv_kernel": [
34
+ 1,
35
+ 3
36
+ ],
37
+ "convt_kernel": [
38
+ 1,
39
+ 3
40
+ ],
41
+ "conv_kernel_inp": [
42
+ 3,
43
+ 3
44
+ ],
45
+ "model_version": "DeepFilterNet"
46
+ }
v1/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ebf22a26e37a849769e1d5573c0bfd6e7337f63e4a583c16d40fb83b8643a72
3
+ size 7536928
v2/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_rate": 48000,
3
+ "fft_size": 960,
4
+ "hop_size": 480,
5
+ "nb_erb": 32,
6
+ "nb_df": 96,
7
+ "df_order": 5,
8
+ "df_lookahead": 2,
9
+ "lsnr_max": 35,
10
+ "lsnr_min": -15,
11
+ "conv_ch": 64,
12
+ "conv_k_enc": 1,
13
+ "conv_k_dec": 1,
14
+ "conv_width_factor": 1,
15
+ "conv_dec_mode": "transposed",
16
+ "emb_hidden_dim": 256,
17
+ "emb_num_layers": 3,
18
+ "df_hidden_dim": 256,
19
+ "df_num_layers": 2,
20
+ "gru_groups": 8,
21
+ "linear_groups": 8,
22
+ "enc_linear_groups": 8,
23
+ "group_shuffle": false,
24
+ "mask_pf": false,
25
+ "conv_lookahead": 2,
26
+ "conv_depthwise": true,
27
+ "convt_depthwise": true,
28
+ "enc_concat": true,
29
+ "emb_gru_skip_enc": "none",
30
+ "emb_gru_skip": "none",
31
+ "df_gru_skip": "none",
32
+ "dfop_method": "df",
33
+ "conv_kernel": [
34
+ 1,
35
+ 3
36
+ ],
37
+ "convt_kernel": [
38
+ 1,
39
+ 3
40
+ ],
41
+ "conv_kernel_inp": [
42
+ 3,
43
+ 3
44
+ ],
45
+ "model_version": "DeepFilterNet2"
46
+ }
v2/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd7e6c8dc47957c72b450308ffeca1f1396705ab6eebf8bddf46a1f9022c5335
3
+ size 9365936
v3/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_rate": 48000,
3
+ "fft_size": 960,
4
+ "hop_size": 480,
5
+ "nb_erb": 32,
6
+ "nb_df": 96,
7
+ "df_order": 5,
8
+ "df_lookahead": 2,
9
+ "lsnr_max": 35,
10
+ "lsnr_min": -15,
11
+ "conv_ch": 64,
12
+ "conv_k_enc": 1,
13
+ "conv_k_dec": 1,
14
+ "conv_width_factor": 1,
15
+ "conv_dec_mode": "transposed",
16
+ "emb_hidden_dim": 256,
17
+ "emb_num_layers": 3,
18
+ "df_hidden_dim": 256,
19
+ "df_num_layers": 2,
20
+ "gru_groups": 8,
21
+ "linear_groups": 16,
22
+ "enc_linear_groups": 32,
23
+ "group_shuffle": false,
24
+ "mask_pf": false,
25
+ "conv_lookahead": 2,
26
+ "conv_depthwise": true,
27
+ "convt_depthwise": false,
28
+ "enc_concat": false,
29
+ "emb_gru_skip_enc": "none",
30
+ "emb_gru_skip": "none",
31
+ "df_gru_skip": "groupedlinear",
32
+ "dfop_method": "df",
33
+ "conv_kernel": [
34
+ 1,
35
+ 3
36
+ ],
37
+ "convt_kernel": [
38
+ 1,
39
+ 3
40
+ ],
41
+ "conv_kernel_inp": [
42
+ 3,
43
+ 3
44
+ ],
45
+ "model_version": "DeepFilterNet3"
46
+ }
v3/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fca0af2f25cad49d74fc9ac5f9155813416e4b350ec344bb433b4a48a9a76d38
3
+ size 8682709