Add all DeepFilterNet versions (v1, v2, v3) in subdirectories

Consolidate DeepFilterNet v1, v2, and v3 MLX weights into a single
repository with v1/, v2/, v3/ subdirectories and a combined README.

Files changed (7) hide show

README.md +120 -0
v1/config.json +46 -0
v1/model.safetensors +3 -0
v2/config.json +46 -0
v2/model.safetensors +3 -0
v3/config.json +46 -0
v3/model.safetensors +3 -0

README.md CHANGED Viewed

@@ -1,3 +1,123 @@
 ---
 license: mit
 ---

 ---
 license: mit
+library_name: mlx
+tags:
+  - mlx
+  - audio
+  - speech-enhancement
+  - noise-suppression
+  - deepfilternet
+  - apple-silicon
+base_model:
+  - DeepFilterNet/DeepFilterNet
+  - DeepFilterNet/DeepFilterNet2
+  - DeepFilterNet/DeepFilterNet3
+pipeline_tag: audio-to-audio
 ---
+# DeepFilterNet — MLX
+MLX-compatible weights for [DeepFilterNet](https://github.com/Rikorose/DeepFilterNet), a real-time speech enhancement framework that suppresses background noise from full-band 48 kHz audio.
+This repository contains all three model versions (v1, v2, v3), converted directly from the original PyTorch checkpoints to `safetensors` format for use with [MLX](https://github.com/ml-explore/mlx) on Apple Silicon. No fine-tuning or quantization was applied — the weights are numerically identical to the originals.
+## Models
+Each version is stored in its own subfolder:
+| Version | Subfolder | Weights | Paper |
+|---------|-----------|---------|-------|
+| DeepFilterNet v1 | `v1/` | ~7.2 MB (float32) | [arXiv:2110.05588](https://arxiv.org/abs/2110.05588) |
+| DeepFilterNet v2 | `v2/` | ~8.9 MB (float32) | [arXiv:2205.05474](https://arxiv.org/abs/2205.05474) |
+| DeepFilterNet v3 | `v3/` | ~8.3 MB (float32) | [arXiv:2305.08227](https://arxiv.org/abs/2305.08227) |
+## Model Details
+All versions share the same audio parameters:
+| Parameter | Value |
+|-----------|-------|
+| Sample rate | 48 kHz |
+| FFT size | 960 |
+| Hop size | 480 |
+| ERB bands | 32 |
+| DF bins | 96 |
+| DF order | 5 |
+| Version | Embedding hidden dim |
+|---------|---------------------|
+| v1 | 512 |
+| v2 | 256 |
+| v3 | 256 |
+## Files
+```
+v1/
+  config.json          # v1 architecture configuration
+  model.safetensors    # v1 weights
+v2/
+  config.json          # v2 architecture configuration
+  model.safetensors    # v2 weights
+v3/
+  config.json          # v3 architecture configuration
+  model.safetensors    # v3 weights
+```
+## Usage
+### Python (mlx-audio)
+```python
+from mlx_audio.sts.models.deepfilternet import DeepFilterNetModel
+# Load v3 (default)
+model = DeepFilterNetModel.from_pretrained("mlx-community/DeepFilterNet-mlx")
+# Load a specific version
+model = DeepFilterNetModel.from_pretrained("mlx-community/DeepFilterNet-mlx", subfolder="v1")
+# Enhance a file
+enhanced = model.enhance("noisy.wav")
+```
+### Swift (mlx-audio-swift)
+```swift
+import MLXAudioSTS
+let model = try await DeepFilterNetModel.fromPretrained("mlx-community/DeepFilterNet-mlx", subfolder: "v3")
+let enhanced = try model.enhance(audioArray)
+```
+## Origin
+- **Original model:** [DeepFilterNet](https://github.com/Rikorose/DeepFilterNet) by Hendrik Schroeter
+- **License:** MIT (same as the original)
+- **Conversion:** PyTorch → `safetensors`
+## Citations
+```bibtex
+@inproceedings{schroeter2022deepfilternet,
+  title={{DeepFilterNet}: A Low Complexity Speech Enhancement Framework for Full-Band Audio based on Deep Filtering},
+  author={Schr{\"o}ter, Hendrik and Escalante-B., Alberto N. and Rosenkranz, Tobias and Maier, Andreas},
+  booktitle={ICASSP 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
+  year={2022},
+  organization={IEEE}
+}
+@inproceedings{schroeter2022deepfilternet2,
+  title={{DeepFilterNet2}: Towards Real-Time Speech Enhancement on Embedded Devices for Full-Band Audio},
+  author={Schr{\"o}ter, Hendrik and Escalante-B., Alberto N. and Rosenkranz, Tobias and Maier, Andreas},
+  booktitle={17th International Workshop on Acoustic Signal Enhancement (IWAENC 2022)},
+  year={2022},
+}
+@inproceedings{schroeter2023deepfilternet3,
+  title={DeepFilterNet: Perceptually Motivated Real-Time Speech Enhancement},
+  author={Schr{\"o}ter, Hendrik and Rosenkranz, Tobias and Escalante-B., Alberto N. and Maier, Andreas},
+  booktitle={INTERSPEECH},
+  year={2023}
+}
+```

v1/config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "sample_rate": 48000,
+  "fft_size": 960,
+  "hop_size": 480,
+  "nb_erb": 32,
+  "nb_df": 96,
+  "df_order": 5,
+  "df_lookahead": 1,
+  "lsnr_max": 35,
+  "lsnr_min": -15,
+  "conv_ch": 64,
+  "conv_k_enc": 2,
+  "conv_k_dec": 2,
+  "conv_width_factor": 1,
+  "conv_dec_mode": "transposed",
+  "emb_hidden_dim": 512,
+  "emb_num_layers": 3,
+  "df_hidden_dim": 512,
+  "df_num_layers": 2,
+  "gru_groups": 8,
+  "linear_groups": 8,
+  "enc_linear_groups": 8,
+  "group_shuffle": true,
+  "mask_pf": false,
+  "conv_lookahead": 2,
+  "conv_depthwise": true,
+  "convt_depthwise": true,
+  "enc_concat": false,
+  "emb_gru_skip_enc": "none",
+  "emb_gru_skip": "none",
+  "df_gru_skip": "groupedlinear",
+  "dfop_method": "real_unfold",
+  "conv_kernel": [
+    1,
+    3
+  ],
+  "convt_kernel": [
+    1,
+    3
+  ],
+  "conv_kernel_inp": [
+    3,
+    3
+  ],
+  "model_version": "DeepFilterNet"
+}

v1/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ebf22a26e37a849769e1d5573c0bfd6e7337f63e4a583c16d40fb83b8643a72
+size 7536928

v2/config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "sample_rate": 48000,
+  "fft_size": 960,
+  "hop_size": 480,
+  "nb_erb": 32,
+  "nb_df": 96,
+  "df_order": 5,
+  "df_lookahead": 2,
+  "lsnr_max": 35,
+  "lsnr_min": -15,
+  "conv_ch": 64,
+  "conv_k_enc": 1,
+  "conv_k_dec": 1,
+  "conv_width_factor": 1,
+  "conv_dec_mode": "transposed",
+  "emb_hidden_dim": 256,
+  "emb_num_layers": 3,
+  "df_hidden_dim": 256,
+  "df_num_layers": 2,
+  "gru_groups": 8,
+  "linear_groups": 8,
+  "enc_linear_groups": 8,
+  "group_shuffle": false,
+  "mask_pf": false,
+  "conv_lookahead": 2,
+  "conv_depthwise": true,
+  "convt_depthwise": true,
+  "enc_concat": true,
+  "emb_gru_skip_enc": "none",
+  "emb_gru_skip": "none",
+  "df_gru_skip": "none",
+  "dfop_method": "df",
+  "conv_kernel": [
+    1,
+    3
+  ],
+  "convt_kernel": [
+    1,
+    3
+  ],
+  "conv_kernel_inp": [
+    3,
+    3
+  ],
+  "model_version": "DeepFilterNet2"
+}

v2/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd7e6c8dc47957c72b450308ffeca1f1396705ab6eebf8bddf46a1f9022c5335
+size 9365936

v3/config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "sample_rate": 48000,
+  "fft_size": 960,
+  "hop_size": 480,
+  "nb_erb": 32,
+  "nb_df": 96,
+  "df_order": 5,
+  "df_lookahead": 2,
+  "lsnr_max": 35,
+  "lsnr_min": -15,
+  "conv_ch": 64,
+  "conv_k_enc": 1,
+  "conv_k_dec": 1,
+  "conv_width_factor": 1,
+  "conv_dec_mode": "transposed",
+  "emb_hidden_dim": 256,
+  "emb_num_layers": 3,
+  "df_hidden_dim": 256,
+  "df_num_layers": 2,
+  "gru_groups": 8,
+  "linear_groups": 16,
+  "enc_linear_groups": 32,
+  "group_shuffle": false,
+  "mask_pf": false,
+  "conv_lookahead": 2,
+  "conv_depthwise": true,
+  "convt_depthwise": false,
+  "enc_concat": false,
+  "emb_gru_skip_enc": "none",
+  "emb_gru_skip": "none",
+  "df_gru_skip": "groupedlinear",
+  "dfop_method": "df",
+  "conv_kernel": [
+    1,
+    3
+  ],
+  "convt_kernel": [
+    1,
+    3
+  ],
+  "conv_kernel_inp": [
+    3,
+    3
+  ],
+  "model_version": "DeepFilterNet3"
+}

v3/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fca0af2f25cad49d74fc9ac5f9155813416e4b350ec344bb433b4a48a9a76d38
+size 8682709