lainlives commited on
Commit
ff87041
·
verified ·
1 Parent(s): 2b965c7

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +33 -0
  2. 0d19c1c6-0f06f20e.th +3 -0
  3. 17_HP-Wind_Inst-UVR.pth +3 -0
  4. 5d2d6c55-db83574e.th +3 -0
  5. 75fc33f5-1941ce65.th +3 -0
  6. 7d865c68-3d5dd56b.th +3 -0
  7. 7ecf8ec1-70f50cc9.th +3 -0
  8. 902315c2-b39ce9c9.th +3 -0
  9. 9a6b4851-03af0aa6.th +3 -0
  10. BS_Inst_EXP_VRL.yaml +124 -0
  11. UVR-De-Reverb-aufr33-jarredou.pth +3 -0
  12. UVR-MDX-NET-Inst_1.onnx +3 -0
  13. UVR-MDX-NET-Inst_2.onnx +3 -0
  14. UVR-MDX-NET-Inst_3.onnx +3 -0
  15. UVR-MDX-NET-Inst_HQ_2.onnx +3 -0
  16. UVR-MDX-NET-Inst_Main.onnx +3 -0
  17. UVR-MDX-NET-Inst_full_292.onnx +3 -0
  18. UVR-MDX-NET-Voc_FT.onnx +3 -0
  19. UVR-MDX-NET_Inst_187_beta.onnx +3 -0
  20. UVR-MDX-NET_Inst_82_beta.onnx +3 -0
  21. UVR-MDX-NET_Inst_90_beta.onnx +3 -0
  22. UVR-MDX-NET_Main_340.onnx +3 -0
  23. UVR-MDX-NET_Main_390.onnx +3 -0
  24. UVR-MDX-NET_Main_406.onnx +3 -0
  25. UVR-MDX-NET_Main_427.onnx +3 -0
  26. UVR-MDX-NET_Main_438.onnx +3 -0
  27. UVR_MDXNET_2_9682.onnx +3 -0
  28. UVR_MDXNET_9482.onnx +3 -0
  29. assets/download_models.py +26 -0
  30. assets/json_link_processor.py +7 -0
  31. assets/luvr5-ui-models.txt +248 -0
  32. assets/model_url_sorter.py +5 -0
  33. assets/upload_models.py +33 -0
  34. aufr33-jarredou_DrumSep_model_mdx23c_ep_141_sdr_10.8059.yaml +87 -0
  35. calculate-model-hashes.py +105 -0
  36. config_bs_roformer_karaoke_anvuew.yaml +131 -0
  37. config_chorus_male_female_bs_roformer.yaml +125 -0
  38. config_dereverb-echo_mel_band_roformer_sdr_13.4843_v2.yaml +64 -0
  39. config_dereverb_echo_mel_band_roformer_v2.yaml +64 -0
  40. config_dereverb_echo_mel_band_roformer_v2.yaml.1 +64 -0
  41. config_dereverb_echo_mel_band_roformer_v2.yaml.2 +64 -0
  42. config_dereverb_mdx23c.yaml +135 -0
  43. config_dnr_bandit_bsrnn_multi_mus64.yaml +78 -0
  44. config_dnr_bandit_v2_mus64.yaml +78 -0
  45. config_drumsep_mdx23c.yaml +87 -0
  46. config_mel_band_roformer_bleed_suppressor_v1.yaml +51 -0
  47. config_mel_band_roformer_karaoke.yaml +71 -0
  48. config_mel_band_roformer_karaoke_gabox.yaml +72 -0
  49. config_melband_roformer_big_beta5e.yaml +51 -0
  50. config_melbandroformer_big_beta4.yaml +51 -0
.gitattributes CHANGED
@@ -4,3 +4,36 @@ UVR_MDXNET_3_9662.onnx filter=lfs diff=lfs merge=lfs -text
4
  UVR_MDXNET_1_9703.onnx filter=lfs diff=lfs merge=lfs -text
5
  scnet_checkpoint_musdb18.ckpt filter=lfs diff=lfs merge=lfs -text
6
  UVR-MDX-NET_Crowd_HQ_1.onnx filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  UVR_MDXNET_1_9703.onnx filter=lfs diff=lfs merge=lfs -text
5
  scnet_checkpoint_musdb18.ckpt filter=lfs diff=lfs merge=lfs -text
6
  UVR-MDX-NET_Crowd_HQ_1.onnx filter=lfs diff=lfs merge=lfs -text
7
+ 7d865c68-3d5dd56b.th filter=lfs diff=lfs merge=lfs -text
8
+ UVR_MDXNET_2_9682.onnx filter=lfs diff=lfs merge=lfs -text
9
+ 0d19c1c6-0f06f20e.th filter=lfs diff=lfs merge=lfs -text
10
+ 902315c2-b39ce9c9.th filter=lfs diff=lfs merge=lfs -text
11
+ 5d2d6c55-db83574e.th filter=lfs diff=lfs merge=lfs -text
12
+ 75fc33f5-1941ce65.th filter=lfs diff=lfs merge=lfs -text
13
+ 17_HP-Wind_Inst-UVR.pth filter=lfs diff=lfs merge=lfs -text
14
+ UVR-MDX-NET_Main_390.onnx filter=lfs diff=lfs merge=lfs -text
15
+ UVR-MDX-NET_Main_406.onnx filter=lfs diff=lfs merge=lfs -text
16
+ UVR-MDX-NET_Inst_187_beta.onnx filter=lfs diff=lfs merge=lfs -text
17
+ UVR-MDX-NET_Inst_82_beta.onnx filter=lfs diff=lfs merge=lfs -text
18
+ UVR-MDX-NET_Main_340.onnx filter=lfs diff=lfs merge=lfs -text
19
+ UVR-MDX-NET_Inst_90_beta.onnx filter=lfs diff=lfs merge=lfs -text
20
+ UVR-MDX-NET-Voc_FT.onnx filter=lfs diff=lfs merge=lfs -text
21
+ 7ecf8ec1-70f50cc9.th filter=lfs diff=lfs merge=lfs -text
22
+ UVR-MDX-NET-Inst_1.onnx filter=lfs diff=lfs merge=lfs -text
23
+ UVR-MDX-NET-Inst_2.onnx filter=lfs diff=lfs merge=lfs -text
24
+ UVR_MDXNET_9482.onnx filter=lfs diff=lfs merge=lfs -text
25
+ 9a6b4851-03af0aa6.th filter=lfs diff=lfs merge=lfs -text
26
+ UVR-MDX-NET-Inst_3.onnx filter=lfs diff=lfs merge=lfs -text
27
+ f7e0c4bc-ba3fe64a.th filter=lfs diff=lfs merge=lfs -text
28
+ UVR-MDX-NET_Main_427.onnx filter=lfs diff=lfs merge=lfs -text
29
+ UVR-MDX-NET-Inst_Main.onnx filter=lfs diff=lfs merge=lfs -text
30
+ UVR-MDX-NET-Inst_full_292.onnx filter=lfs diff=lfs merge=lfs -text
31
+ UVR-MDX-NET_Main_438.onnx filter=lfs diff=lfs merge=lfs -text
32
+ UVR-De-Reverb-aufr33-jarredou.pth filter=lfs diff=lfs merge=lfs -text
33
+ kuielab_b_drums.onnx filter=lfs diff=lfs merge=lfs -text
34
+ kuielab_a_drums.onnx filter=lfs diff=lfs merge=lfs -text
35
+ d12395a8-e57c48e6.th filter=lfs diff=lfs merge=lfs -text
36
+ kuielab_a_vocals.onnx filter=lfs diff=lfs merge=lfs -text
37
+ kuielab_a_other.onnx filter=lfs diff=lfs merge=lfs -text
38
+ kuielab_b_bass.onnx filter=lfs diff=lfs merge=lfs -text
39
+ UVR-MDX-NET-Inst_HQ_2.onnx filter=lfs diff=lfs merge=lfs -text
0d19c1c6-0f06f20e.th ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f06f20ed6ddc8058fa72ccc4845f3a88916eff7d007b623924193de217bbcf4
3
+ size 178048329
17_HP-Wind_Inst-UVR.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acc6d472b4b478da9c9ab5af45b167749e05a7f65b30c7d5988b3700a513aeee
3
+ size 223661285
5d2d6c55-db83574e.th ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db83574e05b2308f76e2764819da673f2d16d437b9e619f5fcb72f275fc0e24f
3
+ size 167391595
75fc33f5-1941ce65.th ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1941ce654b11df4132b9f4eae408556b4c83fad6fe26b4bc0dbcb36b975befb3
3
+ size 167407275
7d865c68-3d5dd56b.th ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d5dd56b5bc986f136dff98655ded22b2b033f465ccec7a28640a6b15fd71ed6
3
+ size 167918783
7ecf8ec1-70f50cc9.th ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70f50cc947d08f32e6dd8e2b687d398fa5ef9e51d1bd7600e32205d1f44be6b9
3
+ size 178048329
902315c2-b39ce9c9.th ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b39ce9c97b5603fbaee99ec59fe1d2db570c77a37356c9c85542d881517c3302
3
+ size 167405611
9a6b4851-03af0aa6.th ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03af0aa64af2c4f0795659c265fdead238fab31c064a8a4b5b051c1b22c8ad48
3
+ size 267602121
BS_Inst_EXP_VRL.yaml ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 485100 #352800 #485100
3
+ dim_f: 1024
4
+ dim_t: 801
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.000
10
+
11
+ model:
12
+ dim: 384
13
+ depth: 12
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ freqs_per_bands: !!python/tuple
19
+ - 2
20
+ - 2
21
+ - 2
22
+ - 2
23
+ - 2
24
+ - 2
25
+ - 2
26
+ - 2
27
+ - 2
28
+ - 2
29
+ - 2
30
+ - 2
31
+ - 2
32
+ - 2
33
+ - 2
34
+ - 2
35
+ - 2
36
+ - 2
37
+ - 2
38
+ - 2
39
+ - 2
40
+ - 2
41
+ - 2
42
+ - 2
43
+ - 4
44
+ - 4
45
+ - 4
46
+ - 4
47
+ - 4
48
+ - 4
49
+ - 4
50
+ - 4
51
+ - 4
52
+ - 4
53
+ - 4
54
+ - 4
55
+ - 12
56
+ - 12
57
+ - 12
58
+ - 12
59
+ - 12
60
+ - 12
61
+ - 12
62
+ - 12
63
+ - 24
64
+ - 24
65
+ - 24
66
+ - 24
67
+ - 24
68
+ - 24
69
+ - 24
70
+ - 24
71
+ - 48
72
+ - 48
73
+ - 48
74
+ - 48
75
+ - 48
76
+ - 48
77
+ - 48
78
+ - 48
79
+ - 128
80
+ - 129
81
+ dim_head: 64
82
+ heads: 8
83
+ attn_dropout: 0
84
+ ff_dropout: 0
85
+ flash_attn: true
86
+ dim_freqs_in: 1025
87
+ stft_n_fft: 2048
88
+ stft_hop_length: 441
89
+ stft_win_length: 2048
90
+ stft_normalized: false
91
+ mask_estimator_depth: 2
92
+ multi_stft_resolution_loss_weight: 1.0
93
+ multi_stft_resolutions_window_sizes: !!python/tuple
94
+ - 4096
95
+ - 2048
96
+ - 1024
97
+ - 512
98
+ - 256
99
+ multi_stft_hop_size: 147
100
+ multi_stft_normalized: False
101
+ training:
102
+ batch_size: 1
103
+ gradient_accumulation_steps: 1
104
+ grad_clip: 0
105
+ instruments:
106
+ - Vocals
107
+ - Instrumental
108
+ lr: 1.0e-04
109
+ patience: 2
110
+ reduce_factor: 0.95
111
+ target_instrument: Instrumental
112
+ num_epochs: 1
113
+ num_steps: 1000
114
+ q: 0.95
115
+ coarse_loss_clip: true
116
+ ema_momentum: 0.999
117
+ optimizer: adamw
118
+ other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
119
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
120
+
121
+ inference:
122
+ batch_size: 1
123
+ dim_t: 1101
124
+ num_overlap: 2
UVR-De-Reverb-aufr33-jarredou.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fff726b0adf09f2eadc2151645557db81e1a01fe15d91f9bbbfa9b50a007f1fd
3
+ size 58928133
UVR-MDX-NET-Inst_1.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ca53f94b7a0cbb04fcfcc8f3ea5ec1ae22cd8ad044f5e673588859f83976f5e
3
+ size 66759214
UVR-MDX-NET-Inst_2.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3a96a664d28b52db9def0a9cae9a16dbb524d8325bfe8f0ac64ac5d231456bc
3
+ size 66759214
UVR-MDX-NET-Inst_3.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b7834e2972158d8c9864e7376e3a7d084079c80a23f38dc31c4b0a4e901a1cb
3
+ size 66759214
UVR-MDX-NET-Inst_HQ_2.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:197f8ab296df850f961e68c595f6649acb7d9e621b5600b460f3458967299112
3
+ size 66759214
UVR-MDX-NET-Inst_Main.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ab401dfe4a548b87deb64f975294bd56ff946aa32903f53b4b24bb13b2cce1e
3
+ size 52786726
UVR-MDX-NET-Inst_full_292.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:020f6b65fa219fb7c285e4f3fc2863bf22daf03c4c93e547b6d13d5f2757a7ec
3
+ size 66759214
UVR-MDX-NET-Voc_FT.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:534b2070fcc7df514b13ef660dc8cbb328679c2374d04354a5c42bb14ecce111
3
+ size 66762490
UVR-MDX-NET_Inst_187_beta.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c74566f3c3033cacba996328b2ee90bf77ef79ea6c35b7841df183b7906f54a5
3
+ size 66759214
UVR-MDX-NET_Inst_82_beta.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6c268302f09ab53687072618e056a611272a7e2c3fd9b3b59164da152f3588e
3
+ size 66759214
UVR-MDX-NET_Inst_90_beta.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d902868a46575aea6ee2335736ff3b53faf497a6bdaa1b864e0fd84eb1b42a5
3
+ size 66759214
UVR-MDX-NET_Main_340.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78792633b4007755af12ecde20f709b4f0b99563b1d25fe0a501ed2122aff218
3
+ size 66759214
UVR-MDX-NET_Main_390.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:286c4f0847ca837e2c3f4c4058f756d5f150cbf080506aa6f33a2847aba92e8c
3
+ size 66759214
UVR-MDX-NET_Main_406.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f158816a44eef1f0ba0f48b813cbfcf460ed1c70a754af3609ade44aaf7d1b23
3
+ size 66759214
UVR-MDX-NET_Main_427.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95275802a27801b97e3c0552b6eaa69f9bb3bd7df53cdf0536cce0a753f702cc
3
+ size 66759214
UVR-MDX-NET_Main_438.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5e1ad93587a163a0987a0168b99a2ad875c0d9bfc3afb596b7c36b09c7f5c26
3
+ size 66759214
UVR_MDXNET_2_9682.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1deb7295acd3206bc9582a5d92f1b0a74bf3f41c7c1fb78a0ac0123cde4372db
3
+ size 29704436
UVR_MDXNET_9482.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4f365207c56deb115bceedff3ad8fe98a751c745f9e370cecec6226b8b47184
3
+ size 29704436
assets/download_models.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import os
3
+ import argparse
4
+ from huggingface_hub import snapshot_download
5
+ from model_tools import download_files_from_txt, download_hf_repo
6
+
7
+ REPO_OWNER = "lainlives"
8
+ REPO_NAME = "audio-separator-models"
9
+ repo_type = "model"
10
+
11
+
12
+ if __name__ == "__main__":
13
+ token = os.getenv("HF_TOKEN")
14
+ repo_id = f"{REPO_OWNER}/{REPO_NAME}"
15
+ parser = argparse.ArgumentParser(description="Model Download Tool")
16
+ local_dir = os.getcwd()
17
+ parser.add_argument(
18
+ "--fromtxt",
19
+ action="store_true",
20
+ help="Download or process based on a text file instead of cloning Hugging Face repo",
21
+ )
22
+ args = parser.parse_args()
23
+ if args.fromtxt:
24
+ download_files_from_txt("assets/links.txt", local_dir)
25
+ else:
26
+ download_hf_repo(repo_id, local_dir, token)
assets/json_link_processor.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import json
2
+ import urllib.request
3
+ import os
4
+ import model_tools as mt
5
+
6
+ if __name__ == "__main__":
7
+ mt.get_links_from_json("models.json")
assets/luvr5-ui-models.txt ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_aspiration_mel_band_roformer.yaml
2
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_aspiration_mel_band_roformer.yaml
3
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_bs_roformer_instrumental_resurrection_unwa.yaml
4
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_bs_roformer_vocals_gabox.yaml
5
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_bs_roformer_vocals_resurrection_unwa.yaml
6
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_bs_roformer_vocals_revive_unwa.yaml
7
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_bs_roformer_vocals_revive_unwa.yaml
8
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_bs_roformer_vocals_revive_unwa.yaml
9
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_chorus_male_female_bs_roformer.yaml
10
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_chorus_male_female_bs_roformer.yaml
11
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_dereverb-echo_mel_band_roformer.yaml
12
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_dereverb-echo_mel_band_roformer_sdr_13.4843_v2.yaml
13
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_dereverb_echo_mel_band_roformer_v2.yaml
14
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_dereverb_echo_mel_band_roformer_v2.yaml
15
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_dereverb_echo_mel_band_roformer_v2.yaml
16
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_dereverb_mdx23c.yaml
17
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_drumsep_mdx23c.yaml
18
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_bleed_suppressor_v1.yaml
19
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_becruily.yaml
20
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
21
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
22
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
23
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
24
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
25
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
26
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
27
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
28
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
29
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
30
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
31
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
32
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
33
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
34
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
35
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
36
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
37
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
38
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
39
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
40
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
41
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_instrumental_gabox.yaml
42
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_karaoke_becruily.yaml
43
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_karaoke_gabox.yaml
44
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_karaoke_gabox.yaml
45
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_kim_ft_unwa.yaml
46
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_kim_ft_unwa.yaml
47
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_kim_ft_unwa.yaml
48
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_kim_ft_unwa.yaml
49
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_vocal_fullness_aname.yaml
50
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_vocals_becruily.yaml
51
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_vocals_gabox.yaml
52
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_vocals_gabox.yaml
53
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_vocals_gabox.yaml
54
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_vocals_gabox.yaml
55
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_vocals_gabox.yaml
56
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_vocals_gabox.yaml
57
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_vocals_gabox.yaml
58
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_mel_band_roformer_vocals_gabox.yaml
59
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_melband_roformer_big_beta5e.yaml
60
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_melbandroformer_big_beta4.yaml
61
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_melbandroformer_big_beta6.yaml
62
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_melbandroformer_big_beta6x.yaml
63
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_melbandroformer_inst.yaml
64
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_melbandroformer_inst.yaml
65
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_melbandroformer_inst.yaml
66
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_melbandroformer_inst.yaml
67
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_melbandroformer_inst_v2.yaml
68
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_melbandroformer_instvoc_duality.yaml
69
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_melbandroformer_instvoc_duality.yaml
70
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_vocals_mel_band_roformer_big_v1_ft.yaml
71
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_vocals_mel_band_roformer_ft.yaml
72
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_vocals_mel_band_roformer_ft.yaml
73
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_vocals_mel_band_roformer_ft.yaml
74
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/config_vocals_mel_band_roformer_ft.yaml
75
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768_config.yaml
76
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/denoise_mel_band_roformer_aufr33_sdr_27.9959_config.yaml
77
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/dereverb_mel_band_roformer_anvuew.yaml
78
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/dereverb_mel_band_roformer_anvuew.yaml
79
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/dereverb_mel_band_roformer_anvuew.yaml
80
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/deverb_bs_roformer_8_384dim_10depth_config.yaml
81
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144_config.yaml
82
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956_config.yaml
83
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/model_2_stem_061321.yaml
84
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/model_2_stem_full_band_8k.yaml
85
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/model_2_stem_full_band_8k.yaml
86
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/model_bs_roformer_ep_317_sdr_12.9755.yaml
87
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/model_bs_roformer_ep_368_sdr_12.9628.yaml
88
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/model_bs_roformer_ep_937_sdr_10.5309.yaml
89
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/model_mel_band_roformer_ep_3005_sdr_11.4360.yaml
90
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/vocals_mel_band_roformer.yaml
91
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/10_SP-UVR-2B-32000-1.pth
92
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/11_SP-UVR-2B-32000-2.pth
93
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/12_SP-UVR-3B-44100.pth
94
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/13_SP-UVR-4B-44100-1.pth
95
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/14_SP-UVR-4B-44100-2.pth
96
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/15_SP-UVR-MID-44100-1.pth
97
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/16_SP-UVR-MID-44100-2.pth
98
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/17_HP-Wind_Inst-UVR.pth
99
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/1_HP-UVR.pth
100
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/2_HP-UVR.pth
101
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/3_HP-Vocal-UVR.pth
102
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/4_HP-Vocal-UVR.pth
103
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/5_HP-Karaoke-UVR.pth
104
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/6_HP-Karaoke-UVR.pth
105
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/7_HP2-UVR.pth
106
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/8_HP2-UVR.pth
107
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/9_HP2-UVR.pth
108
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/MGM_HIGHEND_v4.pth
109
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/MGM_LOWEND_A_v4.pth
110
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/MGM_LOWEND_B_v4.pth
111
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/MGM_MAIN_v4.pth
112
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-BVE-4B_SN-44100-1.pth
113
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-BVE-4B_SN-44100-2.pth
114
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-De-Echo-Aggressive.pth
115
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-De-Echo-Normal.pth
116
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-De-Reverb-aufr33-jarredou.pth
117
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-DeEcho-DeReverb.pth
118
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-DeNoise-Lite.pth
119
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-DeNoise.pth
120
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/MDX23C-8KFFT-InstVoc_HQ.ckpt
121
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/MDX23C-8KFFT-InstVoc_HQ_2.ckpt
122
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/MDX23C-De-Reverb-aufr33-jarredou.ckpt
123
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/MDX23C-DrumSep-aufr33-jarredou.ckpt
124
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/MDX23C_D1581.ckpt
125
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/MelBandRoformerBigSYHFTV1.ckpt
126
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/MelBandRoformerSYHFT.ckpt
127
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/MelBandRoformerSYHFTV2.5.ckpt
128
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/MelBandRoformerSYHFTV2.ckpt
129
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/MelBandRoformerSYHFTV3Epsilon.ckpt
130
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt
131
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/aspiration_mel_band_roformer_sdr_18.9845.ckpt
132
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/bs_roformer_instrumental_resurrection_unwa.ckpt
133
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/bs_roformer_male_female_by_aufr33_sdr_7.2889.ckpt
134
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/bs_roformer_vocals_gabox.ckpt
135
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/bs_roformer_vocals_resurrection_unwa.ckpt
136
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/bs_roformer_vocals_revive_unwa.ckpt
137
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/bs_roformer_vocals_revive_v2_unwa.ckpt
138
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/bs_roformer_vocals_revive_v3e_unwa.ckpt
139
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt
140
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt
141
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt
142
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt
143
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/dereverb_big_mbr_ep_362.ckpt
144
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/dereverb_echo_mbr_fused.ckpt
145
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt
146
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt
147
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/dereverb_mel_band_roformer_mono_anvuew.ckpt
148
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/dereverb_super_big_mbr_ep_346.ckpt
149
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/deverb_bs_roformer_8_384dim_10depth.ckpt
150
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_bleed_suppressor_v1.ckpt
151
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt
152
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_denoise_debleed_gabox.ckpt
153
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_2_gabox.ckpt
154
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_3_gabox.ckpt
155
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_becruily.ckpt
156
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_bleedless_v1_gabox.ckpt
157
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_bleedless_v2_gabox.ckpt
158
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_bleedless_v3_gabox.ckpt
159
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_fullness_noise_v4_gabox.ckpt
160
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_fullness_v1_gabox.ckpt
161
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_fullness_v2_gabox.ckpt
162
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_fullness_v3_gabox.ckpt
163
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_fv7z_gabox.ckpt
164
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_fv8_gabox.ckpt
165
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_fvx_gabox.ckpt
166
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_gabox.ckpt
167
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_instv5_gabox.ckpt
168
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_instv5n_gabox.ckpt
169
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_instv6_gabox.ckpt
170
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_instv6n_gabox.ckpt
171
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_instv7_gabox.ckpt
172
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_instv7n_gabox.ckpt
173
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_instv8_gabox.ckpt
174
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_instrumental_instv8n_gabox.ckpt
175
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt
176
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_karaoke_becruily.ckpt
177
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_karaoke_gabox.ckpt
178
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_karaoke_gabox_v2.ckpt
179
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_kim_ft2_bleedless_unwa.ckpt
180
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_kim_ft2_unwa.ckpt
181
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_kim_ft3_unwa.ckpt
182
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_kim_ft_unwa.ckpt
183
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_vocal_fullness_aname.ckpt
184
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_vocals_becruily.ckpt
185
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_vocals_fv1_gabox.ckpt
186
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_vocals_fv2_gabox.ckpt
187
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_vocals_fv3_gabox.ckpt
188
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_vocals_fv4_gabox.ckpt
189
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_vocals_fv5_gabox.ckpt
190
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_vocals_fv6_gabox.ckpt
191
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_vocals_gabox.ckpt
192
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/mel_band_roformer_vocals_v2_gabox.ckpt
193
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/melband_roformer_big_beta4.ckpt
194
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/melband_roformer_big_beta5e.ckpt
195
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/melband_roformer_big_beta6.ckpt
196
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/melband_roformer_big_beta6x.ckpt
197
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/melband_roformer_inst_v1.ckpt
198
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/melband_roformer_inst_v1_plus.ckpt
199
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/melband_roformer_inst_v1e.ckpt
200
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/melband_roformer_inst_v1e_plus.ckpt
201
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/melband_roformer_inst_v2.ckpt
202
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/melband_roformer_instvoc_duality_v1.ckpt
203
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/melband_roformer_instvox_duality_v2.ckpt
204
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/model_bs_roformer_ep_317_sdr_12.9755.ckpt
205
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/model_bs_roformer_ep_368_sdr_12.9628.ckpt
206
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/model_bs_roformer_ep_937_sdr_10.5309.ckpt
207
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt
208
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt
209
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/vocals_mel_band_roformer.ckpt
210
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/Kim_Inst.onnx
211
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/Kim_Vocal_1.onnx
212
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/Kim_Vocal_2.onnx
213
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/Reverb_HQ_By_FoxJoy.onnx
214
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-MDX-NET-Inst_1.onnx
215
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-MDX-NET-Inst_2.onnx
216
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-MDX-NET-Inst_3.onnx
217
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-MDX-NET-Inst_HQ_1.onnx
218
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-MDX-NET-Inst_HQ_2.onnx
219
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-MDX-NET-Inst_HQ_3.onnx
220
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-MDX-NET-Inst_HQ_4.onnx
221
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-MDX-NET-Inst_HQ_5.onnx
222
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-MDX-NET-Inst_Main.onnx
223
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-MDX-NET-Inst_full_292.onnx
224
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-MDX-NET-Voc_FT.onnx
225
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-MDX-NET_Crowd_HQ_1.onnx
226
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-MDX-NET_Inst_187_beta.onnx
227
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-MDX-NET_Inst_82_beta.onnx
228
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-MDX-NET_Inst_90_beta.onnx
229
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-MDX-NET_Main_340.onnx
230
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-MDX-NET_Main_390.onnx
231
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-MDX-NET_Main_406.onnx
232
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-MDX-NET_Main_427.onnx
233
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR-MDX-NET_Main_438.onnx
234
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR_MDXNET_1_9703.onnx
235
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR_MDXNET_2_9682.onnx
236
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR_MDXNET_3_9662.onnx
237
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR_MDXNET_9482.onnx
238
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR_MDXNET_KARA.onnx
239
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR_MDXNET_KARA_2.onnx
240
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/UVR_MDXNET_Main.onnx
241
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/kuielab_a_bass.onnx
242
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/kuielab_a_drums.onnx
243
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/kuielab_a_other.onnx
244
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/kuielab_a_vocals.onnx
245
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/kuielab_b_bass.onnx
246
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/kuielab_b_drums.onnx
247
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/kuielab_b_other.onnx
248
+ https://huggingface.co/lainlives/audio-separator-models/resolve/main/kuielab_b_vocals.onnx
assets/model_url_sorter.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import os
2
+ from model_tools import sort_links_by_extension
3
+
4
+ if __name__ == "__main__":
5
+ sort_links_by_extension("links.txt", "sorted_links.txt")
assets/upload_models.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #! /usr/bin/env python3
2
+ import os
3
+ import argparse
4
+ from huggingface_hub import HfApi
5
+ import model_tools as mt
6
+
7
+ REPO_OWNER = "lainlives"
8
+ REPO_NAME = "audio-separator-models"
9
+ repo_type = "model"
10
+ HF_TOKEN = os.getenv("HF_TOKEN")
11
+
12
+
13
+ if __name__ == "__main__":
14
+ repo_id = f"{REPO_OWNER}/{REPO_NAME}"
15
+
16
+ parser = argparse.ArgumentParser(description="Model Upload Tool")
17
+ parser.add_argument(
18
+ "--large",
19
+ action="store_true",
20
+ help="Upload using upload-large-folder",
21
+ )
22
+ parser.add_argument(
23
+ "--token",
24
+ type=str,
25
+ default=HF_TOKEN,
26
+ dest="HF_TOKEN",
27
+ help=f"Your access token.",
28
+ )
29
+ args = parser.parse_args()
30
+ if args.large:
31
+ mt.push_large_folder_to_hf(repo_id, repo_type)
32
+ else:
33
+ mt.push_to_hf(repo_id, repo_type)
aufr33-jarredou_DrumSep_model_mdx23c_ep_141_sdr_10.8059.yaml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 130560
3
+ dim_f: 1024
4
+ dim_t: 256
5
+ hop_length: 512
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+
11
+ model:
12
+ act: gelu
13
+ bottleneck_factor: 4
14
+ growth: 128
15
+ norm: InstanceNorm
16
+ num_blocks_per_scale: 2
17
+ num_channels: 128
18
+ num_scales: 5
19
+ num_subbands: 4
20
+ scale:
21
+ - 2
22
+ - 2
23
+
24
+ training:
25
+ batch_size: 12
26
+ gradient_accumulation_steps: 1
27
+ grad_clip: 0
28
+ instruments:
29
+ - Kick
30
+ - Snare
31
+ - Toms
32
+ - Hh
33
+ - Ride
34
+ - Crash
35
+ lr: 9.0e-05
36
+ patience: 30
37
+ reduce_factor: 0.95
38
+ target_instrument: null
39
+ num_epochs: 1000
40
+ num_steps: 1268
41
+ q: 0.95
42
+ coarse_loss_clip: true
43
+ ema_momentum: 0.999
44
+ optimizer: adam
45
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
46
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
47
+
48
+ augmentations:
49
+ enable: true # enable or disable all augmentations (to fast disable if needed)
50
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
51
+ loudness_min: 0.5
52
+ loudness_max: 1.5
53
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
54
+ mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
55
+ - 0.2
56
+ - 0.02
57
+ mixup_loudness_min: 0.5
58
+ mixup_loudness_max: 1.5
59
+
60
+ # apply mp3 compression to mixture only (emulate downloading mp3 from internet)
61
+ mp3_compression_on_mixture: 0.0
62
+ mp3_compression_on_mixture_bitrate_min: 32
63
+ mp3_compression_on_mixture_bitrate_max: 320
64
+ mp3_compression_on_mixture_backend: "lameenc"
65
+
66
+ all:
67
+ channel_shuffle: 0.5 # Set 0 or lower to disable
68
+ random_inverse: 0.01 # inverse track (better lower probability)
69
+ random_polarity: 0.5 # polarity change (multiply waveform to -1)
70
+ mp3_compression: 0.0
71
+ mp3_compression_min_bitrate: 32
72
+ mp3_compression_max_bitrate: 320
73
+ mp3_compression_backend: "lameenc"
74
+ pitch_shift: 0.1
75
+ pitch_shift_min_semitones: -3
76
+ pitch_shift_max_semitones: 3
77
+ seven_band_parametric_eq: 0.5
78
+ seven_band_parametric_eq_min_gain_db: -6
79
+ seven_band_parametric_eq_max_gain_db: 6
80
+ tanh_distortion: 0.2
81
+ tanh_distortion_min: 0.1
82
+ tanh_distortion_max: 0.5
83
+
84
+ inference:
85
+ batch_size: 1
86
+ dim_t: 256
87
+ num_overlap: 4
calculate-model-hashes.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import os
4
+ import sys
5
+ import json
6
+ import hashlib
7
+ import requests
8
+
9
+ MODEL_CACHE_PATH = "/tmp/audio-separator-models"
10
+ VR_MODEL_DATA_LOCAL_PATH = f"{MODEL_CACHE_PATH}/vr_model_data.json"
11
+ MDX_MODEL_DATA_LOCAL_PATH = f"{MODEL_CACHE_PATH}/mdx_model_data.json"
12
+
13
+ MODEL_DATA_URL_PREFIX = "https://raw.githubusercontent.com/TRvlvr/application_data/main"
14
+ VR_MODEL_DATA_URL = f"{MODEL_DATA_URL_PREFIX}/vr_model_data/model_data_new.json"
15
+ MDX_MODEL_DATA_URL = f"{MODEL_DATA_URL_PREFIX}/mdx_model_data/model_data_new.json"
16
+
17
+ OUTPUT_PATH = f"{MODEL_CACHE_PATH}/model_hashes.json"
18
+
19
+
20
+ def get_model_hash(model_path):
21
+ """
22
+ Get the hash of a model file
23
+ """
24
+ # print(f"Getting hash for model at {model_path}")
25
+ try:
26
+ with open(model_path, "rb") as f:
27
+ f.seek(-10000 * 1024, 2) # Move the file pointer 10MB before the end of the file
28
+ hash_result = hashlib.md5(f.read()).hexdigest()
29
+ # print(f"Hash for {model_path}: {hash_result}")
30
+ return hash_result
31
+ except IOError:
32
+ with open(model_path, "rb") as f:
33
+ hash_result = hashlib.md5(f.read()).hexdigest()
34
+ # print(f"IOError encountered, hash for {model_path}: {hash_result}")
35
+ return hash_result
36
+
37
+
38
+ def download_file_if_missing(url, local_path):
39
+ """
40
+ Download a file from a URL if it doesn't exist locally
41
+ """
42
+ print(f"Checking if {local_path} needs to be downloaded from {url}")
43
+ if not os.path.exists(local_path):
44
+ print(f"Downloading {url} to {local_path}")
45
+ with requests.get(url, stream=True, timeout=10) as r:
46
+ r.raise_for_status()
47
+ with open(local_path, "wb") as f:
48
+ for chunk in r.iter_content(chunk_size=8192):
49
+ f.write(chunk)
50
+ print(f"Downloaded {url} to {local_path}")
51
+ else:
52
+ print(f"{local_path} already exists. Skipping download.")
53
+
54
+
55
+ def load_json_data(file_path):
56
+ """
57
+ Load JSON data from a file
58
+ """
59
+ print(f"Loading JSON data from {file_path}")
60
+ try:
61
+ with open(file_path, "r", encoding="utf-8") as file:
62
+ data = json.load(file)
63
+ print(f"Loaded JSON data successfully from {file_path}")
64
+ return data
65
+ except FileNotFoundError:
66
+ print(f"{file_path} not found.")
67
+ sys.exit(1)
68
+
69
+
70
+ def iterate_and_hash(directory):
71
+ """
72
+ Iterate through a directory and hash all model files
73
+ """
74
+ print(f"Iterating through directory {directory} to hash model files")
75
+ model_files = [(file, os.path.join(root, file)) for root, _, files in os.walk(directory) for file in files if file.endswith((".pth", ".onnx"))]
76
+
77
+ download_file_if_missing(VR_MODEL_DATA_URL, VR_MODEL_DATA_LOCAL_PATH)
78
+ download_file_if_missing(MDX_MODEL_DATA_URL, MDX_MODEL_DATA_LOCAL_PATH)
79
+
80
+ vr_model_data = load_json_data(VR_MODEL_DATA_LOCAL_PATH)
81
+ mdx_model_data = load_json_data(MDX_MODEL_DATA_LOCAL_PATH)
82
+
83
+ combined_model_params = {
84
+ **vr_model_data,
85
+ **mdx_model_data,
86
+ }
87
+
88
+ model_info_list = []
89
+ for file, file_path in sorted(model_files):
90
+ file_hash = get_model_hash(file_path)
91
+ model_info = {
92
+ "file": file,
93
+ "hash": file_hash,
94
+ "params": combined_model_params.get(file_hash, "Parameters not found"),
95
+ }
96
+ model_info_list.append(model_info)
97
+
98
+ print(f"Writing model info list to {OUTPUT_PATH}")
99
+ with open(OUTPUT_PATH, "w", encoding="utf-8") as json_file:
100
+ json.dump(model_info_list, json_file, indent=4)
101
+ print(f"Successfully wrote model info list to {OUTPUT_PATH}")
102
+
103
+
104
+ if __name__ == "__main__":
105
+ iterate_and_hash(MODEL_CACHE_PATH)
config_bs_roformer_karaoke_anvuew.yaml ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 640000
3
+ dim_f: 1024
4
+ dim_t: 801 # don't work (use in model)
5
+ hop_length: 441 # don't work (use in model)
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.000
10
+
11
+ model:
12
+ dim: 256
13
+ depth: 12
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ linear_transformer_depth: 0
19
+ freqs_per_bands: !!python/tuple
20
+ - 2
21
+ - 2
22
+ - 2
23
+ - 2
24
+ - 2
25
+ - 2
26
+ - 2
27
+ - 2
28
+ - 2
29
+ - 2
30
+ - 2
31
+ - 2
32
+ - 2
33
+ - 2
34
+ - 2
35
+ - 2
36
+ - 2
37
+ - 2
38
+ - 2
39
+ - 2
40
+ - 2
41
+ - 2
42
+ - 2
43
+ - 2
44
+ - 4
45
+ - 4
46
+ - 4
47
+ - 4
48
+ - 4
49
+ - 4
50
+ - 4
51
+ - 4
52
+ - 4
53
+ - 4
54
+ - 4
55
+ - 4
56
+ - 12
57
+ - 12
58
+ - 12
59
+ - 12
60
+ - 12
61
+ - 12
62
+ - 12
63
+ - 12
64
+ - 24
65
+ - 24
66
+ - 24
67
+ - 24
68
+ - 24
69
+ - 24
70
+ - 24
71
+ - 24
72
+ - 48
73
+ - 48
74
+ - 48
75
+ - 48
76
+ - 48
77
+ - 48
78
+ - 48
79
+ - 48
80
+ - 128
81
+ - 129
82
+ dim_head: 64
83
+ heads: 8
84
+ attn_dropout: 0.0
85
+ ff_dropout: 0.0
86
+ flash_attn: true
87
+ dim_freqs_in: 1025
88
+ stft_n_fft: 2048
89
+ stft_hop_length: 512
90
+ stft_win_length: 2048
91
+ stft_normalized: false
92
+ mask_estimator_depth: 2
93
+ multi_stft_resolution_loss_weight: 1.0
94
+ multi_stft_resolutions_window_sizes: !!python/tuple
95
+ - 4096
96
+ - 2048
97
+ - 1024
98
+ - 512
99
+ - 256
100
+ multi_stft_hop_size: 147
101
+ multi_stft_normalized: False
102
+ mlp_expansion_factor: 4
103
+ use_torch_checkpoint: True
104
+ skip_connection: False
105
+
106
+
107
+ training:
108
+ batch_size: 1
109
+ gradient_accumulation_steps: 1
110
+ grad_clip: 0
111
+ instruments: ['Vocals', 'Instrumental']
112
+ lr: 5.0e-5
113
+ patience: 7
114
+ reduce_factor: 0.75
115
+ target_instrument: Vocals
116
+ num_epochs: 1000
117
+ num_steps: 1000
118
+ q: 0.95
119
+ coarse_loss_clip: true
120
+ ema_momentum: 0.999
121
+ optimizer: adam
122
+ other_fix: False # it's needed for checking on multisong dataset if other is actually instrumental
123
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
124
+
125
+
126
+
127
+
128
+ inference:
129
+ batch_size: 2
130
+ dim_t: 1251
131
+ num_overlap: 4
config_chorus_male_female_bs_roformer.yaml ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 352800
3
+ dim_f: 1024
4
+ dim_t: 801 # don't work (use in model)
5
+ hop_length: 441 # don't work (use in model)
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.000
10
+
11
+ model:
12
+ dim: 384
13
+ depth: 8
14
+ stereo: true
15
+ num_stems: 2
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ freqs_per_bands: !!python/tuple
19
+ - 2
20
+ - 2
21
+ - 2
22
+ - 2
23
+ - 2
24
+ - 2
25
+ - 2
26
+ - 2
27
+ - 2
28
+ - 2
29
+ - 2
30
+ - 2
31
+ - 2
32
+ - 2
33
+ - 2
34
+ - 2
35
+ - 2
36
+ - 2
37
+ - 2
38
+ - 2
39
+ - 2
40
+ - 2
41
+ - 2
42
+ - 2
43
+ - 4
44
+ - 4
45
+ - 4
46
+ - 4
47
+ - 4
48
+ - 4
49
+ - 4
50
+ - 4
51
+ - 4
52
+ - 4
53
+ - 4
54
+ - 4
55
+ - 12
56
+ - 12
57
+ - 12
58
+ - 12
59
+ - 12
60
+ - 12
61
+ - 12
62
+ - 12
63
+ - 24
64
+ - 24
65
+ - 24
66
+ - 24
67
+ - 24
68
+ - 24
69
+ - 24
70
+ - 24
71
+ - 48
72
+ - 48
73
+ - 48
74
+ - 48
75
+ - 48
76
+ - 48
77
+ - 48
78
+ - 48
79
+ - 128
80
+ - 129
81
+ dim_head: 64
82
+ heads: 8
83
+ attn_dropout: 0.0
84
+ ff_dropout: 0.0
85
+ flash_attn: true
86
+ dim_freqs_in: 1025
87
+ stft_n_fft: 2048
88
+ stft_hop_length: 441
89
+ stft_win_length: 2048
90
+ stft_normalized: false
91
+ mask_estimator_depth: 2
92
+ multi_stft_resolution_loss_weight: 1.0
93
+ multi_stft_resolutions_window_sizes: !!python/tuple
94
+ - 4096
95
+ - 2048
96
+ - 1024
97
+ - 512
98
+ - 256
99
+ multi_stft_hop_size: 147
100
+ multi_stft_normalized: False
101
+
102
+ training:
103
+ batch_size: 1
104
+ gradient_accumulation_steps: 1
105
+ grad_clip: 0
106
+ instruments:
107
+ - male
108
+ - female
109
+ lr: 1.0e-05
110
+ patience: 2
111
+ reduce_factor: 0.95
112
+ target_instrument: null
113
+ num_epochs: 1000
114
+ num_steps: 1000
115
+ q: 0.95
116
+ coarse_loss_clip: true
117
+ ema_momentum: 0.999
118
+ optimizer: adam
119
+ other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
120
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
121
+
122
+ inference:
123
+ batch_size: 1
124
+ dim_t: 801
125
+ num_overlap: 2
config_dereverb-echo_mel_band_roformer_sdr_13.4843_v2.yaml ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 352800
3
+ dim_f: 1024
4
+ dim_t: 801
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.000
10
+
11
+ model:
12
+ dim: 256
13
+ depth: 8
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ num_bands: 60
19
+ dim_head: 64
20
+ heads: 8
21
+ attn_dropout: 0.1
22
+ ff_dropout: 0.1
23
+ flash_attn: True
24
+ dim_freqs_in: 1025
25
+ sample_rate: 44100
26
+ stft_n_fft: 2048
27
+ stft_hop_length: 441
28
+ stft_win_length: 2048
29
+ stft_normalized: False
30
+ mask_estimator_depth: 2
31
+ multi_stft_resolution_loss_weight: 1.0
32
+ multi_stft_resolutions_window_sizes: !!python/tuple
33
+ - 4096
34
+ - 2048
35
+ - 1024
36
+ - 512
37
+ - 256
38
+ multi_stft_hop_size: 147
39
+ multi_stft_normalized: False
40
+
41
+ training:
42
+ batch_size: 1
43
+ gradient_accumulation_steps: 8
44
+ grad_clip: 0
45
+ instruments:
46
+ - dry
47
+ - No dry
48
+ lr: 1.0e-05
49
+ patience: 2
50
+ reduce_factor: 0.95
51
+ target_instrument: dry
52
+ num_epochs: 1000
53
+ num_steps: 1000
54
+ q: 0.95
55
+ coarse_loss_clip: true
56
+ ema_momentum: 0.999
57
+ optimizer: adam
58
+ other_fix: false
59
+ use_amp: true
60
+
61
+ inference:
62
+ batch_size: 1
63
+ dim_t: 801
64
+ num_overlap: 4
config_dereverb_echo_mel_band_roformer_v2.yaml ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 352800
3
+ dim_f: 1024
4
+ dim_t: 801
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.000
10
+
11
+ model:
12
+ dim: 256
13
+ depth: 8
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ num_bands: 60
19
+ dim_head: 64
20
+ heads: 8
21
+ attn_dropout: 0.1
22
+ ff_dropout: 0.1
23
+ flash_attn: True
24
+ dim_freqs_in: 1025
25
+ sample_rate: 44100
26
+ stft_n_fft: 2048
27
+ stft_hop_length: 441
28
+ stft_win_length: 2048
29
+ stft_normalized: False
30
+ mask_estimator_depth: 2
31
+ multi_stft_resolution_loss_weight: 1.0
32
+ multi_stft_resolutions_window_sizes: !!python/tuple
33
+ - 4096
34
+ - 2048
35
+ - 1024
36
+ - 512
37
+ - 256
38
+ multi_stft_hop_size: 147
39
+ multi_stft_normalized: False
40
+
41
+ training:
42
+ batch_size: 1
43
+ gradient_accumulation_steps: 8
44
+ grad_clip: 0
45
+ instruments:
46
+ - dry
47
+ - other
48
+ lr: 1.0e-05
49
+ patience: 2
50
+ reduce_factor: 0.95
51
+ target_instrument: dry
52
+ num_epochs: 1000
53
+ num_steps: 1000
54
+ q: 0.95
55
+ coarse_loss_clip: true
56
+ ema_momentum: 0.999
57
+ optimizer: adam
58
+ other_fix: false
59
+ use_amp: true
60
+
61
+ inference:
62
+ batch_size: 1
63
+ dim_t: 801
64
+ num_overlap: 4
config_dereverb_echo_mel_band_roformer_v2.yaml.1 ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 352800
3
+ dim_f: 1024
4
+ dim_t: 801
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.000
10
+
11
+ model:
12
+ dim: 256
13
+ depth: 8
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ num_bands: 60
19
+ dim_head: 64
20
+ heads: 8
21
+ attn_dropout: 0.1
22
+ ff_dropout: 0.1
23
+ flash_attn: True
24
+ dim_freqs_in: 1025
25
+ sample_rate: 44100
26
+ stft_n_fft: 2048
27
+ stft_hop_length: 441
28
+ stft_win_length: 2048
29
+ stft_normalized: False
30
+ mask_estimator_depth: 2
31
+ multi_stft_resolution_loss_weight: 1.0
32
+ multi_stft_resolutions_window_sizes: !!python/tuple
33
+ - 4096
34
+ - 2048
35
+ - 1024
36
+ - 512
37
+ - 256
38
+ multi_stft_hop_size: 147
39
+ multi_stft_normalized: False
40
+
41
+ training:
42
+ batch_size: 1
43
+ gradient_accumulation_steps: 8
44
+ grad_clip: 0
45
+ instruments:
46
+ - dry
47
+ - other
48
+ lr: 1.0e-05
49
+ patience: 2
50
+ reduce_factor: 0.95
51
+ target_instrument: dry
52
+ num_epochs: 1000
53
+ num_steps: 1000
54
+ q: 0.95
55
+ coarse_loss_clip: true
56
+ ema_momentum: 0.999
57
+ optimizer: adam
58
+ other_fix: false
59
+ use_amp: true
60
+
61
+ inference:
62
+ batch_size: 1
63
+ dim_t: 801
64
+ num_overlap: 4
config_dereverb_echo_mel_band_roformer_v2.yaml.2 ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 352800
3
+ dim_f: 1024
4
+ dim_t: 801
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.000
10
+
11
+ model:
12
+ dim: 256
13
+ depth: 8
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ num_bands: 60
19
+ dim_head: 64
20
+ heads: 8
21
+ attn_dropout: 0.1
22
+ ff_dropout: 0.1
23
+ flash_attn: True
24
+ dim_freqs_in: 1025
25
+ sample_rate: 44100
26
+ stft_n_fft: 2048
27
+ stft_hop_length: 441
28
+ stft_win_length: 2048
29
+ stft_normalized: False
30
+ mask_estimator_depth: 2
31
+ multi_stft_resolution_loss_weight: 1.0
32
+ multi_stft_resolutions_window_sizes: !!python/tuple
33
+ - 4096
34
+ - 2048
35
+ - 1024
36
+ - 512
37
+ - 256
38
+ multi_stft_hop_size: 147
39
+ multi_stft_normalized: False
40
+
41
+ training:
42
+ batch_size: 1
43
+ gradient_accumulation_steps: 8
44
+ grad_clip: 0
45
+ instruments:
46
+ - dry
47
+ - other
48
+ lr: 1.0e-05
49
+ patience: 2
50
+ reduce_factor: 0.95
51
+ target_instrument: dry
52
+ num_epochs: 1000
53
+ num_steps: 1000
54
+ q: 0.95
55
+ coarse_loss_clip: true
56
+ ema_momentum: 0.999
57
+ optimizer: adam
58
+ other_fix: false
59
+ use_amp: true
60
+
61
+ inference:
62
+ batch_size: 1
63
+ dim_t: 801
64
+ num_overlap: 4
config_dereverb_mdx23c.yaml ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 261120
3
+ dim_f: 4096
4
+ dim_t: 256
5
+ hop_length: 1024
6
+ n_fft: 8192
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+
11
+ model:
12
+ act: gelu
13
+ bottleneck_factor: 4
14
+ growth: 128
15
+ norm: InstanceNorm
16
+ num_blocks_per_scale: 2
17
+ num_channels: 128
18
+ num_scales: 5
19
+ num_subbands: 4
20
+ scale:
21
+ - 2
22
+ - 2
23
+
24
+ training:
25
+ batch_size: 2
26
+ gradient_accumulation_steps: 1
27
+ grad_clip: 0
28
+ instruments:
29
+ - dry
30
+ - No dry
31
+ lr: 1.0e-06
32
+ patience: 4
33
+ reduce_factor: 0.93
34
+ target_instrument: null
35
+ num_epochs: 40
36
+ num_steps: 1000
37
+ q: 0.95
38
+ coarse_loss_clip: true
39
+ ema_momentum: 0.999
40
+ optimizer: adamw
41
+ read_metadata_procs: 8 # Number of processes to use during metadata reading for dataset. Can speed up metadata generation
42
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
43
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
44
+
45
+ augmentations:
46
+ enable: false # enable or disable all augmentations (to fast disable if needed)
47
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
48
+ loudness_min: 0.5
49
+ loudness_max: 1.5
50
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
51
+ mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
52
+ - 0.2
53
+ - 0.02
54
+ mixup_loudness_min: 0.5
55
+ mixup_loudness_max: 1.5
56
+
57
+ all:
58
+ channel_shuffle: 0.5 # Set 0 or lower to disable
59
+ random_inverse: 0.05 # inverse track (better lower probability)
60
+ random_polarity: 0.5 # polarity change (multiply waveform to -1)
61
+
62
+ # pedalboard chorus block
63
+ pedalboard_chorus: 0.001
64
+ pedalboard_chorus_rate_hz_min: 1.0
65
+ pedalboard_chorus_rate_hz_max: 7.0
66
+ pedalboard_chorus_depth_min: 0.25
67
+ pedalboard_chorus_depth_max: 0.95
68
+ pedalboard_chorus_centre_delay_ms_min: 3
69
+ pedalboard_chorus_centre_delay_ms_max: 10
70
+ pedalboard_chorus_feedback_min: 0.0
71
+ pedalboard_chorus_feedback_max: 0.01
72
+ pedalboard_chorus_mix_min: 0.1
73
+ pedalboard_chorus_mix_max: 0.9
74
+
75
+ # pedalboard phazer block
76
+ pedalboard_phazer: 0.001
77
+ pedalboard_phazer_rate_hz_min: 1.0
78
+ pedalboard_phazer_rate_hz_max: 10.0
79
+ pedalboard_phazer_depth_min: 0.25
80
+ pedalboard_phazer_depth_max: 0.95
81
+ pedalboard_phazer_centre_frequency_hz_min: 200
82
+ pedalboard_phazer_centre_frequency_hz_max: 12000
83
+ pedalboard_phazer_feedback_min: 0.0
84
+ pedalboard_phazer_feedback_max: 0.5
85
+ pedalboard_phazer_mix_min: 0.1
86
+ pedalboard_phazer_mix_max: 0.9
87
+
88
+ # pedalboard pitch shift block
89
+ pedalboard_pitch_shift: 0.01
90
+ pedalboard_pitch_shift_semitones_min: -7
91
+ pedalboard_pitch_shift_semitones_max: 7
92
+
93
+ # pedalboard resample block
94
+ pedalboard_resample: 0.001
95
+ pedalboard_resample_target_sample_rate_min: 4000
96
+ pedalboard_resample_target_sample_rate_max: 44100
97
+
98
+ mp3_compression_min_bitrate: 32
99
+ mp3_compression_max_bitrate: 320
100
+ mp3_compression_backend: "lameenc"
101
+
102
+ dry:
103
+ # pedalboard distortion block
104
+ pedalboard_distortion: 0.001
105
+ pedalboard_distortion_drive_db_min: 1.0
106
+ pedalboard_distortion_drive_db_max: 25.0
107
+
108
+ tanh_distortion: 0.05
109
+ tanh_distortion_min: 0.1
110
+ tanh_distortion_max: 0.7
111
+ # pedalboard bitcrash block
112
+ pedalboard_bitcrash: 0.005
113
+ pedalboard_bitcrash_bit_depth_min: 4
114
+ pedalboard_bitcrash_bit_depth_max: 16
115
+
116
+ seven_band_parametric_eq: 0.24
117
+ seven_band_parametric_eq_min_gain_db: -9
118
+ seven_band_parametric_eq_max_gain_db: 9
119
+
120
+ gaussian_noise: 0.005
121
+ gaussian_noise_min_amplitude: 0.001
122
+ gaussian_noise_max_amplitude: 0.01
123
+
124
+ time_stretch: 0.01
125
+ time_stretch_min_rate: 0.8
126
+ time_stretch_max_rate: 1.25
127
+ other:
128
+ seven_band_parametric_eq: 0.24
129
+ seven_band_parametric_eq_min_gain_db: -9
130
+ seven_band_parametric_eq_max_gain_db: 9
131
+
132
+ inference:
133
+ batch_size: 2
134
+ dim_t: 256
135
+ num_overlap: 4
config_dnr_bandit_bsrnn_multi_mus64.yaml ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "MultiMaskMultiSourceBandSplitRNN"
2
+ audio:
3
+ chunk_size: 264600
4
+ num_channels: 2
5
+ sample_rate: 44100
6
+ min_mean_abs: 0.001
7
+
8
+ model:
9
+ in_channel: 1
10
+ stems: ['speech', 'music', 'effects']
11
+ band_specs: "musical"
12
+ n_bands: 64
13
+ fs: 44100
14
+ require_no_overlap: false
15
+ require_no_gap: true
16
+ normalize_channel_independently: false
17
+ treat_channel_as_feature: true
18
+ n_sqm_modules: 8
19
+ emb_dim: 128
20
+ rnn_dim: 256
21
+ bidirectional: true
22
+ rnn_type: "GRU"
23
+ mlp_dim: 512
24
+ hidden_activation: "Tanh"
25
+ hidden_activation_kwargs: null
26
+ complex_mask: true
27
+ n_fft: 2048
28
+ win_length: 2048
29
+ hop_length: 512
30
+ window_fn: "hann_window"
31
+ wkwargs: null
32
+ power: null
33
+ center: true
34
+ normalized: true
35
+ pad_mode: "constant"
36
+ onesided: true
37
+
38
+ training:
39
+ batch_size: 4
40
+ gradient_accumulation_steps: 4
41
+ grad_clip: 0
42
+ instruments:
43
+ - Speech
44
+ - Music
45
+ - Effects
46
+ lr: 9.0e-05
47
+ patience: 2
48
+ reduce_factor: 0.95
49
+ target_instrument: null
50
+ num_epochs: 1000
51
+ num_steps: 1000
52
+ q: 0.95
53
+ coarse_loss_clip: true
54
+ ema_momentum: 0.999
55
+ optimizer: adam
56
+ other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
57
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
58
+
59
+ augmentations:
60
+ enable: true # enable or disable all augmentations (to fast disable if needed)
61
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
62
+ loudness_min: 0.5
63
+ loudness_max: 1.5
64
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
65
+ mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
66
+ - 0.2
67
+ - 0.02
68
+ mixup_loudness_min: 0.5
69
+ mixup_loudness_max: 1.5
70
+ all:
71
+ channel_shuffle: 0.5 # Set 0 or lower to disable
72
+ random_inverse: 0.1 # inverse track (better lower probability)
73
+ random_polarity: 0.5 # polarity change (multiply waveform to -1)
74
+
75
+ inference:
76
+ batch_size: 1
77
+ dim_t: 256
78
+ num_overlap: 4
config_dnr_bandit_v2_mus64.yaml ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cls: Bandit
2
+
3
+ audio:
4
+ chunk_size: 384000
5
+ num_channels: 2
6
+ sample_rate: 48000
7
+ min_mean_abs: 0.000
8
+
9
+ kwargs:
10
+ in_channels: 1
11
+ stems: ['speech', 'music', 'sfx']
12
+ band_type: musical
13
+ n_bands: 64
14
+ normalize_channel_independently: false
15
+ treat_channel_as_feature: true
16
+ n_sqm_modules: 8
17
+ emb_dim: 128
18
+ rnn_dim: 256
19
+ bidirectional: true
20
+ rnn_type: "GRU"
21
+ mlp_dim: 512
22
+ hidden_activation: "Tanh"
23
+ hidden_activation_kwargs:
24
+ complex_mask: true
25
+ use_freq_weights: true
26
+ n_fft: 2048
27
+ win_length: 2048
28
+ hop_length: 512
29
+ window_fn: "hann_window"
30
+ wkwargs:
31
+ power:
32
+ center: true
33
+ normalized: true
34
+ pad_mode: "reflect"
35
+ onesided: true
36
+
37
+ training:
38
+ batch_size: 4
39
+ gradient_accumulation_steps: 4
40
+ grad_clip: 0
41
+ instruments:
42
+ - Speech
43
+ - Music
44
+ - Sfx
45
+ lr: 9.0e-05
46
+ patience: 2
47
+ reduce_factor: 0.95
48
+ target_instrument:
49
+ num_epochs: 1000
50
+ num_steps: 1000
51
+ q: 0.95
52
+ coarse_loss_clip: true
53
+ ema_momentum: 0.999
54
+ optimizer: adam
55
+ other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
56
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
57
+
58
+ augmentations:
59
+ enable: true # enable or disable all augmentations (to fast disable if needed)
60
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
61
+ loudness_min: 0.5
62
+ loudness_max: 1.5
63
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
64
+ mixup_probs: !!python/tuple
65
+ # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
66
+ - 0.2
67
+ - 0.02
68
+ mixup_loudness_min: 0.5
69
+ mixup_loudness_max: 1.5
70
+ all:
71
+ channel_shuffle: 0.5 # Set 0 or lower to disable
72
+ random_inverse: 0.1 # inverse track (better lower probability)
73
+ random_polarity: 0.5 # polarity change (multiply waveform to -1)
74
+
75
+ inference:
76
+ batch_size: 8
77
+ dim_t: 256
78
+ num_overlap: 4
config_drumsep_mdx23c.yaml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 130560
3
+ dim_f: 1024
4
+ dim_t: 256
5
+ hop_length: 512
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+
11
+ model:
12
+ act: gelu
13
+ bottleneck_factor: 4
14
+ growth: 128
15
+ norm: InstanceNorm
16
+ num_blocks_per_scale: 2
17
+ num_channels: 128
18
+ num_scales: 5
19
+ num_subbands: 4
20
+ scale:
21
+ - 2
22
+ - 2
23
+
24
+ training:
25
+ batch_size: 12
26
+ gradient_accumulation_steps: 1
27
+ grad_clip: 0
28
+ instruments:
29
+ - kick
30
+ - snare
31
+ - toms
32
+ - hh
33
+ - ride
34
+ - crash
35
+ lr: 9.0e-05
36
+ patience: 30
37
+ reduce_factor: 0.95
38
+ target_instrument: null
39
+ num_epochs: 1000
40
+ num_steps: 1268
41
+ q: 0.95
42
+ coarse_loss_clip: true
43
+ ema_momentum: 0.999
44
+ optimizer: adam
45
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
46
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
47
+
48
+ augmentations:
49
+ enable: true # enable or disable all augmentations (to fast disable if needed)
50
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
51
+ loudness_min: 0.5
52
+ loudness_max: 1.5
53
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
54
+ mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
55
+ - 0.2
56
+ - 0.02
57
+ mixup_loudness_min: 0.5
58
+ mixup_loudness_max: 1.5
59
+
60
+ # apply mp3 compression to mixture only (emulate downloading mp3 from internet)
61
+ mp3_compression_on_mixture: 0.0
62
+ mp3_compression_on_mixture_bitrate_min: 32
63
+ mp3_compression_on_mixture_bitrate_max: 320
64
+ mp3_compression_on_mixture_backend: "lameenc"
65
+
66
+ all:
67
+ channel_shuffle: 0.5 # Set 0 or lower to disable
68
+ random_inverse: 0.01 # inverse track (better lower probability)
69
+ random_polarity: 0.5 # polarity change (multiply waveform to -1)
70
+ mp3_compression: 0.0
71
+ mp3_compression_min_bitrate: 32
72
+ mp3_compression_max_bitrate: 320
73
+ mp3_compression_backend: "lameenc"
74
+ pitch_shift: 0.1
75
+ pitch_shift_min_semitones: -3
76
+ pitch_shift_max_semitones: 3
77
+ seven_band_parametric_eq: 0.5
78
+ seven_band_parametric_eq_min_gain_db: -6
79
+ seven_band_parametric_eq_max_gain_db: 6
80
+ tanh_distortion: 0.2
81
+ tanh_distortion_min: 0.1
82
+ tanh_distortion_max: 0.5
83
+
84
+ inference:
85
+ batch_size: 1
86
+ dim_t: 256
87
+ num_overlap: 4
config_mel_band_roformer_bleed_suppressor_v1.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 485100
3
+ dim_f: 1024
4
+ dim_t: 801
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.000
10
+
11
+ model:
12
+ dim: 384
13
+ depth: 6
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ num_bands: 60
19
+ dim_head: 64
20
+ heads: 8
21
+ attn_dropout: 0
22
+ ff_dropout: 0
23
+ flash_attn: True
24
+ dim_freqs_in: 1025
25
+ sample_rate: 44100 # needed for mel filter bank from librosa
26
+ stft_n_fft: 2048
27
+ stft_hop_length: 441
28
+ stft_win_length: 2048
29
+ stft_normalized: False
30
+ mask_estimator_depth: 2
31
+ multi_stft_resolution_loss_weight: 1.0
32
+ multi_stft_resolutions_window_sizes: !!python/tuple
33
+ - 4096
34
+ - 2048
35
+ - 1024
36
+ - 512
37
+ - 256
38
+ multi_stft_hop_size: 147
39
+ multi_stft_normalized: False
40
+
41
+ training:
42
+ instruments:
43
+ - Instrumental
44
+ - Bleed
45
+ target_instrument: Instrumental
46
+ use_amp: True
47
+
48
+ inference:
49
+ batch_size: 1
50
+ dim_t: 801
51
+ num_overlap: 2
config_mel_band_roformer_karaoke.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 352800
3
+ dim_f: 1024
4
+ dim_t: 256
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 000
10
+
11
+ model:
12
+ dim: 384
13
+ depth: 6
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ num_bands: 60
19
+ dim_head: 64
20
+ heads: 8
21
+ attn_dropout: 0
22
+ ff_dropout: 0
23
+ flash_attn: true
24
+ dim_freqs_in: 1025
25
+ sample_rate: 44100 # needed for mel filter bank from librosa
26
+ stft_n_fft: 2048
27
+ stft_hop_length: 441
28
+ stft_win_length: 2048
29
+ stft_normalized: false
30
+ mask_estimator_depth: 2
31
+ multi_stft_resolution_loss_weight: 1.0
32
+ multi_stft_resolutions_window_sizes: !!python/tuple
33
+ - 4096
34
+ - 2048
35
+ - 1024
36
+ - 512
37
+ - 256
38
+ multi_stft_hop_size: 147
39
+ multi_stft_normalized: false
40
+
41
+ training:
42
+ batch_size: 4
43
+ gradient_accumulation_steps: 1
44
+ grad_clip: 0
45
+ instruments:
46
+ - Vocals
47
+ - Instrumental
48
+ lr: 1.0e-05
49
+ patience: 2
50
+ reduce_factor: 0.95
51
+ target_instrument: Vocals
52
+ num_epochs: 1000
53
+ num_steps: 2000
54
+ augmentation: false # enable augmentations by audiomentations and pedalboard
55
+ augmentation_type:
56
+ use_mp3_compress: false # Deprecated
57
+ augmentation_mix: false # Mix several stems of the same type with some probability
58
+ augmentation_loudness: false # randomly change loudness of each stem
59
+ augmentation_loudness_type: 1 # Type 1 or 2
60
+ augmentation_loudness_min: 0
61
+ augmentation_loudness_max: 0
62
+ q: 0.95
63
+ coarse_loss_clip: false
64
+ ema_momentum: 0.999
65
+ optimizer: adam
66
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
67
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
68
+ inference:
69
+ batch_size: 1
70
+ dim_t: 1101
71
+ num_overlap: 4
config_mel_band_roformer_karaoke_gabox.yaml ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 485100
3
+ dim_f: 1024
4
+ dim_t: 256
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.000
10
+
11
+ model:
12
+ dim: 384
13
+ depth: 6
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ num_bands: 60
19
+ dim_head: 64
20
+ heads: 8
21
+ attn_dropout: 0
22
+ ff_dropout: 0
23
+ flash_attn: true
24
+ dim_freqs_in: 1025
25
+ sample_rate: 44100 # needed for mel filter bank from librosa
26
+ stft_n_fft: 2048
27
+ stft_hop_length: 441
28
+ stft_win_length: 2048
29
+ stft_normalized: false
30
+ mask_estimator_depth: 2
31
+ multi_stft_resolution_loss_weight: 1.0
32
+ multi_stft_resolutions_window_sizes: !!python/tuple
33
+ - 4096
34
+ - 2048
35
+ - 1024
36
+ - 512
37
+ - 256
38
+ multi_stft_hop_size: 147
39
+ multi_stft_normalized: true
40
+
41
+ training:
42
+ batch_size: 1
43
+ gradient_accumulation_steps: 1
44
+ grad_clip: 0
45
+ instruments:
46
+ - Vocals
47
+ - Instrumental
48
+ lr: 0.0005
49
+ patience: 2
50
+ reduce_factor: 0.95
51
+ target_instrument: Vocals
52
+ num_epochs: 1000
53
+ num_steps: 1000
54
+ augmentation: false # enable augmentations by audiomentations and pedalboard
55
+ augmentation_type:
56
+ use_mp3_compress: false # Deprecated
57
+ augmentation_mix: false # Mix several stems of the same type with some probability
58
+ augmentation_loudness: false # randomly change loudness of each stem
59
+ augmentation_loudness_type: 1 # Type 1 or 2
60
+ augmentation_loudness_min: 0
61
+ augmentation_loudness_max: 0
62
+ q: 0.95
63
+ coarse_loss_clip: false
64
+ ema_momentum: 0.999
65
+ optimizer: adamw
66
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
67
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
68
+
69
+ inference:
70
+ batch_size: 1
71
+ dim_t: 1101
72
+ num_overlap: 8
config_melband_roformer_big_beta5e.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 485100
3
+ dim_f: 1024
4
+ dim_t: 801
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.000
10
+
11
+ model:
12
+ dim: 384
13
+ depth: 6
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ num_bands: 60
19
+ dim_head: 64
20
+ heads: 8
21
+ attn_dropout: 0
22
+ ff_dropout: 0
23
+ flash_attn: True
24
+ dim_freqs_in: 1025
25
+ sample_rate: 44100 # needed for mel filter bank from librosa
26
+ stft_n_fft: 2048
27
+ stft_hop_length: 441
28
+ stft_win_length: 2048
29
+ stft_normalized: False
30
+ mask_estimator_depth: 3
31
+ multi_stft_resolution_loss_weight: 1.0
32
+ multi_stft_resolutions_window_sizes: !!python/tuple
33
+ - 4096
34
+ - 2048
35
+ - 1024
36
+ - 512
37
+ - 256
38
+ multi_stft_hop_size: 147
39
+ multi_stft_normalized: False
40
+
41
+ training:
42
+ instruments:
43
+ - vocals
44
+ - other
45
+ target_instrument: vocals
46
+ use_amp: True
47
+
48
+ inference:
49
+ batch_size: 1
50
+ dim_t: 801
51
+ num_overlap: 2
config_melbandroformer_big_beta4.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 485100
3
+ dim_f: 1024
4
+ dim_t: 1101
5
+ hop_length: 441
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.000
10
+
11
+ model:
12
+ dim: 384
13
+ depth: 12
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ num_bands: 60
19
+ dim_head: 64
20
+ heads: 8
21
+ attn_dropout: 0
22
+ ff_dropout: 0
23
+ flash_attn: True
24
+ dim_freqs_in: 1025
25
+ sample_rate: 44100 # needed for mel filter bank from librosa
26
+ stft_n_fft: 2048
27
+ stft_hop_length: 441
28
+ stft_win_length: 2048
29
+ stft_normalized: False
30
+ mask_estimator_depth: 3
31
+ multi_stft_resolution_loss_weight: 1.0
32
+ multi_stft_resolutions_window_sizes: !!python/tuple
33
+ - 4096
34
+ - 2048
35
+ - 1024
36
+ - 512
37
+ - 256
38
+ multi_stft_hop_size: 147
39
+ multi_stft_normalized: False
40
+
41
+ training:
42
+ instruments:
43
+ - vocals
44
+ - other
45
+ target_instrument: vocals
46
+ use_amp: True
47
+
48
+ inference:
49
+ batch_size: 1
50
+ dim_t: 1101
51
+ num_overlap: 2