diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..22e16159a46b870d3cc6d637fdd6de5149af7a00
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,190 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+Demucs_models/04573f0d-f3cf25b2.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/0d19c1c6-0f06f20e.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/14fc6a69-a89dd0ee.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/1ef250f1-592467ce.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/305bc58f-18378783.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/42e558d4-196e0e1b.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/464b36d7-e5a9386e.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/5c90dfd2-34c22ccb.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/5d2d6c55-db83574e.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/6b9c2ca1-3fd82607.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/75fc33f5-1941ce65.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/7d865c68-3d5dd56b.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/7ecf8ec1-70f50cc9.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/7fd6ef75-a905dd85.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/83fc094f-4a16d450.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/902315c2-b39ce9c9.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/92cfc3b6-ef3bcb9c.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/955717e8-8726e21a.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/9a6b4851-03af0aa6.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/a1d90b5c-ae9d2452.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/b72baf4e-8778635e.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/c511e2ab-fe698775.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/cfa93e08-61801ae1.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/d12395a8-e57c48e6.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/demucs_extra-3646af93.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/demucs_extra.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/demucs_unittest-09ebc15f.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/demucs-e07c671f.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/demucs.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/demucs48_hq-28a1282c.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/e51eebcc-c1b80bdd.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/ebf34a2db.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/f7e0c4bc-ba3fe64a.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/fa0cb7f9-100d8bf4.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/light_extra.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/light.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/tasnet_extra-df3777b2.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/tasnet_extra.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/tasnet-beb46fac.th filter=lfs diff=lfs merge=lfs -text
+Demucs_models/tasnet.th filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-1/100-Reverb.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-1/101-LargeHall.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-1/102-SmallHall.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-1/103-Strings.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-1/104-PianoHall.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-1/105-OrchRoom.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-1/106-VocalRoom.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-1/107-MediumRm.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-1/108-LargeRoom.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-1/109-CoolPlate.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-1/110-ShortPlt.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-1/111-VocalPlt.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-1/112-SoftAmb.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-1/113-RoomAmb.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-1/114-Cathedral.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-1/115-LongCave.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-1/116-GarageDr.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-1/117-RockKick.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-1/118-RockSnare.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/200-R1_Reverb1.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/201-R1_LargeHall.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/202-R1_SmallHall.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/203-R1_Strings.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/204-R1_PianoHall.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/205-R1_OrchRoom.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/206-R1_VocalRoom.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/207-R1_MediumRm.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/208-R1_LargeRoom.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/209-R1_CoolPlate.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/210-R1_ShortPlt.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/211-R1_VocalPlt.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/212-R1_SoftAmb.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/213-R1_RoomAmb.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/214-R1_Cathedral.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/215-R1_LongCave.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/216-R1_GarageDr.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/217-R1_RockKick.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/218-R1_RockSnare.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/220-R2_Reverb2.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/221-R2_LargeHall.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/222-R2_SmallHall.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/223-R2_Strings.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/224-R2_PianoHall.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/225-R2_OrchRoom.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/226-R2_VocalRoom.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/227-R2_MediumRm.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/228-R2_LargeRoom.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/229-R2_CoolPlate.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/230-R2_ShortPlt.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/231-R2_VocalPlt.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/232-R2_SoftAmb.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/233-R2_RoomAmb.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/234-R2_Cathedral.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/235-R2_LongCave.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/236-R2_GarageDr.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/237-R2_RockKick.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-2/238-R2_RockSnare.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-3/301-LargeHall.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-3/302-SmallHall.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-3/303-Strings.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-3/304-PianoHall.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-3/305-OrchRoom.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-3/306-VocalRoom.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-3/307-MediumRm.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-3/308-LargeRoom.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-3/309-CoolPlate.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-3/310-ShortPlt.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-3/311-VocalPlt.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-3/312-SoftAmb.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-3/313-RoomAmb.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-3/314-Cathedral.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-3/315-LongCave.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-3/316-GarageDr.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-3/317-RockKick.wav filter=lfs diff=lfs merge=lfs -text
+impulse/VS8F-3/318-RockSnare.wav filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v1/demucs_extra.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v1/demucs.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v1/light_extra.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v1/light.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v1/tasnet_extra.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v1/tasnet.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v2/demucs_extra-3646af93.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v2/demucs_unittest-09ebc15f.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v2/demucs-e07c671f.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v2/demucs48_hq-28a1282c.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v2/tasnet_extra-df3777b2.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v2/tasnet-beb46fac.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/0d19c1c6-0f06f20e.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/14fc6a69-a89dd0ee.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/1ef250f1-592467ce.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/305bc58f-18378783.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/42e558d4-196e0e1b.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/464b36d7-e5a9386e.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/5d2d6c55-db83574e.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/6b9c2ca1-3fd82607.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/7d865c68-3d5dd56b.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/7ecf8ec1-70f50cc9.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/7fd6ef75-a905dd85.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/83fc094f-4a16d450.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/902315c2-b39ce9c9.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/9a6b4851-03af0aa6.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/a1d90b5c-ae9d2452.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/b72baf4e-8778635e.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/c511e2ab-fe698775.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/cfa93e08-61801ae1.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/e51eebcc-c1b80bdd.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/ebf34a2db.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v3/fa0cb7f9-100d8bf4.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v4/04573f0d-f3cf25b2.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v4/5c90dfd2-34c22ccb.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v4/75fc33f5-1941ce65.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v4/92cfc3b6-ef3bcb9c.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v4/955717e8-8726e21a.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v4/d12395a8-e57c48e6.th filter=lfs diff=lfs merge=lfs -text
+models/Demucs/Demucs_v4/f7e0c4bc-ba3fe64a.th filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9813e45f73b237d23c41b8413ae90bfbeadff876
--- /dev/null
+++ b/README.md
@@ -0,0 +1,18 @@
+---
+license: mit
+tags:
+- uvr
+- uvr5
+- ultimatevocalremover
+- demucs
+- vr-arch
+- mdx-net
+- mdx23c
+- roformer
+- scnet
+- bandit
+---
+
+
\ No newline at end of file
diff --git a/impulse/VS8F-1/100-Reverb.wav b/impulse/VS8F-1/100-Reverb.wav
new file mode 100644
index 0000000000000000000000000000000000000000..f57f2620bec4b99d1459ab80fd25c88d63f1e316
--- /dev/null
+++ b/impulse/VS8F-1/100-Reverb.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0095d99301acc815fafd915bffd6478dfb34ead49cf06e3b53f7d98d382305c
+size 1192672
diff --git a/impulse/VS8F-1/101-LargeHall.wav b/impulse/VS8F-1/101-LargeHall.wav
new file mode 100644
index 0000000000000000000000000000000000000000..eb49cea0f134f670f8a53aa78bf015b86bbd2ec3
--- /dev/null
+++ b/impulse/VS8F-1/101-LargeHall.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a093f82ab098a1ef004865af68767da88b91a66c418d4f449c268c0d3527ddac
+size 1324768
diff --git a/impulse/VS8F-1/102-SmallHall.wav b/impulse/VS8F-1/102-SmallHall.wav
new file mode 100644
index 0000000000000000000000000000000000000000..6e9c6cc6b2450ccc6cff096acb5fc0b22b88b27d
--- /dev/null
+++ b/impulse/VS8F-1/102-SmallHall.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dde33ecb751b0858e722ba9543e37bae0561c45167432173a544690253d8d393
+size 796384
diff --git a/impulse/VS8F-1/103-Strings.wav b/impulse/VS8F-1/103-Strings.wav
new file mode 100644
index 0000000000000000000000000000000000000000..ef998972a63c3c72efac5e3366b3df0b520f97f9
--- /dev/null
+++ b/impulse/VS8F-1/103-Strings.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:941d1d3a15a4167bda81598ebdd5c904626a3c1f165fba85c1194bee61740398
+size 1192672
diff --git a/impulse/VS8F-1/104-PianoHall.wav b/impulse/VS8F-1/104-PianoHall.wav
new file mode 100644
index 0000000000000000000000000000000000000000..8271841372fd4b4f192c691754bd79d09227b6d7
--- /dev/null
+++ b/impulse/VS8F-1/104-PianoHall.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5c675d5549e519b19ff5925253f6efd0cb634c218b399b838ebb30375b59317
+size 1060576
diff --git a/impulse/VS8F-1/105-OrchRoom.wav b/impulse/VS8F-1/105-OrchRoom.wav
new file mode 100644
index 0000000000000000000000000000000000000000..e30cee045ae908ac2d09dc28b38f26c49bad7b90
--- /dev/null
+++ b/impulse/VS8F-1/105-OrchRoom.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73ed632ca818fbe3309420ecef4623fdf7df346db53a179b9fe3556031db1a00
+size 664288
diff --git a/impulse/VS8F-1/106-VocalRoom.wav b/impulse/VS8F-1/106-VocalRoom.wav
new file mode 100644
index 0000000000000000000000000000000000000000..1cefd02d89fd0743ec2a75c4a407d47ee683bc7c
--- /dev/null
+++ b/impulse/VS8F-1/106-VocalRoom.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c74a371b9043eef9fa7c806ecc27ec3b949f7990946d3e80fbb4d9d0136c38b
+size 796384
diff --git a/impulse/VS8F-1/107-MediumRm.wav b/impulse/VS8F-1/107-MediumRm.wav
new file mode 100644
index 0000000000000000000000000000000000000000..11cc491cef053997e854c6e7bb50d9a562bb44a5
--- /dev/null
+++ b/impulse/VS8F-1/107-MediumRm.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d77584d14207333194a860a9b188117282833928c289ec815184966a6b7163c
+size 424672
diff --git a/impulse/VS8F-1/108-LargeRoom.wav b/impulse/VS8F-1/108-LargeRoom.wav
new file mode 100644
index 0000000000000000000000000000000000000000..3381e9d6cda08ac08f12e782d617e6b26fa6b36b
--- /dev/null
+++ b/impulse/VS8F-1/108-LargeRoom.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9af5c5297ac56d2bd73a970c0028b0c087b854a86fb27a79b054836833cedb57
+size 664288
diff --git a/impulse/VS8F-1/109-CoolPlate.wav b/impulse/VS8F-1/109-CoolPlate.wav
new file mode 100644
index 0000000000000000000000000000000000000000..1d3d89de79ce8c10dc059125212ccb0ebdff3ef8
--- /dev/null
+++ b/impulse/VS8F-1/109-CoolPlate.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e220c9e1349889f909d13727e19e898b57a994a585137b9c16d5d6bec6473536
+size 1192672
diff --git a/impulse/VS8F-1/110-ShortPlt.wav b/impulse/VS8F-1/110-ShortPlt.wav
new file mode 100644
index 0000000000000000000000000000000000000000..a5dabd3902ec613a6e85b59cf9afdcc9678e9ea4
--- /dev/null
+++ b/impulse/VS8F-1/110-ShortPlt.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b80b405c3113144caa136ed2de380d385b9514c7f3df8deae538a8034fbc4e82
+size 842464
diff --git a/impulse/VS8F-1/111-VocalPlt.wav b/impulse/VS8F-1/111-VocalPlt.wav
new file mode 100644
index 0000000000000000000000000000000000000000..afcbb97f717b468bd1d9f7cf83df6b5303c46843
--- /dev/null
+++ b/impulse/VS8F-1/111-VocalPlt.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8abd4d940b1d3340f0d5e24dd76726e2b138c187a02490e0e57e08c22c1c8fd6
+size 1060576
diff --git a/impulse/VS8F-1/112-SoftAmb.wav b/impulse/VS8F-1/112-SoftAmb.wav
new file mode 100644
index 0000000000000000000000000000000000000000..b452fc64d6e3c0e121098a39423ed175518ea3ce
--- /dev/null
+++ b/impulse/VS8F-1/112-SoftAmb.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a5a1e591dc51a9a5b32b632df8b6c36a7a74372a1ada4daf9eaf91d6a509925
+size 268000
diff --git a/impulse/VS8F-1/113-RoomAmb.wav b/impulse/VS8F-1/113-RoomAmb.wav
new file mode 100644
index 0000000000000000000000000000000000000000..e08be09f024a8b2d7b918decef42875d3da80fd2
--- /dev/null
+++ b/impulse/VS8F-1/113-RoomAmb.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:692dc26f2c18f6ba60834e4f8c9e3ccd58585cc19ec958ec9caad110d7a5ffa0
+size 268000
diff --git a/impulse/VS8F-1/114-Cathedral.wav b/impulse/VS8F-1/114-Cathedral.wav
new file mode 100644
index 0000000000000000000000000000000000000000..c96020c963825b9fb2f88db86a070162e6acbdde
--- /dev/null
+++ b/impulse/VS8F-1/114-Cathedral.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ceb429f76b75df3f39349cea8a1502c766564418dbe575cf393102c81be6b124
+size 1853152
diff --git a/impulse/VS8F-1/115-LongCave.wav b/impulse/VS8F-1/115-LongCave.wav
new file mode 100644
index 0000000000000000000000000000000000000000..fcc7a0f7b402fed1fd69dc0ee532f84bf282947d
--- /dev/null
+++ b/impulse/VS8F-1/115-LongCave.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a98545c4647eecad4d2b15047fa04c4123844ed1eeabc5a0a813f82248ff3dcc
+size 2120416
diff --git a/impulse/VS8F-1/116-GarageDr.wav b/impulse/VS8F-1/116-GarageDr.wav
new file mode 100644
index 0000000000000000000000000000000000000000..31c4cb3100256b4664b99d3eec0a1381fc302969
--- /dev/null
+++ b/impulse/VS8F-1/116-GarageDr.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9fde75ec1c9a927001f796611294c3c0e8429b5a3d9c1816ae6bea30efada6ab
+size 268000
diff --git a/impulse/VS8F-1/117-RockKick.wav b/impulse/VS8F-1/117-RockKick.wav
new file mode 100644
index 0000000000000000000000000000000000000000..7e452ec11b0ccb225d89f172276f9eeaedce585c
--- /dev/null
+++ b/impulse/VS8F-1/117-RockKick.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57dd8a02f8e109c11db81259b8f89abf7e810262e18e39d9020a4fad73dddf4c
+size 326368
diff --git a/impulse/VS8F-1/118-RockSnare.wav b/impulse/VS8F-1/118-RockSnare.wav
new file mode 100644
index 0000000000000000000000000000000000000000..27300f0f688191f1b5cdc865193d09f21d52080e
--- /dev/null
+++ b/impulse/VS8F-1/118-RockSnare.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1fbcb7539689bae885f9a3a9f23dfe1cdb88cd8be30cc57c15520ffbd1f458b
+size 400096
diff --git a/impulse/VS8F-2/200-R1_Reverb1.wav b/impulse/VS8F-2/200-R1_Reverb1.wav
new file mode 100644
index 0000000000000000000000000000000000000000..d0beb713cb124733f260edb976a2efafcf82fdbe
--- /dev/null
+++ b/impulse/VS8F-2/200-R1_Reverb1.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a81291b6857eed249af8e8448a47d5bb15b2afd2e0f79cfaaf1f1025f466f4a9
+size 1192672
diff --git a/impulse/VS8F-2/201-R1_LargeHall.wav b/impulse/VS8F-2/201-R1_LargeHall.wav
new file mode 100644
index 0000000000000000000000000000000000000000..f4bfb4402879e161bb98d27d523937f9ab534f0e
--- /dev/null
+++ b/impulse/VS8F-2/201-R1_LargeHall.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e45afd1d7fa363ff8e4dbe0298b0f1eca0a895e24df548c9d0acf1d496f92243
+size 1324768
diff --git a/impulse/VS8F-2/202-R1_SmallHall.wav b/impulse/VS8F-2/202-R1_SmallHall.wav
new file mode 100644
index 0000000000000000000000000000000000000000..c4ed0a3f9c3e84c2c3392edad2d9e6e0b829b1d5
--- /dev/null
+++ b/impulse/VS8F-2/202-R1_SmallHall.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:917d198e75991fb43cdcb0bca26cc9053df6220859b71264d1cc7cf02ae5551b
+size 796384
diff --git a/impulse/VS8F-2/203-R1_Strings.wav b/impulse/VS8F-2/203-R1_Strings.wav
new file mode 100644
index 0000000000000000000000000000000000000000..bf087a122b5d6d56cc45bf8df2ce00fff4c62317
--- /dev/null
+++ b/impulse/VS8F-2/203-R1_Strings.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70987aaaf7f6a02cecbd402b252ccc7614ff05c314a3148a7dcdb8c12b59b6b5
+size 1192672
diff --git a/impulse/VS8F-2/204-R1_PianoHall.wav b/impulse/VS8F-2/204-R1_PianoHall.wav
new file mode 100644
index 0000000000000000000000000000000000000000..d0c6e16798b85c84e3067b244a7c521cb83e7f29
--- /dev/null
+++ b/impulse/VS8F-2/204-R1_PianoHall.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5492cf3884335da6dcff319511be1814a5fdda07295d7dcac6cbd13696007a8b
+size 1060576
diff --git a/impulse/VS8F-2/205-R1_OrchRoom.wav b/impulse/VS8F-2/205-R1_OrchRoom.wav
new file mode 100644
index 0000000000000000000000000000000000000000..3b64be55bb6928dd569052bb42f2cf0d8f1e7605
--- /dev/null
+++ b/impulse/VS8F-2/205-R1_OrchRoom.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:953850822a2773af67c5c8e98f7bf4ff24de9018ccce3182d17558f45f8f8f6e
+size 664288
diff --git a/impulse/VS8F-2/206-R1_VocalRoom.wav b/impulse/VS8F-2/206-R1_VocalRoom.wav
new file mode 100644
index 0000000000000000000000000000000000000000..247593c6a29e786d1a3f95b39f51c0f2fb40a49e
--- /dev/null
+++ b/impulse/VS8F-2/206-R1_VocalRoom.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21cc8996b5d7d98c89c81d4cadeafc2d957112eea9a34fa673f1d0893e4833ea
+size 796384
diff --git a/impulse/VS8F-2/207-R1_MediumRm.wav b/impulse/VS8F-2/207-R1_MediumRm.wav
new file mode 100644
index 0000000000000000000000000000000000000000..086801214a6a5dca60067363b144162d6a083aa0
--- /dev/null
+++ b/impulse/VS8F-2/207-R1_MediumRm.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23207fc3a9273ffe1f632215f9a7581e5b39e9ca192c6a3bd8d495bf1f51dd48
+size 464608
diff --git a/impulse/VS8F-2/208-R1_LargeRoom.wav b/impulse/VS8F-2/208-R1_LargeRoom.wav
new file mode 100644
index 0000000000000000000000000000000000000000..090c6051bf442d9281fbfab0a2b9e41ca333efd8
--- /dev/null
+++ b/impulse/VS8F-2/208-R1_LargeRoom.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6342d02c2f13ed80ef4301c3cbb2c4365abc175d94ab52c584ffbbc2bc572a31
+size 664288
diff --git a/impulse/VS8F-2/209-R1_CoolPlate.wav b/impulse/VS8F-2/209-R1_CoolPlate.wav
new file mode 100644
index 0000000000000000000000000000000000000000..3ed4759a745992f423bc19c377f146e21016d35f
--- /dev/null
+++ b/impulse/VS8F-2/209-R1_CoolPlate.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a0ca4248a3d94b4f01a51385b750a6af75f30f61d752964c945c7a8a7b67d08
+size 1192672
diff --git a/impulse/VS8F-2/210-R1_ShortPlt.wav b/impulse/VS8F-2/210-R1_ShortPlt.wav
new file mode 100644
index 0000000000000000000000000000000000000000..99f1f42499232fbe40c1accb180ae05b39765d8b
--- /dev/null
+++ b/impulse/VS8F-2/210-R1_ShortPlt.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89134a620aa94ce25919cef194e9c0b5db784992cb9154a6ba5a35ccadfb4015
+size 928480
diff --git a/impulse/VS8F-2/211-R1_VocalPlt.wav b/impulse/VS8F-2/211-R1_VocalPlt.wav
new file mode 100644
index 0000000000000000000000000000000000000000..d277b00c91a7aa7e5e8e0d921644c4674620df90
--- /dev/null
+++ b/impulse/VS8F-2/211-R1_VocalPlt.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9dd177835bdc84c196af55a359242dadbb09dfb0a207e287f44d8ef5153bc44
+size 1060576
diff --git a/impulse/VS8F-2/212-R1_SoftAmb.wav b/impulse/VS8F-2/212-R1_SoftAmb.wav
new file mode 100644
index 0000000000000000000000000000000000000000..cdd4d920a783a3518aed091d18b308be7d0b7889
--- /dev/null
+++ b/impulse/VS8F-2/212-R1_SoftAmb.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6819ffb1645a84ac2d667543ad0bc6a18043169d491bbf4d00c84bf1b0487e2b
+size 268000
diff --git a/impulse/VS8F-2/213-R1_RoomAmb.wav b/impulse/VS8F-2/213-R1_RoomAmb.wav
new file mode 100644
index 0000000000000000000000000000000000000000..2e5942857e3003ed8f813d778d7a45d35b1cc4e9
--- /dev/null
+++ b/impulse/VS8F-2/213-R1_RoomAmb.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cad314849ba00d9aaeaa6cad04119dd1410de5f230acc50c7b9194a6ff0f0661
+size 400096
diff --git a/impulse/VS8F-2/214-R1_Cathedral.wav b/impulse/VS8F-2/214-R1_Cathedral.wav
new file mode 100644
index 0000000000000000000000000000000000000000..8813fe1c1eff65f4a9bcd4650327e500ba3bae74
--- /dev/null
+++ b/impulse/VS8F-2/214-R1_Cathedral.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c6ea13edd5e809ade967efb974ed46a14bffadb3b31435f86d126a68a60a292
+size 1721056
diff --git a/impulse/VS8F-2/215-R1_LongCave.wav b/impulse/VS8F-2/215-R1_LongCave.wav
new file mode 100644
index 0000000000000000000000000000000000000000..33c3b87f32f04a283b8460ad4d7f38a20c850fd6
--- /dev/null
+++ b/impulse/VS8F-2/215-R1_LongCave.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce187f05b6341f961e9928d6b25dff9a28cbe0bbbfd38ec88aca79cc38d6be9f
+size 1853152
diff --git a/impulse/VS8F-2/216-R1_GarageDr.wav b/impulse/VS8F-2/216-R1_GarageDr.wav
new file mode 100644
index 0000000000000000000000000000000000000000..804476c9a45b1785a1e6026c8a475e1dce9085f3
--- /dev/null
+++ b/impulse/VS8F-2/216-R1_GarageDr.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bece90c3192d23e735e6eb6ffde1c5654d3ed89f4c6e254fed6c460a129be4e6
+size 268000
diff --git a/impulse/VS8F-2/217-R1_RockKick.wav b/impulse/VS8F-2/217-R1_RockKick.wav
new file mode 100644
index 0000000000000000000000000000000000000000..dfd6a1810a711ecc6dd1684b04de10cdb9bcc9aa
--- /dev/null
+++ b/impulse/VS8F-2/217-R1_RockKick.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3fd8052da61a8dd155cc06c7d745e51b03daed0ae6774270eecff991a2b15ac3
+size 400096
diff --git a/impulse/VS8F-2/218-R1_RockSnare.wav b/impulse/VS8F-2/218-R1_RockSnare.wav
new file mode 100644
index 0000000000000000000000000000000000000000..b2bef81a04fe74cde1be914e60f1f8a3bdf4895c
--- /dev/null
+++ b/impulse/VS8F-2/218-R1_RockSnare.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c72f02fe715fbaf3df7b823e428e1b8992a84671adcbd15a503c34ee0939d4b
+size 532192
diff --git a/impulse/VS8F-2/220-R2_Reverb2.wav b/impulse/VS8F-2/220-R2_Reverb2.wav
new file mode 100644
index 0000000000000000000000000000000000000000..9369a7b44309190b5540e68ddf6e55c9b2623654
--- /dev/null
+++ b/impulse/VS8F-2/220-R2_Reverb2.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a25afb701c0259bec8b910a3a88612b216fc5a394f8133c0f398cec06dc4ca94
+size 1324768
diff --git a/impulse/VS8F-2/221-R2_LargeHall.wav b/impulse/VS8F-2/221-R2_LargeHall.wav
new file mode 100644
index 0000000000000000000000000000000000000000..7397d34a1f81198adac52d6e22519ccccaaadae0
--- /dev/null
+++ b/impulse/VS8F-2/221-R2_LargeHall.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:147a51b02dd12172db2e5913ab2b9e16397260693b9e0d4e29e13893c8114e4d
+size 1588960
diff --git a/impulse/VS8F-2/222-R2_SmallHall.wav b/impulse/VS8F-2/222-R2_SmallHall.wav
new file mode 100644
index 0000000000000000000000000000000000000000..974a468332b76e6b00a96cb3ff817927baedd8b7
--- /dev/null
+++ b/impulse/VS8F-2/222-R2_SmallHall.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5824f3701bbaac2e9e95dfeacb2191f929fbaf29814cb77d60349b5bd9012ea7
+size 928480
diff --git a/impulse/VS8F-2/223-R2_Strings.wav b/impulse/VS8F-2/223-R2_Strings.wav
new file mode 100644
index 0000000000000000000000000000000000000000..c1a012a06d13fdb6b5a5f8ec0d2dcc4c186d4fd7
--- /dev/null
+++ b/impulse/VS8F-2/223-R2_Strings.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0859b2d3bfe2005c0e608b4ae5cacb51fdc7626cf71d821bd6c677871027589
+size 1192672
diff --git a/impulse/VS8F-2/224-R2_PianoHall.wav b/impulse/VS8F-2/224-R2_PianoHall.wav
new file mode 100644
index 0000000000000000000000000000000000000000..95126727180e3bc22f23d593d085a2d73965e9a6
--- /dev/null
+++ b/impulse/VS8F-2/224-R2_PianoHall.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:95564127c49365e165facc791e1a6660e72fb84b26ecbf291d8c68f9b583e093
+size 1060576
diff --git a/impulse/VS8F-2/225-R2_OrchRoom.wav b/impulse/VS8F-2/225-R2_OrchRoom.wav
new file mode 100644
index 0000000000000000000000000000000000000000..5e60a5274ab39979e0791b68575c728bed540c26
--- /dev/null
+++ b/impulse/VS8F-2/225-R2_OrchRoom.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b118c8455760ec7907ba88ee1dfe731fded25cb16cc254ab9887a4b8cf859c74
+size 664288
diff --git a/impulse/VS8F-2/226-R2_VocalRoom.wav b/impulse/VS8F-2/226-R2_VocalRoom.wav
new file mode 100644
index 0000000000000000000000000000000000000000..ed85baab6e4b085e302d390e22e1146d293deb3b
--- /dev/null
+++ b/impulse/VS8F-2/226-R2_VocalRoom.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1f580a6fdbba51cccdbd72243c90c0cf0f7c4b0356fda2e80ef5e9bc98cb63a
+size 796384
diff --git a/impulse/VS8F-2/227-R2_MediumRm.wav b/impulse/VS8F-2/227-R2_MediumRm.wav
new file mode 100644
index 0000000000000000000000000000000000000000..3021db677aca40aeac5cc7b83891b0412e50dd73
--- /dev/null
+++ b/impulse/VS8F-2/227-R2_MediumRm.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84ea78d6048d5b206b9f9970d38cecd1e722dde7e9edd89bf81eb1bc78ea02ab
+size 532192
diff --git a/impulse/VS8F-2/228-R2_LargeRoom.wav b/impulse/VS8F-2/228-R2_LargeRoom.wav
new file mode 100644
index 0000000000000000000000000000000000000000..7eadd0cf7ea457ed58d55a164796ad489e7593a4
--- /dev/null
+++ b/impulse/VS8F-2/228-R2_LargeRoom.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afd70f92e128fc81cd94a54309e87891f542cabf557abdc4d61b43b9e5a19956
+size 796384
diff --git a/impulse/VS8F-2/229-R2_CoolPlate.wav b/impulse/VS8F-2/229-R2_CoolPlate.wav
new file mode 100644
index 0000000000000000000000000000000000000000..28d49423300ba3e63013beca721c136b03fc30f8
--- /dev/null
+++ b/impulse/VS8F-2/229-R2_CoolPlate.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06369334b5b61eb94d51b8d4989776ed163fe7a35da0ff4c2d3abe37ebbdd3f4
+size 1192672
diff --git a/impulse/VS8F-2/230-R2_ShortPlt.wav b/impulse/VS8F-2/230-R2_ShortPlt.wav
new file mode 100644
index 0000000000000000000000000000000000000000..93ae7c5e760ae5df065662a1ae6860446a2c1577
--- /dev/null
+++ b/impulse/VS8F-2/230-R2_ShortPlt.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d02a0506ca0c79b86e1562386b30ba14eb20363a5da0e467e8eac393f12d7bb
+size 928480
diff --git a/impulse/VS8F-2/231-R2_VocalPlt.wav b/impulse/VS8F-2/231-R2_VocalPlt.wav
new file mode 100644
index 0000000000000000000000000000000000000000..e6afe1d4f5c4f6e83cb7542e39dc719bfae3187c
--- /dev/null
+++ b/impulse/VS8F-2/231-R2_VocalPlt.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2b9a17cf73f3406632d75ea6195a5c2898d176c377585a00ba8fbd7a6e9626b
+size 1192672
diff --git a/impulse/VS8F-2/232-R2_SoftAmb.wav b/impulse/VS8F-2/232-R2_SoftAmb.wav
new file mode 100644
index 0000000000000000000000000000000000000000..0af18e4608848872ad3bbd9788865ca607c077d8
--- /dev/null
+++ b/impulse/VS8F-2/232-R2_SoftAmb.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f632ff15e1d0debd699dff84c8d4fae6a19511cdf9f0f3e17c4303b4d8c56ac9
+size 200416
diff --git a/impulse/VS8F-2/233-R2_RoomAmb.wav b/impulse/VS8F-2/233-R2_RoomAmb.wav
new file mode 100644
index 0000000000000000000000000000000000000000..8086ef83c26cc054d201be3ada3badfcc4d4a8d1
--- /dev/null
+++ b/impulse/VS8F-2/233-R2_RoomAmb.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:852f6b054aa80f93087a2b18ed5e57c640c65a578ce5827eb6c345a564779b99
+size 268000
diff --git a/impulse/VS8F-2/234-R2_Cathedral.wav b/impulse/VS8F-2/234-R2_Cathedral.wav
new file mode 100644
index 0000000000000000000000000000000000000000..99bd52d06e9d30bc25bf3a0274ace686b2610b01
--- /dev/null
+++ b/impulse/VS8F-2/234-R2_Cathedral.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ad5c259921e281a85aa685c42b8e96f96a48d73ef0faa3b53dafc4048c1c16a
+size 2120416
diff --git a/impulse/VS8F-2/235-R2_LongCave.wav b/impulse/VS8F-2/235-R2_LongCave.wav
new file mode 100644
index 0000000000000000000000000000000000000000..39373c44463dfcc2673797a6332e2413a1672313
--- /dev/null
+++ b/impulse/VS8F-2/235-R2_LongCave.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a50c986253585947b7e215c56c5f747f830c4a9db39bf443aaf8d6bab0a8d778
+size 2120416
diff --git a/impulse/VS8F-2/236-R2_GarageDr.wav b/impulse/VS8F-2/236-R2_GarageDr.wav
new file mode 100644
index 0000000000000000000000000000000000000000..ccb1cdf875364b6b859bb492fc13273c4c05a58f
--- /dev/null
+++ b/impulse/VS8F-2/236-R2_GarageDr.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:489eb5cf1e468ccadc7268ca93d50c48587e3edb18499e893626edaebbe2dceb
+size 200416
diff --git a/impulse/VS8F-2/237-R2_RockKick.wav b/impulse/VS8F-2/237-R2_RockKick.wav
new file mode 100644
index 0000000000000000000000000000000000000000..a26b3be2db0f239c60d1ec2aeaf6b467999815bc
--- /dev/null
+++ b/impulse/VS8F-2/237-R2_RockKick.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:298bb9200cf32fa2ad8315d226173897a6bc4a6b921a9092e868a6b2bf0a1800
+size 400096
diff --git a/impulse/VS8F-2/238-R2_RockSnare.wav b/impulse/VS8F-2/238-R2_RockSnare.wav
new file mode 100644
index 0000000000000000000000000000000000000000..b605a12ae5d01eb7f124ba11d94a77b3d3dd31be
--- /dev/null
+++ b/impulse/VS8F-2/238-R2_RockSnare.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd2643f393c3275f50382237b6d8b8aebb4cc84f50b53f6389fcca2ec21192b6
+size 532192
diff --git a/impulse/VS8F-3/301-LargeHall.wav b/impulse/VS8F-3/301-LargeHall.wav
new file mode 100644
index 0000000000000000000000000000000000000000..2a09cd7f2de0e4e10f3cea05422cec0ffe84b02a
--- /dev/null
+++ b/impulse/VS8F-3/301-LargeHall.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72c4b6473a119c3d7e573253591b14de31bff9c25cb8ee8cc6372629930ccb34
+size 1324768
diff --git a/impulse/VS8F-3/302-SmallHall.wav b/impulse/VS8F-3/302-SmallHall.wav
new file mode 100644
index 0000000000000000000000000000000000000000..d5984f1cdd7820da72fc5ea13ee020063180ee31
--- /dev/null
+++ b/impulse/VS8F-3/302-SmallHall.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9b0f0b9c797bf1f2b4e4e92c84108b194fa11eb32cbe07ccd39f079f3bc03ee
+size 664288
diff --git a/impulse/VS8F-3/303-Strings.wav b/impulse/VS8F-3/303-Strings.wav
new file mode 100644
index 0000000000000000000000000000000000000000..40fc688c97fd07fea568335f0303b3c221755904
--- /dev/null
+++ b/impulse/VS8F-3/303-Strings.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b4b28231e9c5a358ff71db34557bc97e4ca4ba5f36411b599c38b799fc6fdd0
+size 1324768
diff --git a/impulse/VS8F-3/304-PianoHall.wav b/impulse/VS8F-3/304-PianoHall.wav
new file mode 100644
index 0000000000000000000000000000000000000000..6d2463ec938deb139f8f0b3134ed09861a3cdef2
--- /dev/null
+++ b/impulse/VS8F-3/304-PianoHall.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e02dad4b6d5e6b47c81a6d7c98e584bb59c8bb56a5b6e4eddd0fab559473523
+size 928480
diff --git a/impulse/VS8F-3/305-OrchRoom.wav b/impulse/VS8F-3/305-OrchRoom.wav
new file mode 100644
index 0000000000000000000000000000000000000000..3fed3f8b8383d2825e74b52e0e03408214bee982
--- /dev/null
+++ b/impulse/VS8F-3/305-OrchRoom.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:387cd5ece2de38dbac0c4422e817b488745c6c32515fa7c10e0654dff3ba53ba
+size 664288
diff --git a/impulse/VS8F-3/306-VocalRoom.wav b/impulse/VS8F-3/306-VocalRoom.wav
new file mode 100644
index 0000000000000000000000000000000000000000..3fcc42de97d43c4ea9b609b7b75d4779a2c6ce7c
--- /dev/null
+++ b/impulse/VS8F-3/306-VocalRoom.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:33170ff87c4774da91bba3582b5edac758570b003d949287f24528fdcfd31bd8
+size 664288
diff --git a/impulse/VS8F-3/307-MediumRm.wav b/impulse/VS8F-3/307-MediumRm.wav
new file mode 100644
index 0000000000000000000000000000000000000000..7bf2835ac5cf88db3fa9c0aa87c78d61cceb3a3e
--- /dev/null
+++ b/impulse/VS8F-3/307-MediumRm.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:735f6fe014cab1f59c2cf3a02645c6bb72f6d6a4e86cc421e95543166c3b9425
+size 400096
diff --git a/impulse/VS8F-3/308-LargeRoom.wav b/impulse/VS8F-3/308-LargeRoom.wav
new file mode 100644
index 0000000000000000000000000000000000000000..a697fda620d4bc19f3cbae2465b00a98e3f449d6
--- /dev/null
+++ b/impulse/VS8F-3/308-LargeRoom.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76ebe2e7c74fb5cedab4921d5951baaef1f5cbffd3942ae2ec4a25619c4499fd
+size 664288
diff --git a/impulse/VS8F-3/309-CoolPlate.wav b/impulse/VS8F-3/309-CoolPlate.wav
new file mode 100644
index 0000000000000000000000000000000000000000..6a141a866af56588f576765c4594dfbcdbe012c9
--- /dev/null
+++ b/impulse/VS8F-3/309-CoolPlate.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3de3dc425d622c740d86074e95dac55acbf19425d350046bd80b5a8fe1adeac4
+size 1324768
diff --git a/impulse/VS8F-3/310-ShortPlt.wav b/impulse/VS8F-3/310-ShortPlt.wav
new file mode 100644
index 0000000000000000000000000000000000000000..19e102a4ff4c62ef15e3ada31a9ae4180329e419
--- /dev/null
+++ b/impulse/VS8F-3/310-ShortPlt.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07754e1a61c9da33ace8bddcc0bb2dc5067d525a9b08caa2633bb98f5acfb531
+size 1060576
diff --git a/impulse/VS8F-3/311-VocalPlt.wav b/impulse/VS8F-3/311-VocalPlt.wav
new file mode 100644
index 0000000000000000000000000000000000000000..a47c33e43e2002e3996529d656f889ca79b28460
--- /dev/null
+++ b/impulse/VS8F-3/311-VocalPlt.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da838a15e1b11d60b133c3b9adaf6bddcdad8a17bfdeade7ea25bc43e424acf4
+size 1192672
diff --git a/impulse/VS8F-3/312-SoftAmb.wav b/impulse/VS8F-3/312-SoftAmb.wav
new file mode 100644
index 0000000000000000000000000000000000000000..bc0c83c30e3177c17ab6bf1a4607755ff85124ed
--- /dev/null
+++ b/impulse/VS8F-3/312-SoftAmb.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:90067c097e5b6c234c3874fff893845fccc1d826eeb2118c20b10fcbc4eab1f9
+size 268000
diff --git a/impulse/VS8F-3/313-RoomAmb.wav b/impulse/VS8F-3/313-RoomAmb.wav
new file mode 100644
index 0000000000000000000000000000000000000000..cb1cd064ab5012c7d08d18a1f14c2a1115f2981b
--- /dev/null
+++ b/impulse/VS8F-3/313-RoomAmb.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba9debc9221c9a4756ea8b6bcaa2ba6f8a1e99183378e53c05ced80fab3d56a3
+size 268000
diff --git a/impulse/VS8F-3/314-Cathedral.wav b/impulse/VS8F-3/314-Cathedral.wav
new file mode 100644
index 0000000000000000000000000000000000000000..71a5cbf6e420fff50b123b84129d7dd58e5bcf12
--- /dev/null
+++ b/impulse/VS8F-3/314-Cathedral.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45939810b11ea3f73fdcd760bdacf97f563097354952f4e5c2a56d6f3a2c9b7e
+size 1853152
diff --git a/impulse/VS8F-3/315-LongCave.wav b/impulse/VS8F-3/315-LongCave.wav
new file mode 100644
index 0000000000000000000000000000000000000000..79a424e9a9860d60feed33fad582f95f3dbc32d8
--- /dev/null
+++ b/impulse/VS8F-3/315-LongCave.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:869ecafeed39a22a4fc02a127ae831cc4300b1785299e5d05bc2df671933db55
+size 2384608
diff --git a/impulse/VS8F-3/316-GarageDr.wav b/impulse/VS8F-3/316-GarageDr.wav
new file mode 100644
index 0000000000000000000000000000000000000000..0ac81defae9dc6f745c560dd554d38dce3bef8f1
--- /dev/null
+++ b/impulse/VS8F-3/316-GarageDr.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f78bb7250730665ed5ccc8fc036e73f6fb525ffa14dc4554715d407d5eec4f1
+size 268000
diff --git a/impulse/VS8F-3/317-RockKick.wav b/impulse/VS8F-3/317-RockKick.wav
new file mode 100644
index 0000000000000000000000000000000000000000..74b0b77d79ca59a1b91c2ba927eac9ed0bc3ca9b
--- /dev/null
+++ b/impulse/VS8F-3/317-RockKick.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e51289aac3c5630ac37e5208d6613a58e1023b38d8ff035369879cca814f297
+size 400096
diff --git a/impulse/VS8F-3/318-RockSnare.wav b/impulse/VS8F-3/318-RockSnare.wav
new file mode 100644
index 0000000000000000000000000000000000000000..89bd27c721e81ea214735ef94685accbdfedb133
--- /dev/null
+++ b/impulse/VS8F-3/318-RockSnare.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86032367538db5e717652b58f57ec41ae74e003c96ce6b6434fb94871ffb5d65
+size 596704
diff --git a/models/Apollo/apollo_edm_big_by_essid.ckpt b/models/Apollo/apollo_edm_big_by_essid.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..382ffe2f145fe4527e3b91f46ad659a5c633e446
--- /dev/null
+++ b/models/Apollo/apollo_edm_big_by_essid.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d3cbc482040d053e72212c55145116b05b617f2e1edf4cf6350bfdb93d66ff5
+size 232903951
diff --git a/models/Apollo/apollo_edm_big_by_essid.yaml b/models/Apollo/apollo_edm_big_by_essid.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..98269119d426e3f6245244cd0f6ee904125d95ec
--- /dev/null
+++ b/models/Apollo/apollo_edm_big_by_essid.yaml
@@ -0,0 +1,114 @@
+exp:
+ dir: ./exps # directory to save the experiment
+ name: bluearchive # name of the experiment
+
+datas:
+ _target_: look2hear.datas.DataModule
+ dataset_type: 1 # 1 or 2. see README for more details
+ sr: 44100 # sample rate
+ segments: 4 # cropped audio in seconds. chunksize = sr * segments
+ num_steps: 1000 # number of samples to be used for training in one epoch.
+ batch_size: 1 # batch size
+ num_workers: 0 # number of workers for data loading
+ pin_memory: true # pin memory for data loading
+
+ stems:
+ original: original # key for the original audio files, don't change it
+ codec: codec # key for the codec audio files, don't change it
+
+ train:
+ dir: # dataset where the training audio files are stored
+ - output # list of directories
+ original_format: wav # the format of the original audio files
+ codec_format: mp3 # the format of the codec audio files
+
+ valid:
+ dir: # dataset where the validation audio files are stored
+ - output_v # list of directories
+ original_format: wav # the format of the original audio files
+ codec_format: mp3 # the format of the codec audio files
+
+model:
+ _target_: look2hear.models.apollo.Apollo
+ sr: 44100 # sample rate
+ win: 20 # window size in milliseconds
+ feature_dim: 256 # feature dimension
+ layer: 6 # number of layers
+
+discriminator:
+ _target_: look2hear.discriminators.frequencydis.MultiFrequencyDiscriminator
+ nch: 2
+ window: [32, 64, 128, 256, 512, 1024, 2048]
+
+optimizer_g:
+ _target_: torch.optim.AdamW
+ lr: 0.001
+ weight_decay: 0.01
+
+optimizer_d:
+ _target_: torch.optim.AdamW
+ lr: 0.0001
+ weight_decay: 0.01
+ betas: [0.5, 0.99]
+
+scheduler_g:
+ _target_: torch.optim.lr_scheduler.StepLR
+ step_size: 2
+ gamma: 0.98
+
+scheduler_d:
+ _target_: torch.optim.lr_scheduler.StepLR
+ step_size: 2
+ gamma: 0.98
+
+loss_g:
+ _target_: look2hear.losses.gan_losses.MultiFrequencyGenLoss
+ eps: 1e-8
+
+loss_d:
+ _target_: look2hear.losses.gan_losses.MultiFrequencyDisLoss
+ eps: 1e-8
+
+metrics:
+ _target_: look2hear.losses.MultiSrcNegSDR
+ sdr_type: sisdr # metric for validation, one of [snr, sisdr, sdsdr]
+
+system:
+ _target_: look2hear.system.audio_litmodule.AudioLightningModule
+
+# comment out the early_topping content below, if you do not wish to have early_topping
+early_stopping:
+ _target_: pytorch_lightning.callbacks.EarlyStopping
+ monitor: val_loss # metric to monitor
+ patience: 50 # number of epochs with no improvement after which training will be stopped
+ mode: min
+ verbose: true
+
+checkpoint:
+ _target_: pytorch_lightning.callbacks.ModelCheckpoint
+ dirpath: ${exp.dir}/${exp.name}/checkpoints
+ monitor: val_loss # metric to monitor
+ mode: min
+ verbose: true
+ save_top_k: 10 # number of best models to save
+ save_last: true # save the last checkpoint
+ filename: '{epoch}-{val_loss:.4f}'
+
+logger:
+ _target_: pytorch_lightning.loggers.WandbLogger
+ name: ${exp.name}
+ save_dir: ${exp.dir}/${exp.name}/logs
+ offline: false # if true, the logs will not be uploaded to wandb
+ project: Audio-Restoration
+
+trainer:
+ _target_: pytorch_lightning.Trainer
+ devices: [0] # number of GPUs to use
+ max_epochs: 1000 # max number of epochs
+ sync_batchnorm: true
+ default_root_dir: ${exp.dir}/${exp.name}/
+ accelerator: cuda
+ limit_train_batches: 1.0
+ fast_dev_run: false
+ precision: bf16 # [16, bf16, 32, 64]
+ enable_model_summary: true
\ No newline at end of file
diff --git a/models/Apollo/apollo_edm_by_essid.ckpt b/models/Apollo/apollo_edm_by_essid.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..00e15c659091338de5ca7e5eb0a45045828c273f
--- /dev/null
+++ b/models/Apollo/apollo_edm_by_essid.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f6bd0abb4251c7adc1cec20e0de20a1bc9c5fe98168a50b627f5c72a993be92
+size 86418321
diff --git a/models/Apollo/apollo_edm_by_essid.yaml b/models/Apollo/apollo_edm_by_essid.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7865a976cf46666db65b5d8a835fc1d10076a57f
--- /dev/null
+++ b/models/Apollo/apollo_edm_by_essid.yaml
@@ -0,0 +1,114 @@
+exp:
+ dir: ./exps # directory to save the experiment
+ name: bluearchive # name of the experiment
+
+datas:
+ _target_: look2hear.datas.DataModule
+ dataset_type: 1 # 1 or 2. see README for more details
+ sr: 44100 # sample rate
+ segments: 4 # cropped audio in seconds. chunksize = sr * segments
+ num_steps: 1000 # number of samples to be used for training in one epoch.
+ batch_size: 1 # batch size
+ num_workers: 0 # number of workers for data loading
+ pin_memory: true # pin memory for data loading
+
+ stems:
+ original: original # key for the original audio files, don't change it
+ codec: codec # key for the codec audio files, don't change it
+
+ train:
+ dir: # dataset where the training audio files are stored
+ - output # list of directories
+ original_format: wav # the format of the original audio files
+ codec_format: mp3 # the format of the codec audio files
+
+ valid:
+ dir: # dataset where the validation audio files are stored
+ - output_v # list of directories
+ original_format: wav # the format of the original audio files
+ codec_format: mp3 # the format of the codec audio files
+
+model:
+ _target_: look2hear.models.apollo.Apollo
+ sr: 44100 # sample rate
+ win: 20 # window size in milliseconds
+ feature_dim: 128 # feature dimension
+ layer: 6 # number of layers
+
+discriminator:
+ _target_: look2hear.discriminators.frequencydis.MultiFrequencyDiscriminator
+ nch: 2
+ window: [32, 64, 128, 256, 512, 1024, 2048]
+
+optimizer_g:
+ _target_: torch.optim.AdamW
+ lr: 0.001
+ weight_decay: 0.01
+
+optimizer_d:
+ _target_: torch.optim.AdamW
+ lr: 0.0001
+ weight_decay: 0.01
+ betas: [0.5, 0.99]
+
+scheduler_g:
+ _target_: torch.optim.lr_scheduler.StepLR
+ step_size: 2
+ gamma: 0.98
+
+scheduler_d:
+ _target_: torch.optim.lr_scheduler.StepLR
+ step_size: 2
+ gamma: 0.98
+
+loss_g:
+ _target_: look2hear.losses.gan_losses.MultiFrequencyGenLoss
+ eps: 1e-8
+
+loss_d:
+ _target_: look2hear.losses.gan_losses.MultiFrequencyDisLoss
+ eps: 1e-8
+
+metrics:
+ _target_: look2hear.losses.MultiSrcNegSDR
+ sdr_type: sisdr # metric for validation, one of [snr, sisdr, sdsdr]
+
+system:
+ _target_: look2hear.system.audio_litmodule.AudioLightningModule
+
+# comment out the early_topping content below, if you do not wish to have early_topping
+early_stopping:
+ _target_: pytorch_lightning.callbacks.EarlyStopping
+ monitor: val_loss # metric to monitor
+ patience: 50 # number of epochs with no improvement after which training will be stopped
+ mode: min
+ verbose: true
+
+checkpoint:
+ _target_: pytorch_lightning.callbacks.ModelCheckpoint
+ dirpath: ${exp.dir}/${exp.name}/checkpoints
+ monitor: val_loss # metric to monitor
+ mode: min
+ verbose: true
+ save_top_k: 10 # number of best models to save
+ save_last: true # save the last checkpoint
+ filename: '{epoch}-{val_loss:.4f}'
+
+logger:
+ _target_: pytorch_lightning.loggers.WandbLogger
+ name: ${exp.name}
+ save_dir: ${exp.dir}/${exp.name}/logs
+ offline: false # if true, the logs will not be uploaded to wandb
+ project: Audio-Restoration
+
+trainer:
+ _target_: pytorch_lightning.Trainer
+ devices: [0] # number of GPUs to use
+ max_epochs: 1000 # max number of epochs
+ sync_batchnorm: true
+ default_root_dir: ${exp.dir}/${exp.name}/
+ accelerator: cuda
+ limit_train_batches: 1.0
+ fast_dev_run: false
+ precision: bf16 # [16, bf16, 32, 64]
+ enable_model_summary: true
\ No newline at end of file
diff --git a/models/Bandit/Bandit_Plus/config_dnr_bandit_bsrnn_multi_mus64.yaml b/models/Bandit/Bandit_Plus/config_dnr_bandit_bsrnn_multi_mus64.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5f67bf9d966f88a30ba82bf47e6f109497375979
--- /dev/null
+++ b/models/Bandit/Bandit_Plus/config_dnr_bandit_bsrnn_multi_mus64.yaml
@@ -0,0 +1,78 @@
+name: "MultiMaskMultiSourceBandSplitRNN"
+audio:
+ chunk_size: 264600
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.001
+
+model:
+ in_channel: 1
+ stems: ['speech', 'music', 'effects']
+ band_specs: "musical"
+ n_bands: 64
+ fs: 44100
+ require_no_overlap: false
+ require_no_gap: true
+ normalize_channel_independently: false
+ treat_channel_as_feature: true
+ n_sqm_modules: 8
+ emb_dim: 128
+ rnn_dim: 256
+ bidirectional: true
+ rnn_type: "GRU"
+ mlp_dim: 512
+ hidden_activation: "Tanh"
+ hidden_activation_kwargs: null
+ complex_mask: true
+ n_fft: 2048
+ win_length: 2048
+ hop_length: 512
+ window_fn: "hann_window"
+ wkwargs: null
+ power: null
+ center: true
+ normalized: true
+ pad_mode: "constant"
+ onesided: true
+
+training:
+ batch_size: 4
+ gradient_accumulation_steps: 4
+ grad_clip: 0
+ instruments:
+ - Speech
+ - Music
+ - Effects
+ lr: 9.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: null
+ num_epochs: 1000
+ num_steps: 1000
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+ enable: true # enable or disable all augmentations (to fast disable if needed)
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+ loudness_min: 0.5
+ loudness_max: 1.5
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+ mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+ - 0.2
+ - 0.02
+ mixup_loudness_min: 0.5
+ mixup_loudness_max: 1.5
+ all:
+ channel_shuffle: 0.5 # Set 0 or lower to disable
+ random_inverse: 0.1 # inverse track (better lower probability)
+ random_polarity: 0.5 # polarity change (multiply waveform to -1)
+
+inference:
+ batch_size: 1
+ dim_t: 256
+ num_overlap: 4
\ No newline at end of file
diff --git a/models/Bandit/Bandit_Plus/model_bandit_plus_dnr_sdr_11.47.ckpt b/models/Bandit/Bandit_Plus/model_bandit_plus_dnr_sdr_11.47.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..f0b50711c499ac1971506ec2254f172cd5f01ca1
--- /dev/null
+++ b/models/Bandit/Bandit_Plus/model_bandit_plus_dnr_sdr_11.47.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c48284779f7d1258a6527d3aaa18a532d45c1f506e2dcc25d5ab179a8c5e2573
+size 148891175
diff --git a/models/Bandit/Bandit_v2/checkpoint-multi_fixed.ckpt b/models/Bandit/Bandit_v2/checkpoint-multi_fixed.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..3778bca329292abcc1a3b60d30782e379bf1c19d
--- /dev/null
+++ b/models/Bandit/Bandit_v2/checkpoint-multi_fixed.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20bcd513dc7eb0541dd045909a4e7dff8dab474cc2efba4904101c76524aee85
+size 149133378
diff --git a/models/Bandit/Bandit_v2/config_dnr_bandit_v2_mus64.yaml b/models/Bandit/Bandit_v2/config_dnr_bandit_v2_mus64.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8691ea03cd880bd4da59fae1952eb5b2cf1defab
--- /dev/null
+++ b/models/Bandit/Bandit_v2/config_dnr_bandit_v2_mus64.yaml
@@ -0,0 +1,78 @@
+cls: Bandit
+
+audio:
+ chunk_size: 384000
+ num_channels: 2
+ sample_rate: 48000
+ min_mean_abs: 0.000
+
+kwargs:
+ in_channels: 1
+ stems: ['speech', 'music', 'sfx']
+ band_type: musical
+ n_bands: 64
+ normalize_channel_independently: false
+ treat_channel_as_feature: true
+ n_sqm_modules: 8
+ emb_dim: 128
+ rnn_dim: 256
+ bidirectional: true
+ rnn_type: "GRU"
+ mlp_dim: 512
+ hidden_activation: "Tanh"
+ hidden_activation_kwargs:
+ complex_mask: true
+ use_freq_weights: true
+ n_fft: 2048
+ win_length: 2048
+ hop_length: 512
+ window_fn: "hann_window"
+ wkwargs:
+ power:
+ center: true
+ normalized: true
+ pad_mode: "reflect"
+ onesided: true
+
+training:
+ batch_size: 4
+ gradient_accumulation_steps: 4
+ grad_clip: 0
+ instruments:
+ - Speech
+ - Music
+ - Sfx
+ lr: 9.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument:
+ num_epochs: 1000
+ num_steps: 1000
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+ enable: true # enable or disable all augmentations (to fast disable if needed)
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+ loudness_min: 0.5
+ loudness_max: 1.5
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+ mixup_probs: !!python/tuple
+ # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+ - 0.2
+ - 0.02
+ mixup_loudness_min: 0.5
+ mixup_loudness_max: 1.5
+ all:
+ channel_shuffle: 0.5 # Set 0 or lower to disable
+ random_inverse: 0.1 # inverse track (better lower probability)
+ random_polarity: 0.5 # polarity change (multiply waveform to -1)
+
+inference:
+ batch_size: 8
+ dim_t: 256
+ num_overlap: 4
diff --git a/models/Demucs/Demucs_v1/demucs.th b/models/Demucs/Demucs_v1/demucs.th
new file mode 100644
index 0000000000000000000000000000000000000000..faf85b1eca8b00dfb1c22b9addd3a1a24f706385
--- /dev/null
+++ b/models/Demucs/Demucs_v1/demucs.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6c4148ba0dc92242d82d7b3f2af55c77bd7cb4ff1a0a3028a523986f36a3cfd
+size 2594202563
diff --git a/models/Demucs/Demucs_v1/demucs_extra.th b/models/Demucs/Demucs_v1/demucs_extra.th
new file mode 100644
index 0000000000000000000000000000000000000000..81c8e5092e83af410b9933571c5c94dc512724f2
--- /dev/null
+++ b/models/Demucs/Demucs_v1/demucs_extra.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3331bcc5d09ba1d791c3cf851970242b0bb229ce81dbada557b6d39e8c6a6a87
+size 2594202563
diff --git a/models/Demucs/Demucs_v1/light.th b/models/Demucs/Demucs_v1/light.th
new file mode 100644
index 0000000000000000000000000000000000000000..f807ba7d0a19b21ce9ed1bedee8914c19c7d259f
--- /dev/null
+++ b/models/Demucs/Demucs_v1/light.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79d1ee3c1541c729c552327756954340a1a46a11ce0009dea77dc583e4b6269c
+size 1062727712
diff --git a/models/Demucs/Demucs_v1/light_extra.th b/models/Demucs/Demucs_v1/light_extra.th
new file mode 100644
index 0000000000000000000000000000000000000000..e9e3437567835f972c1a7a179dac370a5e17027e
--- /dev/null
+++ b/models/Demucs/Demucs_v1/light_extra.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e9b4af564229c80cc73c95d02d2058235bb054c6874b3cba4d5b26943a5ddcb
+size 1062727712
diff --git a/models/Demucs/Demucs_v1/tasnet.th b/models/Demucs/Demucs_v1/tasnet.th
new file mode 100644
index 0000000000000000000000000000000000000000..41c4c00dc7910003584dd5aa921856899f5922e0
--- /dev/null
+++ b/models/Demucs/Demucs_v1/tasnet.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be56693f6a5c4854b124f95bb9dd043f3167614898493738ab52e25648bec8a2
+size 43973949
diff --git a/models/Demucs/Demucs_v1/tasnet_extra.th b/models/Demucs/Demucs_v1/tasnet_extra.th
new file mode 100644
index 0000000000000000000000000000000000000000..b10117546e19559449b33ac4d5a1a4d6addf0bfc
--- /dev/null
+++ b/models/Demucs/Demucs_v1/tasnet_extra.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ccbece3acd98785a367211c9c35b1eadae8d148b0d37fe5a5494d6d335269b5
+size 43973949
diff --git a/models/Demucs/Demucs_v2/demucs-e07c671f.th b/models/Demucs/Demucs_v2/demucs-e07c671f.th
new file mode 100644
index 0000000000000000000000000000000000000000..c815f961dfd091478311782ab0cda288256e4b95
--- /dev/null
+++ b/models/Demucs/Demucs_v2/demucs-e07c671f.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e07c671f8da9f16450730df0e4815e97c07555f3dc0a06d236201c268b372e33
+size 1062738817
diff --git a/models/Demucs/Demucs_v2/demucs48_hq-28a1282c.th b/models/Demucs/Demucs_v2/demucs48_hq-28a1282c.th
new file mode 100644
index 0000000000000000000000000000000000000000..3932ef5510af903d40b5699be989567459e3b4d3
--- /dev/null
+++ b/models/Demucs/Demucs_v2/demucs48_hq-28a1282c.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28a1282c89ced9b343d284f61ed32cc3d03ce3e9d356076539924189d94c4eb5
+size 597870849
diff --git a/models/Demucs/Demucs_v2/demucs_extra-3646af93.th b/models/Demucs/Demucs_v2/demucs_extra-3646af93.th
new file mode 100644
index 0000000000000000000000000000000000000000..1ef99fc7f1d42262feabdec9174461dd3a61af35
--- /dev/null
+++ b/models/Demucs/Demucs_v2/demucs_extra-3646af93.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3646af93cb495138aefd44801dc792cbfdd6e31ebced741c5707ce4461fbc4c6
+size 1062738817
diff --git a/models/Demucs/Demucs_v2/demucs_unittest-09ebc15f.th b/models/Demucs/Demucs_v2/demucs_unittest-09ebc15f.th
new file mode 100644
index 0000000000000000000000000000000000000000..60353ff6c788eb5fbfdb41d880569f27d31f03dc
--- /dev/null
+++ b/models/Demucs/Demucs_v2/demucs_unittest-09ebc15f.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:09ebc15ff947a7a8ed10ed6d8a1210f8c33189ea3bb3d0202310cb8d12968157
+size 4194049
diff --git a/models/Demucs/Demucs_v2/tasnet-beb46fac.th b/models/Demucs/Demucs_v2/tasnet-beb46fac.th
new file mode 100644
index 0000000000000000000000000000000000000000..6acabb4e84cce888081f9401698426b829ad1021
--- /dev/null
+++ b/models/Demucs/Demucs_v2/tasnet-beb46fac.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:beb46fac044130652e64e0316447478762d76e03596cabf1cf483f4ac7cfbb0a
+size 44035981
diff --git a/models/Demucs/Demucs_v2/tasnet_extra-df3777b2.th b/models/Demucs/Demucs_v2/tasnet_extra-df3777b2.th
new file mode 100644
index 0000000000000000000000000000000000000000..5040bae34e38f791358cec7cbaa196a7419e72f6
--- /dev/null
+++ b/models/Demucs/Demucs_v2/tasnet_extra-df3777b2.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df3777b2f8a5499ea45b5b29f52c00a013240a333d70fdcd4bc7396325b5e3b8
+size 44035981
diff --git a/models/Demucs/Demucs_v3/0d19c1c6-0f06f20e.th b/models/Demucs/Demucs_v3/0d19c1c6-0f06f20e.th
new file mode 100644
index 0000000000000000000000000000000000000000..dadcebb0f5a2beb40367834688f751c4289634bd
--- /dev/null
+++ b/models/Demucs/Demucs_v3/0d19c1c6-0f06f20e.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f06f20ed6ddc8058fa72ccc4845f3a88916eff7d007b623924193de217bbcf4
+size 178048329
diff --git a/models/Demucs/Demucs_v3/14fc6a69-a89dd0ee.th b/models/Demucs/Demucs_v3/14fc6a69-a89dd0ee.th
new file mode 100644
index 0000000000000000000000000000000000000000..b6e69d3269881c5c823bcbf8973bd40790053cc1
--- /dev/null
+++ b/models/Demucs/Demucs_v3/14fc6a69-a89dd0ee.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a89dd0eeb547221dcfd6c0a47baa768ce0bce548eaf86b9a404d6b1088b5d22f
+size 38491885
diff --git a/models/Demucs/Demucs_v3/1ef250f1-592467ce.th b/models/Demucs/Demucs_v3/1ef250f1-592467ce.th
new file mode 100644
index 0000000000000000000000000000000000000000..f913ca14132baa329bafe8e879d955556dbe43da
--- /dev/null
+++ b/models/Demucs/Demucs_v3/1ef250f1-592467ce.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:592467cef0b0d878bc66fe38c8456353ff435334331baab08e1dc82beeefd596
+size 267602121
diff --git a/models/Demucs/Demucs_v3/305bc58f-18378783.th b/models/Demucs/Demucs_v3/305bc58f-18378783.th
new file mode 100644
index 0000000000000000000000000000000000000000..fec42848ccd1bf97713f104412e119e7ee7f3ce2
--- /dev/null
+++ b/models/Demucs/Demucs_v3/305bc58f-18378783.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18378783cf76d44213f7a0c87da872a35b976c31ac11c4b367438b4674a2f0fe
+size 46847123
diff --git a/models/Demucs/Demucs_v3/42e558d4-196e0e1b.th b/models/Demucs/Demucs_v3/42e558d4-196e0e1b.th
new file mode 100644
index 0000000000000000000000000000000000000000..c1d1cadb008cd5be64410368e635c782b65ce37e
--- /dev/null
+++ b/models/Demucs/Demucs_v3/42e558d4-196e0e1b.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:196e0e1bc5e83ea6ffbf0750b1abae1305b83de12ec98e5b50915a7139c52759
+size 58227087
diff --git a/models/Demucs/Demucs_v3/464b36d7-e5a9386e.th b/models/Demucs/Demucs_v3/464b36d7-e5a9386e.th
new file mode 100644
index 0000000000000000000000000000000000000000..293e970712d239a7c8ca5362cc71d954b28c036f
--- /dev/null
+++ b/models/Demucs/Demucs_v3/464b36d7-e5a9386e.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5a9386ecbf6f30bb2bdc7ae162f471d87f3b5bffba1a8cb2cebe4403280967e
+size 38893153
diff --git a/models/Demucs/Demucs_v3/5d2d6c55-db83574e.th b/models/Demucs/Demucs_v3/5d2d6c55-db83574e.th
new file mode 100644
index 0000000000000000000000000000000000000000..567bd6fda5a4aacaa3c06a7d49c19c06b4bf2f3b
--- /dev/null
+++ b/models/Demucs/Demucs_v3/5d2d6c55-db83574e.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db83574e05b2308f76e2764819da673f2d16d437b9e619f5fcb72f275fc0e24f
+size 167391595
diff --git a/models/Demucs/Demucs_v3/6b9c2ca1-3fd82607.th b/models/Demucs/Demucs_v3/6b9c2ca1-3fd82607.th
new file mode 100644
index 0000000000000000000000000000000000000000..01a4edb986fb6b1d1e45ab7614a62b6ef0ea61f9
--- /dev/null
+++ b/models/Demucs/Demucs_v3/6b9c2ca1-3fd82607.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3fd82607b051e9f8ed0e86a30791023a3b589a427fb994c8bbfdff8c1903eeff
+size 59648321
diff --git a/models/Demucs/Demucs_v3/7d865c68-3d5dd56b.th b/models/Demucs/Demucs_v3/7d865c68-3d5dd56b.th
new file mode 100644
index 0000000000000000000000000000000000000000..29a3bbc32d4aa5fe87611734dbb4e4e16386f9d2
--- /dev/null
+++ b/models/Demucs/Demucs_v3/7d865c68-3d5dd56b.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d5dd56b5bc986f136dff98655ded22b2b033f465ccec7a28640a6b15fd71ed6
+size 167918783
diff --git a/models/Demucs/Demucs_v3/7ecf8ec1-70f50cc9.th b/models/Demucs/Demucs_v3/7ecf8ec1-70f50cc9.th
new file mode 100644
index 0000000000000000000000000000000000000000..e711c6c99ec4bcd406590accf2e3ed559861673d
--- /dev/null
+++ b/models/Demucs/Demucs_v3/7ecf8ec1-70f50cc9.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70f50cc947d08f32e6dd8e2b687d398fa5ef9e51d1bd7600e32205d1f44be6b9
+size 178048329
diff --git a/models/Demucs/Demucs_v3/7fd6ef75-a905dd85.th b/models/Demucs/Demucs_v3/7fd6ef75-a905dd85.th
new file mode 100644
index 0000000000000000000000000000000000000000..1468b0e3c5a6a61e87b7928041ec093439c5b978
--- /dev/null
+++ b/models/Demucs/Demucs_v3/7fd6ef75-a905dd85.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a905dd8548f7389f1a3686c24c09d7bee61b716e3372e778e2acec88333ecdf4
+size 39436529
diff --git a/models/Demucs/Demucs_v3/83fc094f-4a16d450.th b/models/Demucs/Demucs_v3/83fc094f-4a16d450.th
new file mode 100644
index 0000000000000000000000000000000000000000..aa77798ab8936883f3807af9ee815259c6a6d894
--- /dev/null
+++ b/models/Demucs/Demucs_v3/83fc094f-4a16d450.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a16d450fd8c9277494e23865cadaa4a3e64141c45411d84243b8c95ea4b7cae
+size 50756993
diff --git a/models/Demucs/Demucs_v3/902315c2-b39ce9c9.th b/models/Demucs/Demucs_v3/902315c2-b39ce9c9.th
new file mode 100644
index 0000000000000000000000000000000000000000..5193661d2f83a3856c08a696fb2db92c37bf5b83
--- /dev/null
+++ b/models/Demucs/Demucs_v3/902315c2-b39ce9c9.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b39ce9c97b5603fbaee99ec59fe1d2db570c77a37356c9c85542d881517c3302
+size 167405611
diff --git a/models/Demucs/Demucs_v3/9a6b4851-03af0aa6.th b/models/Demucs/Demucs_v3/9a6b4851-03af0aa6.th
new file mode 100644
index 0000000000000000000000000000000000000000..3d23dd61aedad298a375c6fd44c5b4d7f81adc23
--- /dev/null
+++ b/models/Demucs/Demucs_v3/9a6b4851-03af0aa6.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03af0aa64af2c4f0795659c265fdead238fab31c064a8a4b5b051c1b22c8ad48
+size 267602121
diff --git a/models/Demucs/Demucs_v3/UVR_Demucs_Model_1.yaml b/models/Demucs/Demucs_v3/UVR_Demucs_Model_1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0995b75dd7149595388255185ab68a1a81ea9477
--- /dev/null
+++ b/models/Demucs/Demucs_v3/UVR_Demucs_Model_1.yaml
@@ -0,0 +1,2 @@
+models: ['ebf34a2db']
+segment: 44
\ No newline at end of file
diff --git a/models/Demucs/Demucs_v3/a1d90b5c-ae9d2452.th b/models/Demucs/Demucs_v3/a1d90b5c-ae9d2452.th
new file mode 100644
index 0000000000000000000000000000000000000000..1e278992fc105bd35bd9ba7fc6426f52f367214c
--- /dev/null
+++ b/models/Demucs/Demucs_v3/a1d90b5c-ae9d2452.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae9d245283bf24b552913ee233a1101dcd0aeaed59b1c0a2da0e1f6eda15101b
+size 167391595
diff --git a/models/Demucs/Demucs_v3/b72baf4e-8778635e.th b/models/Demucs/Demucs_v3/b72baf4e-8778635e.th
new file mode 100644
index 0000000000000000000000000000000000000000..24c706aacecca159bfbedd0987fe69d84290fb24
--- /dev/null
+++ b/models/Demucs/Demucs_v3/b72baf4e-8778635e.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8778635e98a9d4b34b3d132300597758f8f3f2978510eb20c202fd0d54a7bbf0
+size 44368175
diff --git a/models/Demucs/Demucs_v3/c511e2ab-fe698775.th b/models/Demucs/Demucs_v3/c511e2ab-fe698775.th
new file mode 100644
index 0000000000000000000000000000000000000000..b72d9db5f03bfcc1922b7ae359331c062c483af5
--- /dev/null
+++ b/models/Demucs/Demucs_v3/c511e2ab-fe698775.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe6987756a7087d339bf63b19bb481b12cea02d3bc0de7583df7597210209649
+size 167334095
diff --git a/models/Demucs/Demucs_v3/cfa93e08-61801ae1.th b/models/Demucs/Demucs_v3/cfa93e08-61801ae1.th
new file mode 100644
index 0000000000000000000000000000000000000000..ae63ff8217e90e0549f57cfc43e388e6b27e645f
--- /dev/null
+++ b/models/Demucs/Demucs_v3/cfa93e08-61801ae1.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61801ae1567d606c97a9c3469e943ae306d0a873eeb60d623ae7cfc7042b3f68
+size 167399275
diff --git a/models/Demucs/Demucs_v3/e51eebcc-c1b80bdd.th b/models/Demucs/Demucs_v3/e51eebcc-c1b80bdd.th
new file mode 100644
index 0000000000000000000000000000000000000000..a786c99190033721bbdb524823ae601a768eb012
--- /dev/null
+++ b/models/Demucs/Demucs_v3/e51eebcc-c1b80bdd.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1b80bdd6de58274abf359e66822a76f49ce2b9f086fc5dc917ac14598e6bebf
+size 167399275
diff --git a/models/Demucs/Demucs_v3/ebf34a2db.th b/models/Demucs/Demucs_v3/ebf34a2db.th
new file mode 100644
index 0000000000000000000000000000000000000000..1b95ef445d00f2b9f43d61727c76351d8bb2213f
--- /dev/null
+++ b/models/Demucs/Demucs_v3/ebf34a2db.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbdd521017d706829716055df8a86e7ee5d05a590508488c82ffb551c6edb918
+size 167390507
diff --git a/models/Demucs/Demucs_v3/fa0cb7f9-100d8bf4.th b/models/Demucs/Demucs_v3/fa0cb7f9-100d8bf4.th
new file mode 100644
index 0000000000000000000000000000000000000000..0d3bea1cb75cda1c081f00e829a2733a4e509e66
--- /dev/null
+++ b/models/Demucs/Demucs_v3/fa0cb7f9-100d8bf4.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:100d8bf4dc5e228edfd9bb7a2ec44ad9f44d8f0a845964d38ee8c11ea8ca1399
+size 167405675
diff --git a/models/Demucs/Demucs_v3/mdx.yaml b/models/Demucs/Demucs_v3/mdx.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4e81a5064b86698bf322a4fe11c23451924a5fc5
--- /dev/null
+++ b/models/Demucs/Demucs_v3/mdx.yaml
@@ -0,0 +1,8 @@
+models: ['0d19c1c6', '7ecf8ec1', 'c511e2ab', '7d865c68']
+weights: [
+ [1., 1., 0., 0.],
+ [0., 1., 0., 0.],
+ [1., 0., 1., 1.],
+ [1., 0., 1., 1.],
+]
+segment: 44
diff --git a/models/Demucs/Demucs_v3/mdx_extra.yaml b/models/Demucs/Demucs_v3/mdx_extra.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..847bf6659acce8c31aa9535923e2fd317a05a9e6
--- /dev/null
+++ b/models/Demucs/Demucs_v3/mdx_extra.yaml
@@ -0,0 +1,2 @@
+models: ['e51eebcc', 'a1d90b5c', '5d2d6c55', 'cfa93e08']
+segment: 44
\ No newline at end of file
diff --git a/models/Demucs/Demucs_v3/mdx_extra_q.yaml b/models/Demucs/Demucs_v3/mdx_extra_q.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..87702bc87e4a32acee816e9ffa9a73e8d18404ac
--- /dev/null
+++ b/models/Demucs/Demucs_v3/mdx_extra_q.yaml
@@ -0,0 +1,2 @@
+models: ['83fc094f', '464b36d7', '14fc6a69', '7fd6ef75']
+segment: 44
diff --git a/models/Demucs/Demucs_v3/mdx_q.yaml b/models/Demucs/Demucs_v3/mdx_q.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..827d2c66515e01d74f374e53438e73c1cd927f96
--- /dev/null
+++ b/models/Demucs/Demucs_v3/mdx_q.yaml
@@ -0,0 +1,8 @@
+models: ['6b9c2ca1', 'b72baf4e', '42e558d4', '305bc58f']
+weights: [
+ [1., 1., 0., 0.],
+ [0., 1., 0., 0.],
+ [1., 0., 1., 1.],
+ [1., 0., 1., 1.],
+]
+segment: 44
diff --git a/models/Demucs/Demucs_v3/repro_mdx_a.yaml b/models/Demucs/Demucs_v3/repro_mdx_a.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..691abc2c80fc959a4381c3b716c6f21adfc61fb4
--- /dev/null
+++ b/models/Demucs/Demucs_v3/repro_mdx_a.yaml
@@ -0,0 +1,2 @@
+models: ['9a6b4851', '1ef250f1', 'fa0cb7f9', '902315c2']
+segment: 44
diff --git a/models/Demucs/Demucs_v3/repro_mdx_a_hybrid_only.yaml b/models/Demucs/Demucs_v3/repro_mdx_a_hybrid_only.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..78eb8e0c9a51828de1dd52c82743d2983353a8e6
--- /dev/null
+++ b/models/Demucs/Demucs_v3/repro_mdx_a_hybrid_only.yaml
@@ -0,0 +1,2 @@
+models: ['fa0cb7f9', '902315c2', 'fa0cb7f9', '902315c2']
+segment: 44
diff --git a/models/Demucs/Demucs_v3/repro_mdx_a_time_only.yaml b/models/Demucs/Demucs_v3/repro_mdx_a_time_only.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d5d16ea8bc419198692fc993b560c1bd3f8eb8c9
--- /dev/null
+++ b/models/Demucs/Demucs_v3/repro_mdx_a_time_only.yaml
@@ -0,0 +1,2 @@
+models: ['9a6b4851', '9a6b4851', '1ef250f1', '1ef250f1']
+segment: 44
diff --git a/models/Demucs/Demucs_v4/04573f0d-f3cf25b2.th b/models/Demucs/Demucs_v4/04573f0d-f3cf25b2.th
new file mode 100644
index 0000000000000000000000000000000000000000..87e7befdc8b254d6cd7bfc8c2739e405783b7c3b
--- /dev/null
+++ b/models/Demucs/Demucs_v4/04573f0d-f3cf25b2.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3cf25b222c4eed7cd49dd8b2c9597d50c18bd154090f7b919cfa5f93cf22c49
+size 84141271
diff --git a/models/Demucs/Demucs_v4/5c90dfd2-34c22ccb.th b/models/Demucs/Demucs_v4/5c90dfd2-34c22ccb.th
new file mode 100644
index 0000000000000000000000000000000000000000..43e9de1c47734b05e2e2d9e8f2a70a548970326a
--- /dev/null
+++ b/models/Demucs/Demucs_v4/5c90dfd2-34c22ccb.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34c22ccb381c6f9fdbf324f04e1e2fe21aaaf293f5ded163a162697ff9a02ddd
+size 54996327
diff --git a/models/Demucs/Demucs_v4/75fc33f5-1941ce65.th b/models/Demucs/Demucs_v4/75fc33f5-1941ce65.th
new file mode 100644
index 0000000000000000000000000000000000000000..7401e71bfbf0266d8004647514c8019101a4590b
--- /dev/null
+++ b/models/Demucs/Demucs_v4/75fc33f5-1941ce65.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1941ce654b11df4132b9f4eae408556b4c83fad6fe26b4bc0dbcb36b975befb3
+size 167407275
diff --git a/models/Demucs/Demucs_v4/92cfc3b6-ef3bcb9c.th b/models/Demucs/Demucs_v4/92cfc3b6-ef3bcb9c.th
new file mode 100644
index 0000000000000000000000000000000000000000..dc5aeb5ce23edf05faa98f84e7f8df6e3d121f1f
--- /dev/null
+++ b/models/Demucs/Demucs_v4/92cfc3b6-ef3bcb9c.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef3bcb9c8b40d14ae5d51b6db2587339cc12c6b77c0be151ce6d69002e087bf2
+size 84141271
diff --git a/models/Demucs/Demucs_v4/955717e8-8726e21a.th b/models/Demucs/Demucs_v4/955717e8-8726e21a.th
new file mode 100644
index 0000000000000000000000000000000000000000..94c5ce7eda92bb105307197a7be1d9635b417a1f
--- /dev/null
+++ b/models/Demucs/Demucs_v4/955717e8-8726e21a.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8726e21a993978c7ba086d3872e7608d7d5bfca646ca4aca459ffda844faa8b4
+size 84141911
diff --git a/models/Demucs/Demucs_v4/d12395a8-e57c48e6.th b/models/Demucs/Demucs_v4/d12395a8-e57c48e6.th
new file mode 100644
index 0000000000000000000000000000000000000000..b2516f0ba249a05c119eba7e3c52addc11785d41
--- /dev/null
+++ b/models/Demucs/Demucs_v4/d12395a8-e57c48e6.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e57c48e6b0e38af4f7118d7bd08c49f0a0c0edf7d09143bdd902ea0d237303e6
+size 84141271
diff --git a/models/Demucs/Demucs_v4/f7e0c4bc-ba3fe64a.th b/models/Demucs/Demucs_v4/f7e0c4bc-ba3fe64a.th
new file mode 100644
index 0000000000000000000000000000000000000000..1d2f2cebac207dbb924b9011860f22621184d741
--- /dev/null
+++ b/models/Demucs/Demucs_v4/f7e0c4bc-ba3fe64a.th
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba3fe64ae8ef66ac9a4857222ce48efbdc5eb3ad375cb79dd13debee5aaa4066
+size 84141271
diff --git a/models/Demucs/Demucs_v4/hdemucs_mmi.yaml b/models/Demucs/Demucs_v4/hdemucs_mmi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0ea089139bfbef4a1126ab25e93c3dc380a90b46
--- /dev/null
+++ b/models/Demucs/Demucs_v4/hdemucs_mmi.yaml
@@ -0,0 +1,2 @@
+models: ['75fc33f5']
+segment: 44
diff --git a/models/Demucs/Demucs_v4/htdemucs.yaml b/models/Demucs/Demucs_v4/htdemucs.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0d5f2089fa3e1a0335d93de070f6802598cd4a4d
--- /dev/null
+++ b/models/Demucs/Demucs_v4/htdemucs.yaml
@@ -0,0 +1 @@
+models: ['955717e8']
diff --git a/models/Demucs/Demucs_v4/htdemucs_6s.yaml b/models/Demucs/Demucs_v4/htdemucs_6s.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..651a0fa536038a3e6d650f7b2bcc0b50ff7a4be9
--- /dev/null
+++ b/models/Demucs/Demucs_v4/htdemucs_6s.yaml
@@ -0,0 +1 @@
+models: ['5c90dfd2']
diff --git a/models/Demucs/Demucs_v4/htdemucs_ft.yaml b/models/Demucs/Demucs_v4/htdemucs_ft.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ba5c69c272770f5e5db3dd5fcda75b94ba523250
--- /dev/null
+++ b/models/Demucs/Demucs_v4/htdemucs_ft.yaml
@@ -0,0 +1,7 @@
+models: ['f7e0c4bc', 'd12395a8', '92cfc3b6', '04573f0d']
+weights: [
+ [1., 0., 0., 0.],
+ [0., 1., 0., 0.],
+ [0., 0., 1., 0.],
+ [0., 0., 0., 1.],
+]
\ No newline at end of file
diff --git a/models/MDX23C/MDX23C-8KFFT-InstVoc_HQ.ckpt b/models/MDX23C/MDX23C-8KFFT-InstVoc_HQ.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..d19b34ba845c464fd607a4ce4c9dd3f794d0bd23
--- /dev/null
+++ b/models/MDX23C/MDX23C-8KFFT-InstVoc_HQ.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49d51472769e34a2501cd1da782346a3212555c3a5619fc2c53507445528d816
+size 448101203
diff --git a/models/MDX23C/MDX23C-8KFFT-InstVoc_HQ_2.ckpt b/models/MDX23C/MDX23C-8KFFT-InstVoc_HQ_2.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..486c55bd1538c5471f02f9e08af5ceb8baec08e2
--- /dev/null
+++ b/models/MDX23C/MDX23C-8KFFT-InstVoc_HQ_2.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d960d8e40a458120412c1bd807e013d2dbca7b959cc9da2bbcb0eb203d1daea
+size 448093770
diff --git a/models/MDX23C/MDX23C-De-Reverb-aufr33-jarredou.ckpt b/models/MDX23C/MDX23C-De-Reverb-aufr33-jarredou.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..dca4c63709a57308193ba52a2d3309efed5915bc
--- /dev/null
+++ b/models/MDX23C/MDX23C-De-Reverb-aufr33-jarredou.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eae2471b707758d74db38ac1b1d5800e12f57c4e9d1ebbb2faf004b8e086e914
+size 448098867
diff --git a/models/MDX23C/MDX23C-DrumSep-aufr33-jarredou.ckpt b/models/MDX23C/MDX23C-DrumSep-aufr33-jarredou.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..2f97198d19fcae11a05adbd6b2a746e1f466833f
--- /dev/null
+++ b/models/MDX23C/MDX23C-DrumSep-aufr33-jarredou.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2a4aa53eb584d21eead358a4e66d1882ad182911be018f052b5da73be9096d0
+size 437652699
diff --git a/models/MDX23C/MDX23C_D1581.ckpt b/models/MDX23C/MDX23C_D1581.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..62aeea1ec485760c60a560dbcc87e44995964d23
--- /dev/null
+++ b/models/MDX23C/MDX23C_D1581.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7d4d2137f12764950205b095da20032fef1d41f077bacc8582f20ed40e8cb28
+size 183379219
diff --git a/models/MDX23C/config_dereverb_mdx23c.yaml b/models/MDX23C/config_dereverb_mdx23c.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dda1844a2a7562fca0341a6c7cefc2ec11ec8cac
--- /dev/null
+++ b/models/MDX23C/config_dereverb_mdx23c.yaml
@@ -0,0 +1,135 @@
+audio:
+ chunk_size: 261120
+ dim_f: 4096
+ dim_t: 256
+ hop_length: 1024
+ n_fft: 8192
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.001
+
+model:
+ act: gelu
+ bottleneck_factor: 4
+ growth: 128
+ norm: InstanceNorm
+ num_blocks_per_scale: 2
+ num_channels: 128
+ num_scales: 5
+ num_subbands: 4
+ scale:
+ - 2
+ - 2
+
+training:
+ batch_size: 2
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - dry
+ - No dry
+ lr: 1.0e-06
+ patience: 4
+ reduce_factor: 0.93
+ target_instrument: null
+ num_epochs: 40
+ num_steps: 1000
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: adamw
+ read_metadata_procs: 8 # Number of processes to use during metadata reading for dataset. Can speed up metadata generation
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+ enable: false # enable or disable all augmentations (to fast disable if needed)
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+ loudness_min: 0.5
+ loudness_max: 1.5
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+ mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+ - 0.2
+ - 0.02
+ mixup_loudness_min: 0.5
+ mixup_loudness_max: 1.5
+
+ all:
+ channel_shuffle: 0.5 # Set 0 or lower to disable
+ random_inverse: 0.05 # inverse track (better lower probability)
+ random_polarity: 0.5 # polarity change (multiply waveform to -1)
+
+ # pedalboard chorus block
+ pedalboard_chorus: 0.001
+ pedalboard_chorus_rate_hz_min: 1.0
+ pedalboard_chorus_rate_hz_max: 7.0
+ pedalboard_chorus_depth_min: 0.25
+ pedalboard_chorus_depth_max: 0.95
+ pedalboard_chorus_centre_delay_ms_min: 3
+ pedalboard_chorus_centre_delay_ms_max: 10
+ pedalboard_chorus_feedback_min: 0.0
+ pedalboard_chorus_feedback_max: 0.01
+ pedalboard_chorus_mix_min: 0.1
+ pedalboard_chorus_mix_max: 0.9
+
+ # pedalboard phazer block
+ pedalboard_phazer: 0.001
+ pedalboard_phazer_rate_hz_min: 1.0
+ pedalboard_phazer_rate_hz_max: 10.0
+ pedalboard_phazer_depth_min: 0.25
+ pedalboard_phazer_depth_max: 0.95
+ pedalboard_phazer_centre_frequency_hz_min: 200
+ pedalboard_phazer_centre_frequency_hz_max: 12000
+ pedalboard_phazer_feedback_min: 0.0
+ pedalboard_phazer_feedback_max: 0.5
+ pedalboard_phazer_mix_min: 0.1
+ pedalboard_phazer_mix_max: 0.9
+
+ # pedalboard pitch shift block
+ pedalboard_pitch_shift: 0.01
+ pedalboard_pitch_shift_semitones_min: -7
+ pedalboard_pitch_shift_semitones_max: 7
+
+ # pedalboard resample block
+ pedalboard_resample: 0.001
+ pedalboard_resample_target_sample_rate_min: 4000
+ pedalboard_resample_target_sample_rate_max: 44100
+
+ mp3_compression_min_bitrate: 32
+ mp3_compression_max_bitrate: 320
+ mp3_compression_backend: "lameenc"
+
+ dry:
+ # pedalboard distortion block
+ pedalboard_distortion: 0.001
+ pedalboard_distortion_drive_db_min: 1.0
+ pedalboard_distortion_drive_db_max: 25.0
+
+ tanh_distortion: 0.05
+ tanh_distortion_min: 0.1
+ tanh_distortion_max: 0.7
+ # pedalboard bitcrash block
+ pedalboard_bitcrash: 0.005
+ pedalboard_bitcrash_bit_depth_min: 4
+ pedalboard_bitcrash_bit_depth_max: 16
+
+ seven_band_parametric_eq: 0.24
+ seven_band_parametric_eq_min_gain_db: -9
+ seven_band_parametric_eq_max_gain_db: 9
+
+ gaussian_noise: 0.005
+ gaussian_noise_min_amplitude: 0.001
+ gaussian_noise_max_amplitude: 0.01
+
+ time_stretch: 0.01
+ time_stretch_min_rate: 0.8
+ time_stretch_max_rate: 1.25
+ other:
+ seven_band_parametric_eq: 0.24
+ seven_band_parametric_eq_min_gain_db: -9
+ seven_band_parametric_eq_max_gain_db: 9
+
+inference:
+ batch_size: 2
+ dim_t: 256
+ num_overlap: 4
\ No newline at end of file
diff --git a/models/MDX23C/config_drumsep_mdx23c.yaml b/models/MDX23C/config_drumsep_mdx23c.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7b06ddb77b6d2df982922cabaa9805a189f4c6c6
--- /dev/null
+++ b/models/MDX23C/config_drumsep_mdx23c.yaml
@@ -0,0 +1,87 @@
+audio:
+ chunk_size: 130560
+ dim_f: 1024
+ dim_t: 256
+ hop_length: 512
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.001
+
+model:
+ act: gelu
+ bottleneck_factor: 4
+ growth: 128
+ norm: InstanceNorm
+ num_blocks_per_scale: 2
+ num_channels: 128
+ num_scales: 5
+ num_subbands: 4
+ scale:
+ - 2
+ - 2
+
+training:
+ batch_size: 12
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - kick
+ - snare
+ - toms
+ - hh
+ - ride
+ - crash
+ lr: 9.0e-05
+ patience: 30
+ reduce_factor: 0.95
+ target_instrument: null
+ num_epochs: 1000
+ num_steps: 1268
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+ enable: true # enable or disable all augmentations (to fast disable if needed)
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+ loudness_min: 0.5
+ loudness_max: 1.5
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+ mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+ - 0.2
+ - 0.02
+ mixup_loudness_min: 0.5
+ mixup_loudness_max: 1.5
+
+ # apply mp3 compression to mixture only (emulate downloading mp3 from internet)
+ mp3_compression_on_mixture: 0.0
+ mp3_compression_on_mixture_bitrate_min: 32
+ mp3_compression_on_mixture_bitrate_max: 320
+ mp3_compression_on_mixture_backend: "lameenc"
+
+ all:
+ channel_shuffle: 0.5 # Set 0 or lower to disable
+ random_inverse: 0.01 # inverse track (better lower probability)
+ random_polarity: 0.5 # polarity change (multiply waveform to -1)
+ mp3_compression: 0.0
+ mp3_compression_min_bitrate: 32
+ mp3_compression_max_bitrate: 320
+ mp3_compression_backend: "lameenc"
+ pitch_shift: 0.1
+ pitch_shift_min_semitones: -3
+ pitch_shift_max_semitones: 3
+ seven_band_parametric_eq: 0.5
+ seven_band_parametric_eq_min_gain_db: -6
+ seven_band_parametric_eq_max_gain_db: 6
+ tanh_distortion: 0.2
+ tanh_distortion_min: 0.1
+ tanh_distortion_max: 0.5
+
+inference:
+ batch_size: 1
+ dim_t: 256
+ num_overlap: 4
\ No newline at end of file
diff --git a/models/MDX23C/config_mdx23c_similarity.yaml b/models/MDX23C/config_mdx23c_similarity.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9ce8239fc926820db231cb1a240d20a1ff3eca0e
--- /dev/null
+++ b/models/MDX23C/config_mdx23c_similarity.yaml
@@ -0,0 +1,47 @@
+audio:
+ chunk_size: 130560
+ dim_f: 1024
+ dim_t: 256
+ hop_length: 512
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.001
+
+model:
+ act: gelu
+ bottleneck_factor: 4
+ growth: 128
+ norm: InstanceNorm
+ num_blocks_per_scale: 2
+ num_channels: 128
+ num_scales: 5
+ num_subbands: 4
+ scale:
+ - 2
+ - 2
+
+training:
+ batch_size: 2
+ gradient_accumulation_steps: 3
+ grad_clip: 0
+ instruments:
+ - Similarity
+ - Difference
+ lr: 1.0
+ patience: 15
+ reduce_factor: 0.95
+ target_instrument: Similarity
+ num_epochs: 1000
+ num_steps: 2235
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: prodigy
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 8
+ dim_t: 256
+ num_overlap: 8
diff --git a/models/MDX23C/model_2_stem_061321.yaml b/models/MDX23C/model_2_stem_061321.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d5412e0e16ea287b59ef6c84435fd81169d81d53
--- /dev/null
+++ b/models/MDX23C/model_2_stem_061321.yaml
@@ -0,0 +1,36 @@
+audio:
+ chunk_size: 260096
+ dim_f: 4096
+ dim_t: 256
+ hop_length: 2048
+ n_fft: 12288
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.001
+model:
+ act: gelu
+ bottleneck_factor: 4
+ growth: 64
+ norm: InstanceNorm
+ num_blocks_per_scale: 2
+ num_channels: 128
+ num_scales: 5
+ num_subbands: 4
+ scale:
+ - 2
+ - 2
+ name: epoch_10.ckpt
+training:
+ batch_size: 16
+ grad_clip: 0
+ instruments:
+ - Vocals
+ - Instrumental
+ lr: 5.0e-05
+ target_instrument: null
+ num_epochs: 100
+ num_steps: 1000
+inference:
+ batch_size: 1
+ dim_t: 256
+ num_overlap: 8
diff --git a/models/MDX23C/model_2_stem_full_band_8k.yaml b/models/MDX23C/model_2_stem_full_band_8k.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..207aa712b561221c136576db6a6e7d6e35915ba4
--- /dev/null
+++ b/models/MDX23C/model_2_stem_full_band_8k.yaml
@@ -0,0 +1,43 @@
+audio:
+ chunk_size: 261120
+ dim_f: 4096
+ dim_t: 256
+ hop_length: 1024
+ n_fft: 8192
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.001
+model:
+ act: gelu
+ bottleneck_factor: 4
+ growth: 128
+ norm: InstanceNorm
+ num_blocks_per_scale: 2
+ num_channels: 128
+ num_scales: 5
+ num_subbands: 4
+ scale:
+ - 2
+ - 2
+training:
+ batch_size: 6
+ grad_clip: 0
+ instruments:
+ - Vocals
+ - Instrumental
+ lr: 1.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: null
+ num_epochs: 1000
+ num_steps: 1000
+ augmentation: 1
+ augmentation_type: simple1
+ augmentation_mix: true
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+inference:
+ batch_size: 1
+ dim_t: 256
+ num_overlap: 8
\ No newline at end of file
diff --git a/models/MDX23C/model_mdx23c_ep_271_l1_freq_72.2383.ckpt b/models/MDX23C/model_mdx23c_ep_271_l1_freq_72.2383.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..d0268f2ad093d5b27ca484f20df986a025794a11
--- /dev/null
+++ b/models/MDX23C/model_mdx23c_ep_271_l1_freq_72.2383.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1227f8d97c7436004d03e46091427393abefebcc08ce53ef30082742c4e482f7
+size 437613512
diff --git a/models/MDXNet/Kim_Inst.onnx b/models/MDXNet/Kim_Inst.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..a1cfbc17713f1caef9d11696002bb78aae781ff2
--- /dev/null
+++ b/models/MDXNet/Kim_Inst.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86b1940e7122fbdd2beadc65507cbff6c352d79012a8a7e60d56db98532af5f7
+size 66759214
diff --git a/models/MDXNet/Kim_Vocal_1.onnx b/models/MDXNet/Kim_Vocal_1.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..4d6e31ac4c3e53021d67bf71f7aa576bbff391b5
--- /dev/null
+++ b/models/MDXNet/Kim_Vocal_1.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f313140ef8fecc3041881b60ecb993d985a0281a138b2fb634aa8901aebc38cb
+size 66759214
diff --git a/models/MDXNet/Kim_Vocal_2.onnx b/models/MDXNet/Kim_Vocal_2.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..89d344b3926e4d0d7ee41b78e7f387558ec2acdc
--- /dev/null
+++ b/models/MDXNet/Kim_Vocal_2.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce74ef3b6a6024ce44211a07be9cf8bc6d87728cc852a68ab34eb8e58cde9c8b
+size 66759214
diff --git a/models/MDXNet/Reverb_HQ_By_FoxJoy.onnx b/models/MDXNet/Reverb_HQ_By_FoxJoy.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..0af4086572259731e9237a7d5acc2254f2d30cf5
--- /dev/null
+++ b/models/MDXNet/Reverb_HQ_By_FoxJoy.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:233bb5c6aaa365e568659a0a81211746fa881f8f47f82d9e864fce1f7692db80
+size 66780123
diff --git a/models/MDXNet/UVR-MDX-NET-Inst_1.onnx b/models/MDXNet/UVR-MDX-NET-Inst_1.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..86732ec554ea5624474d9d109f6a72b3570fdb34
--- /dev/null
+++ b/models/MDXNet/UVR-MDX-NET-Inst_1.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ca53f94b7a0cbb04fcfcc8f3ea5ec1ae22cd8ad044f5e673588859f83976f5e
+size 66759214
diff --git a/models/MDXNet/UVR-MDX-NET-Inst_2.onnx b/models/MDXNet/UVR-MDX-NET-Inst_2.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..3d7d5ea74c2caf49634f847217378bb626e2e77a
--- /dev/null
+++ b/models/MDXNet/UVR-MDX-NET-Inst_2.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3a96a664d28b52db9def0a9cae9a16dbb524d8325bfe8f0ac64ac5d231456bc
+size 66759214
diff --git a/models/MDXNet/UVR-MDX-NET-Inst_3.onnx b/models/MDXNet/UVR-MDX-NET-Inst_3.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..548cacd28b73e2f76dde5f4281f1e0d0dcf537e5
--- /dev/null
+++ b/models/MDXNet/UVR-MDX-NET-Inst_3.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b7834e2972158d8c9864e7376e3a7d084079c80a23f38dc31c4b0a4e901a1cb
+size 66759214
diff --git a/models/MDXNet/UVR-MDX-NET-Inst_HQ_1.onnx b/models/MDXNet/UVR-MDX-NET-Inst_HQ_1.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..3302ace57202067fd1e9c709d76bc8d347658f5d
--- /dev/null
+++ b/models/MDXNet/UVR-MDX-NET-Inst_HQ_1.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38a045c4ded87e3bf97b609ec5be7910e8a7cecec455f507227ab12b5e29f7f9
+size 66759214
diff --git a/models/MDXNet/UVR-MDX-NET-Inst_HQ_2.onnx b/models/MDXNet/UVR-MDX-NET-Inst_HQ_2.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..1340d10933420baf427d06cbf9a51ac7903b5398
--- /dev/null
+++ b/models/MDXNet/UVR-MDX-NET-Inst_HQ_2.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:197f8ab296df850f961e68c595f6649acb7d9e621b5600b460f3458967299112
+size 66759214
diff --git a/models/MDXNet/UVR-MDX-NET-Inst_HQ_3.onnx b/models/MDXNet/UVR-MDX-NET-Inst_HQ_3.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..f2b6b241d9246f392067e0717c8b252857022e6c
--- /dev/null
+++ b/models/MDXNet/UVR-MDX-NET-Inst_HQ_3.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:317554b07fe1ea5279a77f2b1520a41ea4b93432560c4ffd08792c30fddf9adc
+size 66759214
diff --git a/models/MDXNet/UVR-MDX-NET-Inst_HQ_4.onnx b/models/MDXNet/UVR-MDX-NET-Inst_HQ_4.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..45b11285dea638eeecfe4487dfc39775da075b14
--- /dev/null
+++ b/models/MDXNet/UVR-MDX-NET-Inst_HQ_4.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c4b5b9b05090fdf238f38ba5046813982d50e2a652e9cb3324ea79720c3c9c8
+size 59074342
diff --git a/models/MDXNet/UVR-MDX-NET-Inst_HQ_5.onnx b/models/MDXNet/UVR-MDX-NET-Inst_HQ_5.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..e674c464504ee1cddb1371129a4314eacabbca64
--- /dev/null
+++ b/models/MDXNet/UVR-MDX-NET-Inst_HQ_5.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:811cb24095d865763752310848b7ec86aeede0626cb05749ab35350e46897000
+size 59074342
diff --git a/models/MDXNet/UVR-MDX-NET-Inst_Main.onnx b/models/MDXNet/UVR-MDX-NET-Inst_Main.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..3c1ab77a4f5874c013ff0269cc926fcc9a7ab466
--- /dev/null
+++ b/models/MDXNet/UVR-MDX-NET-Inst_Main.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ab401dfe4a548b87deb64f975294bd56ff946aa32903f53b4b24bb13b2cce1e
+size 52786726
diff --git a/models/MDXNet/UVR-MDX-NET-Inst_full_292.onnx b/models/MDXNet/UVR-MDX-NET-Inst_full_292.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..9f6c60def7873f03a26391afabb238103fc5c318
--- /dev/null
+++ b/models/MDXNet/UVR-MDX-NET-Inst_full_292.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:020f6b65fa219fb7c285e4f3fc2863bf22daf03c4c93e547b6d13d5f2757a7ec
+size 66759214
diff --git a/models/MDXNet/UVR-MDX-NET-Voc_FT.onnx b/models/MDXNet/UVR-MDX-NET-Voc_FT.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..3048949a6f427d212f310e9a13494306da6e00ec
--- /dev/null
+++ b/models/MDXNet/UVR-MDX-NET-Voc_FT.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:534b2070fcc7df514b13ef660dc8cbb328679c2374d04354a5c42bb14ecce111
+size 66762490
diff --git a/models/MDXNet/UVR-MDX-NET_Crowd_HQ_1.onnx b/models/MDXNet/UVR-MDX-NET_Crowd_HQ_1.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..466c3fa69b05f5b27c19cc11eb23c99909d2a4d0
--- /dev/null
+++ b/models/MDXNet/UVR-MDX-NET_Crowd_HQ_1.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:313b7bf869c411fdafe005cf0d5a635c405cb3d0df137178a64091952d75225c
+size 59074342
diff --git a/models/MDXNet/UVR-MDX-NET_Inst_187_beta.onnx b/models/MDXNet/UVR-MDX-NET_Inst_187_beta.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..b2ff188d7a8ea6ed25bcf1916359853b8fd0cb8f
--- /dev/null
+++ b/models/MDXNet/UVR-MDX-NET_Inst_187_beta.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c74566f3c3033cacba996328b2ee90bf77ef79ea6c35b7841df183b7906f54a5
+size 66759214
diff --git a/models/MDXNet/UVR-MDX-NET_Inst_82_beta.onnx b/models/MDXNet/UVR-MDX-NET_Inst_82_beta.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..f91e15a00d9d805623d70dc7d95b69e2e8329ba2
--- /dev/null
+++ b/models/MDXNet/UVR-MDX-NET_Inst_82_beta.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6c268302f09ab53687072618e056a611272a7e2c3fd9b3b59164da152f3588e
+size 66759214
diff --git a/models/MDXNet/UVR-MDX-NET_Inst_90_beta.onnx b/models/MDXNet/UVR-MDX-NET_Inst_90_beta.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..db0d80d538ff6ecf3a15cabaa0c84500ff0b5ccc
--- /dev/null
+++ b/models/MDXNet/UVR-MDX-NET_Inst_90_beta.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d902868a46575aea6ee2335736ff3b53faf497a6bdaa1b864e0fd84eb1b42a5
+size 66759214
diff --git a/models/MDXNet/UVR-MDX-NET_Main_340.onnx b/models/MDXNet/UVR-MDX-NET_Main_340.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..75ef0024acd46a2900ea78e948325d044ec7580a
--- /dev/null
+++ b/models/MDXNet/UVR-MDX-NET_Main_340.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78792633b4007755af12ecde20f709b4f0b99563b1d25fe0a501ed2122aff218
+size 66759214
diff --git a/models/MDXNet/UVR-MDX-NET_Main_390.onnx b/models/MDXNet/UVR-MDX-NET_Main_390.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..a752391e7cfb5b134ebef388734cd1da9eb5dfb5
--- /dev/null
+++ b/models/MDXNet/UVR-MDX-NET_Main_390.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:286c4f0847ca837e2c3f4c4058f756d5f150cbf080506aa6f33a2847aba92e8c
+size 66759214
diff --git a/models/MDXNet/UVR-MDX-NET_Main_406.onnx b/models/MDXNet/UVR-MDX-NET_Main_406.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..c0f58423f248611332f6a3c33382d0d426c67449
--- /dev/null
+++ b/models/MDXNet/UVR-MDX-NET_Main_406.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f158816a44eef1f0ba0f48b813cbfcf460ed1c70a754af3609ade44aaf7d1b23
+size 66759214
diff --git a/models/MDXNet/UVR-MDX-NET_Main_427.onnx b/models/MDXNet/UVR-MDX-NET_Main_427.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..f6731f9dcc99c5b1ad0af85dd845280e833acf46
--- /dev/null
+++ b/models/MDXNet/UVR-MDX-NET_Main_427.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:95275802a27801b97e3c0552b6eaa69f9bb3bd7df53cdf0536cce0a753f702cc
+size 66759214
diff --git a/models/MDXNet/UVR-MDX-NET_Main_438.onnx b/models/MDXNet/UVR-MDX-NET_Main_438.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..a6a17015453da90871426317c547193196bd6f1e
--- /dev/null
+++ b/models/MDXNet/UVR-MDX-NET_Main_438.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5e1ad93587a163a0987a0168b99a2ad875c0d9bfc3afb596b7c36b09c7f5c26
+size 66759214
diff --git a/models/MDXNet/UVR_MDXNET_1_9703.onnx b/models/MDXNet/UVR_MDXNET_1_9703.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..50d00b7e34e7763954283b9fc13f2d903072be03
--- /dev/null
+++ b/models/MDXNet/UVR_MDXNET_1_9703.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:229ad3bb96a037e89d8ed86732d6d3675856e6a07c3e3f02896eac01ec7ee4be
+size 29704436
diff --git a/models/MDXNet/UVR_MDXNET_2_9682.onnx b/models/MDXNet/UVR_MDXNET_2_9682.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..1d4c430b6d6a8a7d8abea9bccac1959e4ea69ab0
--- /dev/null
+++ b/models/MDXNet/UVR_MDXNET_2_9682.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1deb7295acd3206bc9582a5d92f1b0a74bf3f41c7c1fb78a0ac0123cde4372db
+size 29704436
diff --git a/models/MDXNet/UVR_MDXNET_3_9662.onnx b/models/MDXNet/UVR_MDXNET_3_9662.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..140b8e3eb273df75384c691462998774b3928a52
--- /dev/null
+++ b/models/MDXNet/UVR_MDXNET_3_9662.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e02220e80d8253f4c2209f8924298b2b686bbdf2868b788ff5500fb9bd94aadc
+size 29704436
diff --git a/models/MDXNet/UVR_MDXNET_9482.onnx b/models/MDXNet/UVR_MDXNET_9482.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..c40c9257378536385f625a75f7bc74cb35c1eaa5
--- /dev/null
+++ b/models/MDXNet/UVR_MDXNET_9482.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4f365207c56deb115bceedff3ad8fe98a751c745f9e370cecec6226b8b47184
+size 29704436
diff --git a/models/MDXNet/UVR_MDXNET_KARA.onnx b/models/MDXNet/UVR_MDXNET_KARA.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..4bfb51872cc19150179df558fb7212e5359427ea
--- /dev/null
+++ b/models/MDXNet/UVR_MDXNET_KARA.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3167c87333a48548413e972a286bf40bf5694001d2853861eb1435953f02d63
+size 29704436
diff --git a/models/MDXNet/UVR_MDXNET_KARA_2.onnx b/models/MDXNet/UVR_MDXNET_KARA_2.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..18aeb6a8d05b84fa74fdd0de7ed917f6df273ebd
--- /dev/null
+++ b/models/MDXNet/UVR_MDXNET_KARA_2.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf32e15105a09c0f7dddd2b67346146334d6f3ecb399ed7638eba2ab07cbf5f4
+size 52786726
diff --git a/models/MDXNet/UVR_MDXNET_Main.onnx b/models/MDXNet/UVR_MDXNET_Main.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..d9645fd239fd211619aabeb431d3093a45ed8b85
--- /dev/null
+++ b/models/MDXNet/UVR_MDXNET_Main.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8289784cda38543ff431add4070662813311a8cccfc0112ca82f76d9dba2b4ca
+size 66759214
diff --git a/models/MDXNet/kuielab_a_bass.onnx b/models/MDXNet/kuielab_a_bass.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..0fe816090f69e7198dc780025d2c37b9dc9a272f
--- /dev/null
+++ b/models/MDXNet/kuielab_a_bass.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c3e77b9963185b1ea6bb46a4b8924137d9370fc1ccdefec7b1b416ef550dcaa
+size 29703204
diff --git a/models/MDXNet/kuielab_a_drums.onnx b/models/MDXNet/kuielab_a_drums.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..e29380d6f0c92fe7a38a201709342a56931489df
--- /dev/null
+++ b/models/MDXNet/kuielab_a_drums.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40f586b7091934dd6f5563f0cba8f14bad57ce88440da1098bf388ea716c2901
+size 29703204
diff --git a/models/MDXNet/kuielab_a_other.onnx b/models/MDXNet/kuielab_a_other.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..7f2c3285075afc5e9ed9ba2fd6882d8df9921763
--- /dev/null
+++ b/models/MDXNet/kuielab_a_other.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b67a1dcb5f232153528c59960b4c7bf8dc736b8114de360af0e719633f53358
+size 29703204
diff --git a/models/MDXNet/kuielab_a_vocals.onnx b/models/MDXNet/kuielab_a_vocals.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..8d80e6d5ea023a73a002c15005551a37c7cfa21c
--- /dev/null
+++ b/models/MDXNet/kuielab_a_vocals.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:daba83c2ee1afee9139766ad64c9b6808d6b6f092fff04bed3338be50baac721
+size 29703204
diff --git a/models/MDXNet/kuielab_b_bass.onnx b/models/MDXNet/kuielab_b_bass.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..1670d9993ab4080cc702ba397b629caae20ac699
--- /dev/null
+++ b/models/MDXNet/kuielab_b_bass.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4b7080fe501d0bece62076c5d4eda4d6590c5207ed78ec84a57bac0740a061d
+size 29703204
diff --git a/models/MDXNet/kuielab_b_drums.onnx b/models/MDXNet/kuielab_b_drums.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..a00f9c84fd964d45c2cdd0470ff9e4b5a7f4ad27
--- /dev/null
+++ b/models/MDXNet/kuielab_b_drums.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6fecee758059b33ed99f6dabba297439b3e7cacfac4b1097bd324aff8052208
+size 21930313
diff --git a/models/MDXNet/kuielab_b_other.onnx b/models/MDXNet/kuielab_b_other.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..e17baa7f40b3dbd1299ee7ba7e6db461682e8320
--- /dev/null
+++ b/models/MDXNet/kuielab_b_other.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0d0b63950ac332333fea2d58f68c92fd3ab0aae071398c2a8beeae1ad15b655
+size 29703204
diff --git a/models/MDXNet/kuielab_b_vocals.onnx b/models/MDXNet/kuielab_b_vocals.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..683e555b8a91e3035cc24bee5b47b7347390eb8a
--- /dev/null
+++ b/models/MDXNet/kuielab_b_vocals.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b7dcb9d878acb0f3e64ff3fd27750faae96577013f6d50f5996875bf4250713
+size 29703204
diff --git a/models/Roformer/BandSplit/BS_Inst_EXP_VRL.ckpt b/models/Roformer/BandSplit/BS_Inst_EXP_VRL.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..72934845a8e32e8a5dc85c35b531767f4b964bcb
--- /dev/null
+++ b/models/Roformer/BandSplit/BS_Inst_EXP_VRL.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c035e2a102243405e45bf33faa175f62fd7118f63b62771fafdf81062b804131
+size 393351501
diff --git a/models/Roformer/BandSplit/BandSplit_Roformer_4stems_FT_by_SYH99999.pth b/models/Roformer/BandSplit/BandSplit_Roformer_4stems_FT_by_SYH99999.pth
new file mode 100644
index 0000000000000000000000000000000000000000..776c9a51208964dd72564cd742d3416a60d02eac
--- /dev/null
+++ b/models/Roformer/BandSplit/BandSplit_Roformer_4stems_FT_by_SYH99999.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a64c61c5972fe3fdf331d98f06bb4d6e237147c83a4bc71fd4e00e254c9b7652
+size 527245586
diff --git a/models/Roformer/BandSplit/bs_roformer_male_female_by_aufr33_sdr_7.2889.ckpt b/models/Roformer/BandSplit/bs_roformer_male_female_by_aufr33_sdr_7.2889.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..201059f3d93490e5cc91f20ea0bb74ae7c0dd20a
--- /dev/null
+++ b/models/Roformer/BandSplit/bs_roformer_male_female_by_aufr33_sdr_7.2889.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf11736d1b42a11ae55d8299316585921477dd2a671b24b663660846ca9861b
+size 527119779
diff --git a/models/Roformer/BandSplit/bs_roformer_revive_by_unwa.ckpt b/models/Roformer/BandSplit/bs_roformer_revive_by_unwa.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..5df342a5d9eb8dcdc04f5d04fbedef415a7835d3
--- /dev/null
+++ b/models/Roformer/BandSplit/bs_roformer_revive_by_unwa.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1d7e4bfdfef07c6b2bc1d65283a7d03c3c38f8c7dbc8d729b785f93c8b8699a
+size 639326600
diff --git a/models/Roformer/BandSplit/bs_roformer_revive_v2_by_unwa.ckpt b/models/Roformer/BandSplit/bs_roformer_revive_v2_by_unwa.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..6826b2658d30a6b6aa5f3896311f7cd4c3160bb9
--- /dev/null
+++ b/models/Roformer/BandSplit/bs_roformer_revive_v2_by_unwa.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58098850c882a7472dad39f99fb8040ce6eaafe671cfe9881d89aea276bbb5f5
+size 639326600
diff --git a/models/Roformer/BandSplit/bs_roformer_revive_v3_by_unwa.ckpt b/models/Roformer/BandSplit/bs_roformer_revive_v3_by_unwa.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..56be9dd0e47d16ef81bb1be38566cf0a35c8ad43
--- /dev/null
+++ b/models/Roformer/BandSplit/bs_roformer_revive_v3_by_unwa.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b0751b9a15c591407c3b77f08eb4ad3005e42e96051f3f2b39760f1130c467b
+size 639326600
diff --git a/models/Roformer/BandSplit/bs_roformer_voc_gabox.ckpt b/models/Roformer/BandSplit/bs_roformer_voc_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..cb4a0c8ef7ec4378b27e79a01eb491a2d699a535
--- /dev/null
+++ b/models/Roformer/BandSplit/bs_roformer_voc_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18d58efe5e949e70fab11b875329af6d06ef11ccc29574bfe943fb57cc827f38
+size 639254584
diff --git a/models/Roformer/BandSplit/config_BandSplit-Roformer_FNO_by-Unwa.yaml b/models/Roformer/BandSplit/config_BandSplit-Roformer_FNO_by-Unwa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7341e694abeb1ee8cad6f0063eb02295c7dae973
--- /dev/null
+++ b/models/Roformer/BandSplit/config_BandSplit-Roformer_FNO_by-Unwa.yaml
@@ -0,0 +1,136 @@
+audio:
+ chunk_size: 749259
+ dim_f: 1024
+ dim_t: 1700 # don't work (use in model)
+ hop_length: 441 # don't work (use in model)
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 256
+ depth: 12
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ linear_transformer_depth: 0
+ freqs_per_bands: !!python/tuple
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 128
+ - 129
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0.
+ ff_dropout: 0.
+ flash_attn: true
+ dim_freqs_in: 1025
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+ mlp_expansion_factor: 4
+
+training:
+ batch_size: 2
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments: ['vocals', 'other']
+ patience: 3
+ reduce_factor: 0.95
+ target_instrument: other
+ num_epochs: 1000
+ num_steps: 1000
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: simple1
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: true # Mix several stems of the same type with some probability
+ augmentation_loudness: true # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0.5
+ augmentation_loudness_max: 1.5
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ # optimizer: prodigy
+ optimizer: adam
+ # lr: 1.0
+ lr: 1.0e-5
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 2
+ dim_t: 1700
+ num_overlap: 2
+ normalize: false
diff --git a/models/Roformer/BandSplit/config_BandSplit-Roformer_Karaoke_Frazer_by-becruily.yaml b/models/Roformer/BandSplit/config_BandSplit-Roformer_Karaoke_Frazer_by-becruily.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a0170aeacb3d3a55d9c598ed7faacbdc8799071f
--- /dev/null
+++ b/models/Roformer/BandSplit/config_BandSplit-Roformer_Karaoke_Frazer_by-becruily.yaml
@@ -0,0 +1,129 @@
+audio:
+ chunk_size: 882000
+ dim_f: 1024
+ dim_t: 801
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 256
+ depth: 12
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ linear_transformer_depth: 0
+ freqs_per_bands: !!python/tuple
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 128
+ - 129
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: true
+ dim_freqs_in: 1025
+ stft_n_fft: 2048
+ stft_hop_length: 512
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: false
+ mlp_expansion_factor: 4
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - Vocals
+ - Instrumental
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: Vocals
+ num_epochs: 1000
+ num_steps: 1000
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ # optimizer: prodigy
+ optimizer: adam
+ lr: 1.0e-5
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 2
+ dim_t: 2001
+ num_overlap: 4
+ normalize: false
diff --git a/models/Roformer/BandSplit/config_BandSplit-Roformer_Resurrection_Instrumental_by-Unwa.yaml b/models/Roformer/BandSplit/config_BandSplit-Roformer_Resurrection_Instrumental_by-Unwa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8ec68fdf1b7cde78d1982d9cb5e332fe01ff55f6
--- /dev/null
+++ b/models/Roformer/BandSplit/config_BandSplit-Roformer_Resurrection_Instrumental_by-Unwa.yaml
@@ -0,0 +1,138 @@
+audio:
+ chunk_size: 749259
+ dim_f: 1024
+ dim_t: 1700 # don't work (use in model)
+ hop_length: 441 # don't work (use in model)
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 256
+ depth: 12
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ linear_transformer_depth: 0
+ freqs_per_bands: !!python/tuple
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 128
+ - 129
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0.
+ ff_dropout: 0.
+ flash_attn: true
+ dim_freqs_in: 1025
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+ mlp_expansion_factor: 4
+ use_torch_checkpoint: False # it allows to greatly reduce GPU memory consumption during training (not fully tested)
+ skip_connection: False # Enable skip connection between transformer blocks - can solve problem with gradients and probably faster training
+
+training:
+ batch_size: 2
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments: ['vocals', 'other']
+ patience: 3
+ reduce_factor: 0.95
+ target_instrument: other
+ num_epochs: 1000
+ num_steps: 1000
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: simple1
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: true # Mix several stems of the same type with some probability
+ augmentation_loudness: true # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0.5
+ augmentation_loudness_max: 1.5
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ # optimizer: prodigy
+ optimizer: adam
+ # lr: 1.0
+ lr: 1.0e-5
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 2
+ dim_t: 1700
+ num_overlap: 2
+ normalize: false
diff --git a/models/Roformer/BandSplit/config_BandSplit-Roformer_Resurrection_Vocals_by-Unwa.yaml b/models/Roformer/BandSplit/config_BandSplit-Roformer_Resurrection_Vocals_by-Unwa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a5fce38df0b996d87e52ffe695a33eb839f74612
--- /dev/null
+++ b/models/Roformer/BandSplit/config_BandSplit-Roformer_Resurrection_Vocals_by-Unwa.yaml
@@ -0,0 +1,138 @@
+audio:
+ chunk_size: 785920
+ dim_f: 1024
+ dim_t: 1536 # don't work (use in model)
+ hop_length: 441 # don't work (use in model)
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 256
+ depth: 12
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ linear_transformer_depth: 0
+ freqs_per_bands: !!python/tuple
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 128
+ - 129
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0.
+ ff_dropout: 0.
+ flash_attn: true
+ dim_freqs_in: 1025
+ stft_n_fft: 2048
+ stft_hop_length: 512
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+ mlp_expansion_factor: 4
+ use_torch_checkpoint: False # it allows to greatly reduce GPU memory consumption during training (not fully tested)
+ skip_connection: False # Enable skip connection between transformer blocks - can solve problem with gradients and probably faster training
+
+training:
+ batch_size: 2
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments: ['vocals', 'other']
+ patience: 3
+ reduce_factor: 0.95
+ target_instrument: vocals
+ num_epochs: 1000
+ num_steps: 1000
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: simple1
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: true # Mix several stems of the same type with some probability
+ augmentation_loudness: true # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0.5
+ augmentation_loudness_max: 1.5
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ # optimizer: prodigy
+ optimizer: adam
+ # lr: 1.0
+ lr: 1.0e-5
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 2
+ dim_t: 1536
+ num_overlap: 2
+ normalize: false
diff --git a/models/Roformer/BandSplit/config_BandSplit-Roformer_SW_by-jarredou.yaml b/models/Roformer/BandSplit/config_BandSplit-Roformer_SW_by-jarredou.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..12564e347f6f244f81d948ab60d34f7196a8627f
--- /dev/null
+++ b/models/Roformer/BandSplit/config_BandSplit-Roformer_SW_by-jarredou.yaml
@@ -0,0 +1,197 @@
+audio:
+ chunk_size: 588800 #882000
+ dim_f: 1024
+ dim_t: 801 # don't work (use in model)
+ hop_length: 441 # don't work (use in model)
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 256
+ depth: 12
+ stereo: true
+ num_stems: 6
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ linear_transformer_depth: 0
+ freqs_per_bands: !!python/tuple
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 128
+ - 129
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0.1
+ ff_dropout: 0.1
+ flash_attn: true
+ dim_freqs_in: 1025
+ stft_n_fft: 2048
+ stft_hop_length: 512
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+ mlp_expansion_factor: 4
+ use_torch_checkpoint: False # it allows to greatly reduce GPU memory consumption during training (not fully tested)
+ skip_connection: False # Enable skip connection between transformer blocks - can solve problem with gradients and probably faster training
+
+training:
+ batch_size: 2
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments: ['bass', 'drums', 'other', 'vocals', 'guitar', 'piano']
+ patience: 3
+ reduce_factor: 0.95
+ target_instrument: null
+ num_epochs: 1000
+ num_steps: 1000
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: simple1
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: true # Mix several stems of the same type with some probability
+ augmentation_loudness: true # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0.5
+ augmentation_loudness_max: 1.5
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ # optimizer: prodigy
+ optimizer: adam
+ # lr: 1.0
+ lr: 1.0e-5
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+ enable: true # enable or disable all augmentations (to fast disable if needed)
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+ loudness_min: 0.5
+ loudness_max: 1.5
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+ mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+ - 0.2
+ - 0.02
+ mixup_loudness_min: 0.5
+ mixup_loudness_max: 1.5
+
+ all:
+ channel_shuffle: 0.5 # Set 0 or lower to disable
+ random_inverse: 0.1 # inverse track (better lower probability)
+ random_polarity: 0.5 # polarity change (multiply waveform to -1)
+
+ vocals:
+ pitch_shift: 0.1
+ pitch_shift_min_semitones: -5
+ pitch_shift_max_semitones: 5
+ seven_band_parametric_eq: 0.1
+ seven_band_parametric_eq_min_gain_db: -9
+ seven_band_parametric_eq_max_gain_db: 9
+ tanh_distortion: 0.1
+ tanh_distortion_min: 0.1
+ tanh_distortion_max: 0.7
+ bass:
+ pitch_shift: 0.1
+ pitch_shift_min_semitones: -2
+ pitch_shift_max_semitones: 2
+ seven_band_parametric_eq: 0.1
+ seven_band_parametric_eq_min_gain_db: -3
+ seven_band_parametric_eq_max_gain_db: 6
+ tanh_distortion: 0.1
+ tanh_distortion_min: 0.1
+ tanh_distortion_max: 0.5
+ drums:
+ pitch_shift: 0.1
+ pitch_shift_min_semitones: -5
+ pitch_shift_max_semitones: 5
+ seven_band_parametric_eq: 0.1
+ seven_band_parametric_eq_min_gain_db: -9
+ seven_band_parametric_eq_max_gain_db: 9
+ tanh_distortion: 0.1
+ tanh_distortion_min: 0.1
+ tanh_distortion_max: 0.6
+ other:
+ pitch_shift: 0.1
+ pitch_shift_min_semitones: -4
+ pitch_shift_max_semitones: 4
+ gaussian_noise: 0.1
+ gaussian_noise_min_amplitude: 0.001
+ gaussian_noise_max_amplitude: 0.015
+ time_stretch: 0.1
+ time_stretch_min_rate: 0.8
+ time_stretch_max_rate: 1.25
+
+
+inference:
+ batch_size: 1
+ dim_t: 1101
+ num_overlap: 2
+ normalize: false
\ No newline at end of file
diff --git a/models/Roformer/BandSplit/config_BandSplit_Roformer_4stems_FT_by_SYH99999.yaml b/models/Roformer/BandSplit/config_BandSplit_Roformer_4stems_FT_by_SYH99999.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b2dc706b15ec068d9e03b10acba15bcb91e9082f
--- /dev/null
+++ b/models/Roformer/BandSplit/config_BandSplit_Roformer_4stems_FT_by_SYH99999.yaml
@@ -0,0 +1,196 @@
+audio:
+ chunk_size: 485100
+ dim_f: 1024
+ dim_t: 801 # don't work (use in model)
+ hop_length: 441 # don't work (use in model)
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 8
+ stereo: true
+ num_stems: 4
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ linear_transformer_depth: 0
+ freqs_per_bands: !!python/tuple
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 128
+ - 129
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0.1
+ ff_dropout: 0.1
+ flash_attn: true
+ dim_freqs_in: 1025
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+ mlp_expansion_factor: 2
+ use_torch_checkpoint: False # it allows to greatly reduce GPU memory consumption during training (not fully tested)
+ skip_connection: False # Enable skip connection between transformer blocks - can solve problem with gradients and probably faster training
+
+training:
+ batch_size: 2
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments: ['drums', 'bass', 'other', 'vocals']
+ patience: 3
+ reduce_factor: 0.95
+ target_instrument: null
+ num_epochs: 1000
+ num_steps: 1000
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: simple1
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: true # Mix several stems of the same type with some probability
+ augmentation_loudness: true # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0.5
+ augmentation_loudness_max: 1.5
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ # optimizer: prodigy
+ optimizer: adam
+ # lr: 1.0
+ lr: 1.0e-5
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+ enable: true # enable or disable all augmentations (to fast disable if needed)
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+ loudness_min: 0.5
+ loudness_max: 1.5
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+ mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+ - 0.2
+ - 0.02
+ mixup_loudness_min: 0.5
+ mixup_loudness_max: 1.5
+
+ all:
+ channel_shuffle: 0.5 # Set 0 or lower to disable
+ random_inverse: 0.1 # inverse track (better lower probability)
+ random_polarity: 0.5 # polarity change (multiply waveform to -1)
+
+ vocals:
+ pitch_shift: 0.1
+ pitch_shift_min_semitones: -5
+ pitch_shift_max_semitones: 5
+ seven_band_parametric_eq: 0.1
+ seven_band_parametric_eq_min_gain_db: -9
+ seven_band_parametric_eq_max_gain_db: 9
+ tanh_distortion: 0.1
+ tanh_distortion_min: 0.1
+ tanh_distortion_max: 0.7
+ bass:
+ pitch_shift: 0.1
+ pitch_shift_min_semitones: -2
+ pitch_shift_max_semitones: 2
+ seven_band_parametric_eq: 0.1
+ seven_band_parametric_eq_min_gain_db: -3
+ seven_band_parametric_eq_max_gain_db: 6
+ tanh_distortion: 0.1
+ tanh_distortion_min: 0.1
+ tanh_distortion_max: 0.5
+ drums:
+ pitch_shift: 0.1
+ pitch_shift_min_semitones: -5
+ pitch_shift_max_semitones: 5
+ seven_band_parametric_eq: 0.1
+ seven_band_parametric_eq_min_gain_db: -9
+ seven_band_parametric_eq_max_gain_db: 9
+ tanh_distortion: 0.1
+ tanh_distortion_min: 0.1
+ tanh_distortion_max: 0.6
+ other:
+ pitch_shift: 0.1
+ pitch_shift_min_semitones: -4
+ pitch_shift_max_semitones: 4
+ gaussian_noise: 0.1
+ gaussian_noise_min_amplitude: 0.001
+ gaussian_noise_max_amplitude: 0.015
+ time_stretch: 0.1
+ time_stretch_min_rate: 0.8
+ time_stretch_max_rate: 1.25
+
+
+inference:
+ batch_size: 2
+ dim_t: 2048
+ num_overlap: 4
\ No newline at end of file
diff --git a/models/Roformer/BandSplit/config_bs_roformer_chorus_male_female.yaml b/models/Roformer/BandSplit/config_bs_roformer_chorus_male_female.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eab8413f38d13a0ee021fd752c85fb20b65a376f
--- /dev/null
+++ b/models/Roformer/BandSplit/config_bs_roformer_chorus_male_female.yaml
@@ -0,0 +1,125 @@
+audio:
+ chunk_size: 352800
+ dim_f: 1024
+ dim_t: 801 # don't work (use in model)
+ hop_length: 441 # don't work (use in model)
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 8
+ stereo: true
+ num_stems: 2
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ freqs_per_bands: !!python/tuple
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 128
+ - 129
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0.0
+ ff_dropout: 0.0
+ flash_attn: true
+ dim_freqs_in: 1025
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - male
+ - female
+ lr: 1.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: null
+ num_epochs: 1000
+ num_steps: 1000
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 1
+ dim_t: 801
+ num_overlap: 2
\ No newline at end of file
diff --git a/models/Roformer/BandSplit/config_bs_roformer_deverb_8_384dim_10depth.yaml b/models/Roformer/BandSplit/config_bs_roformer_deverb_8_384dim_10depth.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d2c7ce0c4b424baa7731495c432102672b68cfa6
--- /dev/null
+++ b/models/Roformer/BandSplit/config_bs_roformer_deverb_8_384dim_10depth.yaml
@@ -0,0 +1,137 @@
+audio:
+ chunk_size: 352768
+ dim_f: 1024
+ dim_t: 801
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.001
+
+model:
+ dim: 384
+ depth: 10
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ freqs_per_bands: !!python/tuple
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 128
+ - 129
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0.1
+ ff_dropout: 0.1
+ flash_attn: true
+ dim_freqs_in: 1025
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - noreverb
+ - reverb
+ lr: 5.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: noreverb
+ num_epochs: 1000
+ num_steps: 1000
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+ enable: true # enable or disable all augmentations (to fast disable if needed)
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+ loudness_min: 0.5
+ loudness_max: 1.5
+ mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+ mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+ - 0.2
+ - 0.02
+ mixup_loudness_min: 0.5
+ mixup_loudness_max: 1.5
+
+inference:
+ batch_size: 4
+ dim_t: 801
+ num_overlap: 4
diff --git a/models/Roformer/BandSplit/config_bs_roformer_ep_317_sdr_12.9755.yaml b/models/Roformer/BandSplit/config_bs_roformer_ep_317_sdr_12.9755.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c4a3d323322d75af7d981e9de2ef3fa29e786812
--- /dev/null
+++ b/models/Roformer/BandSplit/config_bs_roformer_ep_317_sdr_12.9755.yaml
@@ -0,0 +1,133 @@
+audio:
+ chunk_size: 352800
+ dim_f: 1024
+ dim_t: 801 # don't work (use in model)
+ hop_length: 441 # don't work (use in model)
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.001
+
+model:
+ dim: 512
+ depth: 12
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ freqs_per_bands: !!python/tuple
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 128
+ - 129
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0.1
+ ff_dropout: 0.1
+ flash_attn: true
+ dim_freqs_in: 1025
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 16
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - Vocals
+ - Instrumental
+ lr: 5.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: Vocals
+ num_epochs: 1000
+ num_steps: 1000
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: simple1
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: true # Mix several stems of the same type with some probability
+ augmentation_loudness: true # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0.5
+ augmentation_loudness_max: 1.5
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 1
+ dim_t: 801
+ num_overlap: 4
\ No newline at end of file
diff --git a/models/Roformer/BandSplit/config_bs_roformer_ep_368_sdr_12.9628.yaml b/models/Roformer/BandSplit/config_bs_roformer_ep_368_sdr_12.9628.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fe893b1a68b8ae8ea8bb5a7ac2b7f12e0c53a826
--- /dev/null
+++ b/models/Roformer/BandSplit/config_bs_roformer_ep_368_sdr_12.9628.yaml
@@ -0,0 +1,133 @@
+audio:
+ chunk_size: 352800
+ dim_f: 1024
+ dim_t: 801 # don't work (use in model)
+ hop_length: 441 # don't work (use in model)
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.001
+
+model:
+ dim: 512
+ depth: 12
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ freqs_per_bands: !!python/tuple
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 128
+ - 129
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0.1
+ ff_dropout: 0.1
+ flash_attn: true
+ dim_freqs_in: 1025
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 16
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - Vocals
+ - Instrumental
+ lr: 5.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: Vocals
+ num_epochs: 1000
+ num_steps: 1000
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: simple1
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: true # Mix several stems of the same type with some probability
+ augmentation_loudness: true # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0.5
+ augmentation_loudness_max: 1.5
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 1
+ dim_t: 801
+ num_overlap: 4
diff --git a/models/Roformer/BandSplit/config_bs_roformer_ep_937_sdr_10.5309.yaml b/models/Roformer/BandSplit/config_bs_roformer_ep_937_sdr_10.5309.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f623832cc06ebc5fa8a049fad6b1319c6038336d
--- /dev/null
+++ b/models/Roformer/BandSplit/config_bs_roformer_ep_937_sdr_10.5309.yaml
@@ -0,0 +1,138 @@
+audio:
+ chunk_size: 131584
+ dim_f: 1024
+ dim_t: 256
+ hop_length: 512
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.001
+
+model:
+ dim: 384
+ depth: 12
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ linear_transformer_depth: 0
+ freqs_per_bands: !!python/tuple
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 128
+ - 129
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0.1
+ ff_dropout: 0.1
+ flash_attn: true
+ dim_freqs_in: 1025
+ stft_n_fft: 2048
+ stft_hop_length: 512
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 4
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - No Drum-Bass
+ - Drum-Bass
+ lr: 5.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: No Drum-Bass
+ num_epochs: 1000
+ num_steps: 1000
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+ enable: true # enable or disable all augmentations (to fast disable if needed)
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+ loudness_min: 0.5
+ loudness_max: 1.5
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+ mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+ - 0.2
+ - 0.02
+ mixup_loudness_min: 0.5
+ mixup_loudness_max: 1.5
+
+inference:
+ batch_size: 1
+ dim_t: 512
+ num_overlap: 4
\ No newline at end of file
diff --git a/models/Roformer/BandSplit/config_bs_roformer_inst_exp_vrl.yaml b/models/Roformer/BandSplit/config_bs_roformer_inst_exp_vrl.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8dc25ddd8268a169492e100b92627e38115ff055
--- /dev/null
+++ b/models/Roformer/BandSplit/config_bs_roformer_inst_exp_vrl.yaml
@@ -0,0 +1,124 @@
+audio:
+ chunk_size: 485100 #352800 #485100
+ dim_f: 1024
+ dim_t: 801
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 12
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ freqs_per_bands: !!python/tuple
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 128
+ - 129
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: true
+ dim_freqs_in: 1025
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - Vocals
+ - Instrumental
+ lr: 1.0e-04
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: Instrumental
+ num_epochs: 1
+ num_steps: 1000
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: adamw
+ other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 1
+ dim_t: 1101
+ num_overlap: 2
\ No newline at end of file
diff --git a/models/Roformer/BandSplit/config_bs_roformer_revive_by_unwa.yaml b/models/Roformer/BandSplit/config_bs_roformer_revive_by_unwa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b2d60c5d37a6c92afea42ef88d8958b7f01b64db
--- /dev/null
+++ b/models/Roformer/BandSplit/config_bs_roformer_revive_by_unwa.yaml
@@ -0,0 +1,134 @@
+audio:
+ chunk_size: 485100 #352800 #485100
+ dim_f: 1024
+ dim_t: 1101
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.
+
+model:
+ dim: 512
+ depth: 12
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ linear_transformer_depth: 0
+ freqs_per_bands: !!python/tuple
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 128
+ - 129
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0.
+ ff_dropout: 0.
+ flash_attn: true
+ dim_freqs_in: 1025
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - vocals
+ - other
+ lr: 1.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: vocals
+ num_epochs: 1000
+ num_steps: 1000
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: null
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: false # Mix several stems of the same type with some probability
+ augmentation_loudness: false # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0
+ augmentation_loudness_max: 0
+ q: 0.95
+ coarse_loss_clip: false
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 2
+ dim_t: 1101
+ num_overlap: 2
diff --git a/models/Roformer/BandSplit/config_bs_roformer_voc_gabox.yaml b/models/Roformer/BandSplit/config_bs_roformer_voc_gabox.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c4a3d323322d75af7d981e9de2ef3fa29e786812
--- /dev/null
+++ b/models/Roformer/BandSplit/config_bs_roformer_voc_gabox.yaml
@@ -0,0 +1,133 @@
+audio:
+ chunk_size: 352800
+ dim_f: 1024
+ dim_t: 801 # don't work (use in model)
+ hop_length: 441 # don't work (use in model)
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.001
+
+model:
+ dim: 512
+ depth: 12
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ freqs_per_bands: !!python/tuple
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 2
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 4
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 12
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 24
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 48
+ - 128
+ - 129
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0.1
+ ff_dropout: 0.1
+ flash_attn: true
+ dim_freqs_in: 1025
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 16
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - Vocals
+ - Instrumental
+ lr: 5.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: Vocals
+ num_epochs: 1000
+ num_steps: 1000
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: simple1
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: true # Mix several stems of the same type with some probability
+ augmentation_loudness: true # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0.5
+ augmentation_loudness_max: 1.5
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 1
+ dim_t: 801
+ num_overlap: 4
\ No newline at end of file
diff --git a/models/Roformer/BandSplit/deverb_bs_roformer_8_384dim_10depth.ckpt b/models/Roformer/BandSplit/deverb_bs_roformer_8_384dim_10depth.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..17a6af84d21d7c3cecf0c627811f42fee20b6d8e
--- /dev/null
+++ b/models/Roformer/BandSplit/deverb_bs_roformer_8_384dim_10depth.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c38653aaa5e49f2f7b84dd3be2b6b679e0cbea23978e6b48389ee6f0a914768
+size 361499604
diff --git a/models/Roformer/BandSplit/model_BandSplit-Roformer_FNO_by-Unwa.ckpt b/models/Roformer/BandSplit/model_BandSplit-Roformer_FNO_by-Unwa.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..bc3a830822d70432a654439ec811d7560d1076f7
--- /dev/null
+++ b/models/Roformer/BandSplit/model_BandSplit-Roformer_FNO_by-Unwa.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f35bf6d87b2863372388e85c2d9679e5b7651e5c2ddd23aab1480f7af10b90ca
+size 332004435
diff --git a/models/Roformer/BandSplit/model_BandSplit-Roformer_Karaoke_Frazer_by-becruily.ckpt b/models/Roformer/BandSplit/model_BandSplit-Roformer_Karaoke_Frazer_by-becruily.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..89ef86629edab3d7580712f0feb5bba8175fd3de
--- /dev/null
+++ b/models/Roformer/BandSplit/model_BandSplit-Roformer_Karaoke_Frazer_by-becruily.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb90ee24c1154d83fbcfd27e96182f19e061557cc6e4746953125e08c29389f9
+size 204436907
diff --git a/models/Roformer/BandSplit/model_BandSplit-Roformer_Resurrection_Instrumental_by-Unwa.ckpt b/models/Roformer/BandSplit/model_BandSplit-Roformer_Resurrection_Instrumental_by-Unwa.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..23b7fc62506629fc0ac29c1551ee7045b805dfa5
--- /dev/null
+++ b/models/Roformer/BandSplit/model_BandSplit-Roformer_Resurrection_Instrumental_by-Unwa.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16311025a5133ae6411760ccfe9e3e66b31a01d9d8bec0a03fa7ec4bedac7a15
+size 204483033
diff --git a/models/Roformer/BandSplit/model_BandSplit-Roformer_Resurrection_Vocals_by-Unwa.ckpt b/models/Roformer/BandSplit/model_BandSplit-Roformer_Resurrection_Vocals_by-Unwa.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..14e223aece4474ca86627c065d73e725c4466902
--- /dev/null
+++ b/models/Roformer/BandSplit/model_BandSplit-Roformer_Resurrection_Vocals_by-Unwa.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9dbfe5cb572e4ed32a15ec727d7bd06c8d7aba97509e6fda5bc008bb1e0b2dd5
+size 204510749
diff --git a/models/Roformer/BandSplit/model_BandSplit-Roformer_SW_by-jarredou.ckpt b/models/Roformer/BandSplit/model_BandSplit-Roformer_SW_by-jarredou.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..011736ea342314bf6193d061f60929f95f4f66bd
--- /dev/null
+++ b/models/Roformer/BandSplit/model_BandSplit-Roformer_SW_by-jarredou.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24e7d35ee9c64415673d3fd33e06a67cac2c103c5df6267ba1576459c775916e
+size 699412152
diff --git a/models/Roformer/BandSplit/model_bs_roformer_ep_317_sdr_12.9755.ckpt b/models/Roformer/BandSplit/model_bs_roformer_ep_317_sdr_12.9755.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..1d4b892da79c875b3b3028f9f4d2504ebafe72e1
--- /dev/null
+++ b/models/Roformer/BandSplit/model_bs_roformer_ep_317_sdr_12.9755.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b84f37e8d444c8cb30c79d77f613a41c05868ff9c9ac6c7049c00aefae115aa
+size 639331213
diff --git a/models/Roformer/BandSplit/model_bs_roformer_ep_368_sdr_12.9628.ckpt b/models/Roformer/BandSplit/model_bs_roformer_ep_368_sdr_12.9628.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..929fbf0bc57f788a3d6b758ee4feedd61976298b
--- /dev/null
+++ b/models/Roformer/BandSplit/model_bs_roformer_ep_368_sdr_12.9628.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6c94864adfb73bbb0ca58ec14d58dd0b364549e9fb61433ae51916f3e2f8d0b
+size 639317465
diff --git a/models/Roformer/BandSplit/model_bs_roformer_ep_937_sdr_10.5309.ckpt b/models/Roformer/BandSplit/model_bs_roformer_ep_937_sdr_10.5309.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..b30e4893bf1e4b198ea8005346b01efaa135c8b7
--- /dev/null
+++ b/models/Roformer/BandSplit/model_bs_roformer_ep_937_sdr_10.5309.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2e825a03bc908cb04dbd88eddeefbf5147dd1cf1f95cebf453d9dbfabec494b
+size 393068365
diff --git a/models/Roformer/BandSplit/model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt b/models/Roformer/BandSplit/model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..4f9492413f4671a4c218f73cd6df2963e6b75fb8
--- /dev/null
+++ b/models/Roformer/BandSplit/model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:123c00786bdbc6bd462dddb35cd21fd6ae99ab8319f93f63a8abc1012e593d94
+size 527121477
diff --git a/models/Roformer/MelBand/MelBandRoformerBigSYHFTV1.ckpt b/models/Roformer/MelBand/MelBandRoformerBigSYHFTV1.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..3b07b85890712d20ad115ead701bf7799c3b9928
--- /dev/null
+++ b/models/Roformer/MelBand/MelBandRoformerBigSYHFTV1.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2327e3e81f19e67c307f8c830c54267c09ecb0e9c6ad2b40a80c310899c955f
+size 1479738496
diff --git a/models/Roformer/MelBand/MelBandRoformerSYHFT.ckpt b/models/Roformer/MelBand/MelBandRoformerSYHFT.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..fc8e114879a3761ce9e6f901cab7b0adbc7035b5
--- /dev/null
+++ b/models/Roformer/MelBand/MelBandRoformerSYHFT.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f319dfcde4396ea3106658f457f5eb0bc577e113491f61ae8bab216fe84b0c0c
+size 913096702
diff --git a/models/Roformer/MelBand/MelBandRoformerSYHFTV2.5.ckpt b/models/Roformer/MelBand/MelBandRoformerSYHFTV2.5.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..232404ba4a0d4e6d32b6f683711c58cd73b10c18
--- /dev/null
+++ b/models/Roformer/MelBand/MelBandRoformerSYHFTV2.5.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:916e3a2c1e63b1457bcad823b98ca705e4933deffd2a5ab3a370e10f68bf47e2
+size 913090472
diff --git a/models/Roformer/MelBand/MelBandRoformerSYHFTV2.ckpt b/models/Roformer/MelBand/MelBandRoformerSYHFTV2.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..6b34c66f158e43e0f2c11b6df91a040c5a11a23c
--- /dev/null
+++ b/models/Roformer/MelBand/MelBandRoformerSYHFTV2.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e99f8efa5315300c197295592bd7e56c21c1d77e1884c904b5128c54a2a4632
+size 913095346
diff --git a/models/Roformer/MelBand/MelBandRoformerSYHFTV3Epsilon.ckpt b/models/Roformer/MelBand/MelBandRoformerSYHFTV3Epsilon.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..650b62bfd05945493a6529898ca0d0023ee7637d
--- /dev/null
+++ b/models/Roformer/MelBand/MelBandRoformerSYHFTV3Epsilon.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c886092e4aae13aa089263a0d54d483643f58c16ec221aed37268e2c1031397
+size 913090472
diff --git a/models/Roformer/MelBand/MelBand_Roformer_4stems_FT_Large_v1_by_SYH99999.ckpt b/models/Roformer/MelBand/MelBand_Roformer_4stems_FT_Large_v1_by_SYH99999.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..79988facfb10e352f59e8983485a6bdfd5f636de
--- /dev/null
+++ b/models/Roformer/MelBand/MelBand_Roformer_4stems_FT_Large_v1_by_SYH99999.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:590358e6abe3255d87d7173ca2e4005de082f14cbb466ea4c5ffedbcc3964506
+size 3764030874
diff --git a/models/Roformer/MelBand/MelBand_Roformer_4stems_FT_Large_v2_by_SYH99999.ckpt b/models/Roformer/MelBand/MelBand_Roformer_4stems_FT_Large_v2_by_SYH99999.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..fe7a321b49416f94a730cd225cef4090e3ed2bb2
--- /dev/null
+++ b/models/Roformer/MelBand/MelBand_Roformer_4stems_FT_Large_v2_by_SYH99999.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:835ce9fa6651769c28ad5a2c87f9220b003a461d7d6c57ebe0898fff5479138c
+size 3764030874
diff --git a/models/Roformer/MelBand/MelBand_Roformer_4stems_Large_v1_by_Aname.ckpt b/models/Roformer/MelBand/MelBand_Roformer_4stems_Large_v1_by_Aname.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..2472823fc801f44ef77d64b29279dfb7389228fd
--- /dev/null
+++ b/models/Roformer/MelBand/MelBand_Roformer_4stems_Large_v1_by_Aname.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2ca1ac95e203d2630994c3d4a0cebabd7160e70bb010adab794843340fb704f
+size 3764030874
diff --git a/models/Roformer/MelBand/MelBand_Roformer_4stems_XL_v1_by_Aname.ckpt b/models/Roformer/MelBand/MelBand_Roformer_4stems_XL_v1_by_Aname.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..6f85c5bb3908fa4a88351032885941e98355dbae
--- /dev/null
+++ b/models/Roformer/MelBand/MelBand_Roformer_4stems_XL_v1_by_Aname.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca5f7c78543d84c369319f226b7337a6cc0b2e1a75d20409fc3ebde32686e94b
+size 6411087858
diff --git a/models/Roformer/MelBand/aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt b/models/Roformer/MelBand/aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..0dd326fa1c5c65e52583803582f154a0359a55bd
--- /dev/null
+++ b/models/Roformer/MelBand/aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:83bfe991cec4fbadde9f30d1f79cd5293ad0b1f936256be327bba5cbb4883374
+size 835982664
diff --git a/models/Roformer/MelBand/aspiration_mel_band_roformer_sdr_18.9845.ckpt b/models/Roformer/MelBand/aspiration_mel_band_roformer_sdr_18.9845.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..1282613860a16200f301a12c7b8ec67d050c63be
--- /dev/null
+++ b/models/Roformer/MelBand/aspiration_mel_band_roformer_sdr_18.9845.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e791258c866c6c8da66052693d8cc3b64f1f42c01e052dbdc570cd278380cc5
+size 835983746
diff --git a/models/Roformer/MelBand/config_MelBand-Roformer_BVE_by-Gonza.yaml b/models/Roformer/MelBand/config_MelBand-Roformer_BVE_by-Gonza.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1b7569151253e22fae5b06dc8d9d8e47c92b15ff
--- /dev/null
+++ b/models/Roformer/MelBand/config_MelBand-Roformer_BVE_by-Gonza.yaml
@@ -0,0 +1,75 @@
+audio:
+ chunk_size: 485100
+ dim_f: 1024
+ dim_t: 256
+ hop_length: 411
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: true
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: true
+
+training:
+ batch_size: 2
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - Lead
+ - Back
+ lr: 5.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: Lead
+ num_epochs: 1000
+ num_steps: 1000
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type:
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: false # Mix several stems of the same type with some probability
+ augmentation_loudness: false # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0
+ augmentation_loudness_max: 0
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+ enable: false
+
+inference:
+ batch_size: 1
+ dim_t: 1101
+ num_overlap: 8
diff --git a/models/Roformer/MelBand/config_MelBand-Roformer_Duality_v1_by-Aname.yaml b/models/Roformer/MelBand/config_MelBand-Roformer_Duality_v1_by-Aname.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d3626aa62a12bd47f178b17241dc7071459042ca
--- /dev/null
+++ b/models/Roformer/MelBand/config_MelBand-Roformer_Duality_v1_by-Aname.yaml
@@ -0,0 +1,72 @@
+audio:
+ chunk_size: 661500
+ dim_f: 1024
+ dim_t: 1101
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 4
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - vocals
+ - other
+ lr: 1.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: vocals
+ num_epochs: 1000
+ num_steps: 1000
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: null
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: false # Mix several stems of the same type with some probability
+ augmentation_loudness: false # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0
+ augmentation_loudness_max: 0
+ q: 0.95
+ coarse_loss_clip: false
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 4
+ dim_t: 1101
+ num_overlap: 4
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_MelBand-Roformer_Karaoke_Fusion_Total_by-Gonza.yaml b/models/Roformer/MelBand/config_MelBand-Roformer_Karaoke_Fusion_Total_by-Gonza.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..846a148cf21264e0821523d7ddee2c533f2a06ba
--- /dev/null
+++ b/models/Roformer/MelBand/config_MelBand-Roformer_Karaoke_Fusion_Total_by-Gonza.yaml
@@ -0,0 +1,75 @@
+audio:
+ chunk_size: 485100
+ dim_f: 1024
+ dim_t: 256
+ hop_length: 411
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: true
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: true
+
+training:
+ batch_size: 2
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - Vocals
+ - Instrumental
+ lr: 1.0
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: Vocals
+ num_epochs: 100
+ num_steps: 200
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type:
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: false # Mix several stems of the same type with some probability
+ augmentation_loudness: false # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0
+ augmentation_loudness_max: 0
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: prodigy
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+ enable: false
+
+inference:
+ batch_size: 1
+ dim_t: 1101
+ num_overlap: 8
diff --git a/models/Roformer/MelBand/config_MelBand-Roformer_Karaoke_Fusion_by-Gonza.yaml b/models/Roformer/MelBand/config_MelBand-Roformer_Karaoke_Fusion_by-Gonza.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2e9560bf491843d329b2e932cc3a1ead0d40bd39
--- /dev/null
+++ b/models/Roformer/MelBand/config_MelBand-Roformer_Karaoke_Fusion_by-Gonza.yaml
@@ -0,0 +1,83 @@
+audio:
+ chunk_size: 485100
+ dim_f: 1024
+ dim_t: 256
+ hop_length: 411
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: true
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: true
+
+training:
+ batch_size: 2
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - Vocals
+ - Instrumental
+ lr: 1.0
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: Vocals
+ num_epochs: 150
+ num_steps: 100
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type:
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: false # Mix several stems of the same type with some probability
+ augmentation_loudness: false # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0
+ augmentation_loudness_max: 0
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: prodigy
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+ enable: false
+
+lora:
+ r: 4
+ lora_alpha: 8 # alpha / rank > 1
+ lora_dropout: 0.01
+ merge_weights: True
+ fan_in_fan_out: False
+ enable_lora: [True]
+
+inference:
+ batch_size: 1
+ dim_t: 1101
+ num_overlap: 8
diff --git a/models/Roformer/MelBand/config_MelBand-Roformer_Karaoke_Fusion_v2_by-Gonza.yaml b/models/Roformer/MelBand/config_MelBand-Roformer_Karaoke_Fusion_v2_by-Gonza.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..14ac25ffa10f762262cc6f00a263e41a805250b0
--- /dev/null
+++ b/models/Roformer/MelBand/config_MelBand-Roformer_Karaoke_Fusion_v2_by-Gonza.yaml
@@ -0,0 +1,83 @@
+audio:
+ chunk_size: 485100
+ dim_f: 1024
+ dim_t: 256
+ hop_length: 411
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: true
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: true
+
+training:
+ batch_size: 2
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - Vocals
+ - Instrumental
+ lr: 1.0
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: Vocals
+ num_epochs: 100
+ num_steps: 100
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type:
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: false # Mix several stems of the same type with some probability
+ augmentation_loudness: false # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0
+ augmentation_loudness_max: 0
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: prodigy
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+lora:
+ r: 8
+ lora_alpha: 16 # alpha / rank > 1
+ lora_dropout: 0.05
+ merge_weights: False
+ fan_in_fan_out: False
+ enable_lora: [True]
+
+augmentations:
+ enable: false
+
+inference:
+ batch_size: 1
+ dim_t: 1101
+ num_overlap: 8
diff --git a/models/Roformer/MelBand/config_MelBand-Roformer_Karaoke_by-Gabox.yaml b/models/Roformer/MelBand/config_MelBand-Roformer_Karaoke_by-Gabox.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..16cb193e3651303c63d76da329b677fd5b16dae8
--- /dev/null
+++ b/models/Roformer/MelBand/config_MelBand-Roformer_Karaoke_by-Gabox.yaml
@@ -0,0 +1,72 @@
+audio:
+ chunk_size: 485100
+ dim_f: 1024
+ dim_t: 256
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: true
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: true
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - Vocals
+ - Instrumental
+ lr: 0.0005
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: Vocals
+ num_epochs: 1000
+ num_steps: 1000
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type:
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: false # Mix several stems of the same type with some probability
+ augmentation_loudness: false # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0
+ augmentation_loudness_max: 0
+ q: 0.95
+ coarse_loss_clip: false
+ ema_momentum: 0.999
+ optimizer: adamw
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 1
+ dim_t: 1101
+ num_overlap: 8
diff --git a/models/Roformer/MelBand/config_MelBand_Roformer_4stems_FT_Large_by_SYH99999.yaml b/models/Roformer/MelBand/config_MelBand_Roformer_4stems_FT_Large_by_SYH99999.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5b40b450771db5422db45c2b0f9350390f944e2c
--- /dev/null
+++ b/models/Roformer/MelBand/config_MelBand_Roformer_4stems_FT_Large_by_SYH99999.yaml
@@ -0,0 +1,69 @@
+audio:
+ chunk_size: 485100
+ dim_f: 1024
+ dim_t: 1101
+ hop_length: 882
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 8
+ stereo: true
+ num_stems: 4
+ linear_transformer_depth: 0
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0.0
+ ff_dropout: 0.0
+ flash_attn: true
+ dim_freqs_in: 2049
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 4096
+ stft_hop_length: 882
+ stft_win_length: 4096
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+ mlp_expansion_factor: 4
+ use_torch_checkpoint: False # it allows to greatly reduce GPU memory consumption during training (not fully tested)
+ skip_connection: True # Enable skip connection between transformer blocks - can solve problem with gradients and probably faster training
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments: ['drums', 'bass', 'other', 'vocals']
+ lr: 1.0
+ patience: 3
+ reduce_factor: 0.95
+ target_instrument: null
+ num_epochs: 1000
+ num_steps: 1000
+ q: 0.95
+ coarse_loss_clip: false
+ ema_momentum: 0.999
+ optimizer: prodigy
+ read_metadata_procs: 8 # Number of processes to use during metadata reading for dataset. Can speed up metadata generation
+ normalize: false
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 4
+ dim_t: 1101
+ num_overlap: 4
+ normalize: false
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_MelBand_Roformer_4stems_Large_v1_by_Aname.yaml b/models/Roformer/MelBand/config_MelBand_Roformer_4stems_Large_v1_by_Aname.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5281b7e67f4a87eb2ac2bfe6071c5f50be766725
--- /dev/null
+++ b/models/Roformer/MelBand/config_MelBand_Roformer_4stems_Large_v1_by_Aname.yaml
@@ -0,0 +1,167 @@
+audio:
+ chunk_size: 661500
+ dim_f: 1024
+ dim_t: 1101
+ hop_length: 882
+ n_fft: 4096
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.0001
+
+model:
+ dim: 384
+ depth: 8
+ stereo: true
+ num_stems: 4
+ linear_transformer_depth: 0
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0.0
+ ff_dropout: 0.0
+ flash_attn: true
+ dim_freqs_in: 2049
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 4096
+ stft_hop_length: 882
+ stft_win_length: 4096
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+ mlp_expansion_factor: 4
+ use_torch_checkpoint: False # it allows to greatly reduce GPU memory consumption during training (not fully tested)
+ skip_connection: True # Enable skip connection between transformer blocks - can solve problem with gradients and probably faster training
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 4
+ grad_clip: 0
+ instruments: ['drums', 'bass', 'other', 'vocals']
+ lr: 2.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: null
+ num_epochs: 1000
+ num_steps: 300
+ q: 0.95
+ coarse_loss_clip: false
+ ema_momentum: 0.999
+ optimizer: adamw
+ read_metadata_procs: 8 # Number of processes to use during metadata reading for dataset. Can speed up metadata generation
+ normalize: false
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+ enable: false # enable or disable all augmentations (to fast disable if needed)
+ loudness: false # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+ loudness_min: 0.5
+ loudness_max: 1.5
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+ mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+ - 0.2
+ - 0.02
+ - 0.002
+ mixup_loudness_min: 0.5
+ mixup_loudness_max: 1.5
+
+ # apply mp3 compression to mixture only (emulate downloading mp3 from internet)
+ mp3_compression_on_mixture: 0.01
+ mp3_compression_on_mixture_bitrate_min: 32
+ mp3_compression_on_mixture_bitrate_max: 320
+ mp3_compression_on_mixture_backend: "lameenc"
+
+ all:
+ channel_shuffle: 0.5 # Set 0 or lower to disable
+ random_inverse: 0.01 # inverse track (better lower probability)
+ random_polarity: 0.5 # polarity change (multiply waveform to -1)
+
+ vocals:
+ pitch_shift: 1.0
+ pitch_shift_min_semitones: -12
+ pitch_shift_max_semitones: 12
+ seven_band_parametric_eq: 0.5
+ seven_band_parametric_eq_min_gain_db: -80
+ seven_band_parametric_eq_max_gain_db: 9
+ tanh_distortion: 0.5
+ tanh_distortion_min: 0.1
+ tanh_distortion_max: 1
+ time_stretch: 1.0
+ time_stretch_min_rate: 0.5
+ time_stretch_max_rate: 2
+ bass:
+ pitch_shift: 1.0
+ pitch_shift_min_semitones: -6
+ pitch_shift_max_semitones: 6
+ seven_band_parametric_eq: 0.4
+ seven_band_parametric_eq_min_gain_db: -32
+ seven_band_parametric_eq_max_gain_db: 6
+ tanh_distortion: 1.0
+ tanh_distortion_min: 0.1
+ tanh_distortion_max: 0.5
+ time_stretch: 1.0
+ time_stretch_min_rate: 0.5
+ time_stretch_max_rate: 1.5
+ drums:
+ pitch_shift: 0.1
+ pitch_shift_min_semitones: -6
+ pitch_shift_max_semitones: 6
+ seven_band_parametric_eq: 0.5
+ seven_band_parametric_eq_min_gain_db: -24
+ seven_band_parametric_eq_max_gain_db: 12
+ tanh_distortion: 0.3
+ tanh_distortion_min: 0.1
+ tanh_distortion_max: 0.6
+ time_stretch: 1.0
+ time_stretch_min_rate: 0.333
+ time_stretch_max_rate: 1.5
+ other:
+ pitch_shift: 1.0
+ pitch_shift_min_semitones: -12
+ pitch_shift_max_semitones: 12
+ gaussian_noise: 0.4
+ gaussian_noise_min_amplitude: 0.001
+ gaussian_noise_max_amplitude: 0.15
+ time_stretch: 0.01
+ time_stretch_min_rate: 0.25
+ time_stretch_max_rate: 1.5
+
+inference:
+ batch_size: 1
+ dim_t: 256
+ num_overlap: 4
+ normalize: false
+
+loss_multistft:
+ fft_sizes:
+ - 1024
+ - 2048
+ - 4096
+ hop_sizes:
+ - 147
+ - 256
+ - 512
+ win_lengths:
+ - 1024
+ - 2048
+ - 4096
+ window: "hann_window"
+ scale: "mel"
+ n_bins: 128
+ sample_rate: 44100
+ perceptual_weighting: true
+ w_sc: 16.0
+ w_log_mag: 16.0
+ w_lin_mag: 16.0
+ w_phs: 0.0
+ mag_distance: "L1"
diff --git a/models/Roformer/MelBand/config_MelBand_Roformer_4stems_XL_v1_by_Aname.yaml b/models/Roformer/MelBand/config_MelBand_Roformer_4stems_XL_v1_by_Aname.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a3e6444365fd12a6f8f9ddc88ffe604aff4e218a
--- /dev/null
+++ b/models/Roformer/MelBand/config_MelBand_Roformer_4stems_XL_v1_by_Aname.yaml
@@ -0,0 +1,167 @@
+audio:
+ chunk_size: 661500
+ dim_f: 1024
+ dim_t: 1101
+ hop_length: 882
+ n_fft: 4096
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.0001
+
+model:
+ dim: 512
+ depth: 12
+ stereo: true
+ num_stems: 4
+ linear_transformer_depth: 0
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0.0
+ ff_dropout: 0.0
+ flash_attn: true
+ dim_freqs_in: 2049
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 4096
+ stft_hop_length: 882
+ stft_win_length: 4096
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+ mlp_expansion_factor: 4
+ use_torch_checkpoint: False # it allows to greatly reduce GPU memory consumption during training (not fully tested)
+ skip_connection: True # Enable skip connection between transformer blocks - can solve problem with gradients and probably faster training
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 4
+ grad_clip: 0
+ instruments: ['drums', 'bass', 'other', 'vocals']
+ lr: 2.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: null
+ num_epochs: 1000
+ num_steps: 300
+ q: 0.95
+ coarse_loss_clip: false
+ ema_momentum: 0.999
+ optimizer: adamw
+ read_metadata_procs: 8 # Number of processes to use during metadata reading for dataset. Can speed up metadata generation
+ normalize: false
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+ enable: false # enable or disable all augmentations (to fast disable if needed)
+ loudness: false # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+ loudness_min: 0.5
+ loudness_max: 1.5
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+ mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+ - 0.2
+ - 0.02
+ - 0.002
+ mixup_loudness_min: 0.5
+ mixup_loudness_max: 1.5
+
+ # apply mp3 compression to mixture only (emulate downloading mp3 from internet)
+ mp3_compression_on_mixture: 0.01
+ mp3_compression_on_mixture_bitrate_min: 32
+ mp3_compression_on_mixture_bitrate_max: 320
+ mp3_compression_on_mixture_backend: "lameenc"
+
+ all:
+ channel_shuffle: 0.5 # Set 0 or lower to disable
+ random_inverse: 0.01 # inverse track (better lower probability)
+ random_polarity: 0.5 # polarity change (multiply waveform to -1)
+
+ vocals:
+ pitch_shift: 1.0
+ pitch_shift_min_semitones: -12
+ pitch_shift_max_semitones: 12
+ seven_band_parametric_eq: 0.5
+ seven_band_parametric_eq_min_gain_db: -80
+ seven_band_parametric_eq_max_gain_db: 9
+ tanh_distortion: 0.5
+ tanh_distortion_min: 0.1
+ tanh_distortion_max: 1
+ time_stretch: 1.0
+ time_stretch_min_rate: 0.5
+ time_stretch_max_rate: 2
+ bass:
+ pitch_shift: 1.0
+ pitch_shift_min_semitones: -6
+ pitch_shift_max_semitones: 6
+ seven_band_parametric_eq: 0.4
+ seven_band_parametric_eq_min_gain_db: -32
+ seven_band_parametric_eq_max_gain_db: 6
+ tanh_distortion: 1.0
+ tanh_distortion_min: 0.1
+ tanh_distortion_max: 0.5
+ time_stretch: 1.0
+ time_stretch_min_rate: 0.5
+ time_stretch_max_rate: 1.5
+ drums:
+ pitch_shift: 0.1
+ pitch_shift_min_semitones: -6
+ pitch_shift_max_semitones: 6
+ seven_band_parametric_eq: 0.5
+ seven_band_parametric_eq_min_gain_db: -24
+ seven_band_parametric_eq_max_gain_db: 12
+ tanh_distortion: 0.3
+ tanh_distortion_min: 0.1
+ tanh_distortion_max: 0.6
+ time_stretch: 1.0
+ time_stretch_min_rate: 0.333
+ time_stretch_max_rate: 1.5
+ other:
+ pitch_shift: 1.0
+ pitch_shift_min_semitones: -12
+ pitch_shift_max_semitones: 12
+ gaussian_noise: 0.4
+ gaussian_noise_min_amplitude: 0.001
+ gaussian_noise_max_amplitude: 0.15
+ time_stretch: 0.01
+ time_stretch_min_rate: 0.25
+ time_stretch_max_rate: 1.5
+
+inference:
+ batch_size: 1
+ dim_t: 256
+ num_overlap: 4
+ normalize: false
+
+loss_multistft:
+ fft_sizes:
+ - 1024
+ - 2048
+ - 4096
+ hop_sizes:
+ - 147
+ - 256
+ - 512
+ win_lengths:
+ - 1024
+ - 2048
+ - 4096
+ window: "hann_window"
+ scale: "mel"
+ n_bins: 128
+ sample_rate: 44100
+ perceptual_weighting: true
+ w_sc: 16.0
+ w_log_mag: 16.0
+ w_lin_mag: 16.0
+ w_phs: 0.0
+ mag_distance: "L1"
diff --git a/models/Roformer/MelBand/config_melband_roformer_aspiration.yaml b/models/Roformer/MelBand/config_melband_roformer_aspiration.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..75983773005de1549919f2f50dc456f76f199b18
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_aspiration.yaml
@@ -0,0 +1,76 @@
+audio:
+ chunk_size: 352800
+ dim_f: 1024
+ dim_t: 801 # don't work (use in model)
+ hop_length: 441 # don't work (use in model)
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 256
+ depth: 8
+ stereo: true
+ num_stems: 2
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0.1
+ ff_dropout: 0.1
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 8
+ grad_clip: 0
+ instruments:
+ - aspiration
+ - other
+ lr: 4.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: null
+ num_epochs: 1000
+ num_steps: 1000
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+ enable: true # enable or disable all augmentations (to fast disable if needed)
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+ loudness_min: 0.5
+ loudness_max: 1.5
+ mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+ mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+ - 0.2
+ - 0.02
+ mixup_loudness_min: 0.5
+ mixup_loudness_max: 1.5
+
+inference:
+ batch_size: 4
+ dim_t: 801
+ num_overlap: 2
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_big_beta4.yaml b/models/Roformer/MelBand/config_melband_roformer_big_beta4.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7dba2f17a3f8b3b5e7ec94bb9c4becb1acc1724a
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_big_beta4.yaml
@@ -0,0 +1,51 @@
+audio:
+ chunk_size: 485100
+ dim_f: 1024
+ dim_t: 1101
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 12
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 3
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ instruments:
+ - vocals
+ - other
+ target_instrument: vocals
+ use_amp: True
+
+inference:
+ batch_size: 1
+ dim_t: 1101
+ num_overlap: 2
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_big_beta5e.yaml b/models/Roformer/MelBand/config_melband_roformer_big_beta5e.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..66f782d66d31113f304fe07aa1ce663952030676
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_big_beta5e.yaml
@@ -0,0 +1,51 @@
+audio:
+ chunk_size: 485100
+ dim_f: 1024
+ dim_t: 801
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 3
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ instruments:
+ - vocals
+ - other
+ target_instrument: vocals
+ use_amp: True
+
+inference:
+ batch_size: 1
+ dim_t: 801
+ num_overlap: 2
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_big_beta6.yaml b/models/Roformer/MelBand/config_melband_roformer_big_beta6.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fb175852d74ba1b04ab1bcfd0fc88e8c5c26f206
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_big_beta6.yaml
@@ -0,0 +1,72 @@
+audio:
+ chunk_size: 529200
+ dim_f: 1024
+ dim_t: 256
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 512
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - vocals
+ - other
+ lr: 1.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: vocals
+ num_epochs: 1000
+ num_steps: 1000
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: null
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: false # Mix several stems of the same type with some probability
+ augmentation_loudness: false # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0
+ augmentation_loudness_max: 0
+ q: 0.95
+ coarse_loss_clip: false
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 2
+ dim_t: 1201
+ num_overlap: 2
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_big_beta6x.yaml b/models/Roformer/MelBand/config_melband_roformer_big_beta6x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7537b31741f9dec3d604f33b859ce7783bb23da6
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_big_beta6x.yaml
@@ -0,0 +1,72 @@
+audio:
+ chunk_size: 529200
+ dim_f: 1024
+ dim_t: 256
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 512
+ depth: 12
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - vocals
+ - other
+ lr: 1.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: vocals
+ num_epochs: 1000
+ num_steps: 1000
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: null
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: false # Mix several stems of the same type with some probability
+ augmentation_loudness: false # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0
+ augmentation_loudness_max: 0
+ q: 0.95
+ coarse_loss_clip: false
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 2
+ dim_t: 1201
+ num_overlap: 2
diff --git a/models/Roformer/MelBand/config_melband_roformer_bleed_suppressor_v1.yaml b/models/Roformer/MelBand/config_melband_roformer_bleed_suppressor_v1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..333fb15b0079c324821981b54b75945f1d8440ac
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_bleed_suppressor_v1.yaml
@@ -0,0 +1,51 @@
+audio:
+ chunk_size: 485100
+ dim_f: 1024
+ dim_t: 801
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ instruments:
+ - Instrumental
+ - Bleed
+ target_instrument: Instrumental
+ use_amp: True
+
+inference:
+ batch_size: 1
+ dim_t: 801
+ num_overlap: 2
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_crowd_aufr33_viperx_sdr_8.7144.yaml b/models/Roformer/MelBand/config_melband_roformer_crowd_aufr33_viperx_sdr_8.7144.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7e44ef94c71082af3a619c9b439f808ae8eb3e1c
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_crowd_aufr33_viperx_sdr_8.7144.yaml
@@ -0,0 +1,71 @@
+audio:
+ chunk_size: 352800
+ dim_f: 1024
+ dim_t: 801
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 2
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - crowd
+ - other
+ lr: 1.0e-05
+ patience: 8
+ reduce_factor: 0.95
+ target_instrument: crowd
+ num_epochs: 1000
+ num_steps: 4032
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: null
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: false # Mix several stems of the same type with some probability
+ augmentation_loudness: false # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0
+ augmentation_loudness_max: 0
+ q: 0.95
+ coarse_loss_clip: false
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+
+inference:
+ batch_size: 1
+ dim_t: 801
+ num_overlap: 4
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_denoise_aufr33_aggr_sdr_27.9768.yaml b/models/Roformer/MelBand/config_melband_roformer_denoise_aufr33_aggr_sdr_27.9768.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..265e19c806778d7b2d5ffdaef9e3d503a6dba3f1
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_denoise_aufr33_aggr_sdr_27.9768.yaml
@@ -0,0 +1,71 @@
+audio:
+ chunk_size: 352800
+ dim_f: 1024
+ dim_t: 801
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 2
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - dry
+ - other
+ lr: 1.0e-05
+ patience: 8
+ reduce_factor: 0.95
+ target_instrument: dry
+ num_epochs: 1000
+ num_steps: 4032
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: null
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: false # Mix several stems of the same type with some probability
+ augmentation_loudness: false # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0
+ augmentation_loudness_max: 0
+ q: 0.95
+ coarse_loss_clip: false
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+
+inference:
+ batch_size: 2
+ dim_t: 801
+ num_overlap: 4
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_denoise_aufr33_sdr_27.9959.yaml b/models/Roformer/MelBand/config_melband_roformer_denoise_aufr33_sdr_27.9959.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..265e19c806778d7b2d5ffdaef9e3d503a6dba3f1
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_denoise_aufr33_sdr_27.9959.yaml
@@ -0,0 +1,71 @@
+audio:
+ chunk_size: 352800
+ dim_f: 1024
+ dim_t: 801
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 2
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - dry
+ - other
+ lr: 1.0e-05
+ patience: 8
+ reduce_factor: 0.95
+ target_instrument: dry
+ num_epochs: 1000
+ num_steps: 4032
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: null
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: false # Mix several stems of the same type with some probability
+ augmentation_loudness: false # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0
+ augmentation_loudness_max: 0
+ q: 0.95
+ coarse_loss_clip: false
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+
+inference:
+ batch_size: 2
+ dim_t: 801
+ num_overlap: 4
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_dereverb-echo.yaml b/models/Roformer/MelBand/config_melband_roformer_dereverb-echo.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bf766a04152c42bb2f16e6b2929a1024c6d550f5
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_dereverb-echo.yaml
@@ -0,0 +1,76 @@
+audio:
+ chunk_size: 352800
+ dim_f: 1024
+ dim_t: 801 # don't work (use in model)
+ hop_length: 441 # don't work (use in model)
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 256
+ depth: 8
+ stereo: true
+ num_stems: 2
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0.1
+ ff_dropout: 0.1
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 8
+ grad_clip: 0
+ instruments:
+ - dry
+ - No dry
+ lr: 4.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: null
+ num_epochs: 1000
+ num_steps: 1000
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+ enable: true # enable or disable all augmentations (to fast disable if needed)
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+ loudness_min: 0.5
+ loudness_max: 1.5
+ mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+ mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+ - 0.2
+ - 0.02
+ mixup_loudness_min: 0.5
+ mixup_loudness_max: 1.5
+
+inference:
+ batch_size: 4
+ dim_t: 801
+ num_overlap: 4
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_dereverb-echo_sdr_13.4843_v2.yaml b/models/Roformer/MelBand/config_melband_roformer_dereverb-echo_sdr_13.4843_v2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b9ed3ca2589954b800872e65c72b2eff115237ac
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_dereverb-echo_sdr_13.4843_v2.yaml
@@ -0,0 +1,64 @@
+audio:
+ chunk_size: 352800
+ dim_f: 1024
+ dim_t: 801
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 256
+ depth: 8
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0.1
+ ff_dropout: 0.1
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 8
+ grad_clip: 0
+ instruments:
+ - dry
+ - No dry
+ lr: 1.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: dry
+ num_epochs: 1000
+ num_steps: 1000
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: false
+ use_amp: true
+
+inference:
+ batch_size: 1
+ dim_t: 801
+ num_overlap: 4
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_dereverb_anvuew.yaml b/models/Roformer/MelBand/config_melband_roformer_dereverb_anvuew.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f4be13c9f337b26a2e11f89aaf24bc873cc23ce9
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_dereverb_anvuew.yaml
@@ -0,0 +1,76 @@
+audio:
+ chunk_size: 352800
+ dim_f: 1024
+ dim_t: 801
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 3
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - noreverb
+ - reverb
+ lr: 5.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: noreverb
+ num_epochs: 1000
+ num_steps: 4000
+ q: 0.95
+ coarse_loss_clip: false
+ ema_momentum: 0.999
+ optimizer: adamw
+ other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+ enable: true # enable or disable all augmentations (to fast disable if needed)
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+ loudness_min: 0.1
+ loudness_max: 1.0
+ mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+ mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+ - 0.2
+ - 0.02
+ mixup_loudness_min: 0.5
+ mixup_loudness_max: 1.5
+
+inference:
+ batch_size: 1
+ dim_t: 801
+ num_overlap: 2
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_dereverb_echo_v2.yaml b/models/Roformer/MelBand/config_melband_roformer_dereverb_echo_v2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..18213b12b2e4c157bca21bd5c8dc922b634e12da
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_dereverb_echo_v2.yaml
@@ -0,0 +1,64 @@
+audio:
+ chunk_size: 352800
+ dim_f: 1024
+ dim_t: 801
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 256
+ depth: 8
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0.1
+ ff_dropout: 0.1
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 8
+ grad_clip: 0
+ instruments:
+ - dry
+ - other
+ lr: 1.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: dry
+ num_epochs: 1000
+ num_steps: 1000
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: false
+ use_amp: true
+
+inference:
+ batch_size: 1
+ dim_t: 801
+ num_overlap: 4
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_ep_3005_sdr_11.4360.yaml b/models/Roformer/MelBand/config_melband_roformer_ep_3005_sdr_11.4360.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7c906f2931cbae3cf64551c231e285ca10097fe5
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_ep_3005_sdr_11.4360.yaml
@@ -0,0 +1,72 @@
+audio:
+ chunk_size: 352800
+ dim_f: 1024
+ dim_t: 801 # don't work (use in model)
+ hop_length: 441 # don't work (use in model)
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.001
+
+model:
+ dim: 384
+ depth: 12
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0.1
+ ff_dropout: 0.1
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 9
+ gradient_accumulation_steps: 8
+ grad_clip: 0
+ instruments:
+ - Vocals
+ - Instrumental
+ lr: 4.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: Vocals
+ num_epochs: 1000
+ num_steps: 1000
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: simple1
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: true # Mix several stems of the same type with some probability
+ augmentation_loudness: true # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0.5
+ augmentation_loudness_max: 1.5
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 1
+ dim_t: 801
+ num_overlap: 4
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_guitar_becruily.yaml b/models/Roformer/MelBand/config_melband_roformer_guitar_becruily.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a4ad9336c4a2a0f96f0239cb1bb7e5886d2fe247
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_guitar_becruily.yaml
@@ -0,0 +1,65 @@
+audio:
+ chunk_size: 485100
+ dim_f: 1024
+ dim_t: 256
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 256
+ depth: 4
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: true
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: false
+ mlp_expansion_factor: 1
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 1
+ grad_clip: 1.0
+ instruments:
+ - Guitar
+ - Other
+ lr: 1.0
+ patience: 1000
+ reduce_factor: 0.95
+ target_instrument: Guitar
+ num_epochs: 1000
+ num_steps: 1000
+ q: 0.95
+ coarse_loss_clip: false
+ ema_momentum: 0.999
+ optimizer: prodigy
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 1
+ dim_t: 1101
+ num_overlap: 2
diff --git a/models/Roformer/MelBand/config_melband_roformer_inst.yaml b/models/Roformer/MelBand/config_melband_roformer_inst.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d6bdca342644a1194427fe505e2044c5006a1213
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_inst.yaml
@@ -0,0 +1,51 @@
+audio:
+ chunk_size: 485100
+ dim_f: 1024
+ dim_t: 1101
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ instruments:
+ - other
+ - vocals
+ target_instrument: other
+ use_amp: True
+
+inference:
+ batch_size: 1
+ dim_t: 1101
+ num_overlap: 2
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_inst_gabox.yaml b/models/Roformer/MelBand/config_melband_roformer_inst_gabox.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b1395e978d64cb1c37d3015adc2feeb0805e3b94
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_inst_gabox.yaml
@@ -0,0 +1,51 @@
+audio:
+ chunk_size: 485100
+ dim_f: 1024
+ dim_t: 1101
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ instruments:
+ - Instrumental
+ - Vocals
+ target_instrument: Instrumental
+ use_amp: True
+
+inference:
+ batch_size: 1
+ dim_t: 1101
+ num_overlap: 2
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_inst_metal_prev_by_mesk.yaml b/models/Roformer/MelBand/config_melband_roformer_inst_metal_prev_by_mesk.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2abe723d25d1e8f053db2a2eeb2bb0ee59ae401f
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_inst_metal_prev_by_mesk.yaml
@@ -0,0 +1,52 @@
+audio:
+ chunk_size: 881559
+ dim_f: 1024
+ dim_t: 2000
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 8
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ instruments:
+ - vocals
+ - other
+ target_instrument: other
+ other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 1
+ dim_t: 2000
+ num_overlap: 5
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_inst_v2.yaml b/models/Roformer/MelBand/config_melband_roformer_inst_v2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4297c088f7b8bd2f28308d8a8d1e0694cdec967
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_inst_v2.yaml
@@ -0,0 +1,51 @@
+audio:
+ chunk_size: 485100
+ dim_f: 1024
+ dim_t: 1101
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 12
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 3
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ instruments:
+ - Instrumental
+ - Vocals
+ target_instrument: Instrumental
+ use_amp: True
+
+inference:
+ batch_size: 1
+ dim_t: 1101
+ num_overlap: 2
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_instrumental_becruily.yaml b/models/Roformer/MelBand/config_melband_roformer_instrumental_becruily.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..862010f34a3765fa1ac9f22c04ba74042b2fd086
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_instrumental_becruily.yaml
@@ -0,0 +1,72 @@
+audio:
+ chunk_size: 352800
+ dim_f: 1024
+ dim_t: 256
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - Instrumental
+ - Vocals
+ lr: 0.0005
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: Instrumental
+ num_epochs: 1000
+ num_steps: 1000
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: null
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: false # Mix several stems of the same type with some probability
+ augmentation_loudness: false # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0
+ augmentation_loudness_max: 0
+ q: 0.95
+ coarse_loss_clip: false
+ ema_momentum: 0.999
+ optimizer: adamw
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 1
+ dim_t: 1101
+ num_overlap: 2
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_instvoc_duality.yaml b/models/Roformer/MelBand/config_melband_roformer_instvoc_duality.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b93e721853f4d90efa7f0bead82f6a1b791fc19f
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_instvoc_duality.yaml
@@ -0,0 +1,51 @@
+audio:
+ chunk_size: 485100
+ dim_f: 1024
+ dim_t: 256
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 2
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ instruments:
+ - Vocals
+ - Instrumental
+ target_instrument: null
+ use_amp: True
+
+inference:
+ batch_size: 1
+ dim_t: 1101
+ num_overlap: 2
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_karaoke_aufr33_viperx_sdr_10.1956.yaml b/models/Roformer/MelBand/config_melband_roformer_karaoke_aufr33_viperx_sdr_10.1956.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b88403c926bc5957a54ba90271f0cced47c8366f
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_karaoke_aufr33_viperx_sdr_10.1956.yaml
@@ -0,0 +1,71 @@
+audio:
+ chunk_size: 352800
+ dim_f: 1024
+ dim_t: 801
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 4
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - Vocals
+ - Instrumental
+ lr: 1.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: Vocals
+ num_epochs: 1000
+ num_steps: 2000
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: null
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: false # Mix several stems of the same type with some probability
+ augmentation_loudness: false # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0
+ augmentation_loudness_max: 0
+ q: 0.95
+ coarse_loss_clip: false
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+inference:
+ batch_size: 1
+ dim_t: 801
+ num_overlap: 4
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_karaoke_becruily.yaml b/models/Roformer/MelBand/config_melband_roformer_karaoke_becruily.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..58cd1747a53d1695128e732aa7aa6802cb77db70
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_karaoke_becruily.yaml
@@ -0,0 +1,72 @@
+audio:
+ chunk_size: 485100
+ dim_f: 1024
+ dim_t: 256
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 2
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: true
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: false
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: false
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - Vocals
+ - Instrumental
+ lr: 0.0005
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: null
+ num_epochs: 1000
+ num_steps: 1000
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type:
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: false # Mix several stems of the same type with some probability
+ augmentation_loudness: false # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0
+ augmentation_loudness_max: 0
+ q: 0.95
+ coarse_loss_clip: false
+ ema_momentum: 0.999
+ optimizer: adamw
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 1
+ dim_t: 1101
+ num_overlap: 8
diff --git a/models/Roformer/MelBand/config_melband_roformer_kim_ft_unwa.yaml b/models/Roformer/MelBand/config_melband_roformer_kim_ft_unwa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d0527f99399af7f504ead83ce75e6715cd190e56
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_kim_ft_unwa.yaml
@@ -0,0 +1,72 @@
+audio:
+ chunk_size: 485100
+ dim_f: 1024
+ dim_t: 801
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - vocals
+ - other
+ lr: 1.0e-05
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: vocals
+ num_epochs: 1000
+ num_steps: 1000
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: null
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: false # Mix several stems of the same type with some probability
+ augmentation_loudness: false # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0
+ augmentation_loudness_max: 0
+ q: 0.95
+ coarse_loss_clip: false
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 1
+ dim_t: 801
+ num_overlap: 8
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_small_by_aname.yaml b/models/Roformer/MelBand/config_melband_roformer_small_by_aname.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9aa577d3470dbab7d2c7aba01883f2d179521a49
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_small_by_aname.yaml
@@ -0,0 +1,52 @@
+audio:
+ chunk_size: 485100
+ dim_f: 1024
+ dim_t: 1101
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.0
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+ mlp_expansion_factor: 1
+
+training:
+ instruments:
+ - Instrumental
+ - Vocals
+ target_instrument: null
+ use_amp: true
+
+inference:
+ batch_size: 2
+ dim_t: 1101
+ num_overlap: 4
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_voc_gabox.yaml b/models/Roformer/MelBand/config_melband_roformer_voc_gabox.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..95f50893eec05b7c612734f4445301629a026f61
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_voc_gabox.yaml
@@ -0,0 +1,51 @@
+audio:
+ chunk_size: 352800
+ dim_f: 1024
+ dim_t: 256
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.001
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ instruments:
+ - Vocals
+ - Instrumental
+ target_instrument: Vocals
+
+inference:
+ batch_size: 1
+ dim_t: 1101
+ num_overlap: 1
+ chunk_size: 352800
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_vocals_becruily.yaml b/models/Roformer/MelBand/config_melband_roformer_vocals_becruily.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d42333851e31c9e5747d818efec365921358a7c
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_vocals_becruily.yaml
@@ -0,0 +1,72 @@
+audio:
+ chunk_size: 352800
+ dim_f: 1024
+ dim_t: 256
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - vocals
+ - other
+ lr: 0.0005
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: vocals
+ num_epochs: 1000
+ num_steps: 1000
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: null
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: false # Mix several stems of the same type with some probability
+ augmentation_loudness: false # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0
+ augmentation_loudness_max: 0
+ q: 0.95
+ coarse_loss_clip: false
+ ema_momentum: 0.999
+ optimizer: adamw
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 1
+ dim_t: 1101
+ num_overlap: 2
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_vocals_big_v1_ft.yaml b/models/Roformer/MelBand/config_melband_roformer_vocals_big_v1_ft.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..66f782d66d31113f304fe07aa1ce663952030676
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_vocals_big_v1_ft.yaml
@@ -0,0 +1,51 @@
+audio:
+ chunk_size: 485100
+ dim_f: 1024
+ dim_t: 801
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 3
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ instruments:
+ - vocals
+ - other
+ target_instrument: vocals
+ use_amp: True
+
+inference:
+ batch_size: 1
+ dim_t: 801
+ num_overlap: 2
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_vocals_ft.yaml b/models/Roformer/MelBand/config_melband_roformer_vocals_ft.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c53d3e0912cd12d69ddd923dbb760a6c2b4d5000
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_vocals_ft.yaml
@@ -0,0 +1,72 @@
+audio:
+ chunk_size: 352800
+ dim_f: 1024
+ dim_t: 256
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 8
+ grad_clip: 0
+ instruments:
+ - vocals
+ - other
+ lr: 1.0e-04
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: vocals
+ num_epochs: 1000
+ num_steps: 100
+ augmentation: true # enable augmentations by audiomentations and pedalboard
+ augmentation_type: null
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: true # Mix several stems of the same type with some probability
+ augmentation_loudness: true # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0
+ augmentation_loudness_max: 0
+ q: 0.95
+ coarse_loss_clip: false
+ ema_momentum: 0.999
+ optimizer: adamw8bit
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+ batch_size: 4
+ dim_t: 256
+ num_overlap: 2
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_vocals_fullness_aname.yaml b/models/Roformer/MelBand/config_melband_roformer_vocals_fullness_aname.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..32fee9c05a82b72931cbb9a8af7d948a538532cb
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_vocals_fullness_aname.yaml
@@ -0,0 +1,54 @@
+audio:
+ chunk_size: 661500
+ dim_f: 1024
+ dim_t: 256
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.001
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - vocals
+ - other
+ target_instrument: vocals
+ use_amp: true
+
+inference:
+ batch_size: 4
+ dim_t: 1101
+ num_overlap: 4
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_vocals_kim.yaml b/models/Roformer/MelBand/config_melband_roformer_vocals_kim.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..99b1ced7032cf791eb065f1859de2e07c89a3eae
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_vocals_kim.yaml
@@ -0,0 +1,50 @@
+audio:
+ chunk_size: 352800
+ dim_f: 1024
+ dim_t: 256
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.001
+
+model:
+ dim: 384
+ depth: 6
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 8
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 2
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ instruments:
+ - vocals
+ - other
+ target_instrument: vocals
+
+inference:
+ dim_t: 1101
+ num_overlap: 1
+ chunk_size: 352800
\ No newline at end of file
diff --git a/models/Roformer/MelBand/config_melband_roformer_vocals_test_by_aname.yaml b/models/Roformer/MelBand/config_melband_roformer_vocals_test_by_aname.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f83647f71017b0c290f625a7233a66ec300c3aef
--- /dev/null
+++ b/models/Roformer/MelBand/config_melband_roformer_vocals_test_by_aname.yaml
@@ -0,0 +1,208 @@
+audio:
+ chunk_size: 661500
+ dim_f: 1024
+ dim_t: 1101
+ hop_length: 441
+ n_fft: 2048
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.001
+
+model:
+ dim: 512
+ depth: 8
+ stereo: true
+ num_stems: 1
+ time_transformer_depth: 1
+ freq_transformer_depth: 1
+ num_bands: 60
+ dim_head: 64
+ heads: 12
+ attn_dropout: 0
+ ff_dropout: 0
+ flash_attn: True
+ dim_freqs_in: 1025
+ sample_rate: 44100 # needed for mel filter bank from librosa
+ stft_n_fft: 2048
+ stft_hop_length: 441
+ stft_win_length: 2048
+ stft_normalized: False
+ mask_estimator_depth: 3
+ multi_stft_resolution_loss_weight: 1.0
+ multi_stft_resolutions_window_sizes: !!python/tuple
+ - 4096
+ - 2048
+ - 1024
+ - 512
+ - 256
+ multi_stft_hop_size: 147
+ multi_stft_normalized: False
+
+training:
+ batch_size: 1
+ gradient_accumulation_steps: 2
+ grad_clip: 0
+ instruments:
+ - vocals
+ - instruments
+ lr: 1.0
+ patience: 2
+ reduce_factor: 0.9999999
+ target_instrument: vocals
+ num_epochs: 100000
+ num_steps: 10
+ augmentation: false # enable augmentations by audiomentations and pedalboard
+ augmentation_type: true
+ use_mp3_compress: false # Deprecated
+ augmentation_mix: true # Mix several stems of the same type with some probability
+ augmentation_loudness: true # randomly change loudness of each stem
+ augmentation_loudness_type: 1 # Type 1 or 2
+ augmentation_loudness_min: 0.25
+ augmentation_loudness_max: 1
+ q: 0.99
+ coarse_loss_clip: false
+ ema_momentum: 0.9995
+ optimizer: prodigy
+ other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+ enable: true # enable or disable all augmentations (to fast disable if needed)
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+ loudness_min: 0.01
+ loudness_max: 1.5
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+ mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+ - 1
+ - 0.1
+ mixup_loudness_min: 0.25
+ mixup_loudness_max: 1
+
+ # apply mp3 compression to mixture only (emulate downloading mp3 from internet)
+ mp3_compression_on_mixture: 0
+ mp3_compression_on_mixture_bitrate_min: 32
+ mp3_compression_on_mixture_bitrate_max: 320
+ mp3_compression_on_mixture_backend: "lameenc"
+
+ all:
+ channel_shuffle: 0.5 # Set 0 or lower to disable
+ random_inverse: 0.1 # inverse track (better lower probability)
+ random_polarity: 0.5 # polarity change (multiply waveform to -1)
+ mp3_compression: 0.01
+ mp3_compression_min_bitrate: 32
+ mp3_compression_max_bitrate: 320
+ mp3_compression_backend: "lameenc"
+
+ # pedalboard reverb block
+ pedalboard_reverb: 0.7
+ pedalboard_reverb_room_size_min: 0.4
+ pedalboard_reverb_room_size_max: 0.9
+ pedalboard_reverb_damping_min: 0.3
+ pedalboard_reverb_damping_max: 0.9
+ pedalboard_reverb_wet_level_min: 0.4
+ pedalboard_reverb_wet_level_max: 0.9
+ pedalboard_reverb_dry_level_min: 0.1
+ pedalboard_reverb_dry_level_max: 0.9
+ pedalboard_reverb_width_min: 0.9
+ pedalboard_reverb_width_max: 1.0
+
+ # pedalboard chorus block
+ pedalboard_chorus: 0.3
+ pedalboard_chorus_rate_hz_min: 1.0
+ pedalboard_chorus_rate_hz_max: 7.0
+ pedalboard_chorus_depth_min: 0.25
+ pedalboard_chorus_depth_max: 0.95
+ pedalboard_chorus_centre_delay_ms_min: 3
+ pedalboard_chorus_centre_delay_ms_max: 10
+ pedalboard_chorus_feedback_min: 0.0
+ pedalboard_chorus_feedback_max: 0.5
+ pedalboard_chorus_mix_min: 0.1
+ pedalboard_chorus_mix_max: 0.9
+
+ # pedalboard phazer block
+ pedalboard_phazer: 0.2
+ pedalboard_phazer_rate_hz_min: 1.0
+ pedalboard_phazer_rate_hz_max: 10.0
+ pedalboard_phazer_depth_min: 0.25
+ pedalboard_phazer_depth_max: 0.95
+ pedalboard_phazer_centre_frequency_hz_min: 200
+ pedalboard_phazer_centre_frequency_hz_max: 12000
+ pedalboard_phazer_feedback_min: 0.0
+ pedalboard_phazer_feedback_max: 0.5
+ pedalboard_phazer_mix_min: 0.1
+ pedalboard_phazer_mix_max: 0.9
+
+ # pedalboard distortion block
+ pedalboard_distortion: 0.2
+ pedalboard_distortion_drive_db_min: 1.0
+ pedalboard_distortion_drive_db_max: 12
+
+ # pedalboard pitch shift block
+ pedalboard_pitch_shift: 0
+ pedalboard_pitch_shift_semitones_min: -7
+ pedalboard_pitch_shift_semitones_max: 7
+
+ # pedalboard resample block
+ pedalboard_resample: 0.3
+ pedalboard_resample_target_sample_rate_min: 8000
+ pedalboard_resample_target_sample_rate_max: 44100
+
+ # pedalboard bitcrash block
+ pedalboard_bitcrash: 0.1
+ pedalboard_bitcrash_bit_depth_min: 8
+ pedalboard_bitcrash_bit_depth_max: 16
+
+ # pedalboard mp3 compressor block
+ pedalboard_mp3_compressor: 0
+ pedalboard_mp3_compressor_pedalboard_mp3_compressor_min: 0
+ pedalboard_mp3_compressor_pedalboard_mp3_compressor_max: 9.999
+
+ vocals:
+ pitch_shift: 0.7
+ pitch_shift_min_semitones: -6
+ pitch_shift_max_semitones: 6
+ seven_band_parametric_eq: 0.45
+ seven_band_parametric_eq_min_gain_db: -80
+ seven_band_parametric_eq_max_gain_db: 3
+ tanh_distortion: 0.3
+ tanh_distortion_min: 0.01
+ tanh_distortion_max: 0.7
+ other:
+ pitch_shift: 0.5
+ pitch_shift_min_semitones: -12
+ pitch_shift_max_semitones: 12
+ gaussian_noise: 0.5
+ gaussian_noise_min_amplitude: 0.01
+ gaussian_noise_max_amplitude: 0.5
+ time_stretch: 0.2
+ time_stretch_min_rate: 0.125
+ time_stretch_max_rate: 1.0
+
+loss_multistft:
+ fft_sizes:
+ - 1024
+ - 2048
+ - 4096
+ hop_sizes:
+ - 512
+ - 1024
+ - 2048
+ win_lengths:
+ - 1024
+ - 2048
+ - 4096
+ window: "hann_window"
+ scale: "mel"
+ n_bins: 128
+ sample_rate: 44100
+ perceptual_weighting: true
+ w_sc: 3.0
+ w_log_mag: 3.0
+ w_lin_mag: 2.0
+ w_phs: 3.0
+ mag_distance: "L1"
+
+inference:
+ batch_size: 4
+ dim_t: 1101
+ num_overlap: 4
diff --git a/models/Roformer/MelBand/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt b/models/Roformer/MelBand/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..3e232e1d5ebb6e2c74cf9803ca9fb4a3fb860d17
--- /dev/null
+++ b/models/Roformer/MelBand/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a25e3b233722cd81e2de7b8e798a3fef29d4b9799ccacda60b0dc958a1e2a5bb
+size 913097300
diff --git a/models/Roformer/MelBand/denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt b/models/Roformer/MelBand/denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..8d1b634e263572a5a955577112e77036279c6287
--- /dev/null
+++ b/models/Roformer/MelBand/denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c1c39191edc34e942ca7f2346ce6b6c0e1208a5f76349ffce6f696bd12910de
+size 913097300
diff --git a/models/Roformer/MelBand/dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt b/models/Roformer/MelBand/dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..b2ec32944c1f4f9c802a1ad02d38682929c77a58
--- /dev/null
+++ b/models/Roformer/MelBand/dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd2b737a394cfb80cd48cc9fcbaf89f5f4062f6b93066c2911617a06d8b7860a
+size 835997896
diff --git a/models/Roformer/MelBand/dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt b/models/Roformer/MelBand/dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..5baa2b3d302d0ab984148ff1f42793220b29c7ca
--- /dev/null
+++ b/models/Roformer/MelBand/dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:396432f5af25992fe82d0286634bd879027c073721db6ab10199e75459708b9f
+size 455862568
diff --git a/models/Roformer/MelBand/dereverb_big_mbr_ep_362.ckpt b/models/Roformer/MelBand/dereverb_big_mbr_ep_362.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..814f35f8718aab66733f43e60a034fa933dd2f7f
--- /dev/null
+++ b/models/Roformer/MelBand/dereverb_big_mbr_ep_362.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0506455e74ffc02bbec700df9863ae243597034003815f1418227c6dee33b6ea
+size 455864012
diff --git a/models/Roformer/MelBand/dereverb_echo_mbr_fused.ckpt b/models/Roformer/MelBand/dereverb_echo_mbr_fused.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..2d495600912d8c596c4f37c1a902c9b918e934fd
--- /dev/null
+++ b/models/Roformer/MelBand/dereverb_echo_mbr_fused.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1596b1063238f487d54a0510a8c92cb28c000c803a271dd618ac49efc99ef3f7
+size 455776577
diff --git a/models/Roformer/MelBand/dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt b/models/Roformer/MelBand/dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..8f21995361b651a6c446cb4e1e7664d94eb42bff
--- /dev/null
+++ b/models/Roformer/MelBand/dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9262877b87e9ebb0fb808a456b0a411fa677f5df31c8383c1254af531c078970
+size 913107578
diff --git a/models/Roformer/MelBand/dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt b/models/Roformer/MelBand/dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..45d5062aae81caa8df3565e7d10c818f076cc588
--- /dev/null
+++ b/models/Roformer/MelBand/dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0db8f1b41c00cead1112e967262a12802fd32e76c0c3a8eb207e772bae25d07b
+size 913107578
diff --git a/models/Roformer/MelBand/dereverb_mel_band_roformer_mono_anvuew_sdr_20.4029.ckpt b/models/Roformer/MelBand/dereverb_mel_band_roformer_mono_anvuew_sdr_20.4029.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..06855114df2802baa49c15faf4b39a743b84e27b
--- /dev/null
+++ b/models/Roformer/MelBand/dereverb_mel_band_roformer_mono_anvuew_sdr_20.4029.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f099ee717eb57fb0ad5eb0e7c9ad6787c36168140b61ce2b158b90c2c4ecee79
+size 913097978
diff --git a/models/Roformer/MelBand/dereverb_super_big_mbr_ep_346.ckpt b/models/Roformer/MelBand/dereverb_super_big_mbr_ep_346.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..bc9d947ff83f741caa2504b673eed1983e6ac008
--- /dev/null
+++ b/models/Roformer/MelBand/dereverb_super_big_mbr_ep_346.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26dda242bce4405555f2d6086d079fe8cc23f1f04e02e501d2689bfe3ece0489
+size 455864012
diff --git a/models/Roformer/MelBand/mel_band_roformer_bleed_suppressor_v1.ckpt b/models/Roformer/MelBand/mel_band_roformer_bleed_suppressor_v1.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..4bf56278572c098907e036b63e094bd7c63c1a20
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_bleed_suppressor_v1.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9a9d10faa7f8997676a78e66d741d7acb9cc449334763f3c8f626d68ec6e575
+size 913102724
diff --git a/models/Roformer/MelBand/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt b/models/Roformer/MelBand/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..8d59dd9875ffadf10ca18f66aa3d20a0e289ce90
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca8799531fe51c94172cc047226209ed48bf7d8c02e04671795a15d2a1c318af
+size 913096801
diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_bleedless_v1_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_bleedless_v1_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..881d22e5fc7fde06ca2c5de68277c4ab8bbceee7
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_inst_bleedless_v1_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6109687febb8f18cd5a45207fee35f18ba8b9467b18a4b2e982a3b7dc04a9d72
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_bleedless_v2_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_bleedless_v2_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..65745b5be537052b903ae5f13f4e26f51912f6db
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_inst_bleedless_v2_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de972fb724601beef237abe94c8b934c73218e9baf3e344ab4c2333276e5cfe7
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_denoise_debleed_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_denoise_debleed_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..8b595aaf271c7d97cbca025436c370b5954e7786
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_inst_denoise_debleed_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91aa7a546ed2e93482e4629c982d35b0d258bb3de6eeab497fd91658cc86c7fd
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v1_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v1_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..e73ab6f21e270a489e7310d9fe81a150cdfceb13
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v1_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31140eccf271d2a9e8a538b092b1f70dfd6471aa5ad163b22bccc758b9f38b62
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v2_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v2_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..6d849f3079743541477aeb8748cd2dac3f05e0d0
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v2_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c446c34551333dd3d45b8d0708658a10f28c5e289f8ec27b5f0e22803681bef3
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v3_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v3_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..c90caf360ea211abf7467c96c8bde8bf656fb521
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v3_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fbb229209a8942d34664e19d2f4862e357ea3108a4e8c04b69aa0aba523a4481
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v4_noise_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v4_noise_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..5a49522bec839a399c6545d2f812bae2ff5f32a1
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v4_noise_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f50296e913b9af3b5b3b961e92877ef0d4a74f9a433e796e89960c4c2b1abe53
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v5_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v5_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..1a192beafebebb2f4785e076cb13aba8a074b8ce
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v5_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38e935cf1e97afcc1de84d0bdb87dd8090bad530fa0df28e707d16448e1d38e2
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v5_noise_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v5_noise_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..5cea44b8d02d4a8379a8fef7d0dc7e3a920b6294
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v5_noise_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:175203923fac3e52ae00e7e37d41e8a7fef5020b6ee4e4144f4786daabc54b34
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v6_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v6_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..9e4e1ed19b5579f8914a2229b05a859b8cb0bf34
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v6_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:677951b8556a27abe32e39705640638826e78101fa901a51ad73d20522be6d25
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v6_noise_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v6_noise_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..edf54dd70e2304cc057d3055a46c02fbb79ef937
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v6_noise_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:802f3e5d183d7c4b50dea147c320e61634f5be6ff55fa899fdebeaf0f3cf7f42
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v7_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v7_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..5f5aa9428c851e83a36c0432a3baf218aa6ca261
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v7_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e725a860176acb475d983a1ddd9c1a99a619c69cc9ceda808dd294d10db746a5
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v7_noise_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v7_noise_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..0bb8eb969ef04c187caa14bc813e8c690d396036
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v7_noise_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0ca36af5d1314be46b56c8a53b6be02f98511fa5d7e3e196fd895755e65be3c
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v8_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v8_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..b017d3834de1d9d48cafa54dc7807d19c2ea0880
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v8_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50bfa127d21f419e0da89730867d28c7ac4484c9473e6f313d036bc8b429df80
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_vX_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_vX_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..93fa162e3077a085662452f147e5fbe0bacd108a
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_vX_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:545ef13b0cdbac505818a38db98e09c54e7c03ea17b4e0c895a531bfa352fa59
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_v1_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_v1_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..6ecff8779cfc8fa3961db8589050b44a43131203
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_inst_v1_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b3ad6bd8bed3aaaa4d9320ea2ca910d140196a2302186db1754f3a8d8e16fb1
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_v2_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_v2_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..7758aa5f0d19e21b8cb82550a8eddb7a7ab10e23
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_inst_v2_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e03ca459c339f88b7521c367c897d0c3f5362b38a6cdb96cb28e625ca0f9931e
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_v3_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_v3_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..1596ed4c464e4346d614bc48bb64a996975bc445
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_inst_v3_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9ec9f299cf617bf6afe1c382f4b0761cd9bee78323da94889951812328e10fb
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_instrumental_becruily.ckpt b/models/Roformer/MelBand/mel_band_roformer_instrumental_becruily.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..9aa6c9ab355e9dfc090251f9703e299b9da7893d
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_instrumental_becruily.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8da6632a1c25efb1c9be783ce9ea367d226d4b918cd6c3717c8b1d7a396041d
+size 913106900
diff --git a/models/Roformer/MelBand/mel_band_roformer_instrumental_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_instrumental_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..6ecff8779cfc8fa3961db8589050b44a43131203
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_instrumental_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b3ad6bd8bed3aaaa4d9320ea2ca910d140196a2302186db1754f3a8d8e16fb1
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt b/models/Roformer/MelBand/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..977a5c60ab478ab0b78ac5bc7e5296f9185f5549
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1de20d459332fe8869aeb01327a31df0032262706e1365114e852dc271779813
+size 913096801
diff --git a/models/Roformer/MelBand/mel_band_roformer_karaoke_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_karaoke_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..69688626b9c8b8655e6eb549e7909f64ae862819
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_karaoke_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:303fc631e7aa587e9dc1e6ac4bb3667c6ba53aacb6b6a90abcfcf57935b92bd8
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_kim_ft2_bleedless_unwa.ckpt b/models/Roformer/MelBand/mel_band_roformer_kim_ft2_bleedless_unwa.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..eb05d0fc17641b53deabf68c1940dd237695026c
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_kim_ft2_bleedless_unwa.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c450bd66a98b49dd03231fc5ebb84121eef8418236b179423c2b171d62b04d9
+size 913101368
diff --git a/models/Roformer/MelBand/mel_band_roformer_kim_ft2_unwa.ckpt b/models/Roformer/MelBand/mel_band_roformer_kim_ft2_unwa.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..64a9c082838de39b001e92ac622efe3de0c810da
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_kim_ft2_unwa.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ed7b9e4c2eebbec7a7e5e8113058f7b68ba5e6048db8eaccfbbeb884c7884c0
+size 913100690
diff --git a/models/Roformer/MelBand/mel_band_roformer_kim_ft_unwa.ckpt b/models/Roformer/MelBand/mel_band_roformer_kim_ft_unwa.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..2912039564c3940b1fb51563a795e3f241841226
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_kim_ft_unwa.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6bd8d333880191254a6ef6be3cb0ffa4dda9d3282e36b0cce2e88a660e00d39
+size 913100690
diff --git a/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v1_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v1_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..5b6fdc61e56b41297707de241f7c353a916c0494
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v1_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4dff354d81152d1b4321f6491f242c060919148239fbfe22a1015513de4a7fe
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v2_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v2_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..dccb5cbb2e7b8aa143e8cd8b803144646998abd5
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v2_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2888813aa5b519941fa8548efc5a4331d63c61909007eb17fe95c367be230196
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v3_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v3_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..f5b0031173c6b54221895b9d66e9553037777cd5
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v3_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49d81446b34a7848446efde7898b25bdc32fe872c2393617acb5356649f7ea93
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v4_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v4_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..144ccb6ac58988f75e37cb1e16dd29d7ae754760
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v4_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ede0504ddc55cb44b966a8212dac75a364f8157974cc40c8e92b9f5d4f17ce2
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v5_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v5_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..144ccb6ac58988f75e37cb1e16dd29d7ae754760
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v5_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ede0504ddc55cb44b966a8212dac75a364f8157974cc40c8e92b9f5d4f17ce2
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v6_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v6_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..09ba0100d657171cb6f17292df03f215102f5bef
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v6_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25033d944288cb032fc51faab044bbd7f90bb81e82cada637ecdec699c2ff773
+size 913031195
diff --git a/models/Roformer/MelBand/mel_band_roformer_voc_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_voc_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..709869d299af3267439c266eede088a1c19b0745
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_voc_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff802a67501fac70587c3ff4e8dbc89c2558e7d8911c92222dfea2aaac208517
+size 913026650
diff --git a/models/Roformer/MelBand/mel_band_roformer_vocals_becruily.ckpt b/models/Roformer/MelBand/mel_band_roformer_vocals_becruily.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..fda579c578c11b43fc8267f25fdbe0552ec36c1b
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_vocals_becruily.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a05961310cc55fbb901290c2e8be02682942f73522b6ac76bf2ec11e347ed95a
+size 913107578
diff --git a/models/Roformer/MelBand/mel_band_roformer_vocals_fullness_aname.ckpt b/models/Roformer/MelBand/mel_band_roformer_vocals_fullness_aname.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..380b1baf015018a1c4ae18ef1ade63a6ab76e441
--- /dev/null
+++ b/models/Roformer/MelBand/mel_band_roformer_vocals_fullness_aname.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a64a27a672b457de23d9decd1fc7b58b0664a9f4f24bb43af154708e2ef07d2f
+size 913090472
diff --git a/models/Roformer/MelBand/melband_roformer_big_beta4.ckpt b/models/Roformer/MelBand/melband_roformer_big_beta4.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..62cb0ed8e023394924b1108a5335daa8b91d78d4
--- /dev/null
+++ b/models/Roformer/MelBand/melband_roformer_big_beta4.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:700a9bd3831d4f7f44cc0019b238774e31045bcbc361fbb69235535c40fc1454
+size 1574477088
diff --git a/models/Roformer/MelBand/melband_roformer_big_beta5e.ckpt b/models/Roformer/MelBand/melband_roformer_big_beta5e.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..a59473cd797f3c5fc6d6eb420d324145dc7c1104
--- /dev/null
+++ b/models/Roformer/MelBand/melband_roformer_big_beta5e.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32b876e1163716a9a007438b5a5107069586aa9b9ca653a5f63013b1edf6920c
+size 1479749810
diff --git a/models/Roformer/MelBand/melband_roformer_big_beta6.ckpt b/models/Roformer/MelBand/melband_roformer_big_beta6.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..3e62cb6881f880ecadb2131661a7bffc1c651889
--- /dev/null
+++ b/models/Roformer/MelBand/melband_roformer_big_beta6.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f51cbb94b4ed5c36cb36fd2024236a8af3ed6886567981702ad6f094b2c6c820
+size 1557078584
diff --git a/models/Roformer/MelBand/melband_roformer_big_beta6x.ckpt b/models/Roformer/MelBand/melband_roformer_big_beta6x.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..8a45e7206470ce16a36e6c5c60726331d16221db
--- /dev/null
+++ b/models/Roformer/MelBand/melband_roformer_big_beta6x.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e16d702f4e20f13d60b293541c1dea75cb4414a5846b36780e28ef70352a4e5c
+size 1708527586
diff --git a/models/Roformer/MelBand/melband_roformer_guitar_becruily.ckpt b/models/Roformer/MelBand/melband_roformer_guitar_becruily.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..9d3ea9afcc2d0451a33fa9fcb920ee53d17b0343
--- /dev/null
+++ b/models/Roformer/MelBand/melband_roformer_guitar_becruily.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:83472bbf125774af5282d2e0b86df89eaf2dd45e8a4ec8d68e820ebf3e42a83c
+size 45142183
diff --git a/models/Roformer/MelBand/melband_roformer_inst_metal_prev_by_mesk.ckpt b/models/Roformer/MelBand/melband_roformer_inst_metal_prev_by_mesk.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..b18c4240a94738ac82c3c48628b592ff0c06682b
--- /dev/null
+++ b/models/Roformer/MelBand/melband_roformer_inst_metal_prev_by_mesk.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46f3f92abdd76ebe192f7c9e6167bf4d75fcffd21628f7844e7e48da42bcd7eb
+size 944683368
diff --git a/models/Roformer/MelBand/melband_roformer_inst_v1.ckpt b/models/Roformer/MelBand/melband_roformer_inst_v1.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..0a5775dcf471eae22321cf27576da3155bd773e9
--- /dev/null
+++ b/models/Roformer/MelBand/melband_roformer_inst_v1.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f88d96958b2b7dec32286b0ced00bbcbd37e28741cad9038758b1eaf9b5c057
+size 913100690
diff --git a/models/Roformer/MelBand/melband_roformer_inst_v1e.ckpt b/models/Roformer/MelBand/melband_roformer_inst_v1e.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..f8370873342b453fb65d96b2fe069cbc0f8130e9
--- /dev/null
+++ b/models/Roformer/MelBand/melband_roformer_inst_v1e.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df2bcdb8838b88264f5381dbb0ccd84a9926c9775cf548c34d8846f5cd20fe96
+size 913102724
diff --git a/models/Roformer/MelBand/melband_roformer_inst_v1e_plus.ckpt b/models/Roformer/MelBand/melband_roformer_inst_v1e_plus.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..d96e91089fa7a07f5c4052f55449fd6ed4047ce2
--- /dev/null
+++ b/models/Roformer/MelBand/melband_roformer_inst_v1e_plus.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a4ddba739f0352407fb6e18b29206b82318ec427fe37fcedb0f83241e4e15fb
+size 913090472
diff --git a/models/Roformer/MelBand/melband_roformer_inst_v2.ckpt b/models/Roformer/MelBand/melband_roformer_inst_v2.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..e59c3365c4976b3c225f0cf9ab34342531dc6955
--- /dev/null
+++ b/models/Roformer/MelBand/melband_roformer_inst_v2.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd19766620f7d6f58fdf7aaada7e89907fe41bc64490ce3faa9a6dab15d6e1f2
+size 1574477088
diff --git a/models/Roformer/MelBand/melband_roformer_instvoc_duality_v1.ckpt b/models/Roformer/MelBand/melband_roformer_instvoc_duality_v1.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..2b99b0c8e53949042dce6f1db2300642212b31f7
--- /dev/null
+++ b/models/Roformer/MelBand/melband_roformer_instvoc_duality_v1.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4640a59d454bf9f69d67460592ab71e7cdce3afa0c0a6f0cf4500bb4ac0b8381
+size 1719116358
diff --git a/models/Roformer/MelBand/melband_roformer_instvoc_duality_v2.ckpt b/models/Roformer/MelBand/melband_roformer_instvoc_duality_v2.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..e4042424f484dbb10d5f76f807c04f9b7f307e23
--- /dev/null
+++ b/models/Roformer/MelBand/melband_roformer_instvoc_duality_v2.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4a69558708f2857e36ac86a0e03ed95c4e3d8b9c5b8113963987d0d7df7e20f
+size 1719116358
diff --git a/models/Roformer/MelBand/melband_roformer_karaoke_becruily.ckpt b/models/Roformer/MelBand/melband_roformer_karaoke_becruily.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..ceaad63144a44e85b64617f842686673070bf5dc
--- /dev/null
+++ b/models/Roformer/MelBand/melband_roformer_karaoke_becruily.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3aa262ac01df870b9fc033e9c7b6cad33fe04fc9c148b6c40841326a515a0e0
+size 1719139254
diff --git a/models/Roformer/MelBand/melband_roformer_kim_vocals_fullness_v1_by_aname.ckpt b/models/Roformer/MelBand/melband_roformer_kim_vocals_fullness_v1_by_aname.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..f22a3e48fc797be7bd2ff6c480eb99e460515322
--- /dev/null
+++ b/models/Roformer/MelBand/melband_roformer_kim_vocals_fullness_v1_by_aname.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d0662af510f63fe48bdc91035951c1a7ae3b41ac9ae92cf7ec88fe9d6a6cb6d
+size 913090472
diff --git a/models/Roformer/MelBand/melband_roformer_kim_vocals_fullness_v2_by_aname.ckpt b/models/Roformer/MelBand/melband_roformer_kim_vocals_fullness_v2_by_aname.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..3d6a69e88c5297c4977d8c139225c8666c47ed72
--- /dev/null
+++ b/models/Roformer/MelBand/melband_roformer_kim_vocals_fullness_v2_by_aname.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc4c02112d53fea925bfe362918a9cbea10b3c0893aa40506aa82874ad03138c
+size 913090472
diff --git a/models/Roformer/MelBand/melband_roformer_kim_vocals_v1_by_aname.ckpt b/models/Roformer/MelBand/melband_roformer_kim_vocals_v1_by_aname.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..c5b1f32ed2a23defa050c8301a0c928e136b576b
--- /dev/null
+++ b/models/Roformer/MelBand/melband_roformer_kim_vocals_v1_by_aname.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:696825ec20f6bc48add7443000def04bb1736c1098784be7cdf0756a140e9621
+size 913106158
diff --git a/models/Roformer/MelBand/melband_roformer_kim_vocals_v2_by_aname.ckpt b/models/Roformer/MelBand/melband_roformer_kim_vocals_v2_by_aname.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..84f9d8e5a29e75f2ffc02392d22fba229a56f927
--- /dev/null
+++ b/models/Roformer/MelBand/melband_roformer_kim_vocals_v2_by_aname.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58b1a07034e74e169e1a332c15fe63fb8da117fbeac580dbab06ad99ddb27702
+size 913090472
diff --git a/models/Roformer/MelBand/melband_roformer_kim_vocals_v3_by_aname.ckpt b/models/Roformer/MelBand/melband_roformer_kim_vocals_v3_by_aname.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..076d6f84387932bdb741a916f23becd4c71c1217
--- /dev/null
+++ b/models/Roformer/MelBand/melband_roformer_kim_vocals_v3_by_aname.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:acf6708e4e9ead350abdeedd9f3385c5e7c08281eba24bd59c8a5fe63a446082
+size 913106158
diff --git a/models/Roformer/MelBand/melband_roformer_small_by_aname.ckpt b/models/Roformer/MelBand/melband_roformer_small_by_aname.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..5db369eedfade9297c978a4242c17b1e2769a572
--- /dev/null
+++ b/models/Roformer/MelBand/melband_roformer_small_by_aname.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20cc592a9dff7dc34a3fb0bf399cd68c950b03ff5f334725e74feb3d0186272f
+size 202573672
diff --git a/models/Roformer/MelBand/melband_roformer_vocals_bleedness_by_aname.ckpt b/models/Roformer/MelBand/melband_roformer_vocals_bleedness_by_aname.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..b9b07d24bf6b80f2b4b979ae3442cc2b45ecfd6b
--- /dev/null
+++ b/models/Roformer/MelBand/melband_roformer_vocals_bleedness_by_aname.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:614bcc380bafe7a39ad1c451103dbe6a487886102c6b34c0561b5d8a4cec1286
+size 1708534246
diff --git a/models/Roformer/MelBand/model_MelBand-Roformer_BVE_by-Gonza.ckpt b/models/Roformer/MelBand/model_MelBand-Roformer_BVE_by-Gonza.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..a2eca39d03b4260a9c50b809e987471e4c614b07
--- /dev/null
+++ b/models/Roformer/MelBand/model_MelBand-Roformer_BVE_by-Gonza.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e003bbb97ebef78c59cd6b46c5fc3d0f2303cd49f6fb98eb8b8f0f8075899ae3
+size 913090472
diff --git a/models/Roformer/MelBand/model_MelBand-Roformer_Duality_v1_by-Aname.ckpt b/models/Roformer/MelBand/model_MelBand-Roformer_Duality_v1_by-Aname.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..a200aeb7a350ff42686f2933c050c2c5264a521d
--- /dev/null
+++ b/models/Roformer/MelBand/model_MelBand-Roformer_Duality_v1_by-Aname.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69c244e76d7142a948f0d41c67e293112ba84f12a28c89d24b22a3fecd9ef79e
+size 913043907
diff --git a/models/Roformer/MelBand/model_MelBand-Roformer_Karaoke_Fusion_Aggressive_by-Gonza.ckpt b/models/Roformer/MelBand/model_MelBand-Roformer_Karaoke_Fusion_Aggressive_by-Gonza.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..ae98012b2e732161a906481dea349c22b4e1cf2a
--- /dev/null
+++ b/models/Roformer/MelBand/model_MelBand-Roformer_Karaoke_Fusion_Aggressive_by-Gonza.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a769b82198727e243ed95cecbfb14bec6e3ddf16ba7b5dc9ab3bc130bb2e88b8
+size 913091027
diff --git a/models/Roformer/MelBand/model_MelBand-Roformer_Karaoke_Fusion_Aggressive_v2_by-Gonza.ckpt b/models/Roformer/MelBand/model_MelBand-Roformer_Karaoke_Fusion_Aggressive_v2_by-Gonza.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..2d994959c31f35ce27efbc428943a37c3a4f0474
--- /dev/null
+++ b/models/Roformer/MelBand/model_MelBand-Roformer_Karaoke_Fusion_Aggressive_v2_by-Gonza.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da6ede321e6036d540df17a1862c17d11fabe1364e74c06c9bb8d319bd359767
+size 913091027
diff --git a/models/Roformer/MelBand/model_MelBand-Roformer_Karaoke_Fusion_Standard_by-Gonza.ckpt b/models/Roformer/MelBand/model_MelBand-Roformer_Karaoke_Fusion_Standard_by-Gonza.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..b14cfcc380d1e954bbf85a9130d6c591a5956764
--- /dev/null
+++ b/models/Roformer/MelBand/model_MelBand-Roformer_Karaoke_Fusion_Standard_by-Gonza.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b22f8715c73e5de48dd57230b46b6c08ddf90e45e6bd086e793330e12f3f2d0
+size 913091027
diff --git a/models/Roformer/MelBand/model_MelBand-Roformer_Karaoke_Fusion_Total_by-Gonza.ckpt b/models/Roformer/MelBand/model_MelBand-Roformer_Karaoke_Fusion_Total_by-Gonza.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..cb0a228286f2ba4eb5b1de23eb81c55c1009cb0d
--- /dev/null
+++ b/models/Roformer/MelBand/model_MelBand-Roformer_Karaoke_Fusion_Total_by-Gonza.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f50f2a8ffd488bfca2ba06cc532b2b6e8da22cc4552aece851e50ccebb6bb4d
+size 913090472
diff --git a/models/Roformer/MelBand/model_MelBand-Roformer_Karaoke_by-Gabox.ckpt b/models/Roformer/MelBand/model_MelBand-Roformer_Karaoke_by-Gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..389856329d46bc6df2170095dad4bb1141b59518
--- /dev/null
+++ b/models/Roformer/MelBand/model_MelBand-Roformer_Karaoke_by-Gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:296fd8c3b3dc9d8f7d7301405c001829bfafcb86d254af2e2e9095689da242ea
+size 913090472
diff --git a/models/Roformer/MelBand/model_MelBand-Roformer_Vocals_Balansed_v2_by-Gabox.ckpt b/models/Roformer/MelBand/model_MelBand-Roformer_Vocals_Balansed_v2_by-Gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..e3589e9ee8a2eba303868270993bbe81cc684625
--- /dev/null
+++ b/models/Roformer/MelBand/model_MelBand-Roformer_Vocals_Balansed_v2_by-Gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fac81dbebc0992503df55110d64d86c4fb74a1529527a819a253f3d20ef72bc1
+size 913031195
diff --git a/models/Roformer/MelBand/model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt b/models/Roformer/MelBand/model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..1cb36d54103c4fdd30f7e8a089164cba50cfb0b0
--- /dev/null
+++ b/models/Roformer/MelBand/model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21b9d0958e35b8ebfbe2afe69bbd5444e5ffe2f5d80ae0d583b833d2f3c0d139
+size 1007816988
diff --git a/models/Roformer/MelBand/vocals_mel_band_roformer.ckpt b/models/Roformer/MelBand/vocals_mel_band_roformer.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..e9269937826d8cedf1855096bc9c1d49298bb4f8
--- /dev/null
+++ b/models/Roformer/MelBand/vocals_mel_band_roformer.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87201f4d31afb5bc79993230fc49446918425574db48c01c405e44f365c7559e
+size 913106900
diff --git a/models/SCnet/SCNet-large_starrytong_fixed.ckpt b/models/SCnet/SCNet-large_starrytong_fixed.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..ddaa50ee05e3ca837df5ec3d540f34443aa66af7
--- /dev/null
+++ b/models/SCnet/SCNet-large_starrytong_fixed.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65900dfa07d6b6e5d784c0f143920200a4bd281d6e78a806c549d0b912d5885e
+size 168852258
diff --git a/models/SCnet/config_musdb18_scnet.yaml b/models/SCnet/config_musdb18_scnet.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b74fc2f6f31deafc819aa680e5075f8e134d9793
--- /dev/null
+++ b/models/SCnet/config_musdb18_scnet.yaml
@@ -0,0 +1,83 @@
+audio:
+ chunk_size: 485100 # 44100 * 11
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ sources:
+ - drums
+ - bass
+ - other
+ - vocals
+ audio_channels: 2
+ dims:
+ - 4
+ - 32
+ - 64
+ - 128
+ nfft: 4096
+ hop_size: 1024
+ win_size: 4096
+ normalized: True
+ band_SR:
+ - 0.175
+ - 0.392
+ - 0.433
+ band_stride:
+ - 1
+ - 4
+ - 16
+ band_kernel:
+ - 3
+ - 4
+ - 16
+ conv_depths:
+ - 3
+ - 2
+ - 1
+ compress: 4
+ conv_kernel: 3
+ num_dplayer: 6
+ expand: 1
+
+training:
+ batch_size: 10
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - Drums
+ - Bass
+ - Other
+ - Vocals
+ lr: 5.0e-04
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: null
+ num_epochs: 1000
+ num_steps: 1000
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: adam
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+ enable: true # enable or disable all augmentations (to fast disable if needed)
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+ loudness_min: 0.5
+ loudness_max: 1.5
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+ mixup_probs:
+ !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+ - 0.2
+ - 0.02
+ mixup_loudness_min: 0.5
+ mixup_loudness_max: 1.5
+
+inference:
+ batch_size: 8
+ dim_t: 256
+ num_overlap: 4
+ normalize: true
diff --git a/models/SCnet/config_musdb18_scnet_large.yaml b/models/SCnet/config_musdb18_scnet_large.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..33912dfc5614881dd2b4f56318918b4ff65d58b1
--- /dev/null
+++ b/models/SCnet/config_musdb18_scnet_large.yaml
@@ -0,0 +1,88 @@
+audio:
+ chunk_size: 485100 # 44100 * 11
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ sources:
+ - drums
+ - bass
+ - other
+ - vocals
+ audio_channels: 2
+ dims:
+ - 4
+ - 64
+ - 128
+ - 256
+ nfft: 4096
+ hop_size: 1024
+ win_size: 4096
+ normalized: True
+ band_SR:
+ - 0.225
+ - 0.372
+ - 0.403
+ band_stride:
+ - 1
+ - 4
+ - 16
+ band_kernel:
+ - 3
+ - 4
+ - 16
+ conv_depths:
+ - 3
+ - 2
+ - 1
+ compress: 4
+ conv_kernel: 3
+ num_dplayer: 6
+ expand: 1
+
+training:
+ batch_size: 6
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - Drums
+ - Bass
+ - Other
+ - Vocals
+ # lr: 1.0e-04
+ lr: 1.0
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: null
+ num_epochs: 1000
+ num_steps: 1000
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: prodigy
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+ enable: true # enable or disable all augmentations (to fast disable if needed)
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+ loudness_min: 0.5
+ loudness_max: 1.5
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+ mixup_probs:
+ !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+ - 0.2
+ - 0.02
+ mixup_loudness_min: 0.5
+ mixup_loudness_max: 1.5
+ all:
+ channel_shuffle: 0.5 # Set 0 or lower to disable
+ random_inverse: 0.1 # inverse track (better lower probability)
+ random_polarity: 0.5 # polarity change (multiply waveform to -1)
+
+inference:
+ batch_size: 8
+ dim_t: 256
+ num_overlap: 4
+ normalize: false
diff --git a/models/SCnet/config_musdb18_scnet_large_starrytong.yaml b/models/SCnet/config_musdb18_scnet_large_starrytong.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7fbb6e4bcb5ddf3946833a137cd61c730bca20a8
--- /dev/null
+++ b/models/SCnet/config_musdb18_scnet_large_starrytong.yaml
@@ -0,0 +1,88 @@
+audio:
+ chunk_size: 485100 # 44100 * 11
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ sources:
+ - drums
+ - bass
+ - other
+ - vocals
+ audio_channels: 2
+ dims:
+ - 4
+ - 64
+ - 128
+ - 256
+ nfft: 4096
+ hop_size: 1024
+ win_size: 4096
+ normalized: True
+ band_SR:
+ - 0.225
+ - 0.372
+ - 0.403
+ band_stride:
+ - 1
+ - 4
+ - 16
+ band_kernel:
+ - 3
+ - 4
+ - 16
+ conv_depths:
+ - 3
+ - 2
+ - 1
+ compress: 4
+ conv_kernel: 3
+ num_dplayer: 6
+ expand: 1
+
+training:
+ batch_size: 6
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - Drums
+ - Bass
+ - Other
+ - Vocals
+ # lr: 1.0e-04
+ lr: 1.0
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: null
+ num_epochs: 1000
+ num_steps: 1000
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ optimizer: prodigy
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+ enable: true # enable or disable all augmentations (to fast disable if needed)
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+ loudness_min: 0.5
+ loudness_max: 1.5
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+ mixup_probs:
+ !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+ - 0.2
+ - 0.02
+ mixup_loudness_min: 0.5
+ mixup_loudness_max: 1.5
+ all:
+ channel_shuffle: 0.5 # Set 0 or lower to disable
+ random_inverse: 0.1 # inverse track (better lower probability)
+ random_polarity: 0.5 # polarity change (multiply waveform to -1)
+
+inference:
+ batch_size: 8
+ dim_t: 256
+ num_overlap: 4
+ normalize: true
diff --git a/models/SCnet/config_musdb18_scnet_xl.yaml b/models/SCnet/config_musdb18_scnet_xl.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a189dc899094da0df5508a1661302fc61f0bce64
--- /dev/null
+++ b/models/SCnet/config_musdb18_scnet_xl.yaml
@@ -0,0 +1,207 @@
+audio:
+ chunk_size: 485100 # 44100 * 11
+ num_channels: 2
+ sample_rate: 44100
+ min_mean_abs: 0.000
+
+model:
+ sources:
+ - drums
+ - bass
+ - other
+ - vocals
+ audio_channels: 2
+ dims:
+ - 4
+ - 64
+ - 128
+ - 256
+ nfft: 4096
+ hop_size: 1024
+ win_size: 4096
+ normalized: True
+ band_SR:
+ - 0.230
+ - 0.370
+ - 0.400
+ band_stride:
+ - 1
+ - 4
+ - 16
+ band_kernel:
+ - 3
+ - 4
+ - 16
+ conv_depths:
+ - 3
+ - 2
+ - 1
+ compress: 4
+ conv_kernel: 3
+ num_dplayer: 8
+ expand: 1
+
+training:
+ batch_size: 4
+ gradient_accumulation_steps: 1
+ grad_clip: 0
+ instruments:
+ - Drums
+ - Bass
+ - Other
+ - Vocals
+ patience: 2
+ reduce_factor: 0.95
+ target_instrument: null
+ num_epochs: 1000
+ num_steps: 1000
+ q: 0.95
+ coarse_loss_clip: true
+ ema_momentum: 0.999
+ # optimizer: prodigy
+ optimizer: adam
+ lr: 1.0e-05
+ # lr: 1.0
+ normalize: false # perform normalization on input of model (use the same for inference!)
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+
+augmentations:
+ enable: false # enable or disable all augmentations (to fast disable if needed)
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+ loudness_min: 0.5
+ loudness_max: 1.5
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+ mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+ - 0.2
+ - 0.02
+ mixup_loudness_min: 0.5
+ mixup_loudness_max: 1.5
+
+ # apply mp3 compression to mixture only (emulate downloading mp3 from internet)
+ mp3_compression_on_mixture: 0.01
+ mp3_compression_on_mixture_bitrate_min: 32
+ mp3_compression_on_mixture_bitrate_max: 320
+ mp3_compression_on_mixture_backend: "lameenc"
+
+ all:
+ channel_shuffle: 0.5 # Set 0 or lower to disable
+ random_inverse: 0.1 # inverse track (better lower probability)
+ random_polarity: 0.5 # polarity change (multiply waveform to -1)
+
+ mp3_compression: 0.01
+ mp3_compression_min_bitrate: 32
+ mp3_compression_max_bitrate: 320
+ mp3_compression_backend: "lameenc"
+
+ # pedalboard reverb block
+ pedalboard_reverb: 0.01
+ pedalboard_reverb_room_size_min: 0.1
+ pedalboard_reverb_room_size_max: 0.9
+ pedalboard_reverb_damping_min: 0.1
+ pedalboard_reverb_damping_max: 0.9
+ pedalboard_reverb_wet_level_min: 0.1
+ pedalboard_reverb_wet_level_max: 0.9
+ pedalboard_reverb_dry_level_min: 0.1
+ pedalboard_reverb_dry_level_max: 0.9
+ pedalboard_reverb_width_min: 0.9
+ pedalboard_reverb_width_max: 1.0
+
+ # pedalboard chorus block
+ pedalboard_chorus: 0.01
+ pedalboard_chorus_rate_hz_min: 1.0
+ pedalboard_chorus_rate_hz_max: 7.0
+ pedalboard_chorus_depth_min: 0.25
+ pedalboard_chorus_depth_max: 0.95
+ pedalboard_chorus_centre_delay_ms_min: 3
+ pedalboard_chorus_centre_delay_ms_max: 10
+ pedalboard_chorus_feedback_min: 0.0
+ pedalboard_chorus_feedback_max: 0.5
+ pedalboard_chorus_mix_min: 0.1
+ pedalboard_chorus_mix_max: 0.9
+
+ # pedalboard phazer block
+ pedalboard_phazer: 0.01
+ pedalboard_phazer_rate_hz_min: 1.0
+ pedalboard_phazer_rate_hz_max: 10.0
+ pedalboard_phazer_depth_min: 0.25
+ pedalboard_phazer_depth_max: 0.95
+ pedalboard_phazer_centre_frequency_hz_min: 200
+ pedalboard_phazer_centre_frequency_hz_max: 12000
+ pedalboard_phazer_feedback_min: 0.0
+ pedalboard_phazer_feedback_max: 0.5
+ pedalboard_phazer_mix_min: 0.1
+ pedalboard_phazer_mix_max: 0.9
+
+ # pedalboard distortion block
+ pedalboard_distortion: 0.01
+ pedalboard_distortion_drive_db_min: 1.0
+ pedalboard_distortion_drive_db_max: 25.0
+
+ # pedalboard pitch shift block
+ pedalboard_pitch_shift: 0.01
+ pedalboard_pitch_shift_semitones_min: -7
+ pedalboard_pitch_shift_semitones_max: 7
+
+ # pedalboard resample block
+ pedalboard_resample: 0.01
+ pedalboard_resample_target_sample_rate_min: 4000
+ pedalboard_resample_target_sample_rate_max: 44100
+
+ # pedalboard bitcrash block
+ pedalboard_bitcrash: 0.01
+ pedalboard_bitcrash_bit_depth_min: 4
+ pedalboard_bitcrash_bit_depth_max: 16
+
+ # pedalboard mp3 compressor block
+ pedalboard_mp3_compressor: 0.01
+ pedalboard_mp3_compressor_pedalboard_mp3_compressor_min: 0
+ pedalboard_mp3_compressor_pedalboard_mp3_compressor_max: 9.999
+
+ vocals:
+ pitch_shift: 0.1
+ pitch_shift_min_semitones: -5
+ pitch_shift_max_semitones: 5
+ seven_band_parametric_eq: 0.25
+ seven_band_parametric_eq_min_gain_db: -9
+ seven_band_parametric_eq_max_gain_db: 9
+ tanh_distortion: 0.1
+ tanh_distortion_min: 0.1
+ tanh_distortion_max: 0.7
+ bass:
+ pitch_shift: 0.1
+ pitch_shift_min_semitones: -2
+ pitch_shift_max_semitones: 2
+ seven_band_parametric_eq: 0.25
+ seven_band_parametric_eq_min_gain_db: -3
+ seven_band_parametric_eq_max_gain_db: 6
+ tanh_distortion: 0.2
+ tanh_distortion_min: 0.1
+ tanh_distortion_max: 0.5
+ drums:
+ pitch_shift: 0.33
+ pitch_shift_min_semitones: -5
+ pitch_shift_max_semitones: 5
+ seven_band_parametric_eq: 0.25
+ seven_band_parametric_eq_min_gain_db: -9
+ seven_band_parametric_eq_max_gain_db: 9
+ tanh_distortion: 0.33
+ tanh_distortion_min: 0.1
+ tanh_distortion_max: 0.6
+ other:
+ pitch_shift: 0.1
+ pitch_shift_min_semitones: -4
+ pitch_shift_max_semitones: 4
+ gaussian_noise: 0.1
+ gaussian_noise_min_amplitude: 0.001
+ gaussian_noise_max_amplitude: 0.015
+ time_stretch: 0.01
+ time_stretch_min_rate: 0.8
+ time_stretch_max_rate: 1.25
+
+inference:
+ batch_size: 4
+ dim_t: 256
+ num_overlap: 4
+ normalize: false
diff --git a/models/SCnet/model_scnet_ep_54_sdr_9.8051.ckpt b/models/SCnet/model_scnet_ep_54_sdr_9.8051.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..e876a2ed0ea8262bd2bae422e8ab02b13558cba6
--- /dev/null
+++ b/models/SCnet/model_scnet_ep_54_sdr_9.8051.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd889cc1d97619ccac59280ecc859c190cd3cc1b1557fbe3a19b1610bb67e410
+size 216189106
diff --git a/models/SCnet/model_scnet_sdr_9.3244.ckpt b/models/SCnet/model_scnet_sdr_9.3244.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..b72d6b1472f10f043fa266b4f3d6507ba55e4cc5
--- /dev/null
+++ b/models/SCnet/model_scnet_sdr_9.3244.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe550315a76e8f4aed8475d7d5952137504a3b6c63b3adcef2443bfe73aac540
+size 168868194
diff --git a/models/SCnet/scnet_checkpoint_musdb18.ckpt b/models/SCnet/scnet_checkpoint_musdb18.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..671b56f708c41055e2fd1ad71391254a8f097aac
--- /dev/null
+++ b/models/SCnet/scnet_checkpoint_musdb18.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bc0d1abb20bfdf966dcd07637bafd03e4bc13653d09ef18bc9b3e342eafe2aa
+size 42434986
diff --git a/models/VR_Arch/10_SP-UVR-2B-32000-1.pth b/models/VR_Arch/10_SP-UVR-2B-32000-1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f27302a3d3972a2fd74440c3c99ff5bba7cbd839
--- /dev/null
+++ b/models/VR_Arch/10_SP-UVR-2B-32000-1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a178d8a8bc2c992bc5e5a045ef5ab8706d55c375d1bfe34700c1946d6c2d28b6
+size 31938951
diff --git a/models/VR_Arch/11_SP-UVR-2B-32000-2.pth b/models/VR_Arch/11_SP-UVR-2B-32000-2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..27190898a57358cfba6fe0343e3432d6155204f2
--- /dev/null
+++ b/models/VR_Arch/11_SP-UVR-2B-32000-2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e80e69ecd2244496710fa77e7d2c785bad93e2a87a7def65c38272cc9fd1613
+size 31938951
diff --git a/models/VR_Arch/12_SP-UVR-3B-44100.pth b/models/VR_Arch/12_SP-UVR-3B-44100.pth
new file mode 100644
index 0000000000000000000000000000000000000000..fb5288aeaca78b613f29014c63e36f474d66c1ad
--- /dev/null
+++ b/models/VR_Arch/12_SP-UVR-3B-44100.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b826e6a44dbe007bde36c720bb2afeed008348cef1b61e35898fee5282d3918
+size 31938951
diff --git a/models/VR_Arch/13_SP-UVR-4B-44100-1.pth b/models/VR_Arch/13_SP-UVR-4B-44100-1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8d0715db510d35c4964eab275b140ef761ff4f7e
--- /dev/null
+++ b/models/VR_Arch/13_SP-UVR-4B-44100-1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afb84b3d1da4a7e24033e5a0e85dbde2ac5f5e4b1e839788748735ac3bcc5046
+size 31938951
diff --git a/models/VR_Arch/14_SP-UVR-4B-44100-2.pth b/models/VR_Arch/14_SP-UVR-4B-44100-2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..09ce4782c307d59cb4d29b9526dd39585485f0aa
--- /dev/null
+++ b/models/VR_Arch/14_SP-UVR-4B-44100-2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78404d85d6e4e11a3d2362f6577fc398f77406b7881fbac402e0c6191d6efca1
+size 31938951
diff --git a/models/VR_Arch/15_SP-UVR-MID-44100-1.pth b/models/VR_Arch/15_SP-UVR-MID-44100-1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..77078902947cbfc26779d83ed895ac0c0dcbf57c
--- /dev/null
+++ b/models/VR_Arch/15_SP-UVR-MID-44100-1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd03d4061ce34569e8df9e0a13a235af7ff0d3c0c3def491db4439f9ff344dd0
+size 31938951
diff --git a/models/VR_Arch/16_SP-UVR-MID-44100-2.pth b/models/VR_Arch/16_SP-UVR-MID-44100-2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c2650722cdecf650037a9eb409fe7f0ea83c4026
--- /dev/null
+++ b/models/VR_Arch/16_SP-UVR-MID-44100-2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0e5025987670f963a339db861b078d8526bf912f0ce2e00f732a7f0a3e846cc
+size 31938951
diff --git a/models/VR_Arch/17_HP-Wind_Inst-UVR.pth b/models/VR_Arch/17_HP-Wind_Inst-UVR.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8833720c17aaf6c73b004e5bf5cb3be37bed2184
--- /dev/null
+++ b/models/VR_Arch/17_HP-Wind_Inst-UVR.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:acc6d472b4b478da9c9ab5af45b167749e05a7f65b30c7d5988b3700a513aeee
+size 223661285
diff --git a/models/VR_Arch/1_HP-UVR.pth b/models/VR_Arch/1_HP-UVR.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e82f0ea280de5f35a32bf213f45ad5b2ab4c11aa
--- /dev/null
+++ b/models/VR_Arch/1_HP-UVR.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9c654305888fe09668078ade76962b32d3782848f158192da7403a650e935a6
+size 126792647
diff --git a/models/VR_Arch/2_HP-UVR.pth b/models/VR_Arch/2_HP-UVR.pth
new file mode 100644
index 0000000000000000000000000000000000000000..75a8b276a9094cbf4e0180175b49f533b358461a
--- /dev/null
+++ b/models/VR_Arch/2_HP-UVR.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26d6400d2d2e90811ba2381828c8c3519196097375af524c3d0edd969acf5356
+size 126782699
diff --git a/models/VR_Arch/3_HP-Vocal-UVR.pth b/models/VR_Arch/3_HP-Vocal-UVR.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3a0e7a3081a92f32895d6478390e544488268f13
--- /dev/null
+++ b/models/VR_Arch/3_HP-Vocal-UVR.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d3f9be265b2f2c3d684f23a9905674627c2552e0f6b5f36325cef8dc1f0ff6b
+size 126792647
diff --git a/models/VR_Arch/4_HP-Vocal-UVR.pth b/models/VR_Arch/4_HP-Vocal-UVR.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0a58d54b24fbabb2f8e3863ec7829b1fdaaab567
--- /dev/null
+++ b/models/VR_Arch/4_HP-Vocal-UVR.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:295d1e1e3a52c17a040a4a6fe390214ad8ca0797f091b7c6d8db97d247a27271
+size 126792647
diff --git a/models/VR_Arch/5_HP-Karaoke-UVR.pth b/models/VR_Arch/5_HP-Karaoke-UVR.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e1a6d0d913c7875ea67d2a892fcec4d44a14f426
--- /dev/null
+++ b/models/VR_Arch/5_HP-Karaoke-UVR.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe00891defbb61f4261500af22f7624f1a3df8dc75fa3998d1aece02e6be4537
+size 126782699
diff --git a/models/VR_Arch/6_HP-Karaoke-UVR.pth b/models/VR_Arch/6_HP-Karaoke-UVR.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b0ab8fc7b1f85c340c608c4ffd43d964d1fb7637
--- /dev/null
+++ b/models/VR_Arch/6_HP-Karaoke-UVR.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ce7eaaa9e56f09366b788aebf6d3a72aec8145692c56f1e090e4e7e2d7ce65f
+size 126782699
diff --git a/models/VR_Arch/7_HP2-UVR.pth b/models/VR_Arch/7_HP2-UVR.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d91fc5482a76af6413dacc2a601df8b2510cd2c6
--- /dev/null
+++ b/models/VR_Arch/7_HP2-UVR.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:545ee733b704a263bf6f75328c5ed14f0fb439dcda7a6e8f53c8cecb92877afe
+size 550120123
diff --git a/models/VR_Arch/8_HP2-UVR.pth b/models/VR_Arch/8_HP2-UVR.pth
new file mode 100644
index 0000000000000000000000000000000000000000..72633ec2c1c41105756bdc46c208910e7f9045ef
--- /dev/null
+++ b/models/VR_Arch/8_HP2-UVR.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e072299a87cf3fcdd36da6adc6d2837bed0c07a208775a7678c3d5deeea58f79
+size 550131079
diff --git a/models/VR_Arch/9_HP2-UVR.pth b/models/VR_Arch/9_HP2-UVR.pth
new file mode 100644
index 0000000000000000000000000000000000000000..344c0743d56b86c4e1f1dad48bcfffc026dc3c6e
--- /dev/null
+++ b/models/VR_Arch/9_HP2-UVR.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a99d032ed8c9d59077a8a41f0ceccb279d59ec10c87102167a283151d25ad9a2
+size 550131079
diff --git a/models/VR_Arch/MGM_HIGHEND_v4.pth b/models/VR_Arch/MGM_HIGHEND_v4.pth
new file mode 100644
index 0000000000000000000000000000000000000000..eaac0cbcfb8eb02a574c6eea6936a45c88ad1163
--- /dev/null
+++ b/models/VR_Arch/MGM_HIGHEND_v4.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67bf5241072ae938ae0e6bec44d2d8307d9db81a5299382a5cfe12188131f9ce
+size 31861529
diff --git a/models/VR_Arch/MGM_LOWEND_A_v4.pth b/models/VR_Arch/MGM_LOWEND_A_v4.pth
new file mode 100644
index 0000000000000000000000000000000000000000..60023486084acb358abf82b5a098ecd34cb03312
--- /dev/null
+++ b/models/VR_Arch/MGM_LOWEND_A_v4.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5def7a35afecde33a77db214b3e9eeff0e47e8329ff5522d33332bc02b4b0e3
+size 31938951
diff --git a/models/VR_Arch/MGM_LOWEND_B_v4.pth b/models/VR_Arch/MGM_LOWEND_B_v4.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f1b963c0cb054a7135ce0cfdfff9e56354f44b71
--- /dev/null
+++ b/models/VR_Arch/MGM_LOWEND_B_v4.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93d6a08abf6357e5f37d4eb6892267520f5bb9e90f45486b08a0857d5fcb2ec8
+size 31938951
diff --git a/models/VR_Arch/MGM_MAIN_v4.pth b/models/VR_Arch/MGM_MAIN_v4.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b2e17497cf069f024054c58b3cc8fc9743593dd1
--- /dev/null
+++ b/models/VR_Arch/MGM_MAIN_v4.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e6f0c0592333a3b215f61ac1e01f6c24c059f903f0789cf634e92daffae1dce
+size 31938951
diff --git a/models/VR_Arch/UVR-BVE-4B_SN-44100-1.pth b/models/VR_Arch/UVR-BVE-4B_SN-44100-1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e8235eeed743f442d3d8c167598448ef09cabf21
--- /dev/null
+++ b/models/VR_Arch/UVR-BVE-4B_SN-44100-1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56165bdfa0dd5df7930ff76652200dc287e1bfef622bba5c812c736dc22067fe
+size 223650277
diff --git a/models/VR_Arch/UVR-De-Echo-Aggressive.pth b/models/VR_Arch/UVR-De-Echo-Aggressive.pth
new file mode 100644
index 0000000000000000000000000000000000000000..28a23d764ee4460b4d3d1fefbde7e15c9deaae95
--- /dev/null
+++ b/models/VR_Arch/UVR-De-Echo-Aggressive.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bd1d79d9c5d1b17d20f96f8a9f8aff1b55a83014f70712446bf420c0188e0a0
+size 127139365
diff --git a/models/VR_Arch/UVR-De-Echo-Normal.pth b/models/VR_Arch/UVR-De-Echo-Normal.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0ff05edc8f7b50b8911a6f179fe5a512aa924fda
--- /dev/null
+++ b/models/VR_Arch/UVR-De-Echo-Normal.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b849dd575643b075c257fb7a96c2ef5a79d7a5e7df74a2b319ad47118f1ee769
+size 127139365
diff --git a/models/VR_Arch/UVR-De-Reverb-aufr33-jarredou.pth b/models/VR_Arch/UVR-De-Reverb-aufr33-jarredou.pth
new file mode 100644
index 0000000000000000000000000000000000000000..fd229237a6cb4e4c4af6961555d8d24d63a542d0
--- /dev/null
+++ b/models/VR_Arch/UVR-De-Reverb-aufr33-jarredou.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fff726b0adf09f2eadc2151645557db81e1a01fe15d91f9bbbfa9b50a007f1fd
+size 58928133
diff --git a/models/VR_Arch/UVR-DeEcho-DeReverb.pth b/models/VR_Arch/UVR-DeEcho-DeReverb.pth
new file mode 100644
index 0000000000000000000000000000000000000000..39fa772ed9097501fd2759fddef376e27d7c3ca4
--- /dev/null
+++ b/models/VR_Arch/UVR-DeEcho-DeReverb.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e644028ec82865dc0fe082bc6fea85a43f7c71cfe375caee2da2d154aa661ee7
+size 223650277
diff --git a/models/VR_Arch/UVR-DeNoise-Lite.pth b/models/VR_Arch/UVR-DeNoise-Lite.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e6c8b8a29f6c427aec2b1a55b235e59b2838113c
--- /dev/null
+++ b/models/VR_Arch/UVR-DeNoise-Lite.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0023492fe98c406817b5253965de19ede65d1c147db015a3a428f07602e99571
+size 17922277
diff --git a/models/VR_Arch/UVR-DeNoise.pth b/models/VR_Arch/UVR-DeNoise.pth
new file mode 100644
index 0000000000000000000000000000000000000000..fbb3b042175d49dfb778a8f4c99caa3c9edf2098
--- /dev/null
+++ b/models/VR_Arch/UVR-DeNoise.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5addf43ece5bddd18da9f575a02d7ffdb32342414e6ad7ac8d1dd7a04138a628
+size 127139365