diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..8a82ee018b5ae32aaa494efdf6f6e0fce967138c
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,6 @@
+kuielab_b_other.onnx filter=lfs diff=lfs merge=lfs -text
+kuielab_b_vocals.onnx filter=lfs diff=lfs merge=lfs -text
+UVR_MDXNET_3_9662.onnx filter=lfs diff=lfs merge=lfs -text
+UVR_MDXNET_1_9703.onnx filter=lfs diff=lfs merge=lfs -text
+scnet_checkpoint_musdb18.ckpt filter=lfs diff=lfs merge=lfs -text
+UVR-MDX-NET_Crowd_HQ_1.onnx filter=lfs diff=lfs merge=lfs -text
diff --git a/BS-Roformer-SW.yaml b/BS-Roformer-SW.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..49760ce0b48d341ee70760803dd59f4cb0982728
--- /dev/null
+++ b/BS-Roformer-SW.yaml
@@ -0,0 +1,198 @@
+audio:
+  chunk_size: 588800 #882000
+  dim_f: 1024
+  dim_t: 801 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 256
+  depth: 12
+  stereo: true
+  num_stems: 6
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  linear_transformer_depth: 0
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 512
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+  mlp_expansion_factor: 4
+  use_torch_checkpoint: False # it allows to greatly reduce GPU memory consumption during training (not fully tested)
+  skip_connection: False # Enable skip connection between transformer blocks - can solve problem with gradients and probably faster training
+
+training:
+  batch_size: 2
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments: ['bass', 'drums', 'other', 'vocals', 'guitar', 'piano']
+  patience: 3
+  reduce_factor: 0.95
+  target_instrument: null
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: simple1
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: true # Mix several stems of the same type with some probability
+  augmentation_loudness: true # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0.5
+  augmentation_loudness_max: 1.5
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  # optimizer: prodigy
+  optimizer: adam
+  # lr: 1.0
+  lr: 1.0e-5
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+  enable: true # enable or disable all augmentations (to fast disable if needed)
+  loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+  loudness_min: 0.5
+  loudness_max: 1.5
+  mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+  mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+    - 0.2
+    - 0.02
+  mixup_loudness_min: 0.5
+  mixup_loudness_max: 1.5
+
+  all:
+    channel_shuffle: 0.5 # Set 0 or lower to disable
+    random_inverse: 0.1 # inverse track (better lower probability)
+    random_polarity: 0.5 # polarity change (multiply waveform to -1)
+
+  vocals:
+      pitch_shift: 0.1
+      pitch_shift_min_semitones: -5
+      pitch_shift_max_semitones: 5
+      seven_band_parametric_eq: 0.1
+      seven_band_parametric_eq_min_gain_db: -9
+      seven_band_parametric_eq_max_gain_db: 9
+      tanh_distortion: 0.1
+      tanh_distortion_min: 0.1
+      tanh_distortion_max: 0.7
+  bass:
+    pitch_shift: 0.1
+    pitch_shift_min_semitones: -2
+    pitch_shift_max_semitones: 2
+    seven_band_parametric_eq: 0.1
+    seven_band_parametric_eq_min_gain_db: -3
+    seven_band_parametric_eq_max_gain_db: 6
+    tanh_distortion: 0.1
+    tanh_distortion_min: 0.1
+    tanh_distortion_max: 0.5
+  drums:
+    pitch_shift: 0.1
+    pitch_shift_min_semitones: -5
+    pitch_shift_max_semitones: 5
+    seven_band_parametric_eq: 0.1
+    seven_band_parametric_eq_min_gain_db: -9
+    seven_band_parametric_eq_max_gain_db: 9
+    tanh_distortion: 0.1
+    tanh_distortion_min: 0.1
+    tanh_distortion_max: 0.6
+  other:
+    pitch_shift: 0.1
+    pitch_shift_min_semitones: -4
+    pitch_shift_max_semitones: 4
+    gaussian_noise: 0.1
+    gaussian_noise_min_amplitude: 0.001
+    gaussian_noise_max_amplitude: 0.015
+    time_stretch: 0.1
+    time_stretch_min_rate: 0.8
+    time_stretch_max_rate: 1.25
+
+
+inference:
+  batch_size: 1
+  dim_t: 801  # Changed from 1101 to match training
+  num_overlap: 2
+  normalize: false
+
diff --git a/UVR-MDX-NET_Crowd_HQ_1.onnx b/UVR-MDX-NET_Crowd_HQ_1.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..466c3fa69b05f5b27c19cc11eb23c99909d2a4d0
--- /dev/null
+++ b/UVR-MDX-NET_Crowd_HQ_1.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:313b7bf869c411fdafe005cf0d5a635c405cb3d0df137178a64091952d75225c
+size 59074342
diff --git a/UVR_Demucs_Model_1.yaml b/UVR_Demucs_Model_1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0995b75dd7149595388255185ab68a1a81ea9477
--- /dev/null
+++ b/UVR_Demucs_Model_1.yaml
@@ -0,0 +1,2 @@
+models: ['ebf34a2db']
+segment: 44
\ No newline at end of file
diff --git a/UVR_MDXNET_1_9703.onnx b/UVR_MDXNET_1_9703.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..50d00b7e34e7763954283b9fc13f2d903072be03
--- /dev/null
+++ b/UVR_MDXNET_1_9703.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:229ad3bb96a037e89d8ed86732d6d3675856e6a07c3e3f02896eac01ec7ee4be
+size 29704436
diff --git a/UVR_MDXNET_3_9662.onnx b/UVR_MDXNET_3_9662.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..140b8e3eb273df75384c691462998774b3928a52
--- /dev/null
+++ b/UVR_MDXNET_3_9662.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e02220e80d8253f4c2209f8924298b2b686bbdf2868b788ff5500fb9bd94aadc
+size 29704436
diff --git a/assets/__pycache__/model_tools.cpython-310.pyc b/assets/__pycache__/model_tools.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..10a3001f5b79176c41ce87c16a75452728edafdd
Binary files /dev/null and b/assets/__pycache__/model_tools.cpython-310.pyc differ
diff --git a/assets/__pycache__/model_tools.cpython-313.pyc b/assets/__pycache__/model_tools.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..477c4f1fda65c79917d95ce6a80b253d6897f84e
Binary files /dev/null and b/assets/__pycache__/model_tools.cpython-313.pyc differ
diff --git a/assets/calculate-model-hashes.py b/assets/calculate-model-hashes.py
new file mode 100644
index 0000000000000000000000000000000000000000..3031ba1736f36502e106faf1c984a1a3d3fe3df2
--- /dev/null
+++ b/assets/calculate-model-hashes.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import json
+import hashlib
+import requests
+import model_tools as mt
+
+MODEL_CACHE_PATH = "/tmp/audio-separator-models"
+VR_MODEL_DATA_LOCAL_PATH = f"{MODEL_CACHE_PATH}/vr_model_data.json"
+MDX_MODEL_DATA_LOCAL_PATH = f"{MODEL_CACHE_PATH}/mdx_model_data.json"
+
+MODEL_DATA_URL_PREFIX = "https://raw.githubusercontent.com/TRvlvr/application_data/main"
+VR_MODEL_DATA_URL = f"{MODEL_DATA_URL_PREFIX}/vr_model_data/model_data_new.json"
+MDX_MODEL_DATA_URL = f"{MODEL_DATA_URL_PREFIX}/mdx_model_data/model_data_new.json"
+
+OUTPUT_PATH = f"{MODEL_CACHE_PATH}/model_hashes.json"
+
+if __name__ == "__main__":
+    mt.iterate_and_hash(MODEL_CACHE_PATH)
diff --git a/assets/delete_duplicate_models.py b/assets/delete_duplicate_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f7193407638956006ffaea30a8113d30370fe85
--- /dev/null
+++ b/assets/delete_duplicate_models.py
@@ -0,0 +1,8 @@
+import os
+import hashlib
+from collections import defaultdict
+import model_tools as mt
+
+if __name__ == "__main__":
+    print(f"Scanning directory: {os.getcwd()}")
+    mt.find_and_remove_duplicates()
diff --git a/assets/list_duplicate_remove.py b/assets/list_duplicate_remove.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4c5b2f81f722d40df9216bcbf0126cb1908f128
--- /dev/null
+++ b/assets/list_duplicate_remove.py
@@ -0,0 +1,7 @@
+from model_tools import remove_duplicate_lines
+
+input_filename = "file.txt"
+output_filename = "processed_links.txt"
+
+if __name__ == "__main__":
+    remove_duplicate_lines(input_filename, output_filename)
diff --git a/assets/model_data/mdx_model_data.json b/assets/model_data/mdx_model_data.json
new file mode 100644
index 0000000000000000000000000000000000000000..ba79b189d787ecd9b3035fca87421377f79cd67f
--- /dev/null
+++ b/assets/model_data/mdx_model_data.json
@@ -0,0 +1,384 @@
+{
+    "0ddfc0eb5792638ad5dc27850236c246": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "26d308f91f3423a67dc69a6d12a8793d": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 8192,
+        "primary_stem": "Other"
+    },
+    "2cdd429caac38f0194b133884160f2c6": {
+        "compensate": 1.045,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "2f5501189a2f6db6349916fabe8c90de": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals",
+        "is_karaoke": true
+    },
+    "398580b6d5d973af3120df54cee6759d": {
+        "compensate": 1.75,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "488b3e6f8bd3717d9d7c428476be2d75": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "4910e7827f335048bdac11fa967772f9": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 7,
+        "mdx_n_fft_scale_set": 4096,
+        "primary_stem": "Drums"
+    },
+    "53c4baf4d12c3e6c3831bb8f5b532b93": {
+        "compensate": 1.043,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "5d343409ef0df48c7d78cce9f0106781": {
+        "compensate": 1.075,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "5f6483271e1efb9bfb59e4a3e6d4d098": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "65ab5919372a128e4167f5e01a8fda85": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 8192,
+        "primary_stem": "Other"
+    },
+    "6703e39f36f18aa7855ee1047765621d": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 16384,
+        "primary_stem": "Bass"
+    },
+    "6b31de20e84392859a3d09d43f089515": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "867595e9de46f6ab699008295df62798": {
+        "compensate": 1.03,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "a3cd63058945e777505c01d2507daf37": {
+        "compensate": 1.03,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "b33d9b3950b6cbf5fe90a32608924700": {
+        "compensate": 1.03,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "c3b29bdce8c4fa17ec609e16220330ab": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 16384,
+        "primary_stem": "Bass"
+    },
+    "ceed671467c1f64ebdfac8a2490d0d52": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "d2a1376f310e4f7fa37fb9b5774eb701": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "d7bff498db9324db933d913388cba6be": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "d94058f8c7f1fae4164868ae8ae66b20": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "dc41ede5961d50f277eb846db17f5319": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 4096,
+        "primary_stem": "Drums"
+    },
+    "e5572e58abf111f80d8241d2e44e7fa4": {
+        "compensate": 1.028,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "e7324c873b1f615c35c1967f912db92a": {
+        "compensate": 1.03,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "1c56ec0224f1d559c42fd6fd2a67b154": {
+        "compensate": 1.025,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "Instrumental"
+    },
+    "f2df6d6863d8f435436d8b561594ff49": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "b06327a00d5e5fbc7d96e1781bbdb596": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "94ff780b977d3ca07c7a343dab2e25dd": {
+        "compensate": 1.039,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "73492b58195c3b52d34590d5474452f6": {
+        "compensate": 1.043,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "970b3f9492014d18fefeedfe4773cb42": {
+        "compensate": 1.009,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "1d64a6d2c30f709b8c9b4ce1366d96ee": {
+        "compensate": 1.065,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "Instrumental",
+        "is_karaoke": true
+    },
+    "203f2a3955221b64df85a41af87cf8f0": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "291c2049608edb52648b96e27eb80e95": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "ead8d05dab12ec571d67549b3aab03fc": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "cc63408db3d80b4d85b0287d1d7c9632": {
+        "compensate": 1.033,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "cd5b2989ad863f116c855db1dfe24e39": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Reverb"
+    },
+    "55657dd70583b0fedfba5f67df11d711": {
+        "compensate": 1.022,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "b6bccda408a436db8500083ef3491e8b": {
+        "compensate": 1.02,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "8a88db95c7fb5dbe6a095ff2ffb428b1": {
+        "compensate": 1.026,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "Instrumental"
+    },
+    "b78da4afc6512f98e4756f5977f5c6b9": {
+        "compensate": 1.021,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "77d07b2667ddf05b9e3175941b4454a0": {
+        "compensate": 1.021,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "0f2a6bc5b49d87d64728ee40e23bceb1": {
+        "compensate": 1.019,
+        "mdx_dim_f_set": 2560,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "Instrumental"
+    },
+    "b02be2d198d4968a121030cf8950b492": {
+        "compensate": 1.020,
+        "mdx_dim_f_set": 2560,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "No Crowd"
+    },
+    "2154254ee89b2945b97a7efed6e88820": {
+        "config_yaml": "model_2_stem_061321.yaml"
+    },
+    "063aadd735d58150722926dcbf5852a9": {
+        "config_yaml": "model_2_stem_061321.yaml"
+    },
+    "c09f714d978b41d718facfe3427e6001": {
+        "config_yaml": "model_2_stem_061321.yaml"
+    },
+    "fe96801369f6a148df2720f5ced88c19": {
+        "config_yaml": "model3.yaml"
+    },
+    "02e8b226f85fb566e5db894b9931c640": {
+        "config_yaml": "model2.yaml"
+    },
+    "e3de6d861635ab9c1d766149edd680d6": {
+        "config_yaml": "model1.yaml"
+    },
+    "3f2936c554ab73ce2e396d54636bd373": {
+        "config_yaml": "modelB.yaml"
+    },
+    "890d0f6f82d7574bca741a9e8bcb8168": {
+        "config_yaml": "modelB.yaml"
+    },
+    "63a3cb8c37c474681049be4ad1ba8815": {
+        "config_yaml": "modelB.yaml"
+    },
+    "a7fc5d719743c7fd6b61bd2b4d48b9f0": {
+        "config_yaml": "modelA.yaml"
+    },
+    "3567f3dee6e77bf366fcb1c7b8bc3745": {
+        "config_yaml": "modelA.yaml"
+    },
+    "a28f4d717bd0d34cd2ff7a3b0a3d065e": {
+        "config_yaml": "modelA.yaml"
+    },
+    "c9971a18da20911822593dc81caa8be9": {
+        "config_yaml": "sndfx.yaml"
+    },
+    "57d94d5ed705460d21c75a5ac829a605": {
+        "config_yaml": "sndfx.yaml"
+    },
+    "e7a25f8764f25a52c1b96c4946e66ba2": {
+        "config_yaml": "sndfx.yaml"
+    },
+    "104081d24e37217086ce5fde09147ee1": {
+        "config_yaml": "model_2_stem_061321.yaml"
+    },
+    "1e6165b601539f38d0a9330f3facffeb": {
+        "config_yaml": "model_2_stem_061321.yaml"
+    },
+    "fe0108464ce0d8271be5ab810891bd7c": {
+        "config_yaml": "model_2_stem_full_band.yaml"
+    },
+    "e9b82ec90ee56c507a3a982f1555714c": {
+        "config_yaml": "model_2_stem_full_band_2.yaml"
+    },
+    "99b6ceaae542265a3b6d657bf9fde79f": {
+        "config_yaml": "model_2_stem_full_band_8k.yaml"
+    },
+    "116f6f9dabb907b53d847ed9f7a9475f": {
+        "config_yaml": "model_2_stem_full_band_8k.yaml"
+    },
+    "53f707017bfcbb56f5e1bfac420d6732": {
+        "config_yaml": "model_bs_roformer_ep_317_sdr_12.9755.yaml",
+        "is_roformer": true
+    },
+    "63e41acc264bf681a73aa9f7e5f606cc": {
+        "config_yaml": "model_mel_band_roformer_ep_3005_sdr_11.4360.yaml",
+        "is_roformer": true
+    },
+    "e733736763234047587931fc35322fd9": {
+        "config_yaml": "model_bs_roformer_ep_937_sdr_10.5309.yaml",
+        "is_roformer": true
+    },
+    "d789065adfd747d6f585b27b495bcdae": {
+        "config_yaml": "model_bs_roformer_ep_368_sdr_12.9628.yaml",
+        "is_roformer": true
+    }
+}
diff --git a/assets/model_tools.py b/assets/model_tools.py
new file mode 100644
index 0000000000000000000000000000000000000000..90a382edc3e171a9b656e657a90551666b64ca52
--- /dev/null
+++ b/assets/model_tools.py
@@ -0,0 +1,380 @@
+import hashlib
+import json
+import os
+import sys
+import subprocess
+import requests
+from huggingface_hub import HfApi, snapshot_download
+import hashlib
+from collections import defaultdict
+
+
+from concurrent.futures import ThreadPoolExecutor
+
+def calculate_file_hash(filepath, block_size=65536):
+    """Calculates the SHA256 hash of a file's content."""
+    sha256 = hashlib.sha256()
+    try:
+        with open(filepath, "rb") as f:
+            while chunk := f.read(block_size):
+                sha256.update(chunk)
+    except FileNotFoundError:
+        return None  # Handle cases where a file might be deleted during the scan
+
+    return sha256.hexdigest()
+
+
+def find_and_remove_duplicates(directory="."):
+    """Finds duplicate files in the given directory and removes the one with the longer filename."""
+    hashes_to_files = defaultdict(list)
+    files_to_hash = {}
+
+    # Step 1: Hash all files in the directory
+    for filename in os.listdir(directory):
+        filepath = os.path.join(directory, filename)
+        if os.path.isfile(filepath):
+            file_hash = calculate_file_hash(filepath)
+            if file_hash:
+                hashes_to_files[file_hash].append(filepath)
+                files_to_hash[filepath] = file_hash
+
+    # Step 2: Identify duplicate groups (more than one file per hash)
+    duplicates = {h: files for h, files in hashes_to_files.items() if len(files) > 1}
+
+    if not duplicates:
+        print("No duplicate files found.")
+        return
+
+    # Step 3: Iterate over duplicates, compare filename length, and delete the longer one
+    for file_hash, file_list in duplicates.items():
+        # Sort files by filename length (ascending). The one to keep is the first item.
+        # If lengths are equal, an arbitrary one is kept.
+        files_sorted_by_length = sorted(file_list, key=len)
+        file_to_keep = files_sorted_by_length[0]
+        files_to_delete = files_sorted_by_length[1:]
+
+        print(f"\nDuplicate group (Hash: {file_hash[:10]}...):")
+        print(f"  Keeping: {file_to_keep}")
+        for file_to_delete in files_to_delete:
+            try:
+                os.remove(file_to_delete)
+                print(f"  Deleted: {file_to_delete} (longer filename)")
+            except OSError as e:
+                print(f"  Error deleting {file_to_delete}: {e}")
+
+
+def download_file(url, local_dir):
+    """Helper function to download a single file."""
+    try:
+        # Extract filename from URL (e.g., https://example.com/file.jpg -> file.jpg)
+        filename = url.split("/")[-1].split("?")[0] or "downloaded_file"
+        save_path = os.path.join(local_dir, filename)
+
+        # Download the file content
+        response = requests.get(url, stream=True, timeout=10)
+        response.raise_for_status()
+
+        with open(save_path, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        return f"Successfully downloaded: {filename}"
+    except Exception as e:
+        return f"Failed to download {url}: {e}"
+
+
+def download_files_from_txt(filename, local_dir):
+    """Main function to read URLs and download them using 20 threads."""
+    # Ensure local directory exists
+    if not os.path.exists(local_dir):
+        os.makedirs(local_dir)
+
+    # Read URLs from the text file
+    with open(filename, "r") as f:
+        urls = [line.strip() for line in f if line.strip()]
+
+    # Use ThreadPoolExecutor to handle 20 downloads at a time
+    with ThreadPoolExecutor(max_workers=20) as executor:
+        # Submit all download tasks to the pool
+        results = [executor.submit(download_file, url, local_dir) for url in urls]
+
+        # Monitor results as they complete
+        for future in results:
+            print(future.result())
+
+
+def download_files_from_txt_aria(filename, local_dir):
+    command = [
+        "aria2c",
+        "--input-file",
+        filename,
+        "--dir",
+        local_dir,
+        "-c",  # Continue downloading a partially downloaded file
+        "-j",
+        "30",  # Set max concurrent downloads (adjust as needed)
+        "-x",
+        "16",  # Set max connections per server (adjust as needed)
+    ]
+    print(f"Starting downloads with aria2c in directory: {os.path.abspath(local_dir)}")
+    try:
+        # Execute the command
+        subprocess.run(
+            command,
+            check=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+        )
+        print("All downloads finished successfully.")
+    except subprocess.CalledProcessError as e:
+        print(f"An error occurred during aria2c execution: {e.stderr}")
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+    finally:
+        # os.remove(filename)
+        print(f"Downloaded all files: {filename}")
+
+
+def download_hf_repo(repo_id, local_dir, repo_type, token):
+    if not token:
+        token = os.getenv("HF_TOKEN")
+    """
+    Downloads an entire Hugging Face repository to a specified local directory.
+    """
+    print(f"Downloading {repo_id} to {local_dir}...")
+
+    # Ensure the target directory exists
+    os.makedirs(local_dir, exist_ok=True)
+
+    # Download the snapshot
+    downloaded_path = snapshot_download(
+        repo_id=repo_id,
+        local_dir=local_dir,
+        token=token,
+        local_dir_use_symlinks=False,  # Set to False to ensure actual files are moved to local_dir
+        repo_type=repo_type,
+    )
+
+    print(f"Download complete! Files are located in: {downloaded_path}")
+    return downloaded_path
+
+
+def remove_duplicate_lines(input_file_path, output_file_path):
+    """
+    Reads lines from input_file_path, removes duplicates, and writes
+    unique lines to output_file_path while preserving order.
+    """
+    try:
+        # Use an ordered set to maintain the original file's line order.
+        # An easy way to do this in Python 3.7+ is using a dictionary's keys.
+        unique_lines_dict = {}
+        with open(input_file_path, "r") as input_file:
+            for line in input_file:
+                # Store line as a dictionary key; duplicates will be ignored
+                unique_lines_dict[line] = None
+
+        unique_lines = unique_lines_dict.keys()
+
+        with open(output_file_path, "w") as output_file:
+            # Write all unique lines to the new file
+            output_file.writelines(unique_lines)
+
+        print(f"Duplicates removed. Unique lines saved to '{output_file_path}'")
+
+    except FileNotFoundError:
+        print(f"Error: The file '{input_file_path}' was not found.")
+    except Exception as e:
+        print(f"An error occurred: {e}")
+
+
+def push_to_hf(repo_id, repo_type):
+    api = HfApi()
+
+    print(f"Uploading current directory to: {repo_id}")
+
+    # Upload everything in the current directory ('.') to the repo root
+    api.upload_folder(
+        folder_path=".",
+        repo_id=repo_id,
+        repo_type=repo_type,
+        commit_message="Initial model upload",
+    )
+    print("Upload complete!")
+
+
+def push_large_folder_to_hf(repo_id, repo_type):
+    api = HfApi()
+    print(f"Starting large folder upload to: {repo_id}")
+
+    # 3. Use upload_large_folder for resilience and speed
+    # This automatically handles multi-threading and local caching for resuming
+    api.upload_large_folder(
+        folder_path=".",
+        repo_id=repo_id,
+        repo_type=repo_type,
+        # Optional: ignore large junk files to save time
+        ignore_patterns=[
+            ".git/",
+            "__pycache__/",
+            "*.tmp",
+            ".DS_Store",
+            "*.cache",
+            "*.trash",
+        ],
+    )
+
+    print(
+        "\nUpload complete! Progress was cached locally; if it failed, just run again to resume."
+    )
+
+
+def get_model_hash(model_path):
+    """
+    Get the hash of a model file
+    """
+    # print(f"Getting hash for model at {model_path}")
+    try:
+        with open(model_path, "rb") as f:
+            f.seek(
+                -10000 * 1024, 2
+            )  # Move the file pointer 10MB before the end of the file
+            hash_result = hashlib.md5(f.read()).hexdigest()
+            # print(f"Hash for {model_path}: {hash_result}")
+            return hash_result
+    except IOError:
+        with open(model_path, "rb") as f:
+            hash_result = hashlib.md5(f.read()).hexdigest()
+            # print(f"IOError encountered, hash for {model_path}: {hash_result}")
+            return hash_result
+
+
+def download_file_if_missing(url, local_path):
+    """
+    Download a file from a URL if it doesn't exist locally
+    """
+    print(f"Checking if {local_path} needs to be downloaded from {url}")
+    if not os.path.exists(local_path):
+        print(f"Downloading {url} to {local_path}")
+        with requests.get(url, stream=True, timeout=10) as r:
+            r.raise_for_status()
+            with open(local_path, "wb") as f:
+                for chunk in r.iter_content(chunk_size=8192):
+                    f.write(chunk)
+        print(f"Downloaded {url} to {local_path}")
+    else:
+        print(f"{local_path} already exists. Skipping download.")
+
+
+def load_json_data(file_path):
+    """
+    Load JSON data from a file
+    """
+    print(f"Loading JSON data from {file_path}")
+    try:
+        with open(file_path, "r", encoding="utf-8") as file:
+            data = json.load(file)
+            print(f"Loaded JSON data successfully from {file_path}")
+            return data
+    except FileNotFoundError:
+        print(f"{file_path} not found.")
+        sys.exit(1)
+
+
+def iterate_and_hash(
+    directory,
+    vr_model_data_url,
+    mdx_model_data_url,
+    vr_model_data_local_path,
+    mdx_model_data_local_path,
+):
+    """
+    Iterate through a directory and hash all model files
+    """
+    print(f"Iterating through directory {directory} to hash model files")
+    model_files = [
+        (file, os.path.join(root, file))
+        for root, _, files in os.walk(directory)
+        for file in files
+        if file.endswith((".pth", ".onnx"))
+    ]
+
+    download_file_if_missing(vr_model_data_url, vr_model_data_local_path)
+    download_file_if_missing(mdx_model_data_url, mdx_model_data_local_path)
+
+    vr_model_data = load_json_data(vr_model_data_local_path)
+    mdx_model_data = load_json_data(mdx_model_data_local_path)
+
+    combined_model_params = {
+        **vr_model_data,
+        **mdx_model_data,
+    }
+
+    model_info_list = []
+    for file, file_path in sorted(model_files):
+        file_hash = get_model_hash(file_path)
+        model_info = {
+            "file": file,
+            "hash": file_hash,
+            "params": combined_model_params.get(file_hash, "Parameters not found"),
+        }
+        model_info_list.append(model_info)
+
+    print(f"Writing model info list to {OUTPUT_PATH}")
+    with open(OUTPUT_PATH, "w", encoding="utf-8") as json_file:
+        json.dump(model_info_list, json_file, indent=4)
+        print(f"Successfully wrote model info list to {OUTPUT_PATH}")
+
+
+def sort_links_by_extension(input_file, output_file):
+    # Define the custom priority order
+    priority = {
+        ".json": 0,
+        ".yaml": 1,
+        ".th": 2,
+        ".pth": 3,
+        ".ckpt": 4,
+        ".onnx": 5,  # Added .onnx (common typo for .onnx or .onx)
+    }
+
+    # Handle the specific user request for .onnx
+    # Example: Map .onnx to priority 5
+    # priority['.onnx'] = 5
+
+    try:
+        with open(input_file, "r") as f:
+            # Read lines and strip whitespace/newlines
+            links = [line.strip() for line in f if line.strip()]
+
+        def sort_key(link):
+            # Extract extension (case-insensitive)
+            _, ext = os.path.splitext(link.lower())
+            # Return priority index; if not in list, place at the end (index 100)
+            return priority.get(ext, 100), link
+
+        # Sort the links
+        sorted_links = sorted(links, key=sort_key)
+
+        with open(output_file, "w") as f:
+            for link in sorted_links:
+                f.write(link + "\n")
+
+        print(f"Successfully sorted links into: {output_file}")
+
+    except FileNotFoundError:
+        print(f"Error: The file '{input_file}' was not found.")
+
+
+# 1. Load the JSON data
+# Ensure 'models.json' is in your current directory
+def get_links_from_json(file_input):
+    try:
+        with open(file_input, "r") as file:
+            data = json.load(file)
+    except FileNotFoundError:
+        print("Error: 'models.json' not found.")
+        data = {}
+
+    # 2. Process and Download
+    for model_name, links in data.items():
+        if not isinstance(links, list) or len(links) == 0:
+            continue
diff --git a/config_aspiration_mel_band_roformer.yaml b/config_aspiration_mel_band_roformer.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..75983773005de1549919f2f50dc456f76f199b18
--- /dev/null
+++ b/config_aspiration_mel_band_roformer.yaml
@@ -0,0 +1,76 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 256
+  depth: 8
+  stereo: true
+  num_stems: 2
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 8
+  grad_clip: 0
+  instruments:
+  - aspiration
+  - other
+  lr: 4.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: null
+  num_epochs: 1000
+  num_steps: 1000
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+  enable: true # enable or disable all augmentations (to fast disable if needed)
+  loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+  loudness_min: 0.5
+  loudness_max: 1.5
+  mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+  mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+    - 0.2
+    - 0.02
+  mixup_loudness_min: 0.5
+  mixup_loudness_max: 1.5
+
+inference:
+  batch_size: 4
+  dim_t: 801
+  num_overlap: 2
\ No newline at end of file
diff --git a/config_bs_roformer_instrumental_resurrection_unwa.yaml b/config_bs_roformer_instrumental_resurrection_unwa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1b73093ad41459a7995340d2d8b81ecbc70dd05f
--- /dev/null
+++ b/config_bs_roformer_instrumental_resurrection_unwa.yaml
@@ -0,0 +1,135 @@
+audio:
+  chunk_size: 749259
+  dim_f: 1024
+  dim_t: 1700 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 256
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  linear_transformer_depth: 0
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.
+  ff_dropout: 0.
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 2
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments: ['vocals', 'other']
+  patience: 3
+  reduce_factor: 0.95
+  target_instrument: other
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: simple1
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: true # Mix several stems of the same type with some probability
+  augmentation_loudness: true # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0.5
+  augmentation_loudness_max: 1.5
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  # optimizer: prodigy
+  optimizer: adam
+  # lr: 1.0
+  lr: 1.0e-5
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 2
+  dim_t: 1700
+  num_overlap: 2
+  normalize: false
diff --git a/config_bs_roformer_karaoke_frazer_becruily.yaml b/config_bs_roformer_karaoke_frazer_becruily.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a0170aeacb3d3a55d9c598ed7faacbdc8799071f
--- /dev/null
+++ b/config_bs_roformer_karaoke_frazer_becruily.yaml
@@ -0,0 +1,129 @@
+audio:
+  chunk_size: 882000
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 256
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  linear_transformer_depth: 0
+  freqs_per_bands: !!python/tuple
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 2
+  - 4
+  - 4
+  - 4
+  - 4
+  - 4
+  - 4
+  - 4
+  - 4
+  - 4
+  - 4
+  - 4
+  - 4
+  - 12
+  - 12
+  - 12
+  - 12
+  - 12
+  - 12
+  - 12
+  - 12
+  - 24
+  - 24
+  - 24
+  - 24
+  - 24
+  - 24
+  - 24
+  - 24
+  - 48
+  - 48
+  - 48
+  - 48
+  - 48
+  - 48
+  - 48
+  - 48
+  - 128
+  - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 512
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: false
+  mlp_expansion_factor: 4
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - Vocals
+  - Instrumental
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: Vocals
+  num_epochs: 1000
+  num_steps: 1000
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  # optimizer: prodigy
+  optimizer: adam
+  lr: 1.0e-5
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 2
+  dim_t: 2001
+  num_overlap: 4
+  normalize: false
diff --git a/config_bs_roformer_vocals_gabox.yaml b/config_bs_roformer_vocals_gabox.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c4a3d323322d75af7d981e9de2ef3fa29e786812
--- /dev/null
+++ b/config_bs_roformer_vocals_gabox.yaml
@@ -0,0 +1,133 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+
+model:
+  dim: 512
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 16
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - Vocals
+  - Instrumental
+  lr: 5.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: Vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: simple1
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: true # Mix several stems of the same type with some probability
+  augmentation_loudness: true # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0.5
+  augmentation_loudness_max: 1.5
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 4
\ No newline at end of file
diff --git a/config_bs_roformer_vocals_resurrection_unwa.yaml b/config_bs_roformer_vocals_resurrection_unwa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6d3c18aa4ac9b224fc549fa6df1bbeac36118016
--- /dev/null
+++ b/config_bs_roformer_vocals_resurrection_unwa.yaml
@@ -0,0 +1,135 @@
+audio:
+  chunk_size: 785920
+  dim_f: 1024
+  dim_t: 1536 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 256
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  linear_transformer_depth: 0
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.
+  ff_dropout: 0.
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 2
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments: ['vocals', 'other']
+  patience: 3
+  reduce_factor: 0.95
+  target_instrument: vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: simple1
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: true # Mix several stems of the same type with some probability
+  augmentation_loudness: true # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0.5
+  augmentation_loudness_max: 1.5
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  # optimizer: prodigy
+  optimizer: adam
+  # lr: 1.0
+  lr: 1.0e-5
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 2
+  dim_t: 1536
+  num_overlap: 2
+  normalize: false
diff --git a/config_bs_roformer_vocals_revive_unwa.yaml b/config_bs_roformer_vocals_revive_unwa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b2d60c5d37a6c92afea42ef88d8958b7f01b64db
--- /dev/null
+++ b/config_bs_roformer_vocals_revive_unwa.yaml
@@ -0,0 +1,134 @@
+audio:
+  chunk_size: 485100 #352800 #485100
+  dim_f: 1024
+  dim_t: 1101
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.
+
+model:
+  dim: 512
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  linear_transformer_depth: 0
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.
+  ff_dropout: 0.
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - vocals
+  - other
+  lr: 1.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 2
+  dim_t: 1101
+  num_overlap: 2
diff --git a/config_dereverb-echo_mel_band_roformer.yaml b/config_dereverb-echo_mel_band_roformer.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..bf766a04152c42bb2f16e6b2929a1024c6d550f5
--- /dev/null
+++ b/config_dereverb-echo_mel_band_roformer.yaml
@@ -0,0 +1,76 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 256
+  depth: 8
+  stereo: true
+  num_stems: 2
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 8
+  grad_clip: 0
+  instruments:
+  - dry
+  - No dry
+  lr: 4.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: null
+  num_epochs: 1000
+  num_steps: 1000
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+  enable: true # enable or disable all augmentations (to fast disable if needed)
+  loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+  loudness_min: 0.5
+  loudness_max: 1.5
+  mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+  mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+    - 0.2
+    - 0.02
+  mixup_loudness_min: 0.5
+  mixup_loudness_max: 1.5
+
+inference:
+  batch_size: 4
+  dim_t: 801
+  num_overlap: 4
\ No newline at end of file
diff --git a/config_mdx23c_similarity.yaml b/config_mdx23c_similarity.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9ce8239fc926820db231cb1a240d20a1ff3eca0e
--- /dev/null
+++ b/config_mdx23c_similarity.yaml
@@ -0,0 +1,47 @@
+audio:
+  chunk_size: 130560
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 512
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+
+model:
+  act: gelu
+  bottleneck_factor: 4
+  growth: 128
+  norm: InstanceNorm
+  num_blocks_per_scale: 2
+  num_channels: 128
+  num_scales: 5
+  num_subbands: 4
+  scale:
+  - 2
+  - 2
+
+training:
+  batch_size: 2
+  gradient_accumulation_steps: 3
+  grad_clip: 0
+  instruments:
+  - Similarity
+  - Difference
+  lr: 1.0
+  patience: 15
+  reduce_factor: 0.95
+  target_instrument: Similarity
+  num_epochs: 1000
+  num_steps: 2235
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: prodigy
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 8
+  dim_t: 256
+  num_overlap: 8
diff --git a/config_mel_band_roformer_instrumental_becruily.yaml b/config_mel_band_roformer_instrumental_becruily.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..862010f34a3765fa1ac9f22c04ba74042b2fd086
--- /dev/null
+++ b/config_mel_band_roformer_instrumental_becruily.yaml
@@ -0,0 +1,72 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - Instrumental
+  - Vocals
+  lr: 0.0005
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: Instrumental
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adamw
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 1
+  dim_t: 1101
+  num_overlap: 2
\ No newline at end of file
diff --git a/config_mel_band_roformer_instrumental_gabox.yaml b/config_mel_band_roformer_instrumental_gabox.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b1395e978d64cb1c37d3015adc2feeb0805e3b94
--- /dev/null
+++ b/config_mel_band_roformer_instrumental_gabox.yaml
@@ -0,0 +1,51 @@
+audio:
+  chunk_size: 485100
+  dim_f: 1024
+  dim_t: 1101
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  instruments:
+  - Instrumental
+  - Vocals
+  target_instrument: Instrumental
+  use_amp: True
+
+inference:
+  batch_size: 1
+  dim_t: 1101
+  num_overlap: 2
\ No newline at end of file
diff --git a/config_mel_band_roformer_karaoke_becruily.yaml b/config_mel_band_roformer_karaoke_becruily.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..58cd1747a53d1695128e732aa7aa6802cb77db70
--- /dev/null
+++ b/config_mel_band_roformer_karaoke_becruily.yaml
@@ -0,0 +1,72 @@
+audio:
+  chunk_size: 485100
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 2
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: true
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: false
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - Vocals
+  - Instrumental
+  lr: 0.0005
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: null
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type:
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adamw
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 1
+  dim_t: 1101
+  num_overlap: 8
diff --git a/config_mel_band_roformer_kim_ft_unwa.yaml b/config_mel_band_roformer_kim_ft_unwa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d0527f99399af7f504ead83ce75e6715cd190e56
--- /dev/null
+++ b/config_mel_band_roformer_kim_ft_unwa.yaml
@@ -0,0 +1,72 @@
+audio:
+  chunk_size: 485100
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - vocals
+  - other
+  lr: 1.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 8
\ No newline at end of file
diff --git a/config_mel_band_roformer_vocal_fullness_aname.yaml b/config_mel_band_roformer_vocal_fullness_aname.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..32fee9c05a82b72931cbb9a8af7d948a538532cb
--- /dev/null
+++ b/config_mel_band_roformer_vocal_fullness_aname.yaml
@@ -0,0 +1,54 @@
+audio:
+  chunk_size: 661500
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - vocals
+  - other
+  target_instrument: vocals
+  use_amp: true
+
+inference:
+  batch_size: 4
+  dim_t: 1101
+  num_overlap: 4
\ No newline at end of file
diff --git a/config_mel_band_roformer_vocals_becruily.yaml b/config_mel_band_roformer_vocals_becruily.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d42333851e31c9e5747d818efec365921358a7c
--- /dev/null
+++ b/config_mel_band_roformer_vocals_becruily.yaml
@@ -0,0 +1,72 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - vocals
+  - other
+  lr: 0.0005
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adamw
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 1
+  dim_t: 1101
+  num_overlap: 2
\ No newline at end of file
diff --git a/config_mel_band_roformer_vocals_gabox.yaml b/config_mel_band_roformer_vocals_gabox.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8130c9958eead0d2efd27f27f4f39ea5ca051a26
--- /dev/null
+++ b/config_mel_band_roformer_vocals_gabox.yaml
@@ -0,0 +1,51 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  instruments:
+  - Vocals
+  - Instrumental
+  target_instrument: Vocals
+
+inference:
+  batch_size: 1
+  dim_t: 1101
+  num_overlap: 1
+  chunk_size: 352800
\ No newline at end of file
diff --git a/config_melbandroformer_inst.yaml b/config_melbandroformer_inst.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d6bdca342644a1194427fe505e2044c5006a1213
--- /dev/null
+++ b/config_melbandroformer_inst.yaml
@@ -0,0 +1,51 @@
+audio:
+  chunk_size: 485100
+  dim_f: 1024
+  dim_t: 1101
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  instruments:
+  - other
+  - vocals
+  target_instrument: other
+  use_amp: True
+
+inference:
+  batch_size: 1
+  dim_t: 1101
+  num_overlap: 2
\ No newline at end of file
diff --git a/config_melbandroformer_inst_v2.yaml b/config_melbandroformer_inst_v2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4297c088f7b8bd2f28308d8a8d1e0694cdec967
--- /dev/null
+++ b/config_melbandroformer_inst_v2.yaml
@@ -0,0 +1,51 @@
+audio:
+  chunk_size: 485100
+  dim_f: 1024
+  dim_t: 1101
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 3
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  instruments:
+  - Instrumental
+  - Vocals
+  target_instrument: Instrumental
+  use_amp: True
+
+inference:
+  batch_size: 1
+  dim_t: 1101
+  num_overlap: 2
\ No newline at end of file
diff --git a/config_melbandroformer_instvoc_duality.yaml b/config_melbandroformer_instvoc_duality.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b93e721853f4d90efa7f0bead82f6a1b791fc19f
--- /dev/null
+++ b/config_melbandroformer_instvoc_duality.yaml
@@ -0,0 +1,51 @@
+audio:
+  chunk_size: 485100
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 2
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  instruments:
+  - Vocals
+  - Instrumental
+  target_instrument: null
+  use_amp: True
+
+inference:
+  batch_size: 1
+  dim_t: 1101
+  num_overlap: 2
\ No newline at end of file
diff --git a/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768_config.yaml b/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..265e19c806778d7b2d5ffdaef9e3d503a6dba3f1
--- /dev/null
+++ b/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768_config.yaml
@@ -0,0 +1,71 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 2
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - dry
+  - other
+  lr: 1.0e-05
+  patience: 8
+  reduce_factor: 0.95
+  target_instrument: dry
+  num_epochs: 1000
+  num_steps: 4032
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+    
+inference:
+  batch_size: 2
+  dim_t: 801
+  num_overlap: 4
\ No newline at end of file
diff --git a/denoise_mel_band_roformer_aufr33_sdr_27.9959_config.yaml b/denoise_mel_band_roformer_aufr33_sdr_27.9959_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..265e19c806778d7b2d5ffdaef9e3d503a6dba3f1
--- /dev/null
+++ b/denoise_mel_band_roformer_aufr33_sdr_27.9959_config.yaml
@@ -0,0 +1,71 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 2
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - dry
+  - other
+  lr: 1.0e-05
+  patience: 8
+  reduce_factor: 0.95
+  target_instrument: dry
+  num_epochs: 1000
+  num_steps: 4032
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+    
+inference:
+  batch_size: 2
+  dim_t: 801
+  num_overlap: 4
\ No newline at end of file
diff --git a/deverb_bs_roformer_8_384dim_10depth_config.yaml b/deverb_bs_roformer_8_384dim_10depth_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d2c7ce0c4b424baa7731495c432102672b68cfa6
--- /dev/null
+++ b/deverb_bs_roformer_8_384dim_10depth_config.yaml
@@ -0,0 +1,137 @@
+audio:
+  chunk_size: 352768
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+
+model:
+  dim: 384
+  depth: 10
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - noreverb
+  - reverb
+  lr: 5.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: noreverb
+  num_epochs: 1000
+  num_steps: 1000
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+  enable: true # enable or disable all augmentations (to fast disable if needed)
+  loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+  loudness_min: 0.5
+  loudness_max: 1.5
+  mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+  mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+    - 0.2
+    - 0.02
+  mixup_loudness_min: 0.5
+  mixup_loudness_max: 1.5
+
+inference:
+  batch_size: 4
+  dim_t: 801
+  num_overlap: 4
diff --git a/hdemucs_mmi.yaml b/hdemucs_mmi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0ea089139bfbef4a1126ab25e93c3dc380a90b46
--- /dev/null
+++ b/hdemucs_mmi.yaml
@@ -0,0 +1,2 @@
+models: ['75fc33f5']
+segment: 44
diff --git a/htdemucs.yaml b/htdemucs.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0d5f2089fa3e1a0335d93de070f6802598cd4a4d
--- /dev/null
+++ b/htdemucs.yaml
@@ -0,0 +1 @@
+models: ['955717e8']
diff --git a/htdemucs_6s.yaml b/htdemucs_6s.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..651a0fa536038a3e6d650f7b2bcc0b50ff7a4be9
--- /dev/null
+++ b/htdemucs_6s.yaml
@@ -0,0 +1 @@
+models: ['5c90dfd2']
diff --git a/htdemucs_ft.yaml b/htdemucs_ft.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ba5c69c272770f5e5db3dd5fcda75b94ba523250
--- /dev/null
+++ b/htdemucs_ft.yaml
@@ -0,0 +1,7 @@
+models: ['f7e0c4bc', 'd12395a8', '92cfc3b6', '04573f0d']
+weights: [
+  [1., 0., 0., 0.],
+  [0., 1., 0., 0.],
+  [0., 0., 1., 0.],
+  [0., 0., 0., 1.],
+]
\ No newline at end of file
diff --git a/kuielab_b_other.onnx b/kuielab_b_other.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..e17baa7f40b3dbd1299ee7ba7e6db461682e8320
--- /dev/null
+++ b/kuielab_b_other.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0d0b63950ac332333fea2d58f68c92fd3ab0aae071398c2a8beeae1ad15b655
+size 29703204
diff --git a/kuielab_b_vocals.onnx b/kuielab_b_vocals.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..683e555b8a91e3035cc24bee5b47b7347390eb8a
--- /dev/null
+++ b/kuielab_b_vocals.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b7dcb9d878acb0f3e64ff3fd27750faae96577013f6d50f5996875bf4250713
+size 29703204
diff --git a/mdx_model_data.json b/mdx_model_data.json
new file mode 100644
index 0000000000000000000000000000000000000000..ba79b189d787ecd9b3035fca87421377f79cd67f
--- /dev/null
+++ b/mdx_model_data.json
@@ -0,0 +1,384 @@
+{
+    "0ddfc0eb5792638ad5dc27850236c246": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "26d308f91f3423a67dc69a6d12a8793d": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 8192,
+        "primary_stem": "Other"
+    },
+    "2cdd429caac38f0194b133884160f2c6": {
+        "compensate": 1.045,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "2f5501189a2f6db6349916fabe8c90de": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals",
+        "is_karaoke": true
+    },
+    "398580b6d5d973af3120df54cee6759d": {
+        "compensate": 1.75,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "488b3e6f8bd3717d9d7c428476be2d75": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "4910e7827f335048bdac11fa967772f9": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 7,
+        "mdx_n_fft_scale_set": 4096,
+        "primary_stem": "Drums"
+    },
+    "53c4baf4d12c3e6c3831bb8f5b532b93": {
+        "compensate": 1.043,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "5d343409ef0df48c7d78cce9f0106781": {
+        "compensate": 1.075,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "5f6483271e1efb9bfb59e4a3e6d4d098": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "65ab5919372a128e4167f5e01a8fda85": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 8192,
+        "primary_stem": "Other"
+    },
+    "6703e39f36f18aa7855ee1047765621d": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 16384,
+        "primary_stem": "Bass"
+    },
+    "6b31de20e84392859a3d09d43f089515": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "867595e9de46f6ab699008295df62798": {
+        "compensate": 1.03,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "a3cd63058945e777505c01d2507daf37": {
+        "compensate": 1.03,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "b33d9b3950b6cbf5fe90a32608924700": {
+        "compensate": 1.03,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "c3b29bdce8c4fa17ec609e16220330ab": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 16384,
+        "primary_stem": "Bass"
+    },
+    "ceed671467c1f64ebdfac8a2490d0d52": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "d2a1376f310e4f7fa37fb9b5774eb701": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "d7bff498db9324db933d913388cba6be": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "d94058f8c7f1fae4164868ae8ae66b20": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "dc41ede5961d50f277eb846db17f5319": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 4096,
+        "primary_stem": "Drums"
+    },
+    "e5572e58abf111f80d8241d2e44e7fa4": {
+        "compensate": 1.028,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "e7324c873b1f615c35c1967f912db92a": {
+        "compensate": 1.03,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "1c56ec0224f1d559c42fd6fd2a67b154": {
+        "compensate": 1.025,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "Instrumental"
+    },
+    "f2df6d6863d8f435436d8b561594ff49": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "b06327a00d5e5fbc7d96e1781bbdb596": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "94ff780b977d3ca07c7a343dab2e25dd": {
+        "compensate": 1.039,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "73492b58195c3b52d34590d5474452f6": {
+        "compensate": 1.043,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "970b3f9492014d18fefeedfe4773cb42": {
+        "compensate": 1.009,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "1d64a6d2c30f709b8c9b4ce1366d96ee": {
+        "compensate": 1.065,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "Instrumental",
+        "is_karaoke": true
+    },
+    "203f2a3955221b64df85a41af87cf8f0": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "291c2049608edb52648b96e27eb80e95": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "ead8d05dab12ec571d67549b3aab03fc": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "cc63408db3d80b4d85b0287d1d7c9632": {
+        "compensate": 1.033,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "cd5b2989ad863f116c855db1dfe24e39": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Reverb"
+    },
+    "55657dd70583b0fedfba5f67df11d711": {
+        "compensate": 1.022,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "b6bccda408a436db8500083ef3491e8b": {
+        "compensate": 1.02,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "8a88db95c7fb5dbe6a095ff2ffb428b1": {
+        "compensate": 1.026,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "Instrumental"
+    },
+    "b78da4afc6512f98e4756f5977f5c6b9": {
+        "compensate": 1.021,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "77d07b2667ddf05b9e3175941b4454a0": {
+        "compensate": 1.021,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "0f2a6bc5b49d87d64728ee40e23bceb1": {
+        "compensate": 1.019,
+        "mdx_dim_f_set": 2560,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "Instrumental"
+    },
+    "b02be2d198d4968a121030cf8950b492": {
+        "compensate": 1.020,
+        "mdx_dim_f_set": 2560,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "No Crowd"
+    },
+    "2154254ee89b2945b97a7efed6e88820": {
+        "config_yaml": "model_2_stem_061321.yaml"
+    },
+    "063aadd735d58150722926dcbf5852a9": {
+        "config_yaml": "model_2_stem_061321.yaml"
+    },
+    "c09f714d978b41d718facfe3427e6001": {
+        "config_yaml": "model_2_stem_061321.yaml"
+    },
+    "fe96801369f6a148df2720f5ced88c19": {
+        "config_yaml": "model3.yaml"
+    },
+    "02e8b226f85fb566e5db894b9931c640": {
+        "config_yaml": "model2.yaml"
+    },
+    "e3de6d861635ab9c1d766149edd680d6": {
+        "config_yaml": "model1.yaml"
+    },
+    "3f2936c554ab73ce2e396d54636bd373": {
+        "config_yaml": "modelB.yaml"
+    },
+    "890d0f6f82d7574bca741a9e8bcb8168": {
+        "config_yaml": "modelB.yaml"
+    },
+    "63a3cb8c37c474681049be4ad1ba8815": {
+        "config_yaml": "modelB.yaml"
+    },
+    "a7fc5d719743c7fd6b61bd2b4d48b9f0": {
+        "config_yaml": "modelA.yaml"
+    },
+    "3567f3dee6e77bf366fcb1c7b8bc3745": {
+        "config_yaml": "modelA.yaml"
+    },
+    "a28f4d717bd0d34cd2ff7a3b0a3d065e": {
+        "config_yaml": "modelA.yaml"
+    },
+    "c9971a18da20911822593dc81caa8be9": {
+        "config_yaml": "sndfx.yaml"
+    },
+    "57d94d5ed705460d21c75a5ac829a605": {
+        "config_yaml": "sndfx.yaml"
+    },
+    "e7a25f8764f25a52c1b96c4946e66ba2": {
+        "config_yaml": "sndfx.yaml"
+    },
+    "104081d24e37217086ce5fde09147ee1": {
+        "config_yaml": "model_2_stem_061321.yaml"
+    },
+    "1e6165b601539f38d0a9330f3facffeb": {
+        "config_yaml": "model_2_stem_061321.yaml"
+    },
+    "fe0108464ce0d8271be5ab810891bd7c": {
+        "config_yaml": "model_2_stem_full_band.yaml"
+    },
+    "e9b82ec90ee56c507a3a982f1555714c": {
+        "config_yaml": "model_2_stem_full_band_2.yaml"
+    },
+    "99b6ceaae542265a3b6d657bf9fde79f": {
+        "config_yaml": "model_2_stem_full_band_8k.yaml"
+    },
+    "116f6f9dabb907b53d847ed9f7a9475f": {
+        "config_yaml": "model_2_stem_full_band_8k.yaml"
+    },
+    "53f707017bfcbb56f5e1bfac420d6732": {
+        "config_yaml": "model_bs_roformer_ep_317_sdr_12.9755.yaml",
+        "is_roformer": true
+    },
+    "63e41acc264bf681a73aa9f7e5f606cc": {
+        "config_yaml": "model_mel_band_roformer_ep_3005_sdr_11.4360.yaml",
+        "is_roformer": true
+    },
+    "e733736763234047587931fc35322fd9": {
+        "config_yaml": "model_bs_roformer_ep_937_sdr_10.5309.yaml",
+        "is_roformer": true
+    },
+    "d789065adfd747d6f585b27b495bcdae": {
+        "config_yaml": "model_bs_roformer_ep_368_sdr_12.9628.yaml",
+        "is_roformer": true
+    }
+}
diff --git a/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144_config.yaml b/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7e44ef94c71082af3a619c9b439f808ae8eb3e1c
--- /dev/null
+++ b/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144_config.yaml
@@ -0,0 +1,71 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 2
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - crowd
+  - other
+  lr: 1.0e-05
+  patience: 8
+  reduce_factor: 0.95
+  target_instrument: crowd
+  num_epochs: 1000
+  num_steps: 4032
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 4
\ No newline at end of file
diff --git a/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956_config.yaml b/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b88403c926bc5957a54ba90271f0cced47c8366f
--- /dev/null
+++ b/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956_config.yaml
@@ -0,0 +1,71 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 4
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - Vocals
+  - Instrumental
+  lr: 1.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: Vocals
+  num_epochs: 1000
+  num_steps: 2000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 4
\ No newline at end of file
diff --git a/model_bs_roformer_ep_317_sdr_12.9755.yaml b/model_bs_roformer_ep_317_sdr_12.9755.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c4a3d323322d75af7d981e9de2ef3fa29e786812
--- /dev/null
+++ b/model_bs_roformer_ep_317_sdr_12.9755.yaml
@@ -0,0 +1,133 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+
+model:
+  dim: 512
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 16
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - Vocals
+  - Instrumental
+  lr: 5.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: Vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: simple1
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: true # Mix several stems of the same type with some probability
+  augmentation_loudness: true # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0.5
+  augmentation_loudness_max: 1.5
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 4
\ No newline at end of file
diff --git a/model_bs_roformer_ep_368_sdr_12.9628.yaml b/model_bs_roformer_ep_368_sdr_12.9628.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fe893b1a68b8ae8ea8bb5a7ac2b7f12e0c53a826
--- /dev/null
+++ b/model_bs_roformer_ep_368_sdr_12.9628.yaml
@@ -0,0 +1,133 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+
+model:
+  dim: 512
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 16
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - Vocals
+  - Instrumental
+  lr: 5.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: Vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: simple1
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: true # Mix several stems of the same type with some probability
+  augmentation_loudness: true # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0.5
+  augmentation_loudness_max: 1.5
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 4
diff --git a/model_bs_roformer_ep_937_sdr_10.5309.yaml b/model_bs_roformer_ep_937_sdr_10.5309.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f623832cc06ebc5fa8a049fad6b1319c6038336d
--- /dev/null
+++ b/model_bs_roformer_ep_937_sdr_10.5309.yaml
@@ -0,0 +1,138 @@
+audio:
+  chunk_size: 131584
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 512
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+
+model:
+  dim: 384
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  linear_transformer_depth: 0
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 512
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 4
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - No Drum-Bass
+  - Drum-Bass
+  lr: 5.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: No Drum-Bass
+  num_epochs: 1000
+  num_steps: 1000
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+  enable: true # enable or disable all augmentations (to fast disable if needed)
+  loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+  loudness_min: 0.5
+  loudness_max: 1.5
+  mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+  mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+    - 0.2
+    - 0.02
+  mixup_loudness_min: 0.5
+  mixup_loudness_max: 1.5
+
+inference:
+  batch_size: 1
+  dim_t: 512
+  num_overlap: 4
\ No newline at end of file
diff --git a/model_mel_band_roformer_ep_3005_sdr_11.4360.yaml b/model_mel_band_roformer_ep_3005_sdr_11.4360.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7c906f2931cbae3cf64551c231e285ca10097fe5
--- /dev/null
+++ b/model_mel_band_roformer_ep_3005_sdr_11.4360.yaml
@@ -0,0 +1,72 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+
+model:
+  dim: 384
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 9
+  gradient_accumulation_steps: 8
+  grad_clip: 0
+  instruments:
+  - Vocals
+  - Instrumental
+  lr: 4.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: Vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: simple1
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: true # Mix several stems of the same type with some probability
+  augmentation_loudness: true # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0.5
+  augmentation_loudness_max: 1.5
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 4
\ No newline at end of file
diff --git a/repro_mdx_a_time_only.yaml b/repro_mdx_a_time_only.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d5d16ea8bc419198692fc993b560c1bd3f8eb8c9
--- /dev/null
+++ b/repro_mdx_a_time_only.yaml
@@ -0,0 +1,2 @@
+models: ['9a6b4851', '9a6b4851', '1ef250f1', '1ef250f1']
+segment: 44
diff --git a/scnet_checkpoint_musdb18.ckpt b/scnet_checkpoint_musdb18.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..671b56f708c41055e2fd1ad71391254a8f097aac
--- /dev/null
+++ b/scnet_checkpoint_musdb18.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bc0d1abb20bfdf966dcd07637bafd03e4bc13653d09ef18bc9b3e342eafe2aa
+size 42434986
diff --git a/vocals_mel_band_roformer.yaml b/vocals_mel_band_roformer.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9cb005e7a97c66d5fb23bba8bb36bec9619cdd8f
--- /dev/null
+++ b/vocals_mel_band_roformer.yaml
@@ -0,0 +1,50 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  instruments:
+  - vocals
+  - other
+  target_instrument: vocals
+
+inference:
+  dim_t: 1101
+  num_overlap: 1
+  chunk_size: 352800
\ No newline at end of file
diff --git a/vr_model_data.json b/vr_model_data.json
new file mode 100644
index 0000000000000000000000000000000000000000..025e093664e798716b177706322649be048dca24
--- /dev/null
+++ b/vr_model_data.json
@@ -0,0 +1,137 @@
+{
+    "0d0e6d143046b0eecc41a22e60224582": {
+        "vr_model_param": "3band_44100_mid",
+        "primary_stem": "Instrumental"
+    },
+    "18b52f873021a0af556fb4ecd552bb8e": {
+        "vr_model_param": "2band_32000",
+        "primary_stem": "Instrumental"
+    },
+    "1fc66027c82b499c7d8f55f79e64cadc": {
+        "vr_model_param": "2band_32000",
+        "primary_stem": "Instrumental"
+    },
+    "2aa34fbc01f8e6d2bf509726481e7142": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "No Piano"
+    },
+    "3e18f639b11abea7361db1a4a91c2559": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Instrumental"
+    },
+    "570b5f50054609a17741369a35007ddd": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "Instrumental"
+    },
+    "5a6e24c1b530f2dab045a522ef89b751": {
+        "vr_model_param": "1band_sr44100_hl512",
+        "primary_stem": "Instrumental"
+    },
+    "6b5916069a49be3fe29d4397ecfd73fa": {
+        "vr_model_param": "3band_44100_msb2",
+        "primary_stem": "Instrumental",
+        "is_karaoke": true
+    },
+    "74b3bc5fa2b69f29baf7839b858bc679": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Instrumental"
+    },
+    "827213b316df36b52a1f3d04fec89369": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Instrumental"
+    },
+    "911d4048eee7223eca4ee0efb7d29256": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Vocals"
+    },
+    "941f3f7f0b0341f12087aacdfef644b1": {
+        "vr_model_param": "4band_v2",
+        "primary_stem": "Instrumental"
+    },
+    "a02827cf69d75781a35c0e8a327f3195": {
+        "vr_model_param": "1band_sr33075_hl384",
+        "primary_stem": "Instrumental"
+    },
+    "b165fbff113c959dba5303b74c6484bc": {
+        "vr_model_param": "3band_44100",
+        "primary_stem": "Instrumental"
+    },
+    "b5f988cd3e891dca7253bf5f0f3427c7": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Instrumental"
+    },
+    "b99c35723bc35cb11ed14a4780006a80": {
+        "vr_model_param": "1band_sr44100_hl1024",
+        "primary_stem": "Instrumental"
+    },
+    "ba02fd25b71d620eebbdb49e18e4c336": {
+        "vr_model_param": "3band_44100_mid",
+        "primary_stem": "Instrumental"
+    },
+    "c4476ef424d8cba65f38d8d04e8514e2": {
+        "vr_model_param": "3band_44100_msb2",
+        "primary_stem": "Instrumental"
+    },
+    "da2d37b8be2972e550a409bae08335aa": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Vocals"
+    },
+    "db57205d3133e39df8e050b435a78c80": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Instrumental"
+    },
+    "ea83b08e32ec2303456fe50659035f69": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "Instrumental"
+    },
+    "f6ea8473ff86017b5ebd586ccacf156b": {
+        "vr_model_param": "4band_v2_sn",
+        "primary_stem": "Instrumental",
+        "is_karaoke": true
+    },
+    "fd297a61eafc9d829033f8b987c39a3d": {
+        "vr_model_param": "1band_sr32000_hl512",
+        "primary_stem": "Instrumental"
+    },
+    "0ec76fd9e65f81d8b4fbd13af4826ed8": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "No Woodwinds"
+    },
+    "0fb9249ffe4ffc38d7b16243f394c0ff": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "No Reverb"
+    },
+    "6857b2972e1754913aad0c9a1678c753": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "No Echo",
+        "nout": 48,
+        "nout_lstm": 128
+    },
+    "f200a145434efc7dcf0cd093f517ed52": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "No Echo",
+        "nout": 48,
+        "nout_lstm": 128
+    },
+    "44c55d8b5d2e3edea98c2b2bf93071c7": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "Noise",
+        "nout": 48,
+        "nout_lstm": 128
+    },
+    "51ea8c43a6928ed3c10ef5cb2707d57b": {
+        "vr_model_param": "1band_sr44100_hl1024",
+        "primary_stem": "Noise",
+        "nout": 16,
+        "nout_lstm": 128
+    },
+    "944950a9c5963a5eb70b445d67b7068a": {
+        "vr_model_param": "4band_v3_sn",
+        "primary_stem": "Vocals",
+        "nout": 64,
+        "nout_lstm": 128,
+        "is_karaoke": false,
+        "is_bv_model": true,
+        "is_bv_model_rebalanced": 0.9
+    }
+}