diff --git a/models/audio-separator-models/.gitattributes b/models/audio-separator-models/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..dab9a4e17afd2ef39d90ccb0b40ef2786fe77422
--- /dev/null
+++ b/models/audio-separator-models/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/models/audio-separator-models/README.md b/models/audio-separator-models/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7be5fc7f47d5db027d120b8024982df93db95b74
--- /dev/null
+++ b/models/audio-separator-models/README.md
@@ -0,0 +1,3 @@
+---
+license: mit
+---
diff --git a/models/audio-separator-models/roformers/MelBandRoformerBigSYHFTV1.ckpt b/models/audio-separator-models/roformers/MelBandRoformerBigSYHFTV1.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..3b07b85890712d20ad115ead701bf7799c3b9928
--- /dev/null
+++ b/models/audio-separator-models/roformers/MelBandRoformerBigSYHFTV1.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2327e3e81f19e67c307f8c830c54267c09ecb0e9c6ad2b40a80c310899c955f
+size 1479738496
diff --git a/models/audio-separator-models/roformers/MelBandRoformerSYHFT.ckpt b/models/audio-separator-models/roformers/MelBandRoformerSYHFT.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..fc8e114879a3761ce9e6f901cab7b0adbc7035b5
--- /dev/null
+++ b/models/audio-separator-models/roformers/MelBandRoformerSYHFT.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f319dfcde4396ea3106658f457f5eb0bc577e113491f61ae8bab216fe84b0c0c
+size 913096702
diff --git a/models/audio-separator-models/roformers/MelBandRoformerSYHFTV2.5.ckpt b/models/audio-separator-models/roformers/MelBandRoformerSYHFTV2.5.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..232404ba4a0d4e6d32b6f683711c58cd73b10c18
--- /dev/null
+++ b/models/audio-separator-models/roformers/MelBandRoformerSYHFTV2.5.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:916e3a2c1e63b1457bcad823b98ca705e4933deffd2a5ab3a370e10f68bf47e2
+size 913090472
diff --git a/models/audio-separator-models/roformers/MelBandRoformerSYHFTV2.ckpt b/models/audio-separator-models/roformers/MelBandRoformerSYHFTV2.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..6b34c66f158e43e0f2c11b6df91a040c5a11a23c
--- /dev/null
+++ b/models/audio-separator-models/roformers/MelBandRoformerSYHFTV2.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e99f8efa5315300c197295592bd7e56c21c1d77e1884c904b5128c54a2a4632
+size 913095346
diff --git a/models/audio-separator-models/roformers/MelBandRoformerSYHFTV3Epsilon.ckpt b/models/audio-separator-models/roformers/MelBandRoformerSYHFTV3Epsilon.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..650b62bfd05945493a6529898ca0d0023ee7637d
--- /dev/null
+++ b/models/audio-separator-models/roformers/MelBandRoformerSYHFTV3Epsilon.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c886092e4aae13aa089263a0d54d483643f58c16ec221aed37268e2c1031397
+size 913090472
diff --git a/models/audio-separator-models/roformers/aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt b/models/audio-separator-models/roformers/aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..0dd326fa1c5c65e52583803582f154a0359a55bd
--- /dev/null
+++ b/models/audio-separator-models/roformers/aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:83bfe991cec4fbadde9f30d1f79cd5293ad0b1f936256be327bba5cbb4883374
+size 835982664
diff --git a/models/audio-separator-models/roformers/aspiration_mel_band_roformer_sdr_18.9845.ckpt b/models/audio-separator-models/roformers/aspiration_mel_band_roformer_sdr_18.9845.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..1282613860a16200f301a12c7b8ec67d050c63be
--- /dev/null
+++ b/models/audio-separator-models/roformers/aspiration_mel_band_roformer_sdr_18.9845.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e791258c866c6c8da66052693d8cc3b64f1f42c01e052dbdc570cd278380cc5
+size 835983746
diff --git a/models/audio-separator-models/roformers/bs_roformer_instrumental_resurrection_unwa.ckpt b/models/audio-separator-models/roformers/bs_roformer_instrumental_resurrection_unwa.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..23b7fc62506629fc0ac29c1551ee7045b805dfa5
--- /dev/null
+++ b/models/audio-separator-models/roformers/bs_roformer_instrumental_resurrection_unwa.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16311025a5133ae6411760ccfe9e3e66b31a01d9d8bec0a03fa7ec4bedac7a15
+size 204483033
diff --git a/models/audio-separator-models/roformers/bs_roformer_male_female_by_aufr33_sdr_7.2889.ckpt b/models/audio-separator-models/roformers/bs_roformer_male_female_by_aufr33_sdr_7.2889.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..201059f3d93490e5cc91f20ea0bb74ae7c0dd20a
--- /dev/null
+++ b/models/audio-separator-models/roformers/bs_roformer_male_female_by_aufr33_sdr_7.2889.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf11736d1b42a11ae55d8299316585921477dd2a671b24b663660846ca9861b
+size 527119779
diff --git a/models/audio-separator-models/roformers/bs_roformer_vocals_gabox.ckpt b/models/audio-separator-models/roformers/bs_roformer_vocals_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..cb4a0c8ef7ec4378b27e79a01eb491a2d699a535
--- /dev/null
+++ b/models/audio-separator-models/roformers/bs_roformer_vocals_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18d58efe5e949e70fab11b875329af6d06ef11ccc29574bfe943fb57cc827f38
+size 639254584
diff --git a/models/audio-separator-models/roformers/bs_roformer_vocals_resurrection_unwa.ckpt b/models/audio-separator-models/roformers/bs_roformer_vocals_resurrection_unwa.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..14e223aece4474ca86627c065d73e725c4466902
--- /dev/null
+++ b/models/audio-separator-models/roformers/bs_roformer_vocals_resurrection_unwa.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9dbfe5cb572e4ed32a15ec727d7bd06c8d7aba97509e6fda5bc008bb1e0b2dd5
+size 204510749
diff --git a/models/audio-separator-models/roformers/bs_roformer_vocals_revive_unwa.ckpt b/models/audio-separator-models/roformers/bs_roformer_vocals_revive_unwa.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..5df342a5d9eb8dcdc04f5d04fbedef415a7835d3
--- /dev/null
+++ b/models/audio-separator-models/roformers/bs_roformer_vocals_revive_unwa.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1d7e4bfdfef07c6b2bc1d65283a7d03c3c38f8c7dbc8d729b785f93c8b8699a
+size 639326600
diff --git a/models/audio-separator-models/roformers/bs_roformer_vocals_revive_v2_unwa.ckpt b/models/audio-separator-models/roformers/bs_roformer_vocals_revive_v2_unwa.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..6826b2658d30a6b6aa5f3896311f7cd4c3160bb9
--- /dev/null
+++ b/models/audio-separator-models/roformers/bs_roformer_vocals_revive_v2_unwa.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58098850c882a7472dad39f99fb8040ce6eaafe671cfe9881d89aea276bbb5f5
+size 639326600
diff --git a/models/audio-separator-models/roformers/bs_roformer_vocals_revive_v3e_unwa.ckpt b/models/audio-separator-models/roformers/bs_roformer_vocals_revive_v3e_unwa.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..56be9dd0e47d16ef81bb1be38566cf0a35c8ad43
--- /dev/null
+++ b/models/audio-separator-models/roformers/bs_roformer_vocals_revive_v3e_unwa.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b0751b9a15c591407c3b77f08eb4ad3005e42e96051f3f2b39760f1130c467b
+size 639326600
diff --git a/models/audio-separator-models/roformers/config_aspiration_mel_band_roformer.yaml b/models/audio-separator-models/roformers/config_aspiration_mel_band_roformer.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..882cf4f8aacb59f5445d0621aeb8ace0ffddec5b
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_aspiration_mel_band_roformer.yaml
@@ -0,0 +1,76 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 256
+  depth: 8
+  stereo: true
+  num_stems: 2
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 8
+  grad_clip: 0
+  instruments:
+  - aspiration
+  - other
+  lr: 4.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: null
+  num_epochs: 1000
+  num_steps: 1000
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+  enable: true # enable or disable all augmentations (to fast disable if needed)
+  loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+  loudness_min: 0.5
+  loudness_max: 1.5
+  mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+  mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+    - 0.2
+    - 0.02
+  mixup_loudness_min: 0.5
+  mixup_loudness_max: 1.5
+
+inference:
+  batch_size: 4
+  dim_t: 801
+  num_overlap: 2
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/config_bs_roformer_instrumental_resurrection_unwa.yaml b/models/audio-separator-models/roformers/config_bs_roformer_instrumental_resurrection_unwa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c089f84015c08719dd280dbef5147ad13296430a
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_bs_roformer_instrumental_resurrection_unwa.yaml
@@ -0,0 +1,135 @@
+audio:
+  chunk_size: 749259
+  dim_f: 1024
+  dim_t: 1700 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 256
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  linear_transformer_depth: 0
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.
+  ff_dropout: 0.
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 2
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments: ['vocals', 'other']
+  patience: 3
+  reduce_factor: 0.95
+  target_instrument: other
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: simple1
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: true # Mix several stems of the same type with some probability
+  augmentation_loudness: true # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0.5
+  augmentation_loudness_max: 1.5
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  # optimizer: prodigy
+  optimizer: adam
+  # lr: 1.0
+  lr: 1.0e-5
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 2
+  dim_t: 1700
+  num_overlap: 2
+  normalize: false
diff --git a/models/audio-separator-models/roformers/config_bs_roformer_vocals_gabox.yaml b/models/audio-separator-models/roformers/config_bs_roformer_vocals_gabox.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..527a3ad8a10366aaa562d007714e74d271a18cbf
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_bs_roformer_vocals_gabox.yaml
@@ -0,0 +1,133 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+
+model:
+  dim: 512
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 16
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - Vocals
+  - Instrumental
+  lr: 5.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: Vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: simple1
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: true # Mix several stems of the same type with some probability
+  augmentation_loudness: true # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0.5
+  augmentation_loudness_max: 1.5
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 4
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/config_bs_roformer_vocals_resurrection_unwa.yaml b/models/audio-separator-models/roformers/config_bs_roformer_vocals_resurrection_unwa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d291dea6ba145335a3e4acd09c8df9181a8a11eb
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_bs_roformer_vocals_resurrection_unwa.yaml
@@ -0,0 +1,135 @@
+audio:
+  chunk_size: 785920
+  dim_f: 1024
+  dim_t: 1536 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 256
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  linear_transformer_depth: 0
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.
+  ff_dropout: 0.
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 2
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments: ['vocals', 'other']
+  patience: 3
+  reduce_factor: 0.95
+  target_instrument: vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: simple1
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: true # Mix several stems of the same type with some probability
+  augmentation_loudness: true # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0.5
+  augmentation_loudness_max: 1.5
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  # optimizer: prodigy
+  optimizer: adam
+  # lr: 1.0
+  lr: 1.0e-5
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 2
+  dim_t: 1536
+  num_overlap: 2
+  normalize: false
diff --git a/models/audio-separator-models/roformers/config_bs_roformer_vocals_revive_unwa.yaml b/models/audio-separator-models/roformers/config_bs_roformer_vocals_revive_unwa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b2d60c5d37a6c92afea42ef88d8958b7f01b64db
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_bs_roformer_vocals_revive_unwa.yaml
@@ -0,0 +1,134 @@
+audio:
+  chunk_size: 485100 #352800 #485100
+  dim_f: 1024
+  dim_t: 1101
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.
+
+model:
+  dim: 512
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  linear_transformer_depth: 0
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.
+  ff_dropout: 0.
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - vocals
+  - other
+  lr: 1.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 2
+  dim_t: 1101
+  num_overlap: 2
diff --git a/models/audio-separator-models/roformers/config_chorus_male_female_bs_roformer.yaml b/models/audio-separator-models/roformers/config_chorus_male_female_bs_roformer.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8090c50d3bf92b4a6f6fd7cc03f08ef834c00a64
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_chorus_male_female_bs_roformer.yaml
@@ -0,0 +1,125 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 8
+  stereo: true
+  num_stems: 2
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.0
+  ff_dropout: 0.0
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - male
+  - female
+  lr: 1.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: null
+  num_epochs: 1000
+  num_steps: 1000
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 2
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/config_dereverb-echo_mel_band_roformer.yaml b/models/audio-separator-models/roformers/config_dereverb-echo_mel_band_roformer.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..14a76e789312f98216a73f2be18d76a3398080c2
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_dereverb-echo_mel_band_roformer.yaml
@@ -0,0 +1,76 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 256
+  depth: 8
+  stereo: true
+  num_stems: 2
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 8
+  grad_clip: 0
+  instruments:
+  - dry
+  - No dry
+  lr: 4.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: null
+  num_epochs: 1000
+  num_steps: 1000
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+  enable: true # enable or disable all augmentations (to fast disable if needed)
+  loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+  loudness_min: 0.5
+  loudness_max: 1.5
+  mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+  mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+    - 0.2
+    - 0.02
+  mixup_loudness_min: 0.5
+  mixup_loudness_max: 1.5
+
+inference:
+  batch_size: 4
+  dim_t: 801
+  num_overlap: 4
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/config_dereverb-echo_mel_band_roformer_sdr_13.4843_v2.yaml b/models/audio-separator-models/roformers/config_dereverb-echo_mel_band_roformer_sdr_13.4843_v2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..83c93db69f6f13e770905cb71a1827465b55af1c
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_dereverb-echo_mel_band_roformer_sdr_13.4843_v2.yaml
@@ -0,0 +1,64 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 256
+  depth: 8
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 8
+  grad_clip: 0
+  instruments:
+  - dry
+  - No dry
+  lr: 1.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: dry
+  num_epochs: 1000
+  num_steps: 1000
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false
+  use_amp: true
+
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 4
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/config_dereverb_echo_mel_band_roformer_v2.yaml b/models/audio-separator-models/roformers/config_dereverb_echo_mel_band_roformer_v2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f263f8b61fb37b757892d64d7cbeccd1db2512ca
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_dereverb_echo_mel_band_roformer_v2.yaml
@@ -0,0 +1,64 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 256
+  depth: 8
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 8
+  grad_clip: 0
+  instruments:
+  - dry
+  - other
+  lr: 1.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: dry
+  num_epochs: 1000
+  num_steps: 1000
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false
+  use_amp: true
+
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 4
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/config_mel_band_roformer_bleed_suppressor_v1.yaml b/models/audio-separator-models/roformers/config_mel_band_roformer_bleed_suppressor_v1.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e9d173f70a8e4cc32a2a76322d4c4c28ebff00af
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_mel_band_roformer_bleed_suppressor_v1.yaml
@@ -0,0 +1,51 @@
+audio:
+  chunk_size: 485100
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  instruments:
+  - Instrumental
+  - Bleed
+  target_instrument: Instrumental
+  use_amp: True
+
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 2
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/config_mel_band_roformer_instrumental_becruily.yaml b/models/audio-separator-models/roformers/config_mel_band_roformer_instrumental_becruily.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d157b9524c596edd7ff277e8914b55348520ad51
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_mel_band_roformer_instrumental_becruily.yaml
@@ -0,0 +1,72 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - Instrumental
+  - Vocals
+  lr: 0.0005
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: Instrumental
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adamw
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 1
+  dim_t: 1101
+  num_overlap: 2
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/config_mel_band_roformer_instrumental_gabox.yaml b/models/audio-separator-models/roformers/config_mel_band_roformer_instrumental_gabox.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9796944c69fe5fa28937eb7670e8397343ffa5ca
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_mel_band_roformer_instrumental_gabox.yaml
@@ -0,0 +1,51 @@
+audio:
+  chunk_size: 485100
+  dim_f: 1024
+  dim_t: 1101
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  instruments:
+  - Instrumental
+  - Vocals
+  target_instrument: Instrumental
+  use_amp: True
+
+inference:
+  batch_size: 1
+  dim_t: 1101
+  num_overlap: 2
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/config_mel_band_roformer_karaoke_becruily.yaml b/models/audio-separator-models/roformers/config_mel_band_roformer_karaoke_becruily.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..afd6907645e89d36d78e933ea036850dad66fb8f
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_mel_band_roformer_karaoke_becruily.yaml
@@ -0,0 +1,72 @@
+audio:
+  chunk_size: 485100
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 2
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: true
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: false
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - Vocals
+  - Instrumental
+  lr: 0.0005
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: null
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type:
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adamw
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 1
+  dim_t: 1101
+  num_overlap: 8
diff --git a/models/audio-separator-models/roformers/config_mel_band_roformer_karaoke_gabox.yaml b/models/audio-separator-models/roformers/config_mel_band_roformer_karaoke_gabox.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..16cb193e3651303c63d76da329b677fd5b16dae8
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_mel_band_roformer_karaoke_gabox.yaml
@@ -0,0 +1,72 @@
+audio:
+  chunk_size: 485100
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: true
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+    - 4096
+    - 2048
+    - 1024
+    - 512
+    - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: true
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+    - Vocals
+    - Instrumental
+  lr: 0.0005
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: Vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type:
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adamw
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 1
+  dim_t: 1101
+  num_overlap: 8
diff --git a/models/audio-separator-models/roformers/config_mel_band_roformer_kim_ft_unwa.yaml b/models/audio-separator-models/roformers/config_mel_band_roformer_kim_ft_unwa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4a8a6a9040c92b78828aa987ce4daa91923953af
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_mel_band_roformer_kim_ft_unwa.yaml
@@ -0,0 +1,72 @@
+audio:
+  chunk_size: 485100
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - vocals
+  - other
+  lr: 1.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 8
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/config_mel_band_roformer_vocal_fullness_aname.yaml b/models/audio-separator-models/roformers/config_mel_band_roformer_vocal_fullness_aname.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..723d742c848aec8c67cee58d0904f419f15841cc
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_mel_band_roformer_vocal_fullness_aname.yaml
@@ -0,0 +1,54 @@
+audio:
+  chunk_size: 661500
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - vocals
+  - other
+  target_instrument: vocals
+  use_amp: true
+
+inference:
+  batch_size: 4
+  dim_t: 1101
+  num_overlap: 4
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/config_mel_band_roformer_vocals_becruily.yaml b/models/audio-separator-models/roformers/config_mel_band_roformer_vocals_becruily.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b314fc4e078e4fdc8df60de26862d2bbea194834
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_mel_band_roformer_vocals_becruily.yaml
@@ -0,0 +1,72 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - vocals
+  - other
+  lr: 0.0005
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adamw
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 1
+  dim_t: 1101
+  num_overlap: 2
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/config_mel_band_roformer_vocals_gabox.yaml b/models/audio-separator-models/roformers/config_mel_band_roformer_vocals_gabox.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8130c9958eead0d2efd27f27f4f39ea5ca051a26
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_mel_band_roformer_vocals_gabox.yaml
@@ -0,0 +1,51 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  instruments:
+  - Vocals
+  - Instrumental
+  target_instrument: Vocals
+
+inference:
+  batch_size: 1
+  dim_t: 1101
+  num_overlap: 1
+  chunk_size: 352800
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/config_melband_roformer_big_beta5e.yaml b/models/audio-separator-models/roformers/config_melband_roformer_big_beta5e.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f19339dd96cdbbd843f1ec3a25bd64cd5d00bbce
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_melband_roformer_big_beta5e.yaml
@@ -0,0 +1,51 @@
+audio:
+  chunk_size: 485100
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 3
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  instruments:
+  - vocals
+  - other
+  target_instrument: vocals
+  use_amp: True
+
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 2
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/config_melbandroformer_big_beta4.yaml b/models/audio-separator-models/roformers/config_melbandroformer_big_beta4.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b187cdf1c454b56531ae02c047df2f8f721b7da2
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_melbandroformer_big_beta4.yaml
@@ -0,0 +1,51 @@
+audio:
+  chunk_size: 485100
+  dim_f: 1024
+  dim_t: 1101
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 3
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  instruments:
+  - vocals
+  - other
+  target_instrument: vocals
+  use_amp: True
+
+inference:
+  batch_size: 1
+  dim_t: 1101
+  num_overlap: 2
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/config_melbandroformer_big_beta6.yaml b/models/audio-separator-models/roformers/config_melbandroformer_big_beta6.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f45cd6af81f2a1d0eb684a280d13fac043653f8c
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_melbandroformer_big_beta6.yaml
@@ -0,0 +1,72 @@
+audio:
+  chunk_size: 529200
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 512
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - vocals
+  - other
+  lr: 1.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 2
+  dim_t: 1201
+  num_overlap: 2
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/config_melbandroformer_big_beta6x.yaml b/models/audio-separator-models/roformers/config_melbandroformer_big_beta6x.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2f959f226e9e4b160b483208512bc457411ac736
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_melbandroformer_big_beta6x.yaml
@@ -0,0 +1,72 @@
+audio:
+  chunk_size: 529200
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 512
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - vocals
+  - other
+  lr: 1.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 2
+  dim_t: 1201
+  num_overlap: 2
diff --git a/models/audio-separator-models/roformers/config_melbandroformer_inst.yaml b/models/audio-separator-models/roformers/config_melbandroformer_inst.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a236341b3f8607d534b191e9092690a52b8015af
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_melbandroformer_inst.yaml
@@ -0,0 +1,51 @@
+audio:
+  chunk_size: 485100
+  dim_f: 1024
+  dim_t: 1101
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  instruments:
+  - other
+  - vocals
+  target_instrument: other
+  use_amp: True
+
+inference:
+  batch_size: 1
+  dim_t: 1101
+  num_overlap: 2
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/config_melbandroformer_inst_v2.yaml b/models/audio-separator-models/roformers/config_melbandroformer_inst_v2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9c13008065433e4eb4522c62c0ae0a4294f595f4
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_melbandroformer_inst_v2.yaml
@@ -0,0 +1,51 @@
+audio:
+  chunk_size: 485100
+  dim_f: 1024
+  dim_t: 1101
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 3
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  instruments:
+  - Instrumental
+  - Vocals
+  target_instrument: Instrumental
+  use_amp: True
+
+inference:
+  batch_size: 1
+  dim_t: 1101
+  num_overlap: 2
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/config_melbandroformer_instvoc_duality.yaml b/models/audio-separator-models/roformers/config_melbandroformer_instvoc_duality.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e30df21030b0986fe33c66a29f8ee59478e49357
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_melbandroformer_instvoc_duality.yaml
@@ -0,0 +1,51 @@
+audio:
+  chunk_size: 485100
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 2
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  instruments:
+  - Vocals
+  - Instrumental
+  target_instrument: null
+  use_amp: True
+
+inference:
+  batch_size: 1
+  dim_t: 1101
+  num_overlap: 2
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/config_vocals_mel_band_roformer_big_v1_ft.yaml b/models/audio-separator-models/roformers/config_vocals_mel_band_roformer_big_v1_ft.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f19339dd96cdbbd843f1ec3a25bd64cd5d00bbce
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_vocals_mel_band_roformer_big_v1_ft.yaml
@@ -0,0 +1,51 @@
+audio:
+  chunk_size: 485100
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 3
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  instruments:
+  - vocals
+  - other
+  target_instrument: vocals
+  use_amp: True
+
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 2
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/config_vocals_mel_band_roformer_ft.yaml b/models/audio-separator-models/roformers/config_vocals_mel_band_roformer_ft.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c4fe03d72610e09e3c1cc479c4dc8f423c601c22
--- /dev/null
+++ b/models/audio-separator-models/roformers/config_vocals_mel_band_roformer_ft.yaml
@@ -0,0 +1,72 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 8
+  grad_clip: 0
+  instruments:
+  - vocals
+  - other
+  lr: 1.0e-04
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: vocals
+  num_epochs: 1000
+  num_steps: 100
+  augmentation: true # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: true # Mix several stems of the same type with some probability
+  augmentation_loudness: true # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adamw8bit
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 4
+  dim_t: 256
+  num_overlap: 2
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt b/models/audio-separator-models/roformers/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..3e232e1d5ebb6e2c74cf9803ca9fb4a3fb860d17
--- /dev/null
+++ b/models/audio-separator-models/roformers/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a25e3b233722cd81e2de7b8e798a3fef29d4b9799ccacda60b0dc958a1e2a5bb
+size 913097300
diff --git a/models/audio-separator-models/roformers/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768_config.yaml b/models/audio-separator-models/roformers/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6a20587dc9024e992ff06994aed24edec3c3efca
--- /dev/null
+++ b/models/audio-separator-models/roformers/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768_config.yaml
@@ -0,0 +1,71 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 2
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - dry
+  - other
+  lr: 1.0e-05
+  patience: 8
+  reduce_factor: 0.95
+  target_instrument: dry
+  num_epochs: 1000
+  num_steps: 4032
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+    
+inference:
+  batch_size: 2
+  dim_t: 801
+  num_overlap: 4
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt b/models/audio-separator-models/roformers/denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..8d1b634e263572a5a955577112e77036279c6287
--- /dev/null
+++ b/models/audio-separator-models/roformers/denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c1c39191edc34e942ca7f2346ce6b6c0e1208a5f76349ffce6f696bd12910de
+size 913097300
diff --git a/models/audio-separator-models/roformers/denoise_mel_band_roformer_aufr33_sdr_27.9959_config.yaml b/models/audio-separator-models/roformers/denoise_mel_band_roformer_aufr33_sdr_27.9959_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6a20587dc9024e992ff06994aed24edec3c3efca
--- /dev/null
+++ b/models/audio-separator-models/roformers/denoise_mel_band_roformer_aufr33_sdr_27.9959_config.yaml
@@ -0,0 +1,71 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 2
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - dry
+  - other
+  lr: 1.0e-05
+  patience: 8
+  reduce_factor: 0.95
+  target_instrument: dry
+  num_epochs: 1000
+  num_steps: 4032
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+    
+inference:
+  batch_size: 2
+  dim_t: 801
+  num_overlap: 4
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt b/models/audio-separator-models/roformers/dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..b2ec32944c1f4f9c802a1ad02d38682929c77a58
--- /dev/null
+++ b/models/audio-separator-models/roformers/dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd2b737a394cfb80cd48cc9fcbaf89f5f4062f6b93066c2911617a06d8b7860a
+size 835997896
diff --git a/models/audio-separator-models/roformers/dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt b/models/audio-separator-models/roformers/dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..5baa2b3d302d0ab984148ff1f42793220b29c7ca
--- /dev/null
+++ b/models/audio-separator-models/roformers/dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:396432f5af25992fe82d0286634bd879027c073721db6ab10199e75459708b9f
+size 455862568
diff --git a/models/audio-separator-models/roformers/dereverb_big_mbr_ep_362.ckpt b/models/audio-separator-models/roformers/dereverb_big_mbr_ep_362.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..814f35f8718aab66733f43e60a034fa933dd2f7f
--- /dev/null
+++ b/models/audio-separator-models/roformers/dereverb_big_mbr_ep_362.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0506455e74ffc02bbec700df9863ae243597034003815f1418227c6dee33b6ea
+size 455864012
diff --git a/models/audio-separator-models/roformers/dereverb_echo_mbr_fused.ckpt b/models/audio-separator-models/roformers/dereverb_echo_mbr_fused.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..2d495600912d8c596c4f37c1a902c9b918e934fd
--- /dev/null
+++ b/models/audio-separator-models/roformers/dereverb_echo_mbr_fused.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1596b1063238f487d54a0510a8c92cb28c000c803a271dd618ac49efc99ef3f7
+size 455776577
diff --git a/models/audio-separator-models/roformers/dereverb_mel_band_roformer_anvuew.yaml b/models/audio-separator-models/roformers/dereverb_mel_band_roformer_anvuew.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4a73f63e7daa18b57764de5d6a8144232b870b4c
--- /dev/null
+++ b/models/audio-separator-models/roformers/dereverb_mel_band_roformer_anvuew.yaml
@@ -0,0 +1,76 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 3
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - noreverb
+  - reverb
+  lr: 5.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: noreverb
+  num_epochs: 1000
+  num_steps: 4000
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adamw
+  other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+  enable: true # enable or disable all augmentations (to fast disable if needed)
+  loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+  loudness_min: 0.1
+  loudness_max: 1.0
+  mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+  mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+    - 0.2
+    - 0.02
+  mixup_loudness_min: 0.5
+  mixup_loudness_max: 1.5
+
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 2
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt b/models/audio-separator-models/roformers/dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..8f21995361b651a6c446cb4e1e7664d94eb42bff
--- /dev/null
+++ b/models/audio-separator-models/roformers/dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9262877b87e9ebb0fb808a456b0a411fa677f5df31c8383c1254af531c078970
+size 913107578
diff --git a/models/audio-separator-models/roformers/dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt b/models/audio-separator-models/roformers/dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..45d5062aae81caa8df3565e7d10c818f076cc588
--- /dev/null
+++ b/models/audio-separator-models/roformers/dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0db8f1b41c00cead1112e967262a12802fd32e76c0c3a8eb207e772bae25d07b
+size 913107578
diff --git a/models/audio-separator-models/roformers/dereverb_mel_band_roformer_mono_anvuew.ckpt b/models/audio-separator-models/roformers/dereverb_mel_band_roformer_mono_anvuew.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..06855114df2802baa49c15faf4b39a743b84e27b
--- /dev/null
+++ b/models/audio-separator-models/roformers/dereverb_mel_band_roformer_mono_anvuew.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f099ee717eb57fb0ad5eb0e7c9ad6787c36168140b61ce2b158b90c2c4ecee79
+size 913097978
diff --git a/models/audio-separator-models/roformers/dereverb_super_big_mbr_ep_346.ckpt b/models/audio-separator-models/roformers/dereverb_super_big_mbr_ep_346.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..bc9d947ff83f741caa2504b673eed1983e6ac008
--- /dev/null
+++ b/models/audio-separator-models/roformers/dereverb_super_big_mbr_ep_346.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26dda242bce4405555f2d6086d079fe8cc23f1f04e02e501d2689bfe3ece0489
+size 455864012
diff --git a/models/audio-separator-models/roformers/deverb_bs_roformer_8_384dim_10depth.ckpt b/models/audio-separator-models/roformers/deverb_bs_roformer_8_384dim_10depth.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..17a6af84d21d7c3cecf0c627811f42fee20b6d8e
--- /dev/null
+++ b/models/audio-separator-models/roformers/deverb_bs_roformer_8_384dim_10depth.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c38653aaa5e49f2f7b84dd3be2b6b679e0cbea23978e6b48389ee6f0a914768
+size 361499604
diff --git a/models/audio-separator-models/roformers/deverb_bs_roformer_8_384dim_10depth_config.yaml b/models/audio-separator-models/roformers/deverb_bs_roformer_8_384dim_10depth_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7aa36fa5e356bd5ece008126289ba05f8cd67bf4
--- /dev/null
+++ b/models/audio-separator-models/roformers/deverb_bs_roformer_8_384dim_10depth_config.yaml
@@ -0,0 +1,137 @@
+audio:
+  chunk_size: 352768
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+
+model:
+  dim: 384
+  depth: 10
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - noreverb
+  - reverb
+  lr: 5.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: noreverb
+  num_epochs: 1000
+  num_steps: 1000
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+  enable: true # enable or disable all augmentations (to fast disable if needed)
+  loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+  loudness_min: 0.5
+  loudness_max: 1.5
+  mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+  mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+    - 0.2
+    - 0.02
+  mixup_loudness_min: 0.5
+  mixup_loudness_max: 1.5
+
+inference:
+  batch_size: 4
+  dim_t: 801
+  num_overlap: 4
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_bleed_suppressor_v1.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_bleed_suppressor_v1.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..4bf56278572c098907e036b63e094bd7c63c1a20
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_bleed_suppressor_v1.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9a9d10faa7f8997676a78e66d741d7acb9cc449334763f3c8f626d68ec6e575
+size 913102724
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..8d59dd9875ffadf10ca18f66aa3d20a0e289ce90
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca8799531fe51c94172cc047226209ed48bf7d8c02e04671795a15d2a1c318af
+size 913096801
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144_config.yaml b/models/audio-separator-models/roformers/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..18687f8b8bb38020fc951d3ade6a54bbf58f094e
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144_config.yaml
@@ -0,0 +1,71 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 2
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - crowd
+  - other
+  lr: 1.0e-05
+  patience: 8
+  reduce_factor: 0.95
+  target_instrument: crowd
+  num_epochs: 1000
+  num_steps: 4032
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 4
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_denoise_debleed_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_denoise_debleed_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..8b595aaf271c7d97cbca025436c370b5954e7786
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_denoise_debleed_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91aa7a546ed2e93482e4629c982d35b0d258bb3de6eeab497fd91658cc86c7fd
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_2_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_2_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..7758aa5f0d19e21b8cb82550a8eddb7a7ab10e23
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_2_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e03ca459c339f88b7521c367c897d0c3f5362b38a6cdb96cb28e625ca0f9931e
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_3_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_3_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..1596ed4c464e4346d614bc48bb64a996975bc445
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_3_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9ec9f299cf617bf6afe1c382f4b0761cd9bee78323da94889951812328e10fb
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_becruily.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_becruily.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..9aa6c9ab355e9dfc090251f9703e299b9da7893d
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_becruily.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8da6632a1c25efb1c9be783ce9ea367d226d4b918cd6c3717c8b1d7a396041d
+size 913106900
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_bleedless_v1_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_bleedless_v1_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..65745b5be537052b903ae5f13f4e26f51912f6db
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_bleedless_v1_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de972fb724601beef237abe94c8b934c73218e9baf3e344ab4c2333276e5cfe7
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_bleedless_v2_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_bleedless_v2_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..881d22e5fc7fde06ca2c5de68277c4ab8bbceee7
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_bleedless_v2_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6109687febb8f18cd5a45207fee35f18ba8b9467b18a4b2e982a3b7dc04a9d72
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_bleedless_v3_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_bleedless_v3_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..651d00bdc63039e222bd04f9e901d82044d7ff84
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_bleedless_v3_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5578545e094e584835b3184310ed1b12072f15d4b6ed8f4359ecc17358a66676
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fullness_noise_v4_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fullness_noise_v4_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..5a49522bec839a399c6545d2f812bae2ff5f32a1
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fullness_noise_v4_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f50296e913b9af3b5b3b961e92877ef0d4a74f9a433e796e89960c4c2b1abe53
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fullness_v1_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fullness_v1_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..e73ab6f21e270a489e7310d9fe81a150cdfceb13
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fullness_v1_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31140eccf271d2a9e8a538b092b1f70dfd6471aa5ad163b22bccc758b9f38b62
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fullness_v2_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fullness_v2_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..6d849f3079743541477aeb8748cd2dac3f05e0d0
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fullness_v2_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c446c34551333dd3d45b8d0708658a10f28c5e289f8ec27b5f0e22803681bef3
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fullness_v3_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fullness_v3_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..c90caf360ea211abf7467c96c8bde8bf656fb521
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fullness_v3_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fbb229209a8942d34664e19d2f4862e357ea3108a4e8c04b69aa0aba523a4481
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fv7z_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fv7z_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..9cd07358dc11187373fb9052aaee3b7434ee4269
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fv7z_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef229f0dcd370c1767e4292981c59e5248814da45f32bfacebcc0f28adaa30a1
+size 913031195
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fv8_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fv8_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..b017d3834de1d9d48cafa54dc7807d19c2ea0880
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fv8_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50bfa127d21f419e0da89730867d28c7ac4484c9473e6f313d036bc8b429df80
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fvx_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fvx_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..93fa162e3077a085662452f147e5fbe0bacd108a
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_fvx_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:545ef13b0cdbac505818a38db98e09c54e7c03ea17b4e0c895a531bfa352fa59
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..6ecff8779cfc8fa3961db8589050b44a43131203
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b3ad6bd8bed3aaaa4d9320ea2ca910d140196a2302186db1754f3a8d8e16fb1
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv5_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv5_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..1a192beafebebb2f4785e076cb13aba8a074b8ce
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv5_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38e935cf1e97afcc1de84d0bdb87dd8090bad530fa0df28e707d16448e1d38e2
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv5n_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv5n_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..5cea44b8d02d4a8379a8fef7d0dc7e3a920b6294
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv5n_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:175203923fac3e52ae00e7e37d41e8a7fef5020b6ee4e4144f4786daabc54b34
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv6_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv6_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..9e4e1ed19b5579f8914a2229b05a859b8cb0bf34
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv6_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:677951b8556a27abe32e39705640638826e78101fa901a51ad73d20522be6d25
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv6n_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv6n_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..edf54dd70e2304cc057d3055a46c02fbb79ef937
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv6n_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:802f3e5d183d7c4b50dea147c320e61634f5be6ff55fa899fdebeaf0f3cf7f42
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv7_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv7_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..96b9a5ec1eb7dddb73acd155f3962118dc4980ee
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv7_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b4f12d81fd7fb02f38609216d59f0e42b3dca655fd90ca275ab5321b3e4d9aa
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv7n_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv7n_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..0bb8eb969ef04c187caa14bc813e8c690d396036
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv7n_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0ca36af5d1314be46b56c8a53b6be02f98511fa5d7e3e196fd895755e65be3c
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv8_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv8_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..9802ecab0b9fa4c33af5610bb61b822076d77dcd
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv8_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d7d97418012e1d241853260d82330869c3e945ffdef9d7841fd90f5b24f20ff
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv8n_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv8n_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..29483cf0cabdabb930587ec5c0be8070d73cc969
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_instrumental_instv8n_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25920c876e601d4ccbf1684b19ecdac4b9fcfcc7f48d2c095d81040ec3fecbea
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..977a5c60ab478ab0b78ac5bc7e5296f9185f5549
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1de20d459332fe8869aeb01327a31df0032262706e1365114e852dc271779813
+size 913096801
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956_config.yaml b/models/audio-separator-models/roformers/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956_config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..417f042961dd076caf5ba32c8bba4122748e8c9d
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956_config.yaml
@@ -0,0 +1,71 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 000
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 4
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - Vocals
+  - Instrumental
+  lr: 1.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: Vocals
+  num_epochs: 1000
+  num_steps: 2000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 4
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_karaoke_becruily.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_karaoke_becruily.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..ceaad63144a44e85b64617f842686673070bf5dc
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_karaoke_becruily.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3aa262ac01df870b9fc033e9c7b6cad33fe04fc9c148b6c40841326a515a0e0
+size 1719139254
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_karaoke_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_karaoke_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..69688626b9c8b8655e6eb549e7909f64ae862819
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_karaoke_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:303fc631e7aa587e9dc1e6ac4bb3667c6ba53aacb6b6a90abcfcf57935b92bd8
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_karaoke_gabox_v2.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_karaoke_gabox_v2.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..f1f8bd05fe7cd5bc814cbc757048f09e5df8f195
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_karaoke_gabox_v2.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec34be50327aeaf1a996c27977f5c30d1ac80c0076d69683d3e5184c31ea29d3
+size 913090472
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_kim_ft2_bleedless_unwa.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_kim_ft2_bleedless_unwa.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..eb05d0fc17641b53deabf68c1940dd237695026c
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_kim_ft2_bleedless_unwa.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c450bd66a98b49dd03231fc5ebb84121eef8418236b179423c2b171d62b04d9
+size 913101368
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_kim_ft2_unwa.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_kim_ft2_unwa.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..64a9c082838de39b001e92ac622efe3de0c810da
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_kim_ft2_unwa.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ed7b9e4c2eebbec7a7e5e8113058f7b68ba5e6048db8eaccfbbeb884c7884c0
+size 913100690
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_kim_ft3_unwa.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_kim_ft3_unwa.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..816aa053cbceb5323a1b6eb5b5c0717b9e0b7a66
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_kim_ft3_unwa.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b10db0095c42576a32aeb3a1c8054af9dea0333e1c38477091d78316a007e52
+size 913090472
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_kim_ft_unwa.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_kim_ft_unwa.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..2912039564c3940b1fb51563a795e3f241841226
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_kim_ft_unwa.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6bd8d333880191254a6ef6be3cb0ffa4dda9d3282e36b0cce2e88a660e00d39
+size 913100690
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_vocal_fullness_aname.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_vocal_fullness_aname.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..380b1baf015018a1c4ae18ef1ade63a6ab76e441
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_vocal_fullness_aname.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a64a27a672b457de23d9decd1fc7b58b0664a9f4f24bb43af154708e2ef07d2f
+size 913090472
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_vocals_becruily.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_vocals_becruily.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..fda579c578c11b43fc8267f25fdbe0552ec36c1b
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_vocals_becruily.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a05961310cc55fbb901290c2e8be02682942f73522b6ac76bf2ec11e347ed95a
+size 913107578
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_vocals_fv1_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_vocals_fv1_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..5b6fdc61e56b41297707de241f7c353a916c0494
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_vocals_fv1_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4dff354d81152d1b4321f6491f242c060919148239fbfe22a1015513de4a7fe
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_vocals_fv2_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_vocals_fv2_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..dccb5cbb2e7b8aa143e8cd8b803144646998abd5
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_vocals_fv2_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2888813aa5b519941fa8548efc5a4331d63c61909007eb17fe95c367be230196
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_vocals_fv3_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_vocals_fv3_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..f5b0031173c6b54221895b9d66e9553037777cd5
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_vocals_fv3_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49d81446b34a7848446efde7898b25bdc32fe872c2393617acb5356649f7ea93
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_vocals_fv4_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_vocals_fv4_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..16f6cdd40cf3df7b1fbbf3f3702c1f326cdd8f94
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_vocals_fv4_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a9657de5fd3ed87ad4fd1a9d2069743ecb33424836973ad0f3288e2a64e90bc
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_vocals_fv5_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_vocals_fv5_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..144ccb6ac58988f75e37cb1e16dd29d7ae754760
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_vocals_fv5_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ede0504ddc55cb44b966a8212dac75a364f8157974cc40c8e92b9f5d4f17ce2
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_vocals_fv6_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_vocals_fv6_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..09ba0100d657171cb6f17292df03f215102f5bef
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_vocals_fv6_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25033d944288cb032fc51faab044bbd7f90bb81e82cada637ecdec699c2ff773
+size 913031195
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_vocals_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_vocals_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..709869d299af3267439c266eede088a1c19b0745
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_vocals_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff802a67501fac70587c3ff4e8dbc89c2558e7d8911c92222dfea2aaac208517
+size 913026650
diff --git a/models/audio-separator-models/roformers/mel_band_roformer_vocals_v2_gabox.ckpt b/models/audio-separator-models/roformers/mel_band_roformer_vocals_v2_gabox.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..e3589e9ee8a2eba303868270993bbe81cc684625
--- /dev/null
+++ b/models/audio-separator-models/roformers/mel_band_roformer_vocals_v2_gabox.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fac81dbebc0992503df55110d64d86c4fb74a1529527a819a253f3d20ef72bc1
+size 913031195
diff --git a/models/audio-separator-models/roformers/melband_roformer_big_beta4.ckpt b/models/audio-separator-models/roformers/melband_roformer_big_beta4.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..62cb0ed8e023394924b1108a5335daa8b91d78d4
--- /dev/null
+++ b/models/audio-separator-models/roformers/melband_roformer_big_beta4.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:700a9bd3831d4f7f44cc0019b238774e31045bcbc361fbb69235535c40fc1454
+size 1574477088
diff --git a/models/audio-separator-models/roformers/melband_roformer_big_beta5e.ckpt b/models/audio-separator-models/roformers/melband_roformer_big_beta5e.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..a59473cd797f3c5fc6d6eb420d324145dc7c1104
--- /dev/null
+++ b/models/audio-separator-models/roformers/melband_roformer_big_beta5e.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32b876e1163716a9a007438b5a5107069586aa9b9ca653a5f63013b1edf6920c
+size 1479749810
diff --git a/models/audio-separator-models/roformers/melband_roformer_big_beta6.ckpt b/models/audio-separator-models/roformers/melband_roformer_big_beta6.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..3e62cb6881f880ecadb2131661a7bffc1c651889
--- /dev/null
+++ b/models/audio-separator-models/roformers/melband_roformer_big_beta6.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f51cbb94b4ed5c36cb36fd2024236a8af3ed6886567981702ad6f094b2c6c820
+size 1557078584
diff --git a/models/audio-separator-models/roformers/melband_roformer_big_beta6x.ckpt b/models/audio-separator-models/roformers/melband_roformer_big_beta6x.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..8a45e7206470ce16a36e6c5c60726331d16221db
--- /dev/null
+++ b/models/audio-separator-models/roformers/melband_roformer_big_beta6x.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e16d702f4e20f13d60b293541c1dea75cb4414a5846b36780e28ef70352a4e5c
+size 1708527586
diff --git a/models/audio-separator-models/roformers/melband_roformer_inst_v1.ckpt b/models/audio-separator-models/roformers/melband_roformer_inst_v1.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..0a5775dcf471eae22321cf27576da3155bd773e9
--- /dev/null
+++ b/models/audio-separator-models/roformers/melband_roformer_inst_v1.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f88d96958b2b7dec32286b0ced00bbcbd37e28741cad9038758b1eaf9b5c057
+size 913100690
diff --git a/models/audio-separator-models/roformers/melband_roformer_inst_v1_plus.ckpt b/models/audio-separator-models/roformers/melband_roformer_inst_v1_plus.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..ba0efaf8a870d3f5921d78c2fe7107ebab72c7e6
--- /dev/null
+++ b/models/audio-separator-models/roformers/melband_roformer_inst_v1_plus.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25fe6ab4db95a6c20468f6c082f9bfc30904b8727ed8d069110bd0960da4879a
+size 913090472
diff --git a/models/audio-separator-models/roformers/melband_roformer_inst_v1e.ckpt b/models/audio-separator-models/roformers/melband_roformer_inst_v1e.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..f8370873342b453fb65d96b2fe069cbc0f8130e9
--- /dev/null
+++ b/models/audio-separator-models/roformers/melband_roformer_inst_v1e.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df2bcdb8838b88264f5381dbb0ccd84a9926c9775cf548c34d8846f5cd20fe96
+size 913102724
diff --git a/models/audio-separator-models/roformers/melband_roformer_inst_v1e_plus.ckpt b/models/audio-separator-models/roformers/melband_roformer_inst_v1e_plus.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..d96e91089fa7a07f5c4052f55449fd6ed4047ce2
--- /dev/null
+++ b/models/audio-separator-models/roformers/melband_roformer_inst_v1e_plus.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a4ddba739f0352407fb6e18b29206b82318ec427fe37fcedb0f83241e4e15fb
+size 913090472
diff --git a/models/audio-separator-models/roformers/melband_roformer_inst_v2.ckpt b/models/audio-separator-models/roformers/melband_roformer_inst_v2.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..e59c3365c4976b3c225f0cf9ab34342531dc6955
--- /dev/null
+++ b/models/audio-separator-models/roformers/melband_roformer_inst_v2.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd19766620f7d6f58fdf7aaada7e89907fe41bc64490ce3faa9a6dab15d6e1f2
+size 1574477088
diff --git a/models/audio-separator-models/roformers/melband_roformer_instvoc_duality_v1.ckpt b/models/audio-separator-models/roformers/melband_roformer_instvoc_duality_v1.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..2b99b0c8e53949042dce6f1db2300642212b31f7
--- /dev/null
+++ b/models/audio-separator-models/roformers/melband_roformer_instvoc_duality_v1.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4640a59d454bf9f69d67460592ab71e7cdce3afa0c0a6f0cf4500bb4ac0b8381
+size 1719116358
diff --git a/models/audio-separator-models/roformers/melband_roformer_instvox_duality_v2.ckpt b/models/audio-separator-models/roformers/melband_roformer_instvox_duality_v2.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..e4042424f484dbb10d5f76f807c04f9b7f307e23
--- /dev/null
+++ b/models/audio-separator-models/roformers/melband_roformer_instvox_duality_v2.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4a69558708f2857e36ac86a0e03ed95c4e3d8b9c5b8113963987d0d7df7e20f
+size 1719116358
diff --git a/models/audio-separator-models/roformers/model_bs_roformer_ep_317_sdr_12.9755.ckpt b/models/audio-separator-models/roformers/model_bs_roformer_ep_317_sdr_12.9755.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..1d4b892da79c875b3b3028f9f4d2504ebafe72e1
--- /dev/null
+++ b/models/audio-separator-models/roformers/model_bs_roformer_ep_317_sdr_12.9755.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b84f37e8d444c8cb30c79d77f613a41c05868ff9c9ac6c7049c00aefae115aa
+size 639331213
diff --git a/models/audio-separator-models/roformers/model_bs_roformer_ep_317_sdr_12.9755.yaml b/models/audio-separator-models/roformers/model_bs_roformer_ep_317_sdr_12.9755.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..527a3ad8a10366aaa562d007714e74d271a18cbf
--- /dev/null
+++ b/models/audio-separator-models/roformers/model_bs_roformer_ep_317_sdr_12.9755.yaml
@@ -0,0 +1,133 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+
+model:
+  dim: 512
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 16
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - Vocals
+  - Instrumental
+  lr: 5.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: Vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: simple1
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: true # Mix several stems of the same type with some probability
+  augmentation_loudness: true # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0.5
+  augmentation_loudness_max: 1.5
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 4
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/model_bs_roformer_ep_368_sdr_12.9628.ckpt b/models/audio-separator-models/roformers/model_bs_roformer_ep_368_sdr_12.9628.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..929fbf0bc57f788a3d6b758ee4feedd61976298b
--- /dev/null
+++ b/models/audio-separator-models/roformers/model_bs_roformer_ep_368_sdr_12.9628.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6c94864adfb73bbb0ca58ec14d58dd0b364549e9fb61433ae51916f3e2f8d0b
+size 639317465
diff --git a/models/audio-separator-models/roformers/model_bs_roformer_ep_368_sdr_12.9628.yaml b/models/audio-separator-models/roformers/model_bs_roformer_ep_368_sdr_12.9628.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3703706964572e9f52a8d9003935eb808f094875
--- /dev/null
+++ b/models/audio-separator-models/roformers/model_bs_roformer_ep_368_sdr_12.9628.yaml
@@ -0,0 +1,133 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+
+model:
+  dim: 512
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 16
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - Vocals
+  - Instrumental
+  lr: 5.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: Vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: simple1
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: true # Mix several stems of the same type with some probability
+  augmentation_loudness: true # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0.5
+  augmentation_loudness_max: 1.5
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 4
diff --git a/models/audio-separator-models/roformers/model_bs_roformer_ep_937_sdr_10.5309.ckpt b/models/audio-separator-models/roformers/model_bs_roformer_ep_937_sdr_10.5309.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..b30e4893bf1e4b198ea8005346b01efaa135c8b7
--- /dev/null
+++ b/models/audio-separator-models/roformers/model_bs_roformer_ep_937_sdr_10.5309.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2e825a03bc908cb04dbd88eddeefbf5147dd1cf1f95cebf453d9dbfabec494b
+size 393068365
diff --git a/models/audio-separator-models/roformers/model_bs_roformer_ep_937_sdr_10.5309.yaml b/models/audio-separator-models/roformers/model_bs_roformer_ep_937_sdr_10.5309.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9ea5c4b701d730d9f752aa87f9a82827e40b7b6e
--- /dev/null
+++ b/models/audio-separator-models/roformers/model_bs_roformer_ep_937_sdr_10.5309.yaml
@@ -0,0 +1,138 @@
+audio:
+  chunk_size: 131584
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 512
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+
+model:
+  dim: 384
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  linear_transformer_depth: 0
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 512
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 4
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - No Drum-Bass
+  - Drum-Bass
+  lr: 5.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: No Drum-Bass
+  num_epochs: 1000
+  num_steps: 1000
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+augmentations:
+  enable: true # enable or disable all augmentations (to fast disable if needed)
+  loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+  loudness_min: 0.5
+  loudness_max: 1.5
+  mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+  mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+    - 0.2
+    - 0.02
+  mixup_loudness_min: 0.5
+  mixup_loudness_max: 1.5
+
+inference:
+  batch_size: 1
+  dim_t: 512
+  num_overlap: 4
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt b/models/audio-separator-models/roformers/model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..4f9492413f4671a4c218f73cd6df2963e6b75fb8
--- /dev/null
+++ b/models/audio-separator-models/roformers/model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:123c00786bdbc6bd462dddb35cd21fd6ae99ab8319f93f63a8abc1012e593d94
+size 527121477
diff --git a/models/audio-separator-models/roformers/model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt b/models/audio-separator-models/roformers/model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..1cb36d54103c4fdd30f7e8a089164cba50cfb0b0
--- /dev/null
+++ b/models/audio-separator-models/roformers/model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21b9d0958e35b8ebfbe2afe69bbd5444e5ffe2f5d80ae0d583b833d2f3c0d139
+size 1007816988
diff --git a/models/audio-separator-models/roformers/model_mel_band_roformer_ep_3005_sdr_11.4360.yaml b/models/audio-separator-models/roformers/model_mel_band_roformer_ep_3005_sdr_11.4360.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..16802cec559bad5ad47225053feb5d0d0c3b65a8
--- /dev/null
+++ b/models/audio-separator-models/roformers/model_mel_band_roformer_ep_3005_sdr_11.4360.yaml
@@ -0,0 +1,72 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+
+model:
+  dim: 384
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  batch_size: 9
+  gradient_accumulation_steps: 8
+  grad_clip: 0
+  instruments:
+  - Vocals
+  - Instrumental
+  lr: 4.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: Vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: simple1
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: true # Mix several stems of the same type with some probability
+  augmentation_loudness: true # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0.5
+  augmentation_loudness_max: 1.5
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 4
\ No newline at end of file
diff --git a/models/audio-separator-models/roformers/vocals_mel_band_roformer.ckpt b/models/audio-separator-models/roformers/vocals_mel_band_roformer.ckpt
new file mode 100644
index 0000000000000000000000000000000000000000..e9269937826d8cedf1855096bc9c1d49298bb4f8
--- /dev/null
+++ b/models/audio-separator-models/roformers/vocals_mel_band_roformer.ckpt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87201f4d31afb5bc79993230fc49446918425574db48c01c405e44f365c7559e
+size 913106900
diff --git a/models/audio-separator-models/roformers/vocals_mel_band_roformer.yaml b/models/audio-separator-models/roformers/vocals_mel_band_roformer.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9cb005e7a97c66d5fb23bba8bb36bec9619cdd8f
--- /dev/null
+++ b/models/audio-separator-models/roformers/vocals_mel_band_roformer.yaml
@@ -0,0 +1,50 @@
+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+
+training:
+  instruments:
+  - vocals
+  - other
+  target_instrument: vocals
+
+inference:
+  dim_t: 1101
+  num_overlap: 1
+  chunk_size: 352800
\ No newline at end of file
diff --git a/models/audio-separator-models/source.txt b/models/audio-separator-models/source.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a031824f31440defb4bcdc3e89d6a25e31b7db7a
--- /dev/null
+++ b/models/audio-separator-models/source.txt
@@ -0,0 +1 @@
+https://huggingface.co/Eddycrack864/audio-separator-models
\ No newline at end of file