Eddycrack864 commited on Oct 16, 2025

Commit

beeb78b

verified ·

1 Parent(s): 319c4df

Upload 20 files

Browse files

Files changed (20) hide show

roformers/config_mel_band_roformer_instrumental_gabox.yaml +51 -0
roformers/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt +3 -0
roformers/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768_config.yaml +71 -0
roformers/denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt +3 -0
roformers/denoise_mel_band_roformer_aufr33_sdr_27.9959_config.yaml +71 -0
roformers/deverb_bs_roformer_8_384dim_10depth.ckpt +3 -0
roformers/deverb_bs_roformer_8_384dim_10depth_config.yaml +137 -0
roformers/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt +3 -0
roformers/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144_config.yaml +71 -0
roformers/mel_band_roformer_denoise_debleed_gabox.ckpt +3 -0
roformers/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt +3 -0
roformers/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956_config.yaml +71 -0
roformers/model_bs_roformer_ep_317_sdr_12.9755.ckpt +3 -0
roformers/model_bs_roformer_ep_317_sdr_12.9755.yaml +133 -0
roformers/model_bs_roformer_ep_368_sdr_12.9628.ckpt +3 -0
roformers/model_bs_roformer_ep_368_sdr_12.9628.yaml +133 -0
roformers/model_bs_roformer_ep_937_sdr_10.5309.ckpt +3 -0
roformers/model_bs_roformer_ep_937_sdr_10.5309.yaml +138 -0
roformers/model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt +3 -0
roformers/model_mel_band_roformer_ep_3005_sdr_11.4360.yaml +72 -0

roformers/config_mel_band_roformer_instrumental_gabox.yaml ADDED Viewed

	@@ -0,0 +1,51 @@

+audio:
+  chunk_size: 485100
+  dim_f: 1024
+  dim_t: 1101
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.000
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+training:
+  instruments:
+  - Instrumental
+  - Vocals
+  target_instrument: Instrumental
+  use_amp: True
+inference:
+  batch_size: 1
+  dim_t: 1101
+  num_overlap: 2

roformers/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a25e3b233722cd81e2de7b8e798a3fef29d4b9799ccacda60b0dc958a1e2a5bb
+size 913097300

roformers/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768_config.yaml ADDED Viewed

	@@ -0,0 +1,71 @@

+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 000
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+training:
+  batch_size: 2
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - dry
+  - other
+  lr: 1.0e-05
+  patience: 8
+  reduce_factor: 0.95
+  target_instrument: dry
+  num_epochs: 1000
+  num_steps: 4032
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+inference:
+  batch_size: 2
+  dim_t: 801
+  num_overlap: 4

roformers/denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7c1c39191edc34e942ca7f2346ce6b6c0e1208a5f76349ffce6f696bd12910de
+size 913097300

roformers/denoise_mel_band_roformer_aufr33_sdr_27.9959_config.yaml ADDED Viewed

	@@ -0,0 +1,71 @@

+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 000
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+training:
+  batch_size: 2
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - dry
+  - other
+  lr: 1.0e-05
+  patience: 8
+  reduce_factor: 0.95
+  target_instrument: dry
+  num_epochs: 1000
+  num_steps: 4032
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+inference:
+  batch_size: 2
+  dim_t: 801
+  num_overlap: 4

roformers/deverb_bs_roformer_8_384dim_10depth.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c38653aaa5e49f2f7b84dd3be2b6b679e0cbea23978e6b48389ee6f0a914768
+size 361499604

roformers/deverb_bs_roformer_8_384dim_10depth_config.yaml ADDED Viewed

	@@ -0,0 +1,137 @@

+audio:
+  chunk_size: 352768
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+model:
+  dim: 384
+  depth: 10
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+training:
+  batch_size: 1
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - noreverb
+  - reverb
+  lr: 5.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: noreverb
+  num_epochs: 1000
+  num_steps: 1000
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+augmentations:
+  enable: true # enable or disable all augmentations (to fast disable if needed)
+  loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+  loudness_min: 0.5
+  loudness_max: 1.5
+  mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+  mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+    - 0.2
+    - 0.02
+  mixup_loudness_min: 0.5
+  mixup_loudness_max: 1.5
+inference:
+  batch_size: 4
+  dim_t: 801
+  num_overlap: 4

roformers/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca8799531fe51c94172cc047226209ed48bf7d8c02e04671795a15d2a1c318af
+size 913096801

roformers/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144_config.yaml ADDED Viewed

	@@ -0,0 +1,71 @@

+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 000
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+training:
+  batch_size: 2
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - crowd
+  - other
+  lr: 1.0e-05
+  patience: 8
+  reduce_factor: 0.95
+  target_instrument: crowd
+  num_epochs: 1000
+  num_steps: 4032
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 4

roformers/mel_band_roformer_denoise_debleed_gabox.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91aa7a546ed2e93482e4629c982d35b0d258bb3de6eeab497fd91658cc86c7fd
+size 913026650

roformers/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1de20d459332fe8869aeb01327a31df0032262706e1365114e852dc271779813
+size 913096801

roformers/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956_config.yaml ADDED Viewed

	@@ -0,0 +1,71 @@

+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801
+  hop_length: 441
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 000
+model:
+  dim: 384
+  depth: 6
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0
+  ff_dropout: 0
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+training:
+  batch_size: 4
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - Vocals
+  - Instrumental
+  lr: 1.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: Vocals
+  num_epochs: 1000
+  num_steps: 2000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: null
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: false # Mix several stems of the same type with some probability
+  augmentation_loudness: false # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0
+  augmentation_loudness_max: 0
+  q: 0.95
+  coarse_loss_clip: false
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 4

roformers/model_bs_roformer_ep_317_sdr_12.9755.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5b84f37e8d444c8cb30c79d77f613a41c05868ff9c9ac6c7049c00aefae115aa
+size 639331213

roformers/model_bs_roformer_ep_317_sdr_12.9755.yaml ADDED Viewed

	@@ -0,0 +1,133 @@

+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+model:
+  dim: 512
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+training:
+  batch_size: 16
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - Vocals
+  - Instrumental
+  lr: 5.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: Vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: simple1
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: true # Mix several stems of the same type with some probability
+  augmentation_loudness: true # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0.5
+  augmentation_loudness_max: 1.5
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 4

roformers/model_bs_roformer_ep_368_sdr_12.9628.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f6c94864adfb73bbb0ca58ec14d58dd0b364549e9fb61433ae51916f3e2f8d0b
+size 639317465

roformers/model_bs_roformer_ep_368_sdr_12.9628.yaml ADDED Viewed

	@@ -0,0 +1,133 @@

+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+model:
+  dim: 512
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+training:
+  batch_size: 16
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - Vocals
+  - Instrumental
+  lr: 5.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: Vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: simple1
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: true # Mix several stems of the same type with some probability
+  augmentation_loudness: true # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0.5
+  augmentation_loudness_max: 1.5
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 4

roformers/model_bs_roformer_ep_937_sdr_10.5309.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a2e825a03bc908cb04dbd88eddeefbf5147dd1cf1f95cebf453d9dbfabec494b
+size 393068365

roformers/model_bs_roformer_ep_937_sdr_10.5309.yaml ADDED Viewed

	@@ -0,0 +1,138 @@

+audio:
+  chunk_size: 131584
+  dim_f: 1024
+  dim_t: 256
+  hop_length: 512
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+model:
+  dim: 384
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  linear_transformer_depth: 0
+  freqs_per_bands: !!python/tuple
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 2
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 4
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 12
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 24
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 48
+    - 128
+    - 129
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: true
+  dim_freqs_in: 1025
+  stft_n_fft: 2048
+  stft_hop_length: 512
+  stft_win_length: 2048
+  stft_normalized: false
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+training:
+  batch_size: 4
+  gradient_accumulation_steps: 1
+  grad_clip: 0
+  instruments:
+  - No Drum-Bass
+  - Drum-Bass
+  lr: 5.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: No Drum-Bass
+  num_epochs: 1000
+  num_steps: 1000
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+augmentations:
+  enable: true # enable or disable all augmentations (to fast disable if needed)
+  loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
+  loudness_min: 0.5
+  loudness_max: 1.5
+  mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
+  mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
+    - 0.2
+    - 0.02
+  mixup_loudness_min: 0.5
+  mixup_loudness_max: 1.5
+inference:
+  batch_size: 1
+  dim_t: 512
+  num_overlap: 4

roformers/model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21b9d0958e35b8ebfbe2afe69bbd5444e5ffe2f5d80ae0d583b833d2f3c0d139
+size 1007816988

roformers/model_mel_band_roformer_ep_3005_sdr_11.4360.yaml ADDED Viewed

	@@ -0,0 +1,72 @@

+audio:
+  chunk_size: 352800
+  dim_f: 1024
+  dim_t: 801 # don't work (use in model)
+  hop_length: 441 # don't work (use in model)
+  n_fft: 2048
+  num_channels: 2
+  sample_rate: 44100
+  min_mean_abs: 0.001
+model:
+  dim: 384
+  depth: 12
+  stereo: true
+  num_stems: 1
+  time_transformer_depth: 1
+  freq_transformer_depth: 1
+  num_bands: 60
+  dim_head: 64
+  heads: 8
+  attn_dropout: 0.1
+  ff_dropout: 0.1
+  flash_attn: True
+  dim_freqs_in: 1025
+  sample_rate: 44100  # needed for mel filter bank from librosa
+  stft_n_fft: 2048
+  stft_hop_length: 441
+  stft_win_length: 2048
+  stft_normalized: False
+  mask_estimator_depth: 2
+  multi_stft_resolution_loss_weight: 1.0
+  multi_stft_resolutions_window_sizes: !!python/tuple
+  - 4096
+  - 2048
+  - 1024
+  - 512
+  - 256
+  multi_stft_hop_size: 147
+  multi_stft_normalized: False
+training:
+  batch_size: 9
+  gradient_accumulation_steps: 8
+  grad_clip: 0
+  instruments:
+  - Vocals
+  - Instrumental
+  lr: 4.0e-05
+  patience: 2
+  reduce_factor: 0.95
+  target_instrument: Vocals
+  num_epochs: 1000
+  num_steps: 1000
+  augmentation: false # enable augmentations by audiomentations and pedalboard
+  augmentation_type: simple1
+  use_mp3_compress: false # Deprecated
+  augmentation_mix: true # Mix several stems of the same type with some probability
+  augmentation_loudness: true # randomly change loudness of each stem
+  augmentation_loudness_type: 1 # Type 1 or 2
+  augmentation_loudness_min: 0.5
+  augmentation_loudness_max: 1.5
+  q: 0.95
+  coarse_loss_clip: true
+  ema_momentum: 0.999
+  optimizer: adam
+  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
+  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
+inference:
+  batch_size: 1
+  dim_t: 801
+  num_overlap: 4