Politrees commited on Apr 12, 2025

Commit

c9b303e

verified ·

1 Parent(s): 1aa25f6

.

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Roformer_models/BS_Inst_EXP_VRL.ckpt +0 -3
Roformer_models/BS_Inst_EXP_VRL.yaml +0 -124
Roformer_models/MelBandRoformerBigSYHFTV1.ckpt +0 -3
Roformer_models/MelBandRoformerSYHFT.ckpt +0 -3
Roformer_models/MelBandRoformerSYHFTV2.5.ckpt +0 -3
Roformer_models/MelBandRoformerSYHFTV2.ckpt +0 -3
Roformer_models/MelBandRoformerSYHFTV3Epsilon.ckpt +0 -3
Roformer_models/aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt +0 -3
Roformer_models/aspiration_mel_band_roformer_sdr_18.9845.ckpt +0 -3
Roformer_models/bs_roformer_male_female_by_aufr33_sdr_7.2889.ckpt +0 -3
Roformer_models/bs_roformer_voc_gabox.ckpt +0 -3
Roformer_models/config_aspiration_mel_band_roformer.yaml +0 -76
Roformer_models/config_bs_roformer_voc_gabox.yaml +0 -133
Roformer_models/config_chorus_male_female_bs_roformer.yaml +0 -125
Roformer_models/config_dereverb-echo_mel_band_roformer.yaml +0 -76
Roformer_models/config_dereverb-echo_mel_band_roformer_sdr_13.4843_v2.yaml +0 -64
Roformer_models/config_dereverb_echo_mel_band_roformer_v2.yaml +0 -64
Roformer_models/config_mel_band_roformer_bleed_suppressor_v1.yaml +0 -51
Roformer_models/config_mel_band_roformer_inst_gabox.yaml +0 -51
Roformer_models/config_mel_band_roformer_instrumental_becruily.yaml +0 -72
Roformer_models/config_mel_band_roformer_kim_ft_unwa.yaml +0 -72
Roformer_models/config_mel_band_roformer_voc_gabox.yaml +0 -51
Roformer_models/config_mel_band_roformer_vocals_becruily.yaml +0 -72
Roformer_models/config_mel_band_roformer_vocals_fullness_aname.yaml +0 -54
Roformer_models/config_melband_roformer_big_beta5e.yaml +0 -51
Roformer_models/config_melband_roformer_big_beta6.yaml +0 -72
Roformer_models/config_melband_roformer_big_beta6x.yaml +0 -72
Roformer_models/config_melband_roformer_small_by_aname.yaml +0 -52
Roformer_models/config_melbandroformer_big_beta4.yaml +0 -51
Roformer_models/config_melbandroformer_inst.yaml +0 -51
Roformer_models/config_melbandroformer_inst_v2.yaml +0 -51
Roformer_models/config_melbandroformer_instvoc_duality.yaml +0 -51
Roformer_models/config_vocals_mel_band_roformer_big_v1_ft.yaml +0 -51
Roformer_models/config_vocals_mel_band_roformer_ft.yaml +0 -72
Roformer_models/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt +0 -3
Roformer_models/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768_config.yaml +0 -71
Roformer_models/denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt +0 -3
Roformer_models/denoise_mel_band_roformer_aufr33_sdr_27.9959_config.yaml +0 -71
Roformer_models/dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt +0 -3
Roformer_models/dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt +0 -3
Roformer_models/dereverb_big_mbr_ep_362.ckpt +0 -3
Roformer_models/dereverb_echo_mbr_fused.ckpt +0 -3
Roformer_models/dereverb_mel_band_roformer_anvuew.yaml +0 -76
Roformer_models/dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt +0 -3
Roformer_models/dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt +0 -3
Roformer_models/dereverb_mel_band_roformer_mono_anvuew_sdr_20.4029.ckpt +0 -3
Roformer_models/dereverb_super_big_mbr_ep_346.ckpt +0 -3
Roformer_models/deverb_bs_roformer_8_384dim_10depth.ckpt +0 -3
Roformer_models/deverb_bs_roformer_8_384dim_10depth_config.yaml +0 -137
Roformer_models/mel_band_roformer_bleed_suppressor_v1.ckpt +0 -3

Roformer_models/BS_Inst_EXP_VRL.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c035e2a102243405e45bf33faa175f62fd7118f63b62771fafdf81062b804131
-size 393351501

Roformer_models/BS_Inst_EXP_VRL.yaml DELETED Viewed

@@ -1,124 +0,0 @@
-audio:
-  chunk_size: 485100 #352800 #485100
-  dim_f: 1024
-  dim_t: 801
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 384
-  depth: 12
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  freqs_per_bands: !!python/tuple
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 12
-    - 12
-    - 12
-    - 12
-    - 12
-    - 12
-    - 12
-    - 12
-    - 24
-    - 24
-    - 24
-    - 24
-    - 24
-    - 24
-    - 24
-    - 24
-    - 48
-    - 48
-    - 48
-    - 48
-    - 48
-    - 48
-    - 48
-    - 48
-    - 128
-    - 129
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: true
-  dim_freqs_in: 1025
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: false
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 1
-  gradient_accumulation_steps: 1
-  grad_clip: 0
-  instruments:
-  - Vocals
-  - Instrumental
-  lr: 1.0e-04
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: Instrumental
-  num_epochs: 1
-  num_steps: 1000
-  q: 0.95
-  coarse_loss_clip: true
-  ema_momentum: 0.999
-  optimizer: adamw
-  other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
-  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
-inference:
-  batch_size: 1
-  dim_t: 1101
-  num_overlap: 2

Roformer_models/MelBandRoformerBigSYHFTV1.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e2327e3e81f19e67c307f8c830c54267c09ecb0e9c6ad2b40a80c310899c955f
-size 1479738496

Roformer_models/MelBandRoformerSYHFT.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f319dfcde4396ea3106658f457f5eb0bc577e113491f61ae8bab216fe84b0c0c
-size 913096702

Roformer_models/MelBandRoformerSYHFTV2.5.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:916e3a2c1e63b1457bcad823b98ca705e4933deffd2a5ab3a370e10f68bf47e2
-size 913090472

Roformer_models/MelBandRoformerSYHFTV2.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2e99f8efa5315300c197295592bd7e56c21c1d77e1884c904b5128c54a2a4632
-size 913095346

Roformer_models/MelBandRoformerSYHFTV3Epsilon.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4c886092e4aae13aa089263a0d54d483643f58c16ec221aed37268e2c1031397
-size 913090472

Roformer_models/aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:83bfe991cec4fbadde9f30d1f79cd5293ad0b1f936256be327bba5cbb4883374
-size 835982664

Roformer_models/aspiration_mel_band_roformer_sdr_18.9845.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9e791258c866c6c8da66052693d8cc3b64f1f42c01e052dbdc570cd278380cc5
-size 835983746

Roformer_models/bs_roformer_male_female_by_aufr33_sdr_7.2889.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3cf11736d1b42a11ae55d8299316585921477dd2a671b24b663660846ca9861b
-size 527119779

Roformer_models/bs_roformer_voc_gabox.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:18d58efe5e949e70fab11b875329af6d06ef11ccc29574bfe943fb57cc827f38
-size 639254584

Roformer_models/config_aspiration_mel_band_roformer.yaml DELETED Viewed

@@ -1,76 +0,0 @@
-audio:
-  chunk_size: 352800
-  dim_f: 1024
-  dim_t: 801 # don't work (use in model)
-  hop_length: 441 # don't work (use in model)
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 256
-  depth: 8
-  stereo: true
-  num_stems: 2
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0.1
-  ff_dropout: 0.1
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 1
-  gradient_accumulation_steps: 8
-  grad_clip: 0
-  instruments:
-  - aspiration
-  - other
-  lr: 4.0e-05
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: null
-  num_epochs: 1000
-  num_steps: 1000
-  q: 0.95
-  coarse_loss_clip: true
-  ema_momentum: 0.999
-  optimizer: adam
-  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
-  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
-augmentations:
-  enable: true # enable or disable all augmentations (to fast disable if needed)
-  loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
-  loudness_min: 0.5
-  loudness_max: 1.5
-  mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
-  mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
-    - 0.2
-    - 0.02
-  mixup_loudness_min: 0.5
-  mixup_loudness_max: 1.5
-inference:
-  batch_size: 4
-  dim_t: 801
-  num_overlap: 2

Roformer_models/config_bs_roformer_voc_gabox.yaml DELETED Viewed

@@ -1,133 +0,0 @@
-audio:
-  chunk_size: 352800
-  dim_f: 1024
-  dim_t: 801 # don't work (use in model)
-  hop_length: 441 # don't work (use in model)
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.001
-model:
-  dim: 512
-  depth: 12
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  freqs_per_bands: !!python/tuple
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 12
-    - 12
-    - 12
-    - 12
-    - 12
-    - 12
-    - 12
-    - 12
-    - 24
-    - 24
-    - 24
-    - 24
-    - 24
-    - 24
-    - 24
-    - 24
-    - 48
-    - 48
-    - 48
-    - 48
-    - 48
-    - 48
-    - 48
-    - 48
-    - 128
-    - 129
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0.1
-  ff_dropout: 0.1
-  flash_attn: true
-  dim_freqs_in: 1025
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: false
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 16
-  gradient_accumulation_steps: 1
-  grad_clip: 0
-  instruments:
-  - Vocals
-  - Instrumental
-  lr: 5.0e-05
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: Vocals
-  num_epochs: 1000
-  num_steps: 1000
-  augmentation: false # enable augmentations by audiomentations and pedalboard
-  augmentation_type: simple1
-  use_mp3_compress: false # Deprecated
-  augmentation_mix: true # Mix several stems of the same type with some probability
-  augmentation_loudness: true # randomly change loudness of each stem
-  augmentation_loudness_type: 1 # Type 1 or 2
-  augmentation_loudness_min: 0.5
-  augmentation_loudness_max: 1.5
-  q: 0.95
-  coarse_loss_clip: true
-  ema_momentum: 0.999
-  optimizer: adam
-  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
-  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
-inference:
-  batch_size: 1
-  dim_t: 801
-  num_overlap: 4

Roformer_models/config_chorus_male_female_bs_roformer.yaml DELETED Viewed

@@ -1,125 +0,0 @@
-audio:
-  chunk_size: 352800
-  dim_f: 1024
-  dim_t: 801 # don't work (use in model)
-  hop_length: 441 # don't work (use in model)
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 384
-  depth: 8
-  stereo: true
-  num_stems: 2
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  freqs_per_bands: !!python/tuple
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 12
-    - 12
-    - 12
-    - 12
-    - 12
-    - 12
-    - 12
-    - 12
-    - 24
-    - 24
-    - 24
-    - 24
-    - 24
-    - 24
-    - 24
-    - 24
-    - 48
-    - 48
-    - 48
-    - 48
-    - 48
-    - 48
-    - 48
-    - 48
-    - 128
-    - 129
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0.0
-  ff_dropout: 0.0
-  flash_attn: true
-  dim_freqs_in: 1025
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: false
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 1
-  gradient_accumulation_steps: 1
-  grad_clip: 0
-  instruments:
-  - male
-  - female
-  lr: 1.0e-05
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: null
-  num_epochs: 1000
-  num_steps: 1000
-  q: 0.95
-  coarse_loss_clip: true
-  ema_momentum: 0.999
-  optimizer: adam
-  other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
-  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
-inference:
-  batch_size: 1
-  dim_t: 801
-  num_overlap: 2

Roformer_models/config_dereverb-echo_mel_band_roformer.yaml DELETED Viewed

@@ -1,76 +0,0 @@
-audio:
-  chunk_size: 352800
-  dim_f: 1024
-  dim_t: 801 # don't work (use in model)
-  hop_length: 441 # don't work (use in model)
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 256
-  depth: 8
-  stereo: true
-  num_stems: 2
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0.1
-  ff_dropout: 0.1
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 1
-  gradient_accumulation_steps: 8
-  grad_clip: 0
-  instruments:
-  - dry
-  - No dry
-  lr: 4.0e-05
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: null
-  num_epochs: 1000
-  num_steps: 1000
-  q: 0.95
-  coarse_loss_clip: true
-  ema_momentum: 0.999
-  optimizer: adam
-  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
-  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
-augmentations:
-  enable: true # enable or disable all augmentations (to fast disable if needed)
-  loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
-  loudness_min: 0.5
-  loudness_max: 1.5
-  mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
-  mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
-    - 0.2
-    - 0.02
-  mixup_loudness_min: 0.5
-  mixup_loudness_max: 1.5
-inference:
-  batch_size: 4
-  dim_t: 801
-  num_overlap: 4

Roformer_models/config_dereverb-echo_mel_band_roformer_sdr_13.4843_v2.yaml DELETED Viewed

@@ -1,64 +0,0 @@
-audio:
-  chunk_size: 352800
-  dim_f: 1024
-  dim_t: 801
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 256
-  depth: 8
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0.1
-  ff_dropout: 0.1
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 1
-  gradient_accumulation_steps: 8
-  grad_clip: 0
-  instruments:
-  - dry
-  - No dry
-  lr: 1.0e-05
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: dry
-  num_epochs: 1000
-  num_steps: 1000
-  q: 0.95
-  coarse_loss_clip: true
-  ema_momentum: 0.999
-  optimizer: adam
-  other_fix: false
-  use_amp: true
-inference:
-  batch_size: 1
-  dim_t: 801
-  num_overlap: 4

Roformer_models/config_dereverb_echo_mel_band_roformer_v2.yaml DELETED Viewed

@@ -1,64 +0,0 @@
-audio:
-  chunk_size: 352800
-  dim_f: 1024
-  dim_t: 801
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 256
-  depth: 8
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0.1
-  ff_dropout: 0.1
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 1
-  gradient_accumulation_steps: 8
-  grad_clip: 0
-  instruments:
-  - dry
-  - other
-  lr: 1.0e-05
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: dry
-  num_epochs: 1000
-  num_steps: 1000
-  q: 0.95
-  coarse_loss_clip: true
-  ema_momentum: 0.999
-  optimizer: adam
-  other_fix: false
-  use_amp: true
-inference:
-  batch_size: 1
-  dim_t: 801
-  num_overlap: 4

Roformer_models/config_mel_band_roformer_bleed_suppressor_v1.yaml DELETED Viewed

@@ -1,51 +0,0 @@
-audio:
-  chunk_size: 485100
-  dim_f: 1024
-  dim_t: 801
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 384
-  depth: 6
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  instruments:
-  - Instrumental
-  - Bleed
-  target_instrument: Instrumental
-  use_amp: True
-inference:
-  batch_size: 1
-  dim_t: 801
-  num_overlap: 2

Roformer_models/config_mel_band_roformer_inst_gabox.yaml DELETED Viewed

@@ -1,51 +0,0 @@
-audio:
-  chunk_size: 485100
-  dim_f: 1024
-  dim_t: 1101
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 384
-  depth: 6
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  instruments:
-  - Instrumental
-  - Vocals
-  target_instrument: Instrumental
-  use_amp: True
-inference:
-  batch_size: 1
-  dim_t: 1101
-  num_overlap: 2

Roformer_models/config_mel_band_roformer_instrumental_becruily.yaml DELETED Viewed

@@ -1,72 +0,0 @@
-audio:
-  chunk_size: 352800
-  dim_f: 1024
-  dim_t: 256
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 384
-  depth: 6
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 1
-  gradient_accumulation_steps: 1
-  grad_clip: 0
-  instruments:
-  - Instrumental
-  - Vocals
-  lr: 0.0005
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: Instrumental
-  num_epochs: 1000
-  num_steps: 1000
-  augmentation: false # enable augmentations by audiomentations and pedalboard
-  augmentation_type: null
-  use_mp3_compress: false # Deprecated
-  augmentation_mix: false # Mix several stems of the same type with some probability
-  augmentation_loudness: false # randomly change loudness of each stem
-  augmentation_loudness_type: 1 # Type 1 or 2
-  augmentation_loudness_min: 0
-  augmentation_loudness_max: 0
-  q: 0.95
-  coarse_loss_clip: false
-  ema_momentum: 0.999
-  optimizer: adamw
-  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
-  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
-inference:
-  batch_size: 1
-  dim_t: 1101
-  num_overlap: 2

Roformer_models/config_mel_band_roformer_kim_ft_unwa.yaml DELETED Viewed

@@ -1,72 +0,0 @@
-audio:
-  chunk_size: 485100
-  dim_f: 1024
-  dim_t: 801
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 384
-  depth: 6
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 1
-  gradient_accumulation_steps: 1
-  grad_clip: 0
-  instruments:
-  - vocals
-  - other
-  lr: 1.0e-05
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: vocals
-  num_epochs: 1000
-  num_steps: 1000
-  augmentation: false # enable augmentations by audiomentations and pedalboard
-  augmentation_type: null
-  use_mp3_compress: false # Deprecated
-  augmentation_mix: false # Mix several stems of the same type with some probability
-  augmentation_loudness: false # randomly change loudness of each stem
-  augmentation_loudness_type: 1 # Type 1 or 2
-  augmentation_loudness_min: 0
-  augmentation_loudness_max: 0
-  q: 0.95
-  coarse_loss_clip: false
-  ema_momentum: 0.999
-  optimizer: adam
-  other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
-  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
-inference:
-  batch_size: 1
-  dim_t: 801
-  num_overlap: 8

Roformer_models/config_mel_band_roformer_voc_gabox.yaml DELETED Viewed

@@ -1,51 +0,0 @@
-audio:
-  chunk_size: 352800
-  dim_f: 1024
-  dim_t: 256
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.001
-model:
-  dim: 384
-  depth: 6
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  instruments:
-  - Vocals
-  - Instrumental
-  target_instrument: Vocals
-inference:
-  batch_size: 1
-  dim_t: 1101
-  num_overlap: 1
-  chunk_size: 352800

Roformer_models/config_mel_band_roformer_vocals_becruily.yaml DELETED Viewed

@@ -1,72 +0,0 @@
-audio:
-  chunk_size: 352800
-  dim_f: 1024
-  dim_t: 256
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 384
-  depth: 6
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 1
-  gradient_accumulation_steps: 1
-  grad_clip: 0
-  instruments:
-  - vocals
-  - other
-  lr: 0.0005
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: vocals
-  num_epochs: 1000
-  num_steps: 1000
-  augmentation: false # enable augmentations by audiomentations and pedalboard
-  augmentation_type: null
-  use_mp3_compress: false # Deprecated
-  augmentation_mix: false # Mix several stems of the same type with some probability
-  augmentation_loudness: false # randomly change loudness of each stem
-  augmentation_loudness_type: 1 # Type 1 or 2
-  augmentation_loudness_min: 0
-  augmentation_loudness_max: 0
-  q: 0.95
-  coarse_loss_clip: false
-  ema_momentum: 0.999
-  optimizer: adamw
-  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
-  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
-inference:
-  batch_size: 1
-  dim_t: 1101
-  num_overlap: 2

Roformer_models/config_mel_band_roformer_vocals_fullness_aname.yaml DELETED Viewed

@@ -1,54 +0,0 @@
-audio:
-  chunk_size: 661500
-  dim_f: 1024
-  dim_t: 256
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.001
-model:
-  dim: 384
-  depth: 6
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 1
-  gradient_accumulation_steps: 1
-  grad_clip: 0
-  instruments:
-  - vocals
-  - other
-  target_instrument: vocals
-  use_amp: true
-inference:
-  batch_size: 4
-  dim_t: 1101
-  num_overlap: 4

Roformer_models/config_melband_roformer_big_beta5e.yaml DELETED Viewed

@@ -1,51 +0,0 @@
-audio:
-  chunk_size: 485100
-  dim_f: 1024
-  dim_t: 801
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 384
-  depth: 6
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 3
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  instruments:
-  - vocals
-  - other
-  target_instrument: vocals
-  use_amp: True
-inference:
-  batch_size: 1
-  dim_t: 801
-  num_overlap: 2

Roformer_models/config_melband_roformer_big_beta6.yaml DELETED Viewed

@@ -1,72 +0,0 @@
-audio:
-  chunk_size: 529200
-  dim_f: 1024
-  dim_t: 256
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 512
-  depth: 6
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 1
-  gradient_accumulation_steps: 1
-  grad_clip: 0
-  instruments:
-  - vocals
-  - other
-  lr: 1.0e-05
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: vocals
-  num_epochs: 1000
-  num_steps: 1000
-  augmentation: false # enable augmentations by audiomentations and pedalboard
-  augmentation_type: null
-  use_mp3_compress: false # Deprecated
-  augmentation_mix: false # Mix several stems of the same type with some probability
-  augmentation_loudness: false # randomly change loudness of each stem
-  augmentation_loudness_type: 1 # Type 1 or 2
-  augmentation_loudness_min: 0
-  augmentation_loudness_max: 0
-  q: 0.95
-  coarse_loss_clip: false
-  ema_momentum: 0.999
-  optimizer: adam
-  other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
-  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
-inference:
-  batch_size: 2
-  dim_t: 1201
-  num_overlap: 2

Roformer_models/config_melband_roformer_big_beta6x.yaml DELETED Viewed

@@ -1,72 +0,0 @@
-audio:
-  chunk_size: 529200
-  dim_f: 1024
-  dim_t: 256
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 512
-  depth: 12
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 1
-  gradient_accumulation_steps: 1
-  grad_clip: 0
-  instruments:
-  - vocals
-  - other
-  lr: 1.0e-05
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: vocals
-  num_epochs: 1000
-  num_steps: 1000
-  augmentation: false # enable augmentations by audiomentations and pedalboard
-  augmentation_type: null
-  use_mp3_compress: false # Deprecated
-  augmentation_mix: false # Mix several stems of the same type with some probability
-  augmentation_loudness: false # randomly change loudness of each stem
-  augmentation_loudness_type: 1 # Type 1 or 2
-  augmentation_loudness_min: 0
-  augmentation_loudness_max: 0
-  q: 0.95
-  coarse_loss_clip: false
-  ema_momentum: 0.999
-  optimizer: adam
-  other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
-  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
-inference:
-  batch_size: 2
-  dim_t: 1201
-  num_overlap: 2

Roformer_models/config_melband_roformer_small_by_aname.yaml DELETED Viewed

@@ -1,52 +0,0 @@
-audio:
-  chunk_size: 485100
-  dim_f: 1024
-  dim_t: 1101
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.0
-model:
-  dim: 384
-  depth: 6
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-  mlp_expansion_factor: 1
-training:
-  instruments:
-    - Instrumental
-    - Vocals
-  target_instrument: null
-  use_amp: true
-inference:
-  batch_size: 2
-  dim_t: 1101
-  num_overlap: 4

Roformer_models/config_melbandroformer_big_beta4.yaml DELETED Viewed

@@ -1,51 +0,0 @@
-audio:
-  chunk_size: 485100
-  dim_f: 1024
-  dim_t: 1101
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 384
-  depth: 12
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 3
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  instruments:
-  - vocals
-  - other
-  target_instrument: vocals
-  use_amp: True
-inference:
-  batch_size: 1
-  dim_t: 1101
-  num_overlap: 2

Roformer_models/config_melbandroformer_inst.yaml DELETED Viewed

@@ -1,51 +0,0 @@
-audio:
-  chunk_size: 485100
-  dim_f: 1024
-  dim_t: 1101
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 384
-  depth: 6
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  instruments:
-  - other
-  - vocals
-  target_instrument: other
-  use_amp: True
-inference:
-  batch_size: 1
-  dim_t: 1101
-  num_overlap: 2

Roformer_models/config_melbandroformer_inst_v2.yaml DELETED Viewed

@@ -1,51 +0,0 @@
-audio:
-  chunk_size: 485100
-  dim_f: 1024
-  dim_t: 1101
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 384
-  depth: 12
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 3
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  instruments:
-  - Instrumental
-  - Vocals
-  target_instrument: Instrumental
-  use_amp: True
-inference:
-  batch_size: 1
-  dim_t: 1101
-  num_overlap: 2

Roformer_models/config_melbandroformer_instvoc_duality.yaml DELETED Viewed

@@ -1,51 +0,0 @@
-audio:
-  chunk_size: 485100
-  dim_f: 1024
-  dim_t: 256
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 384
-  depth: 6
-  stereo: true
-  num_stems: 2
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  instruments:
-  - Vocals
-  - Instrumental
-  target_instrument: null
-  use_amp: True
-inference:
-  batch_size: 1
-  dim_t: 1101
-  num_overlap: 2

Roformer_models/config_vocals_mel_band_roformer_big_v1_ft.yaml DELETED Viewed

@@ -1,51 +0,0 @@
-audio:
-  chunk_size: 485100
-  dim_f: 1024
-  dim_t: 801
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 384
-  depth: 6
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 3
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  instruments:
-  - vocals
-  - other
-  target_instrument: vocals
-  use_amp: True
-inference:
-  batch_size: 1
-  dim_t: 801
-  num_overlap: 2

Roformer_models/config_vocals_mel_band_roformer_ft.yaml DELETED Viewed

@@ -1,72 +0,0 @@
-audio:
-  chunk_size: 352800
-  dim_f: 1024
-  dim_t: 256
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 384
-  depth: 6
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 1
-  gradient_accumulation_steps: 8
-  grad_clip: 0
-  instruments:
-  - vocals
-  - other
-  lr: 1.0e-04
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: vocals
-  num_epochs: 1000
-  num_steps: 100
-  augmentation: true # enable augmentations by audiomentations and pedalboard
-  augmentation_type: null
-  use_mp3_compress: false # Deprecated
-  augmentation_mix: true # Mix several stems of the same type with some probability
-  augmentation_loudness: true # randomly change loudness of each stem
-  augmentation_loudness_type: 1 # Type 1 or 2
-  augmentation_loudness_min: 0
-  augmentation_loudness_max: 0
-  q: 0.95
-  coarse_loss_clip: false
-  ema_momentum: 0.999
-  optimizer: adamw8bit
-  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
-  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
-inference:
-  batch_size: 4
-  dim_t: 256
-  num_overlap: 2

Roformer_models/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a25e3b233722cd81e2de7b8e798a3fef29d4b9799ccacda60b0dc958a1e2a5bb
-size 913097300

Roformer_models/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768_config.yaml DELETED Viewed

@@ -1,71 +0,0 @@
-audio:
-  chunk_size: 352800
-  dim_f: 1024
-  dim_t: 801
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 000
-model:
-  dim: 384
-  depth: 6
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 2
-  gradient_accumulation_steps: 1
-  grad_clip: 0
-  instruments:
-  - dry
-  - other
-  lr: 1.0e-05
-  patience: 8
-  reduce_factor: 0.95
-  target_instrument: dry
-  num_epochs: 1000
-  num_steps: 4032
-  augmentation: false # enable augmentations by audiomentations and pedalboard
-  augmentation_type: null
-  use_mp3_compress: false # Deprecated
-  augmentation_mix: false # Mix several stems of the same type with some probability
-  augmentation_loudness: false # randomly change loudness of each stem
-  augmentation_loudness_type: 1 # Type 1 or 2
-  augmentation_loudness_min: 0
-  augmentation_loudness_max: 0
-  q: 0.95
-  coarse_loss_clip: false
-  ema_momentum: 0.999
-  optimizer: adam
-  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
-inference:
-  batch_size: 2
-  dim_t: 801
-  num_overlap: 4

Roformer_models/denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7c1c39191edc34e942ca7f2346ce6b6c0e1208a5f76349ffce6f696bd12910de
-size 913097300

Roformer_models/denoise_mel_band_roformer_aufr33_sdr_27.9959_config.yaml DELETED Viewed

@@ -1,71 +0,0 @@
-audio:
-  chunk_size: 352800
-  dim_f: 1024
-  dim_t: 801
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 000
-model:
-  dim: 384
-  depth: 6
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 2
-  gradient_accumulation_steps: 1
-  grad_clip: 0
-  instruments:
-  - dry
-  - other
-  lr: 1.0e-05
-  patience: 8
-  reduce_factor: 0.95
-  target_instrument: dry
-  num_epochs: 1000
-  num_steps: 4032
-  augmentation: false # enable augmentations by audiomentations and pedalboard
-  augmentation_type: null
-  use_mp3_compress: false # Deprecated
-  augmentation_mix: false # Mix several stems of the same type with some probability
-  augmentation_loudness: false # randomly change loudness of each stem
-  augmentation_loudness_type: 1 # Type 1 or 2
-  augmentation_loudness_min: 0
-  augmentation_loudness_max: 0
-  q: 0.95
-  coarse_loss_clip: false
-  ema_momentum: 0.999
-  optimizer: adam
-  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
-inference:
-  batch_size: 2
-  dim_t: 801
-  num_overlap: 4

Roformer_models/dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cd2b737a394cfb80cd48cc9fcbaf89f5f4062f6b93066c2911617a06d8b7860a
-size 835997896

Roformer_models/dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:396432f5af25992fe82d0286634bd879027c073721db6ab10199e75459708b9f
-size 455862568

Roformer_models/dereverb_big_mbr_ep_362.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0506455e74ffc02bbec700df9863ae243597034003815f1418227c6dee33b6ea
-size 455864012

Roformer_models/dereverb_echo_mbr_fused.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1596b1063238f487d54a0510a8c92cb28c000c803a271dd618ac49efc99ef3f7
-size 455776577

Roformer_models/dereverb_mel_band_roformer_anvuew.yaml DELETED Viewed

@@ -1,76 +0,0 @@
-audio:
-  chunk_size: 352800
-  dim_f: 1024
-  dim_t: 801
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.000
-model:
-  dim: 384
-  depth: 6
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  num_bands: 60
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0
-  ff_dropout: 0
-  flash_attn: True
-  dim_freqs_in: 1025
-  sample_rate: 44100  # needed for mel filter bank from librosa
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: False
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 3
-  gradient_accumulation_steps: 1
-  grad_clip: 0
-  instruments:
-  - noreverb
-  - reverb
-  lr: 5.0e-05
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: noreverb
-  num_epochs: 1000
-  num_steps: 4000
-  q: 0.95
-  coarse_loss_clip: false
-  ema_momentum: 0.999
-  optimizer: adamw
-  other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
-  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
-augmentations:
-  enable: true # enable or disable all augmentations (to fast disable if needed)
-  loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
-  loudness_min: 0.1
-  loudness_max: 1.0
-  mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
-  mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
-    - 0.2
-    - 0.02
-  mixup_loudness_min: 0.5
-  mixup_loudness_max: 1.5
-inference:
-  batch_size: 1
-  dim_t: 801
-  num_overlap: 2

Roformer_models/dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9262877b87e9ebb0fb808a456b0a411fa677f5df31c8383c1254af531c078970
-size 913107578

Roformer_models/dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0db8f1b41c00cead1112e967262a12802fd32e76c0c3a8eb207e772bae25d07b
-size 913107578

Roformer_models/dereverb_mel_band_roformer_mono_anvuew_sdr_20.4029.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f099ee717eb57fb0ad5eb0e7c9ad6787c36168140b61ce2b158b90c2c4ecee79
-size 913097978

Roformer_models/dereverb_super_big_mbr_ep_346.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:26dda242bce4405555f2d6086d079fe8cc23f1f04e02e501d2689bfe3ece0489
-size 455864012

Roformer_models/deverb_bs_roformer_8_384dim_10depth.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9c38653aaa5e49f2f7b84dd3be2b6b679e0cbea23978e6b48389ee6f0a914768
-size 361499604

Roformer_models/deverb_bs_roformer_8_384dim_10depth_config.yaml DELETED Viewed

@@ -1,137 +0,0 @@
-audio:
-  chunk_size: 352768
-  dim_f: 1024
-  dim_t: 801
-  hop_length: 441
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.001
-model:
-  dim: 384
-  depth: 10
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  freqs_per_bands: !!python/tuple
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 12
-    - 12
-    - 12
-    - 12
-    - 12
-    - 12
-    - 12
-    - 12
-    - 24
-    - 24
-    - 24
-    - 24
-    - 24
-    - 24
-    - 24
-    - 24
-    - 48
-    - 48
-    - 48
-    - 48
-    - 48
-    - 48
-    - 48
-    - 48
-    - 128
-    - 129
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0.1
-  ff_dropout: 0.1
-  flash_attn: true
-  dim_freqs_in: 1025
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: false
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 1
-  gradient_accumulation_steps: 1
-  grad_clip: 0
-  instruments:
-  - noreverb
-  - reverb
-  lr: 5.0e-05
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: noreverb
-  num_epochs: 1000
-  num_steps: 1000
-  q: 0.95
-  coarse_loss_clip: true
-  ema_momentum: 0.999
-  optimizer: adam
-  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
-  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
-augmentations:
-  enable: true # enable or disable all augmentations (to fast disable if needed)
-  loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
-  loudness_min: 0.5
-  loudness_max: 1.5
-  mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
-  mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
-    - 0.2
-    - 0.02
-  mixup_loudness_min: 0.5
-  mixup_loudness_max: 1.5
-inference:
-  batch_size: 4
-  dim_t: 801
-  num_overlap: 4

Roformer_models/mel_band_roformer_bleed_suppressor_v1.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a9a9d10faa7f8997676a78e66d741d7acb9cc449334763f3c8f626d68ec6e575
-size 913102724