diff --git a/Roformer_models/BS_Inst_EXP_VRL.ckpt b/Roformer_models/BS_Inst_EXP_VRL.ckpt deleted file mode 100644 index 72934845a8e32e8a5dc85c35b531767f4b964bcb..0000000000000000000000000000000000000000 --- a/Roformer_models/BS_Inst_EXP_VRL.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c035e2a102243405e45bf33faa175f62fd7118f63b62771fafdf81062b804131 -size 393351501 diff --git a/Roformer_models/BS_Inst_EXP_VRL.yaml b/Roformer_models/BS_Inst_EXP_VRL.yaml deleted file mode 100644 index 87d83eb4c0aad761c268c65ecc796a7fbb9ca7a8..0000000000000000000000000000000000000000 --- a/Roformer_models/BS_Inst_EXP_VRL.yaml +++ /dev/null @@ -1,124 +0,0 @@ -audio: - chunk_size: 485100 #352800 #485100 - dim_f: 1024 - dim_t: 801 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 384 - depth: 12 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - freqs_per_bands: !!python/tuple - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 128 - - 129 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: true - dim_freqs_in: 1025 - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: false - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False -training: - batch_size: 1 - gradient_accumulation_steps: 1 - grad_clip: 0 - instruments: - - Vocals - - Instrumental - lr: 1.0e-04 - patience: 2 - reduce_factor: 0.95 - target_instrument: Instrumental - num_epochs: 1 - num_steps: 1000 - q: 0.95 - coarse_loss_clip: true - ema_momentum: 0.999 - optimizer: adamw - other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental - use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true - -inference: - batch_size: 1 - dim_t: 1101 - num_overlap: 2 \ No newline at end of file diff --git a/Roformer_models/MelBandRoformerBigSYHFTV1.ckpt b/Roformer_models/MelBandRoformerBigSYHFTV1.ckpt deleted file mode 100644 index 3b07b85890712d20ad115ead701bf7799c3b9928..0000000000000000000000000000000000000000 --- a/Roformer_models/MelBandRoformerBigSYHFTV1.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e2327e3e81f19e67c307f8c830c54267c09ecb0e9c6ad2b40a80c310899c955f -size 1479738496 diff --git a/Roformer_models/MelBandRoformerSYHFT.ckpt b/Roformer_models/MelBandRoformerSYHFT.ckpt deleted file mode 100644 index fc8e114879a3761ce9e6f901cab7b0adbc7035b5..0000000000000000000000000000000000000000 --- a/Roformer_models/MelBandRoformerSYHFT.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f319dfcde4396ea3106658f457f5eb0bc577e113491f61ae8bab216fe84b0c0c -size 913096702 diff --git a/Roformer_models/MelBandRoformerSYHFTV2.5.ckpt b/Roformer_models/MelBandRoformerSYHFTV2.5.ckpt deleted file mode 100644 index 232404ba4a0d4e6d32b6f683711c58cd73b10c18..0000000000000000000000000000000000000000 --- a/Roformer_models/MelBandRoformerSYHFTV2.5.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:916e3a2c1e63b1457bcad823b98ca705e4933deffd2a5ab3a370e10f68bf47e2 -size 913090472 diff --git a/Roformer_models/MelBandRoformerSYHFTV2.ckpt b/Roformer_models/MelBandRoformerSYHFTV2.ckpt deleted file mode 100644 index 6b34c66f158e43e0f2c11b6df91a040c5a11a23c..0000000000000000000000000000000000000000 --- a/Roformer_models/MelBandRoformerSYHFTV2.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2e99f8efa5315300c197295592bd7e56c21c1d77e1884c904b5128c54a2a4632 -size 913095346 diff --git a/Roformer_models/MelBandRoformerSYHFTV3Epsilon.ckpt b/Roformer_models/MelBandRoformerSYHFTV3Epsilon.ckpt deleted file mode 100644 index 650b62bfd05945493a6529898ca0d0023ee7637d..0000000000000000000000000000000000000000 --- a/Roformer_models/MelBandRoformerSYHFTV3Epsilon.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4c886092e4aae13aa089263a0d54d483643f58c16ec221aed37268e2c1031397 -size 913090472 diff --git a/Roformer_models/aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt b/Roformer_models/aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt deleted file mode 100644 index 0dd326fa1c5c65e52583803582f154a0359a55bd..0000000000000000000000000000000000000000 --- a/Roformer_models/aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:83bfe991cec4fbadde9f30d1f79cd5293ad0b1f936256be327bba5cbb4883374 -size 835982664 diff --git a/Roformer_models/aspiration_mel_band_roformer_sdr_18.9845.ckpt b/Roformer_models/aspiration_mel_band_roformer_sdr_18.9845.ckpt deleted file mode 100644 index 1282613860a16200f301a12c7b8ec67d050c63be..0000000000000000000000000000000000000000 --- a/Roformer_models/aspiration_mel_band_roformer_sdr_18.9845.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e791258c866c6c8da66052693d8cc3b64f1f42c01e052dbdc570cd278380cc5 -size 835983746 diff --git a/Roformer_models/bs_roformer_male_female_by_aufr33_sdr_7.2889.ckpt b/Roformer_models/bs_roformer_male_female_by_aufr33_sdr_7.2889.ckpt deleted file mode 100644 index 201059f3d93490e5cc91f20ea0bb74ae7c0dd20a..0000000000000000000000000000000000000000 --- a/Roformer_models/bs_roformer_male_female_by_aufr33_sdr_7.2889.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3cf11736d1b42a11ae55d8299316585921477dd2a671b24b663660846ca9861b -size 527119779 diff --git a/Roformer_models/bs_roformer_voc_gabox.ckpt b/Roformer_models/bs_roformer_voc_gabox.ckpt deleted file mode 100644 index cb4a0c8ef7ec4378b27e79a01eb491a2d699a535..0000000000000000000000000000000000000000 --- a/Roformer_models/bs_roformer_voc_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:18d58efe5e949e70fab11b875329af6d06ef11ccc29574bfe943fb57cc827f38 -size 639254584 diff --git a/Roformer_models/config_aspiration_mel_band_roformer.yaml b/Roformer_models/config_aspiration_mel_band_roformer.yaml deleted file mode 100644 index 75983773005de1549919f2f50dc456f76f199b18..0000000000000000000000000000000000000000 --- a/Roformer_models/config_aspiration_mel_band_roformer.yaml +++ /dev/null @@ -1,76 +0,0 @@ -audio: - chunk_size: 352800 - dim_f: 1024 - dim_t: 801 # don't work (use in model) - hop_length: 441 # don't work (use in model) - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 256 - depth: 8 - stereo: true - num_stems: 2 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0.1 - ff_dropout: 0.1 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 1 - gradient_accumulation_steps: 8 - grad_clip: 0 - instruments: - - aspiration - - other - lr: 4.0e-05 - patience: 2 - reduce_factor: 0.95 - target_instrument: null - num_epochs: 1000 - num_steps: 1000 - q: 0.95 - coarse_loss_clip: true - ema_momentum: 0.999 - optimizer: adam - other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental - use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true - -augmentations: - enable: true # enable or disable all augmentations (to fast disable if needed) - loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max) - loudness_min: 0.5 - loudness_max: 1.5 - mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3) - mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02) - - 0.2 - - 0.02 - mixup_loudness_min: 0.5 - mixup_loudness_max: 1.5 - -inference: - batch_size: 4 - dim_t: 801 - num_overlap: 2 \ No newline at end of file diff --git a/Roformer_models/config_bs_roformer_voc_gabox.yaml b/Roformer_models/config_bs_roformer_voc_gabox.yaml deleted file mode 100644 index c4a3d323322d75af7d981e9de2ef3fa29e786812..0000000000000000000000000000000000000000 --- a/Roformer_models/config_bs_roformer_voc_gabox.yaml +++ /dev/null @@ -1,133 +0,0 @@ -audio: - chunk_size: 352800 - dim_f: 1024 - dim_t: 801 # don't work (use in model) - hop_length: 441 # don't work (use in model) - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.001 - -model: - dim: 512 - depth: 12 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - freqs_per_bands: !!python/tuple - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 128 - - 129 - dim_head: 64 - heads: 8 - attn_dropout: 0.1 - ff_dropout: 0.1 - flash_attn: true - dim_freqs_in: 1025 - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: false - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 16 - gradient_accumulation_steps: 1 - grad_clip: 0 - instruments: - - Vocals - - Instrumental - lr: 5.0e-05 - patience: 2 - reduce_factor: 0.95 - target_instrument: Vocals - num_epochs: 1000 - num_steps: 1000 - augmentation: false # enable augmentations by audiomentations and pedalboard - augmentation_type: simple1 - use_mp3_compress: false # Deprecated - augmentation_mix: true # Mix several stems of the same type with some probability - augmentation_loudness: true # randomly change loudness of each stem - augmentation_loudness_type: 1 # Type 1 or 2 - augmentation_loudness_min: 0.5 - augmentation_loudness_max: 1.5 - q: 0.95 - coarse_loss_clip: true - ema_momentum: 0.999 - optimizer: adam - other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental - use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true - -inference: - batch_size: 1 - dim_t: 801 - num_overlap: 4 \ No newline at end of file diff --git a/Roformer_models/config_chorus_male_female_bs_roformer.yaml b/Roformer_models/config_chorus_male_female_bs_roformer.yaml deleted file mode 100644 index eab8413f38d13a0ee021fd752c85fb20b65a376f..0000000000000000000000000000000000000000 --- a/Roformer_models/config_chorus_male_female_bs_roformer.yaml +++ /dev/null @@ -1,125 +0,0 @@ -audio: - chunk_size: 352800 - dim_f: 1024 - dim_t: 801 # don't work (use in model) - hop_length: 441 # don't work (use in model) - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 384 - depth: 8 - stereo: true - num_stems: 2 - time_transformer_depth: 1 - freq_transformer_depth: 1 - freqs_per_bands: !!python/tuple - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 128 - - 129 - dim_head: 64 - heads: 8 - attn_dropout: 0.0 - ff_dropout: 0.0 - flash_attn: true - dim_freqs_in: 1025 - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: false - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 1 - gradient_accumulation_steps: 1 - grad_clip: 0 - instruments: - - male - - female - lr: 1.0e-05 - patience: 2 - reduce_factor: 0.95 - target_instrument: null - num_epochs: 1000 - num_steps: 1000 - q: 0.95 - coarse_loss_clip: true - ema_momentum: 0.999 - optimizer: adam - other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental - use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true - -inference: - batch_size: 1 - dim_t: 801 - num_overlap: 2 \ No newline at end of file diff --git a/Roformer_models/config_dereverb-echo_mel_band_roformer.yaml b/Roformer_models/config_dereverb-echo_mel_band_roformer.yaml deleted file mode 100644 index bf766a04152c42bb2f16e6b2929a1024c6d550f5..0000000000000000000000000000000000000000 --- a/Roformer_models/config_dereverb-echo_mel_band_roformer.yaml +++ /dev/null @@ -1,76 +0,0 @@ -audio: - chunk_size: 352800 - dim_f: 1024 - dim_t: 801 # don't work (use in model) - hop_length: 441 # don't work (use in model) - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 256 - depth: 8 - stereo: true - num_stems: 2 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0.1 - ff_dropout: 0.1 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 1 - gradient_accumulation_steps: 8 - grad_clip: 0 - instruments: - - dry - - No dry - lr: 4.0e-05 - patience: 2 - reduce_factor: 0.95 - target_instrument: null - num_epochs: 1000 - num_steps: 1000 - q: 0.95 - coarse_loss_clip: true - ema_momentum: 0.999 - optimizer: adam - other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental - use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true - -augmentations: - enable: true # enable or disable all augmentations (to fast disable if needed) - loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max) - loudness_min: 0.5 - loudness_max: 1.5 - mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3) - mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02) - - 0.2 - - 0.02 - mixup_loudness_min: 0.5 - mixup_loudness_max: 1.5 - -inference: - batch_size: 4 - dim_t: 801 - num_overlap: 4 \ No newline at end of file diff --git a/Roformer_models/config_dereverb-echo_mel_band_roformer_sdr_13.4843_v2.yaml b/Roformer_models/config_dereverb-echo_mel_band_roformer_sdr_13.4843_v2.yaml deleted file mode 100644 index b9ed3ca2589954b800872e65c72b2eff115237ac..0000000000000000000000000000000000000000 --- a/Roformer_models/config_dereverb-echo_mel_band_roformer_sdr_13.4843_v2.yaml +++ /dev/null @@ -1,64 +0,0 @@ -audio: - chunk_size: 352800 - dim_f: 1024 - dim_t: 801 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 256 - depth: 8 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0.1 - ff_dropout: 0.1 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 1 - gradient_accumulation_steps: 8 - grad_clip: 0 - instruments: - - dry - - No dry - lr: 1.0e-05 - patience: 2 - reduce_factor: 0.95 - target_instrument: dry - num_epochs: 1000 - num_steps: 1000 - q: 0.95 - coarse_loss_clip: true - ema_momentum: 0.999 - optimizer: adam - other_fix: false - use_amp: true - -inference: - batch_size: 1 - dim_t: 801 - num_overlap: 4 \ No newline at end of file diff --git a/Roformer_models/config_dereverb_echo_mel_band_roformer_v2.yaml b/Roformer_models/config_dereverb_echo_mel_band_roformer_v2.yaml deleted file mode 100644 index 18213b12b2e4c157bca21bd5c8dc922b634e12da..0000000000000000000000000000000000000000 --- a/Roformer_models/config_dereverb_echo_mel_band_roformer_v2.yaml +++ /dev/null @@ -1,64 +0,0 @@ -audio: - chunk_size: 352800 - dim_f: 1024 - dim_t: 801 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 256 - depth: 8 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0.1 - ff_dropout: 0.1 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 1 - gradient_accumulation_steps: 8 - grad_clip: 0 - instruments: - - dry - - other - lr: 1.0e-05 - patience: 2 - reduce_factor: 0.95 - target_instrument: dry - num_epochs: 1000 - num_steps: 1000 - q: 0.95 - coarse_loss_clip: true - ema_momentum: 0.999 - optimizer: adam - other_fix: false - use_amp: true - -inference: - batch_size: 1 - dim_t: 801 - num_overlap: 4 \ No newline at end of file diff --git a/Roformer_models/config_mel_band_roformer_bleed_suppressor_v1.yaml b/Roformer_models/config_mel_band_roformer_bleed_suppressor_v1.yaml deleted file mode 100644 index 333fb15b0079c324821981b54b75945f1d8440ac..0000000000000000000000000000000000000000 --- a/Roformer_models/config_mel_band_roformer_bleed_suppressor_v1.yaml +++ /dev/null @@ -1,51 +0,0 @@ -audio: - chunk_size: 485100 - dim_f: 1024 - dim_t: 801 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 384 - depth: 6 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - instruments: - - Instrumental - - Bleed - target_instrument: Instrumental - use_amp: True - -inference: - batch_size: 1 - dim_t: 801 - num_overlap: 2 \ No newline at end of file diff --git a/Roformer_models/config_mel_band_roformer_inst_gabox.yaml b/Roformer_models/config_mel_band_roformer_inst_gabox.yaml deleted file mode 100644 index b1395e978d64cb1c37d3015adc2feeb0805e3b94..0000000000000000000000000000000000000000 --- a/Roformer_models/config_mel_band_roformer_inst_gabox.yaml +++ /dev/null @@ -1,51 +0,0 @@ -audio: - chunk_size: 485100 - dim_f: 1024 - dim_t: 1101 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 384 - depth: 6 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - instruments: - - Instrumental - - Vocals - target_instrument: Instrumental - use_amp: True - -inference: - batch_size: 1 - dim_t: 1101 - num_overlap: 2 \ No newline at end of file diff --git a/Roformer_models/config_mel_band_roformer_instrumental_becruily.yaml b/Roformer_models/config_mel_band_roformer_instrumental_becruily.yaml deleted file mode 100644 index 862010f34a3765fa1ac9f22c04ba74042b2fd086..0000000000000000000000000000000000000000 --- a/Roformer_models/config_mel_band_roformer_instrumental_becruily.yaml +++ /dev/null @@ -1,72 +0,0 @@ -audio: - chunk_size: 352800 - dim_f: 1024 - dim_t: 256 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 384 - depth: 6 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 1 - gradient_accumulation_steps: 1 - grad_clip: 0 - instruments: - - Instrumental - - Vocals - lr: 0.0005 - patience: 2 - reduce_factor: 0.95 - target_instrument: Instrumental - num_epochs: 1000 - num_steps: 1000 - augmentation: false # enable augmentations by audiomentations and pedalboard - augmentation_type: null - use_mp3_compress: false # Deprecated - augmentation_mix: false # Mix several stems of the same type with some probability - augmentation_loudness: false # randomly change loudness of each stem - augmentation_loudness_type: 1 # Type 1 or 2 - augmentation_loudness_min: 0 - augmentation_loudness_max: 0 - q: 0.95 - coarse_loss_clip: false - ema_momentum: 0.999 - optimizer: adamw - other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental - use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true - -inference: - batch_size: 1 - dim_t: 1101 - num_overlap: 2 \ No newline at end of file diff --git a/Roformer_models/config_mel_band_roformer_kim_ft_unwa.yaml b/Roformer_models/config_mel_band_roformer_kim_ft_unwa.yaml deleted file mode 100644 index d0527f99399af7f504ead83ce75e6715cd190e56..0000000000000000000000000000000000000000 --- a/Roformer_models/config_mel_band_roformer_kim_ft_unwa.yaml +++ /dev/null @@ -1,72 +0,0 @@ -audio: - chunk_size: 485100 - dim_f: 1024 - dim_t: 801 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 384 - depth: 6 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 1 - gradient_accumulation_steps: 1 - grad_clip: 0 - instruments: - - vocals - - other - lr: 1.0e-05 - patience: 2 - reduce_factor: 0.95 - target_instrument: vocals - num_epochs: 1000 - num_steps: 1000 - augmentation: false # enable augmentations by audiomentations and pedalboard - augmentation_type: null - use_mp3_compress: false # Deprecated - augmentation_mix: false # Mix several stems of the same type with some probability - augmentation_loudness: false # randomly change loudness of each stem - augmentation_loudness_type: 1 # Type 1 or 2 - augmentation_loudness_min: 0 - augmentation_loudness_max: 0 - q: 0.95 - coarse_loss_clip: false - ema_momentum: 0.999 - optimizer: adam - other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental - use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true - -inference: - batch_size: 1 - dim_t: 801 - num_overlap: 8 \ No newline at end of file diff --git a/Roformer_models/config_mel_band_roformer_voc_gabox.yaml b/Roformer_models/config_mel_band_roformer_voc_gabox.yaml deleted file mode 100644 index 8130c9958eead0d2efd27f27f4f39ea5ca051a26..0000000000000000000000000000000000000000 --- a/Roformer_models/config_mel_band_roformer_voc_gabox.yaml +++ /dev/null @@ -1,51 +0,0 @@ -audio: - chunk_size: 352800 - dim_f: 1024 - dim_t: 256 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.001 - -model: - dim: 384 - depth: 6 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - instruments: - - Vocals - - Instrumental - target_instrument: Vocals - -inference: - batch_size: 1 - dim_t: 1101 - num_overlap: 1 - chunk_size: 352800 \ No newline at end of file diff --git a/Roformer_models/config_mel_band_roformer_vocals_becruily.yaml b/Roformer_models/config_mel_band_roformer_vocals_becruily.yaml deleted file mode 100644 index 2d42333851e31c9e5747d818efec365921358a7c..0000000000000000000000000000000000000000 --- a/Roformer_models/config_mel_band_roformer_vocals_becruily.yaml +++ /dev/null @@ -1,72 +0,0 @@ -audio: - chunk_size: 352800 - dim_f: 1024 - dim_t: 256 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 384 - depth: 6 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 1 - gradient_accumulation_steps: 1 - grad_clip: 0 - instruments: - - vocals - - other - lr: 0.0005 - patience: 2 - reduce_factor: 0.95 - target_instrument: vocals - num_epochs: 1000 - num_steps: 1000 - augmentation: false # enable augmentations by audiomentations and pedalboard - augmentation_type: null - use_mp3_compress: false # Deprecated - augmentation_mix: false # Mix several stems of the same type with some probability - augmentation_loudness: false # randomly change loudness of each stem - augmentation_loudness_type: 1 # Type 1 or 2 - augmentation_loudness_min: 0 - augmentation_loudness_max: 0 - q: 0.95 - coarse_loss_clip: false - ema_momentum: 0.999 - optimizer: adamw - other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental - use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true - -inference: - batch_size: 1 - dim_t: 1101 - num_overlap: 2 \ No newline at end of file diff --git a/Roformer_models/config_mel_band_roformer_vocals_fullness_aname.yaml b/Roformer_models/config_mel_band_roformer_vocals_fullness_aname.yaml deleted file mode 100644 index 32fee9c05a82b72931cbb9a8af7d948a538532cb..0000000000000000000000000000000000000000 --- a/Roformer_models/config_mel_band_roformer_vocals_fullness_aname.yaml +++ /dev/null @@ -1,54 +0,0 @@ -audio: - chunk_size: 661500 - dim_f: 1024 - dim_t: 256 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.001 - -model: - dim: 384 - depth: 6 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 1 - gradient_accumulation_steps: 1 - grad_clip: 0 - instruments: - - vocals - - other - target_instrument: vocals - use_amp: true - -inference: - batch_size: 4 - dim_t: 1101 - num_overlap: 4 \ No newline at end of file diff --git a/Roformer_models/config_melband_roformer_big_beta5e.yaml b/Roformer_models/config_melband_roformer_big_beta5e.yaml deleted file mode 100644 index 66f782d66d31113f304fe07aa1ce663952030676..0000000000000000000000000000000000000000 --- a/Roformer_models/config_melband_roformer_big_beta5e.yaml +++ /dev/null @@ -1,51 +0,0 @@ -audio: - chunk_size: 485100 - dim_f: 1024 - dim_t: 801 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 384 - depth: 6 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 3 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - instruments: - - vocals - - other - target_instrument: vocals - use_amp: True - -inference: - batch_size: 1 - dim_t: 801 - num_overlap: 2 \ No newline at end of file diff --git a/Roformer_models/config_melband_roformer_big_beta6.yaml b/Roformer_models/config_melband_roformer_big_beta6.yaml deleted file mode 100644 index fb175852d74ba1b04ab1bcfd0fc88e8c5c26f206..0000000000000000000000000000000000000000 --- a/Roformer_models/config_melband_roformer_big_beta6.yaml +++ /dev/null @@ -1,72 +0,0 @@ -audio: - chunk_size: 529200 - dim_f: 1024 - dim_t: 256 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 512 - depth: 6 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 1 - gradient_accumulation_steps: 1 - grad_clip: 0 - instruments: - - vocals - - other - lr: 1.0e-05 - patience: 2 - reduce_factor: 0.95 - target_instrument: vocals - num_epochs: 1000 - num_steps: 1000 - augmentation: false # enable augmentations by audiomentations and pedalboard - augmentation_type: null - use_mp3_compress: false # Deprecated - augmentation_mix: false # Mix several stems of the same type with some probability - augmentation_loudness: false # randomly change loudness of each stem - augmentation_loudness_type: 1 # Type 1 or 2 - augmentation_loudness_min: 0 - augmentation_loudness_max: 0 - q: 0.95 - coarse_loss_clip: false - ema_momentum: 0.999 - optimizer: adam - other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental - use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true - -inference: - batch_size: 2 - dim_t: 1201 - num_overlap: 2 \ No newline at end of file diff --git a/Roformer_models/config_melband_roformer_big_beta6x.yaml b/Roformer_models/config_melband_roformer_big_beta6x.yaml deleted file mode 100644 index 7537b31741f9dec3d604f33b859ce7783bb23da6..0000000000000000000000000000000000000000 --- a/Roformer_models/config_melband_roformer_big_beta6x.yaml +++ /dev/null @@ -1,72 +0,0 @@ -audio: - chunk_size: 529200 - dim_f: 1024 - dim_t: 256 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 512 - depth: 12 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 1 - gradient_accumulation_steps: 1 - grad_clip: 0 - instruments: - - vocals - - other - lr: 1.0e-05 - patience: 2 - reduce_factor: 0.95 - target_instrument: vocals - num_epochs: 1000 - num_steps: 1000 - augmentation: false # enable augmentations by audiomentations and pedalboard - augmentation_type: null - use_mp3_compress: false # Deprecated - augmentation_mix: false # Mix several stems of the same type with some probability - augmentation_loudness: false # randomly change loudness of each stem - augmentation_loudness_type: 1 # Type 1 or 2 - augmentation_loudness_min: 0 - augmentation_loudness_max: 0 - q: 0.95 - coarse_loss_clip: false - ema_momentum: 0.999 - optimizer: adam - other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental - use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true - -inference: - batch_size: 2 - dim_t: 1201 - num_overlap: 2 diff --git a/Roformer_models/config_melband_roformer_small_by_aname.yaml b/Roformer_models/config_melband_roformer_small_by_aname.yaml deleted file mode 100644 index 9aa577d3470dbab7d2c7aba01883f2d179521a49..0000000000000000000000000000000000000000 --- a/Roformer_models/config_melband_roformer_small_by_aname.yaml +++ /dev/null @@ -1,52 +0,0 @@ -audio: - chunk_size: 485100 - dim_f: 1024 - dim_t: 1101 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.0 - -model: - dim: 384 - depth: 6 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - mlp_expansion_factor: 1 - -training: - instruments: - - Instrumental - - Vocals - target_instrument: null - use_amp: true - -inference: - batch_size: 2 - dim_t: 1101 - num_overlap: 4 \ No newline at end of file diff --git a/Roformer_models/config_melbandroformer_big_beta4.yaml b/Roformer_models/config_melbandroformer_big_beta4.yaml deleted file mode 100644 index 7dba2f17a3f8b3b5e7ec94bb9c4becb1acc1724a..0000000000000000000000000000000000000000 --- a/Roformer_models/config_melbandroformer_big_beta4.yaml +++ /dev/null @@ -1,51 +0,0 @@ -audio: - chunk_size: 485100 - dim_f: 1024 - dim_t: 1101 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 384 - depth: 12 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 3 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - instruments: - - vocals - - other - target_instrument: vocals - use_amp: True - -inference: - batch_size: 1 - dim_t: 1101 - num_overlap: 2 \ No newline at end of file diff --git a/Roformer_models/config_melbandroformer_inst.yaml b/Roformer_models/config_melbandroformer_inst.yaml deleted file mode 100644 index d6bdca342644a1194427fe505e2044c5006a1213..0000000000000000000000000000000000000000 --- a/Roformer_models/config_melbandroformer_inst.yaml +++ /dev/null @@ -1,51 +0,0 @@ -audio: - chunk_size: 485100 - dim_f: 1024 - dim_t: 1101 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 384 - depth: 6 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - instruments: - - other - - vocals - target_instrument: other - use_amp: True - -inference: - batch_size: 1 - dim_t: 1101 - num_overlap: 2 \ No newline at end of file diff --git a/Roformer_models/config_melbandroformer_inst_v2.yaml b/Roformer_models/config_melbandroformer_inst_v2.yaml deleted file mode 100644 index d4297c088f7b8bd2f28308d8a8d1e0694cdec967..0000000000000000000000000000000000000000 --- a/Roformer_models/config_melbandroformer_inst_v2.yaml +++ /dev/null @@ -1,51 +0,0 @@ -audio: - chunk_size: 485100 - dim_f: 1024 - dim_t: 1101 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 384 - depth: 12 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 3 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - instruments: - - Instrumental - - Vocals - target_instrument: Instrumental - use_amp: True - -inference: - batch_size: 1 - dim_t: 1101 - num_overlap: 2 \ No newline at end of file diff --git a/Roformer_models/config_melbandroformer_instvoc_duality.yaml b/Roformer_models/config_melbandroformer_instvoc_duality.yaml deleted file mode 100644 index b93e721853f4d90efa7f0bead82f6a1b791fc19f..0000000000000000000000000000000000000000 --- a/Roformer_models/config_melbandroformer_instvoc_duality.yaml +++ /dev/null @@ -1,51 +0,0 @@ -audio: - chunk_size: 485100 - dim_f: 1024 - dim_t: 256 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 384 - depth: 6 - stereo: true - num_stems: 2 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - instruments: - - Vocals - - Instrumental - target_instrument: null - use_amp: True - -inference: - batch_size: 1 - dim_t: 1101 - num_overlap: 2 \ No newline at end of file diff --git a/Roformer_models/config_vocals_mel_band_roformer_big_v1_ft.yaml b/Roformer_models/config_vocals_mel_band_roformer_big_v1_ft.yaml deleted file mode 100644 index 66f782d66d31113f304fe07aa1ce663952030676..0000000000000000000000000000000000000000 --- a/Roformer_models/config_vocals_mel_band_roformer_big_v1_ft.yaml +++ /dev/null @@ -1,51 +0,0 @@ -audio: - chunk_size: 485100 - dim_f: 1024 - dim_t: 801 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 384 - depth: 6 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 3 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - instruments: - - vocals - - other - target_instrument: vocals - use_amp: True - -inference: - batch_size: 1 - dim_t: 801 - num_overlap: 2 \ No newline at end of file diff --git a/Roformer_models/config_vocals_mel_band_roformer_ft.yaml b/Roformer_models/config_vocals_mel_band_roformer_ft.yaml deleted file mode 100644 index c53d3e0912cd12d69ddd923dbb760a6c2b4d5000..0000000000000000000000000000000000000000 --- a/Roformer_models/config_vocals_mel_band_roformer_ft.yaml +++ /dev/null @@ -1,72 +0,0 @@ -audio: - chunk_size: 352800 - dim_f: 1024 - dim_t: 256 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 384 - depth: 6 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 1 - gradient_accumulation_steps: 8 - grad_clip: 0 - instruments: - - vocals - - other - lr: 1.0e-04 - patience: 2 - reduce_factor: 0.95 - target_instrument: vocals - num_epochs: 1000 - num_steps: 100 - augmentation: true # enable augmentations by audiomentations and pedalboard - augmentation_type: null - use_mp3_compress: false # Deprecated - augmentation_mix: true # Mix several stems of the same type with some probability - augmentation_loudness: true # randomly change loudness of each stem - augmentation_loudness_type: 1 # Type 1 or 2 - augmentation_loudness_min: 0 - augmentation_loudness_max: 0 - q: 0.95 - coarse_loss_clip: false - ema_momentum: 0.999 - optimizer: adamw8bit - other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental - use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true - -inference: - batch_size: 4 - dim_t: 256 - num_overlap: 2 \ No newline at end of file diff --git a/Roformer_models/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt b/Roformer_models/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt deleted file mode 100644 index 3e232e1d5ebb6e2c74cf9803ca9fb4a3fb860d17..0000000000000000000000000000000000000000 --- a/Roformer_models/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a25e3b233722cd81e2de7b8e798a3fef29d4b9799ccacda60b0dc958a1e2a5bb -size 913097300 diff --git a/Roformer_models/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768_config.yaml b/Roformer_models/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768_config.yaml deleted file mode 100644 index 265e19c806778d7b2d5ffdaef9e3d503a6dba3f1..0000000000000000000000000000000000000000 --- a/Roformer_models/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768_config.yaml +++ /dev/null @@ -1,71 +0,0 @@ -audio: - chunk_size: 352800 - dim_f: 1024 - dim_t: 801 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 000 - -model: - dim: 384 - depth: 6 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 2 - gradient_accumulation_steps: 1 - grad_clip: 0 - instruments: - - dry - - other - lr: 1.0e-05 - patience: 8 - reduce_factor: 0.95 - target_instrument: dry - num_epochs: 1000 - num_steps: 4032 - augmentation: false # enable augmentations by audiomentations and pedalboard - augmentation_type: null - use_mp3_compress: false # Deprecated - augmentation_mix: false # Mix several stems of the same type with some probability - augmentation_loudness: false # randomly change loudness of each stem - augmentation_loudness_type: 1 # Type 1 or 2 - augmentation_loudness_min: 0 - augmentation_loudness_max: 0 - q: 0.95 - coarse_loss_clip: false - ema_momentum: 0.999 - optimizer: adam - other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental - -inference: - batch_size: 2 - dim_t: 801 - num_overlap: 4 \ No newline at end of file diff --git a/Roformer_models/denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt b/Roformer_models/denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt deleted file mode 100644 index 8d1b634e263572a5a955577112e77036279c6287..0000000000000000000000000000000000000000 --- a/Roformer_models/denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7c1c39191edc34e942ca7f2346ce6b6c0e1208a5f76349ffce6f696bd12910de -size 913097300 diff --git a/Roformer_models/denoise_mel_band_roformer_aufr33_sdr_27.9959_config.yaml b/Roformer_models/denoise_mel_band_roformer_aufr33_sdr_27.9959_config.yaml deleted file mode 100644 index 265e19c806778d7b2d5ffdaef9e3d503a6dba3f1..0000000000000000000000000000000000000000 --- a/Roformer_models/denoise_mel_band_roformer_aufr33_sdr_27.9959_config.yaml +++ /dev/null @@ -1,71 +0,0 @@ -audio: - chunk_size: 352800 - dim_f: 1024 - dim_t: 801 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 000 - -model: - dim: 384 - depth: 6 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 2 - gradient_accumulation_steps: 1 - grad_clip: 0 - instruments: - - dry - - other - lr: 1.0e-05 - patience: 8 - reduce_factor: 0.95 - target_instrument: dry - num_epochs: 1000 - num_steps: 4032 - augmentation: false # enable augmentations by audiomentations and pedalboard - augmentation_type: null - use_mp3_compress: false # Deprecated - augmentation_mix: false # Mix several stems of the same type with some probability - augmentation_loudness: false # randomly change loudness of each stem - augmentation_loudness_type: 1 # Type 1 or 2 - augmentation_loudness_min: 0 - augmentation_loudness_max: 0 - q: 0.95 - coarse_loss_clip: false - ema_momentum: 0.999 - optimizer: adam - other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental - -inference: - batch_size: 2 - dim_t: 801 - num_overlap: 4 \ No newline at end of file diff --git a/Roformer_models/dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt b/Roformer_models/dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt deleted file mode 100644 index b2ec32944c1f4f9c802a1ad02d38682929c77a58..0000000000000000000000000000000000000000 --- a/Roformer_models/dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cd2b737a394cfb80cd48cc9fcbaf89f5f4062f6b93066c2911617a06d8b7860a -size 835997896 diff --git a/Roformer_models/dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt b/Roformer_models/dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt deleted file mode 100644 index 5baa2b3d302d0ab984148ff1f42793220b29c7ca..0000000000000000000000000000000000000000 --- a/Roformer_models/dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:396432f5af25992fe82d0286634bd879027c073721db6ab10199e75459708b9f -size 455862568 diff --git a/Roformer_models/dereverb_big_mbr_ep_362.ckpt b/Roformer_models/dereverb_big_mbr_ep_362.ckpt deleted file mode 100644 index 814f35f8718aab66733f43e60a034fa933dd2f7f..0000000000000000000000000000000000000000 --- a/Roformer_models/dereverb_big_mbr_ep_362.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0506455e74ffc02bbec700df9863ae243597034003815f1418227c6dee33b6ea -size 455864012 diff --git a/Roformer_models/dereverb_echo_mbr_fused.ckpt b/Roformer_models/dereverb_echo_mbr_fused.ckpt deleted file mode 100644 index 2d495600912d8c596c4f37c1a902c9b918e934fd..0000000000000000000000000000000000000000 --- a/Roformer_models/dereverb_echo_mbr_fused.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1596b1063238f487d54a0510a8c92cb28c000c803a271dd618ac49efc99ef3f7 -size 455776577 diff --git a/Roformer_models/dereverb_mel_band_roformer_anvuew.yaml b/Roformer_models/dereverb_mel_band_roformer_anvuew.yaml deleted file mode 100644 index f4be13c9f337b26a2e11f89aaf24bc873cc23ce9..0000000000000000000000000000000000000000 --- a/Roformer_models/dereverb_mel_band_roformer_anvuew.yaml +++ /dev/null @@ -1,76 +0,0 @@ -audio: - chunk_size: 352800 - dim_f: 1024 - dim_t: 801 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.000 - -model: - dim: 384 - depth: 6 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 3 - gradient_accumulation_steps: 1 - grad_clip: 0 - instruments: - - noreverb - - reverb - lr: 5.0e-05 - patience: 2 - reduce_factor: 0.95 - target_instrument: noreverb - num_epochs: 1000 - num_steps: 4000 - q: 0.95 - coarse_loss_clip: false - ema_momentum: 0.999 - optimizer: adamw - other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental - use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true - -augmentations: - enable: true # enable or disable all augmentations (to fast disable if needed) - loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max) - loudness_min: 0.1 - loudness_max: 1.0 - mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3) - mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02) - - 0.2 - - 0.02 - mixup_loudness_min: 0.5 - mixup_loudness_max: 1.5 - -inference: - batch_size: 1 - dim_t: 801 - num_overlap: 2 \ No newline at end of file diff --git a/Roformer_models/dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt b/Roformer_models/dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt deleted file mode 100644 index 8f21995361b651a6c446cb4e1e7664d94eb42bff..0000000000000000000000000000000000000000 --- a/Roformer_models/dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9262877b87e9ebb0fb808a456b0a411fa677f5df31c8383c1254af531c078970 -size 913107578 diff --git a/Roformer_models/dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt b/Roformer_models/dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt deleted file mode 100644 index 45d5062aae81caa8df3565e7d10c818f076cc588..0000000000000000000000000000000000000000 --- a/Roformer_models/dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0db8f1b41c00cead1112e967262a12802fd32e76c0c3a8eb207e772bae25d07b -size 913107578 diff --git a/Roformer_models/dereverb_mel_band_roformer_mono_anvuew_sdr_20.4029.ckpt b/Roformer_models/dereverb_mel_band_roformer_mono_anvuew_sdr_20.4029.ckpt deleted file mode 100644 index 06855114df2802baa49c15faf4b39a743b84e27b..0000000000000000000000000000000000000000 --- a/Roformer_models/dereverb_mel_band_roformer_mono_anvuew_sdr_20.4029.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f099ee717eb57fb0ad5eb0e7c9ad6787c36168140b61ce2b158b90c2c4ecee79 -size 913097978 diff --git a/Roformer_models/dereverb_super_big_mbr_ep_346.ckpt b/Roformer_models/dereverb_super_big_mbr_ep_346.ckpt deleted file mode 100644 index bc9d947ff83f741caa2504b673eed1983e6ac008..0000000000000000000000000000000000000000 --- a/Roformer_models/dereverb_super_big_mbr_ep_346.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:26dda242bce4405555f2d6086d079fe8cc23f1f04e02e501d2689bfe3ece0489 -size 455864012 diff --git a/Roformer_models/deverb_bs_roformer_8_384dim_10depth.ckpt b/Roformer_models/deverb_bs_roformer_8_384dim_10depth.ckpt deleted file mode 100644 index 17a6af84d21d7c3cecf0c627811f42fee20b6d8e..0000000000000000000000000000000000000000 --- a/Roformer_models/deverb_bs_roformer_8_384dim_10depth.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9c38653aaa5e49f2f7b84dd3be2b6b679e0cbea23978e6b48389ee6f0a914768 -size 361499604 diff --git a/Roformer_models/deverb_bs_roformer_8_384dim_10depth_config.yaml b/Roformer_models/deverb_bs_roformer_8_384dim_10depth_config.yaml deleted file mode 100644 index d2c7ce0c4b424baa7731495c432102672b68cfa6..0000000000000000000000000000000000000000 --- a/Roformer_models/deverb_bs_roformer_8_384dim_10depth_config.yaml +++ /dev/null @@ -1,137 +0,0 @@ -audio: - chunk_size: 352768 - dim_f: 1024 - dim_t: 801 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.001 - -model: - dim: 384 - depth: 10 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - freqs_per_bands: !!python/tuple - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 128 - - 129 - dim_head: 64 - heads: 8 - attn_dropout: 0.1 - ff_dropout: 0.1 - flash_attn: true - dim_freqs_in: 1025 - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: false - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 1 - gradient_accumulation_steps: 1 - grad_clip: 0 - instruments: - - noreverb - - reverb - lr: 5.0e-05 - patience: 2 - reduce_factor: 0.95 - target_instrument: noreverb - num_epochs: 1000 - num_steps: 1000 - q: 0.95 - coarse_loss_clip: true - ema_momentum: 0.999 - optimizer: adam - other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental - use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true - -augmentations: - enable: true # enable or disable all augmentations (to fast disable if needed) - loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max) - loudness_min: 0.5 - loudness_max: 1.5 - mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3) - mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02) - - 0.2 - - 0.02 - mixup_loudness_min: 0.5 - mixup_loudness_max: 1.5 - -inference: - batch_size: 4 - dim_t: 801 - num_overlap: 4 diff --git a/Roformer_models/mel_band_roformer_bleed_suppressor_v1.ckpt b/Roformer_models/mel_band_roformer_bleed_suppressor_v1.ckpt deleted file mode 100644 index 4bf56278572c098907e036b63e094bd7c63c1a20..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_bleed_suppressor_v1.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a9a9d10faa7f8997676a78e66d741d7acb9cc449334763f3c8f626d68ec6e575 -size 913102724 diff --git a/Roformer_models/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt b/Roformer_models/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt deleted file mode 100644 index 8d59dd9875ffadf10ca18f66aa3d20a0e289ce90..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ca8799531fe51c94172cc047226209ed48bf7d8c02e04671795a15d2a1c318af -size 913096801 diff --git a/Roformer_models/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144_config.yaml b/Roformer_models/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144_config.yaml deleted file mode 100644 index 7e44ef94c71082af3a619c9b439f808ae8eb3e1c..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144_config.yaml +++ /dev/null @@ -1,71 +0,0 @@ -audio: - chunk_size: 352800 - dim_f: 1024 - dim_t: 801 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 000 - -model: - dim: 384 - depth: 6 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 2 - gradient_accumulation_steps: 1 - grad_clip: 0 - instruments: - - crowd - - other - lr: 1.0e-05 - patience: 8 - reduce_factor: 0.95 - target_instrument: crowd - num_epochs: 1000 - num_steps: 4032 - augmentation: false # enable augmentations by audiomentations and pedalboard - augmentation_type: null - use_mp3_compress: false # Deprecated - augmentation_mix: false # Mix several stems of the same type with some probability - augmentation_loudness: false # randomly change loudness of each stem - augmentation_loudness_type: 1 # Type 1 or 2 - augmentation_loudness_min: 0 - augmentation_loudness_max: 0 - q: 0.95 - coarse_loss_clip: false - ema_momentum: 0.999 - optimizer: adam - other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental - -inference: - batch_size: 1 - dim_t: 801 - num_overlap: 4 \ No newline at end of file diff --git a/Roformer_models/mel_band_roformer_inst_bleedless_v1_gabox.ckpt b/Roformer_models/mel_band_roformer_inst_bleedless_v1_gabox.ckpt deleted file mode 100644 index 881d22e5fc7fde06ca2c5de68277c4ab8bbceee7..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_inst_bleedless_v1_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6109687febb8f18cd5a45207fee35f18ba8b9467b18a4b2e982a3b7dc04a9d72 -size 913026650 diff --git a/Roformer_models/mel_band_roformer_inst_bleedless_v2_gabox.ckpt b/Roformer_models/mel_band_roformer_inst_bleedless_v2_gabox.ckpt deleted file mode 100644 index 65745b5be537052b903ae5f13f4e26f51912f6db..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_inst_bleedless_v2_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:de972fb724601beef237abe94c8b934c73218e9baf3e344ab4c2333276e5cfe7 -size 913026650 diff --git a/Roformer_models/mel_band_roformer_inst_denoise_debleed_gabox.ckpt b/Roformer_models/mel_band_roformer_inst_denoise_debleed_gabox.ckpt deleted file mode 100644 index 8b595aaf271c7d97cbca025436c370b5954e7786..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_inst_denoise_debleed_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:91aa7a546ed2e93482e4629c982d35b0d258bb3de6eeab497fd91658cc86c7fd -size 913026650 diff --git a/Roformer_models/mel_band_roformer_inst_fullness_v1_gabox.ckpt b/Roformer_models/mel_band_roformer_inst_fullness_v1_gabox.ckpt deleted file mode 100644 index e73ab6f21e270a489e7310d9fe81a150cdfceb13..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_inst_fullness_v1_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:31140eccf271d2a9e8a538b092b1f70dfd6471aa5ad163b22bccc758b9f38b62 -size 913026650 diff --git a/Roformer_models/mel_band_roformer_inst_fullness_v2_gabox.ckpt b/Roformer_models/mel_band_roformer_inst_fullness_v2_gabox.ckpt deleted file mode 100644 index 6d849f3079743541477aeb8748cd2dac3f05e0d0..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_inst_fullness_v2_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c446c34551333dd3d45b8d0708658a10f28c5e289f8ec27b5f0e22803681bef3 -size 913026650 diff --git a/Roformer_models/mel_band_roformer_inst_fullness_v3_gabox.ckpt b/Roformer_models/mel_band_roformer_inst_fullness_v3_gabox.ckpt deleted file mode 100644 index c90caf360ea211abf7467c96c8bde8bf656fb521..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_inst_fullness_v3_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fbb229209a8942d34664e19d2f4862e357ea3108a4e8c04b69aa0aba523a4481 -size 913026650 diff --git a/Roformer_models/mel_band_roformer_inst_fullness_v4_noise_gabox.ckpt b/Roformer_models/mel_band_roformer_inst_fullness_v4_noise_gabox.ckpt deleted file mode 100644 index 5a49522bec839a399c6545d2f812bae2ff5f32a1..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_inst_fullness_v4_noise_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f50296e913b9af3b5b3b961e92877ef0d4a74f9a433e796e89960c4c2b1abe53 -size 913026650 diff --git a/Roformer_models/mel_band_roformer_inst_fullness_v5_gabox.ckpt b/Roformer_models/mel_band_roformer_inst_fullness_v5_gabox.ckpt deleted file mode 100644 index 1a192beafebebb2f4785e076cb13aba8a074b8ce..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_inst_fullness_v5_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38e935cf1e97afcc1de84d0bdb87dd8090bad530fa0df28e707d16448e1d38e2 -size 913026650 diff --git a/Roformer_models/mel_band_roformer_inst_fullness_v5_noise_gabox.ckpt b/Roformer_models/mel_band_roformer_inst_fullness_v5_noise_gabox.ckpt deleted file mode 100644 index 5cea44b8d02d4a8379a8fef7d0dc7e3a920b6294..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_inst_fullness_v5_noise_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:175203923fac3e52ae00e7e37d41e8a7fef5020b6ee4e4144f4786daabc54b34 -size 913026650 diff --git a/Roformer_models/mel_band_roformer_inst_fullness_v6_gabox.ckpt b/Roformer_models/mel_band_roformer_inst_fullness_v6_gabox.ckpt deleted file mode 100644 index 9e4e1ed19b5579f8914a2229b05a859b8cb0bf34..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_inst_fullness_v6_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:677951b8556a27abe32e39705640638826e78101fa901a51ad73d20522be6d25 -size 913026650 diff --git a/Roformer_models/mel_band_roformer_inst_fullness_v6_noise_gabox.ckpt b/Roformer_models/mel_band_roformer_inst_fullness_v6_noise_gabox.ckpt deleted file mode 100644 index edf54dd70e2304cc057d3055a46c02fbb79ef937..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_inst_fullness_v6_noise_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:802f3e5d183d7c4b50dea147c320e61634f5be6ff55fa899fdebeaf0f3cf7f42 -size 913026650 diff --git a/Roformer_models/mel_band_roformer_inst_fullness_v7_gabox.ckpt b/Roformer_models/mel_band_roformer_inst_fullness_v7_gabox.ckpt deleted file mode 100644 index 5f5aa9428c851e83a36c0432a3baf218aa6ca261..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_inst_fullness_v7_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e725a860176acb475d983a1ddd9c1a99a619c69cc9ceda808dd294d10db746a5 -size 913026650 diff --git a/Roformer_models/mel_band_roformer_inst_fullness_v7_noise_gabox.ckpt b/Roformer_models/mel_band_roformer_inst_fullness_v7_noise_gabox.ckpt deleted file mode 100644 index 0bb8eb969ef04c187caa14bc813e8c690d396036..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_inst_fullness_v7_noise_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b0ca36af5d1314be46b56c8a53b6be02f98511fa5d7e3e196fd895755e65be3c -size 913026650 diff --git a/Roformer_models/mel_band_roformer_inst_fullness_v8_gabox.ckpt b/Roformer_models/mel_band_roformer_inst_fullness_v8_gabox.ckpt deleted file mode 100644 index 9816b322dacc2e3b6b87954a8202b64fdb78cddb..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_inst_fullness_v8_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ba9efd5167ca3cf1c74dfac3b545a9609fdbe5dc1f468953f0d3e624fede99a0 -size 913026650 diff --git a/Roformer_models/mel_band_roformer_inst_fullness_vX_gabox.ckpt b/Roformer_models/mel_band_roformer_inst_fullness_vX_gabox.ckpt deleted file mode 100644 index 93fa162e3077a085662452f147e5fbe0bacd108a..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_inst_fullness_vX_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:545ef13b0cdbac505818a38db98e09c54e7c03ea17b4e0c895a531bfa352fa59 -size 913026650 diff --git a/Roformer_models/mel_band_roformer_inst_v1_gabox.ckpt b/Roformer_models/mel_band_roformer_inst_v1_gabox.ckpt deleted file mode 100644 index 6ecff8779cfc8fa3961db8589050b44a43131203..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_inst_v1_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7b3ad6bd8bed3aaaa4d9320ea2ca910d140196a2302186db1754f3a8d8e16fb1 -size 913026650 diff --git a/Roformer_models/mel_band_roformer_inst_v2_gabox.ckpt b/Roformer_models/mel_band_roformer_inst_v2_gabox.ckpt deleted file mode 100644 index 7758aa5f0d19e21b8cb82550a8eddb7a7ab10e23..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_inst_v2_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e03ca459c339f88b7521c367c897d0c3f5362b38a6cdb96cb28e625ca0f9931e -size 913026650 diff --git a/Roformer_models/mel_band_roformer_inst_v3_gabox.ckpt b/Roformer_models/mel_band_roformer_inst_v3_gabox.ckpt deleted file mode 100644 index 1596ed4c464e4346d614bc48bb64a996975bc445..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_inst_v3_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f9ec9f299cf617bf6afe1c382f4b0761cd9bee78323da94889951812328e10fb -size 913026650 diff --git a/Roformer_models/mel_band_roformer_instrumental_becruily.ckpt b/Roformer_models/mel_band_roformer_instrumental_becruily.ckpt deleted file mode 100644 index 9aa6c9ab355e9dfc090251f9703e299b9da7893d..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_instrumental_becruily.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a8da6632a1c25efb1c9be783ce9ea367d226d4b918cd6c3717c8b1d7a396041d -size 913106900 diff --git a/Roformer_models/mel_band_roformer_instrumental_gabox.ckpt b/Roformer_models/mel_band_roformer_instrumental_gabox.ckpt deleted file mode 100644 index 6ecff8779cfc8fa3961db8589050b44a43131203..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_instrumental_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7b3ad6bd8bed3aaaa4d9320ea2ca910d140196a2302186db1754f3a8d8e16fb1 -size 913026650 diff --git a/Roformer_models/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt b/Roformer_models/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt deleted file mode 100644 index 977a5c60ab478ab0b78ac5bc7e5296f9185f5549..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1de20d459332fe8869aeb01327a31df0032262706e1365114e852dc271779813 -size 913096801 diff --git a/Roformer_models/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956_config.yaml b/Roformer_models/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956_config.yaml deleted file mode 100644 index b88403c926bc5957a54ba90271f0cced47c8366f..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956_config.yaml +++ /dev/null @@ -1,71 +0,0 @@ -audio: - chunk_size: 352800 - dim_f: 1024 - dim_t: 801 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 000 - -model: - dim: 384 - depth: 6 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 4 - gradient_accumulation_steps: 1 - grad_clip: 0 - instruments: - - Vocals - - Instrumental - lr: 1.0e-05 - patience: 2 - reduce_factor: 0.95 - target_instrument: Vocals - num_epochs: 1000 - num_steps: 2000 - augmentation: false # enable augmentations by audiomentations and pedalboard - augmentation_type: null - use_mp3_compress: false # Deprecated - augmentation_mix: false # Mix several stems of the same type with some probability - augmentation_loudness: false # randomly change loudness of each stem - augmentation_loudness_type: 1 # Type 1 or 2 - augmentation_loudness_min: 0 - augmentation_loudness_max: 0 - q: 0.95 - coarse_loss_clip: false - ema_momentum: 0.999 - optimizer: adam - other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental - use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true -inference: - batch_size: 1 - dim_t: 801 - num_overlap: 4 \ No newline at end of file diff --git a/Roformer_models/mel_band_roformer_karaoke_gabox.ckpt b/Roformer_models/mel_band_roformer_karaoke_gabox.ckpt deleted file mode 100644 index 69688626b9c8b8655e6eb549e7909f64ae862819..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_karaoke_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:303fc631e7aa587e9dc1e6ac4bb3667c6ba53aacb6b6a90abcfcf57935b92bd8 -size 913026650 diff --git a/Roformer_models/mel_band_roformer_kim_ft2_bleedless_unwa.ckpt b/Roformer_models/mel_band_roformer_kim_ft2_bleedless_unwa.ckpt deleted file mode 100644 index eb05d0fc17641b53deabf68c1940dd237695026c..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_kim_ft2_bleedless_unwa.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3c450bd66a98b49dd03231fc5ebb84121eef8418236b179423c2b171d62b04d9 -size 913101368 diff --git a/Roformer_models/mel_band_roformer_kim_ft2_unwa.ckpt b/Roformer_models/mel_band_roformer_kim_ft2_unwa.ckpt deleted file mode 100644 index 64a9c082838de39b001e92ac622efe3de0c810da..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_kim_ft2_unwa.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5ed7b9e4c2eebbec7a7e5e8113058f7b68ba5e6048db8eaccfbbeb884c7884c0 -size 913100690 diff --git a/Roformer_models/mel_band_roformer_kim_ft_unwa.ckpt b/Roformer_models/mel_band_roformer_kim_ft_unwa.ckpt deleted file mode 100644 index 2912039564c3940b1fb51563a795e3f241841226..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_kim_ft_unwa.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e6bd8d333880191254a6ef6be3cb0ffa4dda9d3282e36b0cce2e88a660e00d39 -size 913100690 diff --git a/Roformer_models/mel_band_roformer_voc_fullness_v1_gabox.ckpt b/Roformer_models/mel_band_roformer_voc_fullness_v1_gabox.ckpt deleted file mode 100644 index 5b6fdc61e56b41297707de241f7c353a916c0494..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_voc_fullness_v1_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c4dff354d81152d1b4321f6491f242c060919148239fbfe22a1015513de4a7fe -size 913026650 diff --git a/Roformer_models/mel_band_roformer_voc_fullness_v2_gabox.ckpt b/Roformer_models/mel_band_roformer_voc_fullness_v2_gabox.ckpt deleted file mode 100644 index dccb5cbb2e7b8aa143e8cd8b803144646998abd5..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_voc_fullness_v2_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2888813aa5b519941fa8548efc5a4331d63c61909007eb17fe95c367be230196 -size 913026650 diff --git a/Roformer_models/mel_band_roformer_voc_fullness_v3_gabox.ckpt b/Roformer_models/mel_band_roformer_voc_fullness_v3_gabox.ckpt deleted file mode 100644 index f5b0031173c6b54221895b9d66e9553037777cd5..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_voc_fullness_v3_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:49d81446b34a7848446efde7898b25bdc32fe872c2393617acb5356649f7ea93 -size 913026650 diff --git a/Roformer_models/mel_band_roformer_voc_fullness_v4_gabox.ckpt b/Roformer_models/mel_band_roformer_voc_fullness_v4_gabox.ckpt deleted file mode 100644 index 16f6cdd40cf3df7b1fbbf3f3702c1f326cdd8f94..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_voc_fullness_v4_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1a9657de5fd3ed87ad4fd1a9d2069743ecb33424836973ad0f3288e2a64e90bc -size 913026650 diff --git a/Roformer_models/mel_band_roformer_voc_gabox.ckpt b/Roformer_models/mel_band_roformer_voc_gabox.ckpt deleted file mode 100644 index 709869d299af3267439c266eede088a1c19b0745..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_voc_gabox.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ff802a67501fac70587c3ff4e8dbc89c2558e7d8911c92222dfea2aaac208517 -size 913026650 diff --git a/Roformer_models/mel_band_roformer_vocals_becruily.ckpt b/Roformer_models/mel_band_roformer_vocals_becruily.ckpt deleted file mode 100644 index fda579c578c11b43fc8267f25fdbe0552ec36c1b..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_vocals_becruily.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a05961310cc55fbb901290c2e8be02682942f73522b6ac76bf2ec11e347ed95a -size 913107578 diff --git a/Roformer_models/mel_band_roformer_vocals_fullness_aname.ckpt b/Roformer_models/mel_band_roformer_vocals_fullness_aname.ckpt deleted file mode 100644 index 380b1baf015018a1c4ae18ef1ade63a6ab76e441..0000000000000000000000000000000000000000 --- a/Roformer_models/mel_band_roformer_vocals_fullness_aname.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a64a27a672b457de23d9decd1fc7b58b0664a9f4f24bb43af154708e2ef07d2f -size 913090472 diff --git a/Roformer_models/melband_roformer_big_beta4.ckpt b/Roformer_models/melband_roformer_big_beta4.ckpt deleted file mode 100644 index 62cb0ed8e023394924b1108a5335daa8b91d78d4..0000000000000000000000000000000000000000 --- a/Roformer_models/melband_roformer_big_beta4.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:700a9bd3831d4f7f44cc0019b238774e31045bcbc361fbb69235535c40fc1454 -size 1574477088 diff --git a/Roformer_models/melband_roformer_big_beta5e.ckpt b/Roformer_models/melband_roformer_big_beta5e.ckpt deleted file mode 100644 index a59473cd797f3c5fc6d6eb420d324145dc7c1104..0000000000000000000000000000000000000000 --- a/Roformer_models/melband_roformer_big_beta5e.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:32b876e1163716a9a007438b5a5107069586aa9b9ca653a5f63013b1edf6920c -size 1479749810 diff --git a/Roformer_models/melband_roformer_big_beta6.ckpt b/Roformer_models/melband_roformer_big_beta6.ckpt deleted file mode 100644 index 3e62cb6881f880ecadb2131661a7bffc1c651889..0000000000000000000000000000000000000000 --- a/Roformer_models/melband_roformer_big_beta6.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f51cbb94b4ed5c36cb36fd2024236a8af3ed6886567981702ad6f094b2c6c820 -size 1557078584 diff --git a/Roformer_models/melband_roformer_big_beta6x.ckpt b/Roformer_models/melband_roformer_big_beta6x.ckpt deleted file mode 100644 index 8a45e7206470ce16a36e6c5c60726331d16221db..0000000000000000000000000000000000000000 --- a/Roformer_models/melband_roformer_big_beta6x.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e16d702f4e20f13d60b293541c1dea75cb4414a5846b36780e28ef70352a4e5c -size 1708527586 diff --git a/Roformer_models/melband_roformer_inst_v1.ckpt b/Roformer_models/melband_roformer_inst_v1.ckpt deleted file mode 100644 index 0a5775dcf471eae22321cf27576da3155bd773e9..0000000000000000000000000000000000000000 --- a/Roformer_models/melband_roformer_inst_v1.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6f88d96958b2b7dec32286b0ced00bbcbd37e28741cad9038758b1eaf9b5c057 -size 913100690 diff --git a/Roformer_models/melband_roformer_inst_v1e.ckpt b/Roformer_models/melband_roformer_inst_v1e.ckpt deleted file mode 100644 index f8370873342b453fb65d96b2fe069cbc0f8130e9..0000000000000000000000000000000000000000 --- a/Roformer_models/melband_roformer_inst_v1e.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:df2bcdb8838b88264f5381dbb0ccd84a9926c9775cf548c34d8846f5cd20fe96 -size 913102724 diff --git a/Roformer_models/melband_roformer_inst_v1e_plus.ckpt b/Roformer_models/melband_roformer_inst_v1e_plus.ckpt deleted file mode 100644 index d96e91089fa7a07f5c4052f55449fd6ed4047ce2..0000000000000000000000000000000000000000 --- a/Roformer_models/melband_roformer_inst_v1e_plus.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6a4ddba739f0352407fb6e18b29206b82318ec427fe37fcedb0f83241e4e15fb -size 913090472 diff --git a/Roformer_models/melband_roformer_inst_v2.ckpt b/Roformer_models/melband_roformer_inst_v2.ckpt deleted file mode 100644 index e59c3365c4976b3c225f0cf9ab34342531dc6955..0000000000000000000000000000000000000000 --- a/Roformer_models/melband_roformer_inst_v2.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bd19766620f7d6f58fdf7aaada7e89907fe41bc64490ce3faa9a6dab15d6e1f2 -size 1574477088 diff --git a/Roformer_models/melband_roformer_instvoc_duality_v1.ckpt b/Roformer_models/melband_roformer_instvoc_duality_v1.ckpt deleted file mode 100644 index 2b99b0c8e53949042dce6f1db2300642212b31f7..0000000000000000000000000000000000000000 --- a/Roformer_models/melband_roformer_instvoc_duality_v1.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4640a59d454bf9f69d67460592ab71e7cdce3afa0c0a6f0cf4500bb4ac0b8381 -size 1719116358 diff --git a/Roformer_models/melband_roformer_instvoc_duality_v2.ckpt b/Roformer_models/melband_roformer_instvoc_duality_v2.ckpt deleted file mode 100644 index e4042424f484dbb10d5f76f807c04f9b7f307e23..0000000000000000000000000000000000000000 --- a/Roformer_models/melband_roformer_instvoc_duality_v2.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b4a69558708f2857e36ac86a0e03ed95c4e3d8b9c5b8113963987d0d7df7e20f -size 1719116358 diff --git a/Roformer_models/melband_roformer_small_by_aname.ckpt b/Roformer_models/melband_roformer_small_by_aname.ckpt deleted file mode 100644 index 5db369eedfade9297c978a4242c17b1e2769a572..0000000000000000000000000000000000000000 --- a/Roformer_models/melband_roformer_small_by_aname.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:20cc592a9dff7dc34a3fb0bf399cd68c950b03ff5f334725e74feb3d0186272f -size 202573672 diff --git a/Roformer_models/model_bs_roformer_ep_317_sdr_12.9755.ckpt b/Roformer_models/model_bs_roformer_ep_317_sdr_12.9755.ckpt deleted file mode 100644 index 1d4b892da79c875b3b3028f9f4d2504ebafe72e1..0000000000000000000000000000000000000000 --- a/Roformer_models/model_bs_roformer_ep_317_sdr_12.9755.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5b84f37e8d444c8cb30c79d77f613a41c05868ff9c9ac6c7049c00aefae115aa -size 639331213 diff --git a/Roformer_models/model_bs_roformer_ep_317_sdr_12.9755.yaml b/Roformer_models/model_bs_roformer_ep_317_sdr_12.9755.yaml deleted file mode 100644 index c4a3d323322d75af7d981e9de2ef3fa29e786812..0000000000000000000000000000000000000000 --- a/Roformer_models/model_bs_roformer_ep_317_sdr_12.9755.yaml +++ /dev/null @@ -1,133 +0,0 @@ -audio: - chunk_size: 352800 - dim_f: 1024 - dim_t: 801 # don't work (use in model) - hop_length: 441 # don't work (use in model) - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.001 - -model: - dim: 512 - depth: 12 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - freqs_per_bands: !!python/tuple - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 128 - - 129 - dim_head: 64 - heads: 8 - attn_dropout: 0.1 - ff_dropout: 0.1 - flash_attn: true - dim_freqs_in: 1025 - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: false - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 16 - gradient_accumulation_steps: 1 - grad_clip: 0 - instruments: - - Vocals - - Instrumental - lr: 5.0e-05 - patience: 2 - reduce_factor: 0.95 - target_instrument: Vocals - num_epochs: 1000 - num_steps: 1000 - augmentation: false # enable augmentations by audiomentations and pedalboard - augmentation_type: simple1 - use_mp3_compress: false # Deprecated - augmentation_mix: true # Mix several stems of the same type with some probability - augmentation_loudness: true # randomly change loudness of each stem - augmentation_loudness_type: 1 # Type 1 or 2 - augmentation_loudness_min: 0.5 - augmentation_loudness_max: 1.5 - q: 0.95 - coarse_loss_clip: true - ema_momentum: 0.999 - optimizer: adam - other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental - use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true - -inference: - batch_size: 1 - dim_t: 801 - num_overlap: 4 \ No newline at end of file diff --git a/Roformer_models/model_bs_roformer_ep_368_sdr_12.9628.ckpt b/Roformer_models/model_bs_roformer_ep_368_sdr_12.9628.ckpt deleted file mode 100644 index 929fbf0bc57f788a3d6b758ee4feedd61976298b..0000000000000000000000000000000000000000 --- a/Roformer_models/model_bs_roformer_ep_368_sdr_12.9628.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f6c94864adfb73bbb0ca58ec14d58dd0b364549e9fb61433ae51916f3e2f8d0b -size 639317465 diff --git a/Roformer_models/model_bs_roformer_ep_368_sdr_12.9628.yaml b/Roformer_models/model_bs_roformer_ep_368_sdr_12.9628.yaml deleted file mode 100644 index fe893b1a68b8ae8ea8bb5a7ac2b7f12e0c53a826..0000000000000000000000000000000000000000 --- a/Roformer_models/model_bs_roformer_ep_368_sdr_12.9628.yaml +++ /dev/null @@ -1,133 +0,0 @@ -audio: - chunk_size: 352800 - dim_f: 1024 - dim_t: 801 # don't work (use in model) - hop_length: 441 # don't work (use in model) - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.001 - -model: - dim: 512 - depth: 12 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - freqs_per_bands: !!python/tuple - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 128 - - 129 - dim_head: 64 - heads: 8 - attn_dropout: 0.1 - ff_dropout: 0.1 - flash_attn: true - dim_freqs_in: 1025 - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: false - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 16 - gradient_accumulation_steps: 1 - grad_clip: 0 - instruments: - - Vocals - - Instrumental - lr: 5.0e-05 - patience: 2 - reduce_factor: 0.95 - target_instrument: Vocals - num_epochs: 1000 - num_steps: 1000 - augmentation: false # enable augmentations by audiomentations and pedalboard - augmentation_type: simple1 - use_mp3_compress: false # Deprecated - augmentation_mix: true # Mix several stems of the same type with some probability - augmentation_loudness: true # randomly change loudness of each stem - augmentation_loudness_type: 1 # Type 1 or 2 - augmentation_loudness_min: 0.5 - augmentation_loudness_max: 1.5 - q: 0.95 - coarse_loss_clip: true - ema_momentum: 0.999 - optimizer: adam - other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental - use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true - -inference: - batch_size: 1 - dim_t: 801 - num_overlap: 4 diff --git a/Roformer_models/model_bs_roformer_ep_937_sdr_10.5309.ckpt b/Roformer_models/model_bs_roformer_ep_937_sdr_10.5309.ckpt deleted file mode 100644 index b30e4893bf1e4b198ea8005346b01efaa135c8b7..0000000000000000000000000000000000000000 --- a/Roformer_models/model_bs_roformer_ep_937_sdr_10.5309.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a2e825a03bc908cb04dbd88eddeefbf5147dd1cf1f95cebf453d9dbfabec494b -size 393068365 diff --git a/Roformer_models/model_bs_roformer_ep_937_sdr_10.5309.yaml b/Roformer_models/model_bs_roformer_ep_937_sdr_10.5309.yaml deleted file mode 100644 index f623832cc06ebc5fa8a049fad6b1319c6038336d..0000000000000000000000000000000000000000 --- a/Roformer_models/model_bs_roformer_ep_937_sdr_10.5309.yaml +++ /dev/null @@ -1,138 +0,0 @@ -audio: - chunk_size: 131584 - dim_f: 1024 - dim_t: 256 - hop_length: 512 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.001 - -model: - dim: 384 - depth: 12 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - linear_transformer_depth: 0 - freqs_per_bands: !!python/tuple - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 2 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 4 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 12 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 24 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 48 - - 128 - - 129 - dim_head: 64 - heads: 8 - attn_dropout: 0.1 - ff_dropout: 0.1 - flash_attn: true - dim_freqs_in: 1025 - stft_n_fft: 2048 - stft_hop_length: 512 - stft_win_length: 2048 - stft_normalized: false - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 4 - gradient_accumulation_steps: 1 - grad_clip: 0 - instruments: - - No Drum-Bass - - Drum-Bass - lr: 5.0e-05 - patience: 2 - reduce_factor: 0.95 - target_instrument: No Drum-Bass - num_epochs: 1000 - num_steps: 1000 - q: 0.95 - coarse_loss_clip: true - ema_momentum: 0.999 - optimizer: adam - other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental - use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true - -augmentations: - enable: true # enable or disable all augmentations (to fast disable if needed) - loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max) - loudness_min: 0.5 - loudness_max: 1.5 - mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3) - mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02) - - 0.2 - - 0.02 - mixup_loudness_min: 0.5 - mixup_loudness_max: 1.5 - -inference: - batch_size: 1 - dim_t: 512 - num_overlap: 4 \ No newline at end of file diff --git a/Roformer_models/model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt b/Roformer_models/model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt deleted file mode 100644 index 4f9492413f4671a4c218f73cd6df2963e6b75fb8..0000000000000000000000000000000000000000 --- a/Roformer_models/model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:123c00786bdbc6bd462dddb35cd21fd6ae99ab8319f93f63a8abc1012e593d94 -size 527121477 diff --git a/Roformer_models/model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt b/Roformer_models/model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt deleted file mode 100644 index 1cb36d54103c4fdd30f7e8a089164cba50cfb0b0..0000000000000000000000000000000000000000 --- a/Roformer_models/model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:21b9d0958e35b8ebfbe2afe69bbd5444e5ffe2f5d80ae0d583b833d2f3c0d139 -size 1007816988 diff --git a/Roformer_models/model_mel_band_roformer_ep_3005_sdr_11.4360.yaml b/Roformer_models/model_mel_band_roformer_ep_3005_sdr_11.4360.yaml deleted file mode 100644 index 7c906f2931cbae3cf64551c231e285ca10097fe5..0000000000000000000000000000000000000000 --- a/Roformer_models/model_mel_band_roformer_ep_3005_sdr_11.4360.yaml +++ /dev/null @@ -1,72 +0,0 @@ -audio: - chunk_size: 352800 - dim_f: 1024 - dim_t: 801 # don't work (use in model) - hop_length: 441 # don't work (use in model) - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.001 - -model: - dim: 384 - depth: 12 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0.1 - ff_dropout: 0.1 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - batch_size: 9 - gradient_accumulation_steps: 8 - grad_clip: 0 - instruments: - - Vocals - - Instrumental - lr: 4.0e-05 - patience: 2 - reduce_factor: 0.95 - target_instrument: Vocals - num_epochs: 1000 - num_steps: 1000 - augmentation: false # enable augmentations by audiomentations and pedalboard - augmentation_type: simple1 - use_mp3_compress: false # Deprecated - augmentation_mix: true # Mix several stems of the same type with some probability - augmentation_loudness: true # randomly change loudness of each stem - augmentation_loudness_type: 1 # Type 1 or 2 - augmentation_loudness_min: 0.5 - augmentation_loudness_max: 1.5 - q: 0.95 - coarse_loss_clip: true - ema_momentum: 0.999 - optimizer: adam - other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental - use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true - -inference: - batch_size: 1 - dim_t: 801 - num_overlap: 4 \ No newline at end of file diff --git a/Roformer_models/vocals_mel_band_roformer.ckpt b/Roformer_models/vocals_mel_band_roformer.ckpt deleted file mode 100644 index e9269937826d8cedf1855096bc9c1d49298bb4f8..0000000000000000000000000000000000000000 --- a/Roformer_models/vocals_mel_band_roformer.ckpt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:87201f4d31afb5bc79993230fc49446918425574db48c01c405e44f365c7559e -size 913106900 diff --git a/Roformer_models/vocals_mel_band_roformer.yaml b/Roformer_models/vocals_mel_band_roformer.yaml deleted file mode 100644 index 9cb005e7a97c66d5fb23bba8bb36bec9619cdd8f..0000000000000000000000000000000000000000 --- a/Roformer_models/vocals_mel_band_roformer.yaml +++ /dev/null @@ -1,50 +0,0 @@ -audio: - chunk_size: 352800 - dim_f: 1024 - dim_t: 256 - hop_length: 441 - n_fft: 2048 - num_channels: 2 - sample_rate: 44100 - min_mean_abs: 0.001 - -model: - dim: 384 - depth: 6 - stereo: true - num_stems: 1 - time_transformer_depth: 1 - freq_transformer_depth: 1 - num_bands: 60 - dim_head: 64 - heads: 8 - attn_dropout: 0 - ff_dropout: 0 - flash_attn: True - dim_freqs_in: 1025 - sample_rate: 44100 # needed for mel filter bank from librosa - stft_n_fft: 2048 - stft_hop_length: 441 - stft_win_length: 2048 - stft_normalized: False - mask_estimator_depth: 2 - multi_stft_resolution_loss_weight: 1.0 - multi_stft_resolutions_window_sizes: !!python/tuple - - 4096 - - 2048 - - 1024 - - 512 - - 256 - multi_stft_hop_size: 147 - multi_stft_normalized: False - -training: - instruments: - - vocals - - other - target_instrument: vocals - -inference: - dim_t: 1101 - num_overlap: 1 - chunk_size: 352800 \ No newline at end of file