diff --git a/models/Roformer/BandSplit/BS_Inst_EXP_VRL.ckpt b/models/Roformer/BandSplit/BS_Inst_EXP_VRL.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..72934845a8e32e8a5dc85c35b531767f4b964bcb --- /dev/null +++ b/models/Roformer/BandSplit/BS_Inst_EXP_VRL.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c035e2a102243405e45bf33faa175f62fd7118f63b62771fafdf81062b804131 +size 393351501 diff --git a/models/Roformer/BandSplit/bs_roformer_male_female_by_aufr33_sdr_7.2889.ckpt b/models/Roformer/BandSplit/bs_roformer_male_female_by_aufr33_sdr_7.2889.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..201059f3d93490e5cc91f20ea0bb74ae7c0dd20a --- /dev/null +++ b/models/Roformer/BandSplit/bs_roformer_male_female_by_aufr33_sdr_7.2889.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cf11736d1b42a11ae55d8299316585921477dd2a671b24b663660846ca9861b +size 527119779 diff --git a/models/Roformer/BandSplit/bs_roformer_voc_gabox.ckpt b/models/Roformer/BandSplit/bs_roformer_voc_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..cb4a0c8ef7ec4378b27e79a01eb491a2d699a535 --- /dev/null +++ b/models/Roformer/BandSplit/bs_roformer_voc_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18d58efe5e949e70fab11b875329af6d06ef11ccc29574bfe943fb57cc827f38 +size 639254584 diff --git a/models/Roformer/BandSplit/config_bs_roformer_chorus_male_female.yaml b/models/Roformer/BandSplit/config_bs_roformer_chorus_male_female.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eab8413f38d13a0ee021fd752c85fb20b65a376f --- /dev/null +++ b/models/Roformer/BandSplit/config_bs_roformer_chorus_male_female.yaml @@ -0,0 +1,125 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 801 # don't work (use in model) + hop_length: 441 # don't work (use in model) + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 384 + depth: 8 + stereo: true + num_stems: 2 + time_transformer_depth: 1 + freq_transformer_depth: 1 + freqs_per_bands: !!python/tuple + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 128 + - 129 + dim_head: 64 + heads: 8 + attn_dropout: 0.0 + ff_dropout: 0.0 + flash_attn: true + dim_freqs_in: 1025 + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: false + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 1 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - male + - female + lr: 1.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +inference: + batch_size: 1 + dim_t: 801 + num_overlap: 2 \ No newline at end of file diff --git a/models/Roformer/BandSplit/config_bs_roformer_deverb_8_384dim_10depth.yaml b/models/Roformer/BandSplit/config_bs_roformer_deverb_8_384dim_10depth.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d2c7ce0c4b424baa7731495c432102672b68cfa6 --- /dev/null +++ b/models/Roformer/BandSplit/config_bs_roformer_deverb_8_384dim_10depth.yaml @@ -0,0 +1,137 @@ +audio: + chunk_size: 352768 + dim_f: 1024 + dim_t: 801 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 + +model: + dim: 384 + depth: 10 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + freqs_per_bands: !!python/tuple + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 128 + - 129 + dim_head: 64 + heads: 8 + attn_dropout: 0.1 + ff_dropout: 0.1 + flash_attn: true + dim_freqs_in: 1025 + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: false + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 1 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - noreverb + - reverb + lr: 5.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: noreverb + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +augmentations: + enable: true # enable or disable all augmentations (to fast disable if needed) + loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max) + loudness_min: 0.5 + loudness_max: 1.5 + mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3) + mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02) + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + +inference: + batch_size: 4 + dim_t: 801 + num_overlap: 4 diff --git a/models/Roformer/BandSplit/config_bs_roformer_ep_317_sdr_12.9755.yaml b/models/Roformer/BandSplit/config_bs_roformer_ep_317_sdr_12.9755.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c4a3d323322d75af7d981e9de2ef3fa29e786812 --- /dev/null +++ b/models/Roformer/BandSplit/config_bs_roformer_ep_317_sdr_12.9755.yaml @@ -0,0 +1,133 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 801 # don't work (use in model) + hop_length: 441 # don't work (use in model) + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 + +model: + dim: 512 + depth: 12 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + freqs_per_bands: !!python/tuple + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 128 + - 129 + dim_head: 64 + heads: 8 + attn_dropout: 0.1 + ff_dropout: 0.1 + flash_attn: true + dim_freqs_in: 1025 + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: false + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 16 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - Vocals + - Instrumental + lr: 5.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: Vocals + num_epochs: 1000 + num_steps: 1000 + augmentation: false # enable augmentations by audiomentations and pedalboard + augmentation_type: simple1 + use_mp3_compress: false # Deprecated + augmentation_mix: true # Mix several stems of the same type with some probability + augmentation_loudness: true # randomly change loudness of each stem + augmentation_loudness_type: 1 # Type 1 or 2 + augmentation_loudness_min: 0.5 + augmentation_loudness_max: 1.5 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +inference: + batch_size: 1 + dim_t: 801 + num_overlap: 4 \ No newline at end of file diff --git a/models/Roformer/BandSplit/config_bs_roformer_ep_368_sdr_12.9628.yaml b/models/Roformer/BandSplit/config_bs_roformer_ep_368_sdr_12.9628.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe893b1a68b8ae8ea8bb5a7ac2b7f12e0c53a826 --- /dev/null +++ b/models/Roformer/BandSplit/config_bs_roformer_ep_368_sdr_12.9628.yaml @@ -0,0 +1,133 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 801 # don't work (use in model) + hop_length: 441 # don't work (use in model) + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 + +model: + dim: 512 + depth: 12 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + freqs_per_bands: !!python/tuple + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 128 + - 129 + dim_head: 64 + heads: 8 + attn_dropout: 0.1 + ff_dropout: 0.1 + flash_attn: true + dim_freqs_in: 1025 + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: false + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 16 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - Vocals + - Instrumental + lr: 5.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: Vocals + num_epochs: 1000 + num_steps: 1000 + augmentation: false # enable augmentations by audiomentations and pedalboard + augmentation_type: simple1 + use_mp3_compress: false # Deprecated + augmentation_mix: true # Mix several stems of the same type with some probability + augmentation_loudness: true # randomly change loudness of each stem + augmentation_loudness_type: 1 # Type 1 or 2 + augmentation_loudness_min: 0.5 + augmentation_loudness_max: 1.5 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +inference: + batch_size: 1 + dim_t: 801 + num_overlap: 4 diff --git a/models/Roformer/BandSplit/config_bs_roformer_ep_937_sdr_10.5309.yaml b/models/Roformer/BandSplit/config_bs_roformer_ep_937_sdr_10.5309.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f623832cc06ebc5fa8a049fad6b1319c6038336d --- /dev/null +++ b/models/Roformer/BandSplit/config_bs_roformer_ep_937_sdr_10.5309.yaml @@ -0,0 +1,138 @@ +audio: + chunk_size: 131584 + dim_f: 1024 + dim_t: 256 + hop_length: 512 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 + +model: + dim: 384 + depth: 12 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + linear_transformer_depth: 0 + freqs_per_bands: !!python/tuple + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 128 + - 129 + dim_head: 64 + heads: 8 + attn_dropout: 0.1 + ff_dropout: 0.1 + flash_attn: true + dim_freqs_in: 1025 + stft_n_fft: 2048 + stft_hop_length: 512 + stft_win_length: 2048 + stft_normalized: false + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 4 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - No Drum-Bass + - Drum-Bass + lr: 5.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: No Drum-Bass + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +augmentations: + enable: true # enable or disable all augmentations (to fast disable if needed) + loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max) + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3) + mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02) + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + +inference: + batch_size: 1 + dim_t: 512 + num_overlap: 4 \ No newline at end of file diff --git a/models/Roformer/BandSplit/config_bs_roformer_inst_exp_vrl.yaml b/models/Roformer/BandSplit/config_bs_roformer_inst_exp_vrl.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8dc25ddd8268a169492e100b92627e38115ff055 --- /dev/null +++ b/models/Roformer/BandSplit/config_bs_roformer_inst_exp_vrl.yaml @@ -0,0 +1,124 @@ +audio: + chunk_size: 485100 #352800 #485100 + dim_f: 1024 + dim_t: 801 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 384 + depth: 12 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + freqs_per_bands: !!python/tuple + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 128 + - 129 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: true + dim_freqs_in: 1025 + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: false + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False +training: + batch_size: 1 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - Vocals + - Instrumental + lr: 1.0e-04 + patience: 2 + reduce_factor: 0.95 + target_instrument: Instrumental + num_epochs: 1 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adamw + other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +inference: + batch_size: 1 + dim_t: 1101 + num_overlap: 2 \ No newline at end of file diff --git a/models/Roformer/BandSplit/config_bs_roformer_voc_gabox.yaml b/models/Roformer/BandSplit/config_bs_roformer_voc_gabox.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c4a3d323322d75af7d981e9de2ef3fa29e786812 --- /dev/null +++ b/models/Roformer/BandSplit/config_bs_roformer_voc_gabox.yaml @@ -0,0 +1,133 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 801 # don't work (use in model) + hop_length: 441 # don't work (use in model) + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 + +model: + dim: 512 + depth: 12 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + freqs_per_bands: !!python/tuple + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 2 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 4 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 12 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 24 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 48 + - 128 + - 129 + dim_head: 64 + heads: 8 + attn_dropout: 0.1 + ff_dropout: 0.1 + flash_attn: true + dim_freqs_in: 1025 + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: false + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 16 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - Vocals + - Instrumental + lr: 5.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: Vocals + num_epochs: 1000 + num_steps: 1000 + augmentation: false # enable augmentations by audiomentations and pedalboard + augmentation_type: simple1 + use_mp3_compress: false # Deprecated + augmentation_mix: true # Mix several stems of the same type with some probability + augmentation_loudness: true # randomly change loudness of each stem + augmentation_loudness_type: 1 # Type 1 or 2 + augmentation_loudness_min: 0.5 + augmentation_loudness_max: 1.5 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +inference: + batch_size: 1 + dim_t: 801 + num_overlap: 4 \ No newline at end of file diff --git a/models/Roformer/BandSplit/deverb_bs_roformer_8_384dim_10depth.ckpt b/models/Roformer/BandSplit/deverb_bs_roformer_8_384dim_10depth.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..17a6af84d21d7c3cecf0c627811f42fee20b6d8e --- /dev/null +++ b/models/Roformer/BandSplit/deverb_bs_roformer_8_384dim_10depth.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c38653aaa5e49f2f7b84dd3be2b6b679e0cbea23978e6b48389ee6f0a914768 +size 361499604 diff --git a/models/Roformer/BandSplit/model_bs_roformer_ep_317_sdr_12.9755.ckpt b/models/Roformer/BandSplit/model_bs_roformer_ep_317_sdr_12.9755.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..1d4b892da79c875b3b3028f9f4d2504ebafe72e1 --- /dev/null +++ b/models/Roformer/BandSplit/model_bs_roformer_ep_317_sdr_12.9755.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b84f37e8d444c8cb30c79d77f613a41c05868ff9c9ac6c7049c00aefae115aa +size 639331213 diff --git a/models/Roformer/BandSplit/model_bs_roformer_ep_368_sdr_12.9628.ckpt b/models/Roformer/BandSplit/model_bs_roformer_ep_368_sdr_12.9628.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..929fbf0bc57f788a3d6b758ee4feedd61976298b --- /dev/null +++ b/models/Roformer/BandSplit/model_bs_roformer_ep_368_sdr_12.9628.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6c94864adfb73bbb0ca58ec14d58dd0b364549e9fb61433ae51916f3e2f8d0b +size 639317465 diff --git a/models/Roformer/BandSplit/model_bs_roformer_ep_937_sdr_10.5309.ckpt b/models/Roformer/BandSplit/model_bs_roformer_ep_937_sdr_10.5309.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..b30e4893bf1e4b198ea8005346b01efaa135c8b7 --- /dev/null +++ b/models/Roformer/BandSplit/model_bs_roformer_ep_937_sdr_10.5309.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2e825a03bc908cb04dbd88eddeefbf5147dd1cf1f95cebf453d9dbfabec494b +size 393068365 diff --git a/models/Roformer/BandSplit/model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt b/models/Roformer/BandSplit/model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..4f9492413f4671a4c218f73cd6df2963e6b75fb8 --- /dev/null +++ b/models/Roformer/BandSplit/model_chorus_bs_roformer_ep_267_sdr_24.1275.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:123c00786bdbc6bd462dddb35cd21fd6ae99ab8319f93f63a8abc1012e593d94 +size 527121477 diff --git a/models/Roformer/MelBand/MelBandRoformerBigSYHFTV1.ckpt b/models/Roformer/MelBand/MelBandRoformerBigSYHFTV1.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..3b07b85890712d20ad115ead701bf7799c3b9928 --- /dev/null +++ b/models/Roformer/MelBand/MelBandRoformerBigSYHFTV1.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2327e3e81f19e67c307f8c830c54267c09ecb0e9c6ad2b40a80c310899c955f +size 1479738496 diff --git a/models/Roformer/MelBand/MelBandRoformerSYHFT.ckpt b/models/Roformer/MelBand/MelBandRoformerSYHFT.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..fc8e114879a3761ce9e6f901cab7b0adbc7035b5 --- /dev/null +++ b/models/Roformer/MelBand/MelBandRoformerSYHFT.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f319dfcde4396ea3106658f457f5eb0bc577e113491f61ae8bab216fe84b0c0c +size 913096702 diff --git a/models/Roformer/MelBand/MelBandRoformerSYHFTV2.5.ckpt b/models/Roformer/MelBand/MelBandRoformerSYHFTV2.5.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..232404ba4a0d4e6d32b6f683711c58cd73b10c18 --- /dev/null +++ b/models/Roformer/MelBand/MelBandRoformerSYHFTV2.5.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:916e3a2c1e63b1457bcad823b98ca705e4933deffd2a5ab3a370e10f68bf47e2 +size 913090472 diff --git a/models/Roformer/MelBand/MelBandRoformerSYHFTV2.ckpt b/models/Roformer/MelBand/MelBandRoformerSYHFTV2.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..6b34c66f158e43e0f2c11b6df91a040c5a11a23c --- /dev/null +++ b/models/Roformer/MelBand/MelBandRoformerSYHFTV2.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e99f8efa5315300c197295592bd7e56c21c1d77e1884c904b5128c54a2a4632 +size 913095346 diff --git a/models/Roformer/MelBand/MelBandRoformerSYHFTV3Epsilon.ckpt b/models/Roformer/MelBand/MelBandRoformerSYHFTV3Epsilon.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..650b62bfd05945493a6529898ca0d0023ee7637d --- /dev/null +++ b/models/Roformer/MelBand/MelBandRoformerSYHFTV3Epsilon.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c886092e4aae13aa089263a0d54d483643f58c16ec221aed37268e2c1031397 +size 913090472 diff --git a/models/Roformer/MelBand/aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt b/models/Roformer/MelBand/aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..0dd326fa1c5c65e52583803582f154a0359a55bd --- /dev/null +++ b/models/Roformer/MelBand/aspiration_mel_band_roformer_less_aggr_sdr_18.1201.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83bfe991cec4fbadde9f30d1f79cd5293ad0b1f936256be327bba5cbb4883374 +size 835982664 diff --git a/models/Roformer/MelBand/aspiration_mel_band_roformer_sdr_18.9845.ckpt b/models/Roformer/MelBand/aspiration_mel_band_roformer_sdr_18.9845.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..1282613860a16200f301a12c7b8ec67d050c63be --- /dev/null +++ b/models/Roformer/MelBand/aspiration_mel_band_roformer_sdr_18.9845.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e791258c866c6c8da66052693d8cc3b64f1f42c01e052dbdc570cd278380cc5 +size 835983746 diff --git a/models/Roformer/MelBand/config_melband_roformer_aspiration.yaml b/models/Roformer/MelBand/config_melband_roformer_aspiration.yaml new file mode 100644 index 0000000000000000000000000000000000000000..75983773005de1549919f2f50dc456f76f199b18 --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_aspiration.yaml @@ -0,0 +1,76 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 801 # don't work (use in model) + hop_length: 441 # don't work (use in model) + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 256 + depth: 8 + stereo: true + num_stems: 2 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0.1 + ff_dropout: 0.1 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 1 + gradient_accumulation_steps: 8 + grad_clip: 0 + instruments: + - aspiration + - other + lr: 4.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +augmentations: + enable: true # enable or disable all augmentations (to fast disable if needed) + loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max) + loudness_min: 0.5 + loudness_max: 1.5 + mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3) + mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02) + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + +inference: + batch_size: 4 + dim_t: 801 + num_overlap: 2 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_big_beta4.yaml b/models/Roformer/MelBand/config_melband_roformer_big_beta4.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7dba2f17a3f8b3b5e7ec94bb9c4becb1acc1724a --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_big_beta4.yaml @@ -0,0 +1,51 @@ +audio: + chunk_size: 485100 + dim_f: 1024 + dim_t: 1101 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 384 + depth: 12 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 3 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + instruments: + - vocals + - other + target_instrument: vocals + use_amp: True + +inference: + batch_size: 1 + dim_t: 1101 + num_overlap: 2 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_big_beta5e.yaml b/models/Roformer/MelBand/config_melband_roformer_big_beta5e.yaml new file mode 100644 index 0000000000000000000000000000000000000000..66f782d66d31113f304fe07aa1ce663952030676 --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_big_beta5e.yaml @@ -0,0 +1,51 @@ +audio: + chunk_size: 485100 + dim_f: 1024 + dim_t: 801 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 3 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + instruments: + - vocals + - other + target_instrument: vocals + use_amp: True + +inference: + batch_size: 1 + dim_t: 801 + num_overlap: 2 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_big_beta6.yaml b/models/Roformer/MelBand/config_melband_roformer_big_beta6.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb175852d74ba1b04ab1bcfd0fc88e8c5c26f206 --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_big_beta6.yaml @@ -0,0 +1,72 @@ +audio: + chunk_size: 529200 + dim_f: 1024 + dim_t: 256 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 512 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 1 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - vocals + - other + lr: 1.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: vocals + num_epochs: 1000 + num_steps: 1000 + augmentation: false # enable augmentations by audiomentations and pedalboard + augmentation_type: null + use_mp3_compress: false # Deprecated + augmentation_mix: false # Mix several stems of the same type with some probability + augmentation_loudness: false # randomly change loudness of each stem + augmentation_loudness_type: 1 # Type 1 or 2 + augmentation_loudness_min: 0 + augmentation_loudness_max: 0 + q: 0.95 + coarse_loss_clip: false + ema_momentum: 0.999 + optimizer: adam + other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +inference: + batch_size: 2 + dim_t: 1201 + num_overlap: 2 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_big_beta6x.yaml b/models/Roformer/MelBand/config_melband_roformer_big_beta6x.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7537b31741f9dec3d604f33b859ce7783bb23da6 --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_big_beta6x.yaml @@ -0,0 +1,72 @@ +audio: + chunk_size: 529200 + dim_f: 1024 + dim_t: 256 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 512 + depth: 12 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 1 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - vocals + - other + lr: 1.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: vocals + num_epochs: 1000 + num_steps: 1000 + augmentation: false # enable augmentations by audiomentations and pedalboard + augmentation_type: null + use_mp3_compress: false # Deprecated + augmentation_mix: false # Mix several stems of the same type with some probability + augmentation_loudness: false # randomly change loudness of each stem + augmentation_loudness_type: 1 # Type 1 or 2 + augmentation_loudness_min: 0 + augmentation_loudness_max: 0 + q: 0.95 + coarse_loss_clip: false + ema_momentum: 0.999 + optimizer: adam + other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +inference: + batch_size: 2 + dim_t: 1201 + num_overlap: 2 diff --git a/models/Roformer/MelBand/config_melband_roformer_bleed_suppressor_v1.yaml b/models/Roformer/MelBand/config_melband_roformer_bleed_suppressor_v1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..333fb15b0079c324821981b54b75945f1d8440ac --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_bleed_suppressor_v1.yaml @@ -0,0 +1,51 @@ +audio: + chunk_size: 485100 + dim_f: 1024 + dim_t: 801 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + instruments: + - Instrumental + - Bleed + target_instrument: Instrumental + use_amp: True + +inference: + batch_size: 1 + dim_t: 801 + num_overlap: 2 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_crowd_aufr33_viperx_sdr_8.7144.yaml b/models/Roformer/MelBand/config_melband_roformer_crowd_aufr33_viperx_sdr_8.7144.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7e44ef94c71082af3a619c9b439f808ae8eb3e1c --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_crowd_aufr33_viperx_sdr_8.7144.yaml @@ -0,0 +1,71 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 801 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 000 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 2 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - crowd + - other + lr: 1.0e-05 + patience: 8 + reduce_factor: 0.95 + target_instrument: crowd + num_epochs: 1000 + num_steps: 4032 + augmentation: false # enable augmentations by audiomentations and pedalboard + augmentation_type: null + use_mp3_compress: false # Deprecated + augmentation_mix: false # Mix several stems of the same type with some probability + augmentation_loudness: false # randomly change loudness of each stem + augmentation_loudness_type: 1 # Type 1 or 2 + augmentation_loudness_min: 0 + augmentation_loudness_max: 0 + q: 0.95 + coarse_loss_clip: false + ema_momentum: 0.999 + optimizer: adam + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + +inference: + batch_size: 1 + dim_t: 801 + num_overlap: 4 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_denoise_aufr33_aggr_sdr_27.9768.yaml b/models/Roformer/MelBand/config_melband_roformer_denoise_aufr33_aggr_sdr_27.9768.yaml new file mode 100644 index 0000000000000000000000000000000000000000..265e19c806778d7b2d5ffdaef9e3d503a6dba3f1 --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_denoise_aufr33_aggr_sdr_27.9768.yaml @@ -0,0 +1,71 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 801 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 000 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 2 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - dry + - other + lr: 1.0e-05 + patience: 8 + reduce_factor: 0.95 + target_instrument: dry + num_epochs: 1000 + num_steps: 4032 + augmentation: false # enable augmentations by audiomentations and pedalboard + augmentation_type: null + use_mp3_compress: false # Deprecated + augmentation_mix: false # Mix several stems of the same type with some probability + augmentation_loudness: false # randomly change loudness of each stem + augmentation_loudness_type: 1 # Type 1 or 2 + augmentation_loudness_min: 0 + augmentation_loudness_max: 0 + q: 0.95 + coarse_loss_clip: false + ema_momentum: 0.999 + optimizer: adam + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + +inference: + batch_size: 2 + dim_t: 801 + num_overlap: 4 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_denoise_aufr33_sdr_27.9959.yaml b/models/Roformer/MelBand/config_melband_roformer_denoise_aufr33_sdr_27.9959.yaml new file mode 100644 index 0000000000000000000000000000000000000000..265e19c806778d7b2d5ffdaef9e3d503a6dba3f1 --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_denoise_aufr33_sdr_27.9959.yaml @@ -0,0 +1,71 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 801 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 000 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 2 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - dry + - other + lr: 1.0e-05 + patience: 8 + reduce_factor: 0.95 + target_instrument: dry + num_epochs: 1000 + num_steps: 4032 + augmentation: false # enable augmentations by audiomentations and pedalboard + augmentation_type: null + use_mp3_compress: false # Deprecated + augmentation_mix: false # Mix several stems of the same type with some probability + augmentation_loudness: false # randomly change loudness of each stem + augmentation_loudness_type: 1 # Type 1 or 2 + augmentation_loudness_min: 0 + augmentation_loudness_max: 0 + q: 0.95 + coarse_loss_clip: false + ema_momentum: 0.999 + optimizer: adam + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + +inference: + batch_size: 2 + dim_t: 801 + num_overlap: 4 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_dereverb-echo.yaml b/models/Roformer/MelBand/config_melband_roformer_dereverb-echo.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bf766a04152c42bb2f16e6b2929a1024c6d550f5 --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_dereverb-echo.yaml @@ -0,0 +1,76 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 801 # don't work (use in model) + hop_length: 441 # don't work (use in model) + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 256 + depth: 8 + stereo: true + num_stems: 2 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0.1 + ff_dropout: 0.1 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 1 + gradient_accumulation_steps: 8 + grad_clip: 0 + instruments: + - dry + - No dry + lr: 4.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +augmentations: + enable: true # enable or disable all augmentations (to fast disable if needed) + loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max) + loudness_min: 0.5 + loudness_max: 1.5 + mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3) + mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02) + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + +inference: + batch_size: 4 + dim_t: 801 + num_overlap: 4 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_dereverb-echo_sdr_13.4843_v2.yaml b/models/Roformer/MelBand/config_melband_roformer_dereverb-echo_sdr_13.4843_v2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b9ed3ca2589954b800872e65c72b2eff115237ac --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_dereverb-echo_sdr_13.4843_v2.yaml @@ -0,0 +1,64 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 801 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 256 + depth: 8 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0.1 + ff_dropout: 0.1 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 1 + gradient_accumulation_steps: 8 + grad_clip: 0 + instruments: + - dry + - No dry + lr: 1.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: dry + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: false + use_amp: true + +inference: + batch_size: 1 + dim_t: 801 + num_overlap: 4 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_dereverb_anvuew.yaml b/models/Roformer/MelBand/config_melband_roformer_dereverb_anvuew.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f4be13c9f337b26a2e11f89aaf24bc873cc23ce9 --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_dereverb_anvuew.yaml @@ -0,0 +1,76 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 801 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 3 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - noreverb + - reverb + lr: 5.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: noreverb + num_epochs: 1000 + num_steps: 4000 + q: 0.95 + coarse_loss_clip: false + ema_momentum: 0.999 + optimizer: adamw + other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +augmentations: + enable: true # enable or disable all augmentations (to fast disable if needed) + loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max) + loudness_min: 0.1 + loudness_max: 1.0 + mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3) + mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02) + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + +inference: + batch_size: 1 + dim_t: 801 + num_overlap: 2 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_dereverb_echo_v2.yaml b/models/Roformer/MelBand/config_melband_roformer_dereverb_echo_v2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..18213b12b2e4c157bca21bd5c8dc922b634e12da --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_dereverb_echo_v2.yaml @@ -0,0 +1,64 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 801 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 256 + depth: 8 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0.1 + ff_dropout: 0.1 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 1 + gradient_accumulation_steps: 8 + grad_clip: 0 + instruments: + - dry + - other + lr: 1.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: dry + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: false + use_amp: true + +inference: + batch_size: 1 + dim_t: 801 + num_overlap: 4 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_ep_3005_sdr_11.4360.yaml b/models/Roformer/MelBand/config_melband_roformer_ep_3005_sdr_11.4360.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c906f2931cbae3cf64551c231e285ca10097fe5 --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_ep_3005_sdr_11.4360.yaml @@ -0,0 +1,72 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 801 # don't work (use in model) + hop_length: 441 # don't work (use in model) + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 + +model: + dim: 384 + depth: 12 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0.1 + ff_dropout: 0.1 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 9 + gradient_accumulation_steps: 8 + grad_clip: 0 + instruments: + - Vocals + - Instrumental + lr: 4.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: Vocals + num_epochs: 1000 + num_steps: 1000 + augmentation: false # enable augmentations by audiomentations and pedalboard + augmentation_type: simple1 + use_mp3_compress: false # Deprecated + augmentation_mix: true # Mix several stems of the same type with some probability + augmentation_loudness: true # randomly change loudness of each stem + augmentation_loudness_type: 1 # Type 1 or 2 + augmentation_loudness_min: 0.5 + augmentation_loudness_max: 1.5 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +inference: + batch_size: 1 + dim_t: 801 + num_overlap: 4 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_inst.yaml b/models/Roformer/MelBand/config_melband_roformer_inst.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d6bdca342644a1194427fe505e2044c5006a1213 --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_inst.yaml @@ -0,0 +1,51 @@ +audio: + chunk_size: 485100 + dim_f: 1024 + dim_t: 1101 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + instruments: + - other + - vocals + target_instrument: other + use_amp: True + +inference: + batch_size: 1 + dim_t: 1101 + num_overlap: 2 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_inst_gabox.yaml b/models/Roformer/MelBand/config_melband_roformer_inst_gabox.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b1395e978d64cb1c37d3015adc2feeb0805e3b94 --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_inst_gabox.yaml @@ -0,0 +1,51 @@ +audio: + chunk_size: 485100 + dim_f: 1024 + dim_t: 1101 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + instruments: + - Instrumental + - Vocals + target_instrument: Instrumental + use_amp: True + +inference: + batch_size: 1 + dim_t: 1101 + num_overlap: 2 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_inst_v2.yaml b/models/Roformer/MelBand/config_melband_roformer_inst_v2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4297c088f7b8bd2f28308d8a8d1e0694cdec967 --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_inst_v2.yaml @@ -0,0 +1,51 @@ +audio: + chunk_size: 485100 + dim_f: 1024 + dim_t: 1101 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 384 + depth: 12 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 3 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + instruments: + - Instrumental + - Vocals + target_instrument: Instrumental + use_amp: True + +inference: + batch_size: 1 + dim_t: 1101 + num_overlap: 2 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_instrumental_becruily.yaml b/models/Roformer/MelBand/config_melband_roformer_instrumental_becruily.yaml new file mode 100644 index 0000000000000000000000000000000000000000..862010f34a3765fa1ac9f22c04ba74042b2fd086 --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_instrumental_becruily.yaml @@ -0,0 +1,72 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 256 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 1 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - Instrumental + - Vocals + lr: 0.0005 + patience: 2 + reduce_factor: 0.95 + target_instrument: Instrumental + num_epochs: 1000 + num_steps: 1000 + augmentation: false # enable augmentations by audiomentations and pedalboard + augmentation_type: null + use_mp3_compress: false # Deprecated + augmentation_mix: false # Mix several stems of the same type with some probability + augmentation_loudness: false # randomly change loudness of each stem + augmentation_loudness_type: 1 # Type 1 or 2 + augmentation_loudness_min: 0 + augmentation_loudness_max: 0 + q: 0.95 + coarse_loss_clip: false + ema_momentum: 0.999 + optimizer: adamw + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +inference: + batch_size: 1 + dim_t: 1101 + num_overlap: 2 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_instvoc_duality.yaml b/models/Roformer/MelBand/config_melband_roformer_instvoc_duality.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b93e721853f4d90efa7f0bead82f6a1b791fc19f --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_instvoc_duality.yaml @@ -0,0 +1,51 @@ +audio: + chunk_size: 485100 + dim_f: 1024 + dim_t: 256 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 2 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + instruments: + - Vocals + - Instrumental + target_instrument: null + use_amp: True + +inference: + batch_size: 1 + dim_t: 1101 + num_overlap: 2 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_karaoke_aufr33_viperx_sdr_10.1956.yaml b/models/Roformer/MelBand/config_melband_roformer_karaoke_aufr33_viperx_sdr_10.1956.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b88403c926bc5957a54ba90271f0cced47c8366f --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_karaoke_aufr33_viperx_sdr_10.1956.yaml @@ -0,0 +1,71 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 801 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 000 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 4 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - Vocals + - Instrumental + lr: 1.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: Vocals + num_epochs: 1000 + num_steps: 2000 + augmentation: false # enable augmentations by audiomentations and pedalboard + augmentation_type: null + use_mp3_compress: false # Deprecated + augmentation_mix: false # Mix several stems of the same type with some probability + augmentation_loudness: false # randomly change loudness of each stem + augmentation_loudness_type: 1 # Type 1 or 2 + augmentation_loudness_min: 0 + augmentation_loudness_max: 0 + q: 0.95 + coarse_loss_clip: false + ema_momentum: 0.999 + optimizer: adam + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true +inference: + batch_size: 1 + dim_t: 801 + num_overlap: 4 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_kim_ft_unwa.yaml b/models/Roformer/MelBand/config_melband_roformer_kim_ft_unwa.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d0527f99399af7f504ead83ce75e6715cd190e56 --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_kim_ft_unwa.yaml @@ -0,0 +1,72 @@ +audio: + chunk_size: 485100 + dim_f: 1024 + dim_t: 801 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 1 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - vocals + - other + lr: 1.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: vocals + num_epochs: 1000 + num_steps: 1000 + augmentation: false # enable augmentations by audiomentations and pedalboard + augmentation_type: null + use_mp3_compress: false # Deprecated + augmentation_mix: false # Mix several stems of the same type with some probability + augmentation_loudness: false # randomly change loudness of each stem + augmentation_loudness_type: 1 # Type 1 or 2 + augmentation_loudness_min: 0 + augmentation_loudness_max: 0 + q: 0.95 + coarse_loss_clip: false + ema_momentum: 0.999 + optimizer: adam + other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +inference: + batch_size: 1 + dim_t: 801 + num_overlap: 8 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_small_by_aname.yaml b/models/Roformer/MelBand/config_melband_roformer_small_by_aname.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9aa577d3470dbab7d2c7aba01883f2d179521a49 --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_small_by_aname.yaml @@ -0,0 +1,52 @@ +audio: + chunk_size: 485100 + dim_f: 1024 + dim_t: 1101 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.0 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + mlp_expansion_factor: 1 + +training: + instruments: + - Instrumental + - Vocals + target_instrument: null + use_amp: true + +inference: + batch_size: 2 + dim_t: 1101 + num_overlap: 4 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_voc_gabox.yaml b/models/Roformer/MelBand/config_melband_roformer_voc_gabox.yaml new file mode 100644 index 0000000000000000000000000000000000000000..95f50893eec05b7c612734f4445301629a026f61 --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_voc_gabox.yaml @@ -0,0 +1,51 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 256 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + instruments: + - Vocals + - Instrumental + target_instrument: Vocals + +inference: + batch_size: 1 + dim_t: 1101 + num_overlap: 1 + chunk_size: 352800 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_vocals_becruily.yaml b/models/Roformer/MelBand/config_melband_roformer_vocals_becruily.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d42333851e31c9e5747d818efec365921358a7c --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_vocals_becruily.yaml @@ -0,0 +1,72 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 256 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 1 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - vocals + - other + lr: 0.0005 + patience: 2 + reduce_factor: 0.95 + target_instrument: vocals + num_epochs: 1000 + num_steps: 1000 + augmentation: false # enable augmentations by audiomentations and pedalboard + augmentation_type: null + use_mp3_compress: false # Deprecated + augmentation_mix: false # Mix several stems of the same type with some probability + augmentation_loudness: false # randomly change loudness of each stem + augmentation_loudness_type: 1 # Type 1 or 2 + augmentation_loudness_min: 0 + augmentation_loudness_max: 0 + q: 0.95 + coarse_loss_clip: false + ema_momentum: 0.999 + optimizer: adamw + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +inference: + batch_size: 1 + dim_t: 1101 + num_overlap: 2 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_vocals_big_v1_ft.yaml b/models/Roformer/MelBand/config_melband_roformer_vocals_big_v1_ft.yaml new file mode 100644 index 0000000000000000000000000000000000000000..66f782d66d31113f304fe07aa1ce663952030676 --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_vocals_big_v1_ft.yaml @@ -0,0 +1,51 @@ +audio: + chunk_size: 485100 + dim_f: 1024 + dim_t: 801 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 3 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + instruments: + - vocals + - other + target_instrument: vocals + use_amp: True + +inference: + batch_size: 1 + dim_t: 801 + num_overlap: 2 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_vocals_ft.yaml b/models/Roformer/MelBand/config_melband_roformer_vocals_ft.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c53d3e0912cd12d69ddd923dbb760a6c2b4d5000 --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_vocals_ft.yaml @@ -0,0 +1,72 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 256 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 1 + gradient_accumulation_steps: 8 + grad_clip: 0 + instruments: + - vocals + - other + lr: 1.0e-04 + patience: 2 + reduce_factor: 0.95 + target_instrument: vocals + num_epochs: 1000 + num_steps: 100 + augmentation: true # enable augmentations by audiomentations and pedalboard + augmentation_type: null + use_mp3_compress: false # Deprecated + augmentation_mix: true # Mix several stems of the same type with some probability + augmentation_loudness: true # randomly change loudness of each stem + augmentation_loudness_type: 1 # Type 1 or 2 + augmentation_loudness_min: 0 + augmentation_loudness_max: 0 + q: 0.95 + coarse_loss_clip: false + ema_momentum: 0.999 + optimizer: adamw8bit + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +inference: + batch_size: 4 + dim_t: 256 + num_overlap: 2 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_vocals_fullness_aname.yaml b/models/Roformer/MelBand/config_melband_roformer_vocals_fullness_aname.yaml new file mode 100644 index 0000000000000000000000000000000000000000..32fee9c05a82b72931cbb9a8af7d948a538532cb --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_vocals_fullness_aname.yaml @@ -0,0 +1,54 @@ +audio: + chunk_size: 661500 + dim_f: 1024 + dim_t: 256 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 1 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - vocals + - other + target_instrument: vocals + use_amp: true + +inference: + batch_size: 4 + dim_t: 1101 + num_overlap: 4 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_vocals_kim.yaml b/models/Roformer/MelBand/config_melband_roformer_vocals_kim.yaml new file mode 100644 index 0000000000000000000000000000000000000000..99b1ced7032cf791eb065f1859de2e07c89a3eae --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_vocals_kim.yaml @@ -0,0 +1,50 @@ +audio: + chunk_size: 352800 + dim_f: 1024 + dim_t: 256 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 + +model: + dim: 384 + depth: 6 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 8 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 2 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + instruments: + - vocals + - other + target_instrument: vocals + +inference: + dim_t: 1101 + num_overlap: 1 + chunk_size: 352800 \ No newline at end of file diff --git a/models/Roformer/MelBand/config_melband_roformer_vocals_test_by_aname.yaml b/models/Roformer/MelBand/config_melband_roformer_vocals_test_by_aname.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f83647f71017b0c290f625a7233a66ec300c3aef --- /dev/null +++ b/models/Roformer/MelBand/config_melband_roformer_vocals_test_by_aname.yaml @@ -0,0 +1,208 @@ +audio: + chunk_size: 661500 + dim_f: 1024 + dim_t: 1101 + hop_length: 441 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 + +model: + dim: 512 + depth: 8 + stereo: true + num_stems: 1 + time_transformer_depth: 1 + freq_transformer_depth: 1 + num_bands: 60 + dim_head: 64 + heads: 12 + attn_dropout: 0 + ff_dropout: 0 + flash_attn: True + dim_freqs_in: 1025 + sample_rate: 44100 # needed for mel filter bank from librosa + stft_n_fft: 2048 + stft_hop_length: 441 + stft_win_length: 2048 + stft_normalized: False + mask_estimator_depth: 3 + multi_stft_resolution_loss_weight: 1.0 + multi_stft_resolutions_window_sizes: !!python/tuple + - 4096 + - 2048 + - 1024 + - 512 + - 256 + multi_stft_hop_size: 147 + multi_stft_normalized: False + +training: + batch_size: 1 + gradient_accumulation_steps: 2 + grad_clip: 0 + instruments: + - vocals + - instruments + lr: 1.0 + patience: 2 + reduce_factor: 0.9999999 + target_instrument: vocals + num_epochs: 100000 + num_steps: 10 + augmentation: false # enable augmentations by audiomentations and pedalboard + augmentation_type: true + use_mp3_compress: false # Deprecated + augmentation_mix: true # Mix several stems of the same type with some probability + augmentation_loudness: true # randomly change loudness of each stem + augmentation_loudness_type: 1 # Type 1 or 2 + augmentation_loudness_min: 0.25 + augmentation_loudness_max: 1 + q: 0.99 + coarse_loss_clip: false + ema_momentum: 0.9995 + optimizer: prodigy + other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +augmentations: + enable: true # enable or disable all augmentations (to fast disable if needed) + loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max) + loudness_min: 0.01 + loudness_max: 1.5 + mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3) + mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02) + - 1 + - 0.1 + mixup_loudness_min: 0.25 + mixup_loudness_max: 1 + + # apply mp3 compression to mixture only (emulate downloading mp3 from internet) + mp3_compression_on_mixture: 0 + mp3_compression_on_mixture_bitrate_min: 32 + mp3_compression_on_mixture_bitrate_max: 320 + mp3_compression_on_mixture_backend: "lameenc" + + all: + channel_shuffle: 0.5 # Set 0 or lower to disable + random_inverse: 0.1 # inverse track (better lower probability) + random_polarity: 0.5 # polarity change (multiply waveform to -1) + mp3_compression: 0.01 + mp3_compression_min_bitrate: 32 + mp3_compression_max_bitrate: 320 + mp3_compression_backend: "lameenc" + + # pedalboard reverb block + pedalboard_reverb: 0.7 + pedalboard_reverb_room_size_min: 0.4 + pedalboard_reverb_room_size_max: 0.9 + pedalboard_reverb_damping_min: 0.3 + pedalboard_reverb_damping_max: 0.9 + pedalboard_reverb_wet_level_min: 0.4 + pedalboard_reverb_wet_level_max: 0.9 + pedalboard_reverb_dry_level_min: 0.1 + pedalboard_reverb_dry_level_max: 0.9 + pedalboard_reverb_width_min: 0.9 + pedalboard_reverb_width_max: 1.0 + + # pedalboard chorus block + pedalboard_chorus: 0.3 + pedalboard_chorus_rate_hz_min: 1.0 + pedalboard_chorus_rate_hz_max: 7.0 + pedalboard_chorus_depth_min: 0.25 + pedalboard_chorus_depth_max: 0.95 + pedalboard_chorus_centre_delay_ms_min: 3 + pedalboard_chorus_centre_delay_ms_max: 10 + pedalboard_chorus_feedback_min: 0.0 + pedalboard_chorus_feedback_max: 0.5 + pedalboard_chorus_mix_min: 0.1 + pedalboard_chorus_mix_max: 0.9 + + # pedalboard phazer block + pedalboard_phazer: 0.2 + pedalboard_phazer_rate_hz_min: 1.0 + pedalboard_phazer_rate_hz_max: 10.0 + pedalboard_phazer_depth_min: 0.25 + pedalboard_phazer_depth_max: 0.95 + pedalboard_phazer_centre_frequency_hz_min: 200 + pedalboard_phazer_centre_frequency_hz_max: 12000 + pedalboard_phazer_feedback_min: 0.0 + pedalboard_phazer_feedback_max: 0.5 + pedalboard_phazer_mix_min: 0.1 + pedalboard_phazer_mix_max: 0.9 + + # pedalboard distortion block + pedalboard_distortion: 0.2 + pedalboard_distortion_drive_db_min: 1.0 + pedalboard_distortion_drive_db_max: 12 + + # pedalboard pitch shift block + pedalboard_pitch_shift: 0 + pedalboard_pitch_shift_semitones_min: -7 + pedalboard_pitch_shift_semitones_max: 7 + + # pedalboard resample block + pedalboard_resample: 0.3 + pedalboard_resample_target_sample_rate_min: 8000 + pedalboard_resample_target_sample_rate_max: 44100 + + # pedalboard bitcrash block + pedalboard_bitcrash: 0.1 + pedalboard_bitcrash_bit_depth_min: 8 + pedalboard_bitcrash_bit_depth_max: 16 + + # pedalboard mp3 compressor block + pedalboard_mp3_compressor: 0 + pedalboard_mp3_compressor_pedalboard_mp3_compressor_min: 0 + pedalboard_mp3_compressor_pedalboard_mp3_compressor_max: 9.999 + + vocals: + pitch_shift: 0.7 + pitch_shift_min_semitones: -6 + pitch_shift_max_semitones: 6 + seven_band_parametric_eq: 0.45 + seven_band_parametric_eq_min_gain_db: -80 + seven_band_parametric_eq_max_gain_db: 3 + tanh_distortion: 0.3 + tanh_distortion_min: 0.01 + tanh_distortion_max: 0.7 + other: + pitch_shift: 0.5 + pitch_shift_min_semitones: -12 + pitch_shift_max_semitones: 12 + gaussian_noise: 0.5 + gaussian_noise_min_amplitude: 0.01 + gaussian_noise_max_amplitude: 0.5 + time_stretch: 0.2 + time_stretch_min_rate: 0.125 + time_stretch_max_rate: 1.0 + +loss_multistft: + fft_sizes: + - 1024 + - 2048 + - 4096 + hop_sizes: + - 512 + - 1024 + - 2048 + win_lengths: + - 1024 + - 2048 + - 4096 + window: "hann_window" + scale: "mel" + n_bins: 128 + sample_rate: 44100 + perceptual_weighting: true + w_sc: 3.0 + w_log_mag: 3.0 + w_lin_mag: 2.0 + w_phs: 3.0 + mag_distance: "L1" + +inference: + batch_size: 4 + dim_t: 1101 + num_overlap: 4 diff --git a/models/Roformer/MelBand/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt b/models/Roformer/MelBand/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..3e232e1d5ebb6e2c74cf9803ca9fb4a3fb860d17 --- /dev/null +++ b/models/Roformer/MelBand/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a25e3b233722cd81e2de7b8e798a3fef29d4b9799ccacda60b0dc958a1e2a5bb +size 913097300 diff --git a/models/Roformer/MelBand/denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt b/models/Roformer/MelBand/denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..8d1b634e263572a5a955577112e77036279c6287 --- /dev/null +++ b/models/Roformer/MelBand/denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c1c39191edc34e942ca7f2346ce6b6c0e1208a5f76349ffce6f696bd12910de +size 913097300 diff --git a/models/Roformer/MelBand/dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt b/models/Roformer/MelBand/dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..b2ec32944c1f4f9c802a1ad02d38682929c77a58 --- /dev/null +++ b/models/Roformer/MelBand/dereverb-echo_mel_band_roformer_sdr_10.0169.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd2b737a394cfb80cd48cc9fcbaf89f5f4062f6b93066c2911617a06d8b7860a +size 835997896 diff --git a/models/Roformer/MelBand/dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt b/models/Roformer/MelBand/dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..5baa2b3d302d0ab984148ff1f42793220b29c7ca --- /dev/null +++ b/models/Roformer/MelBand/dereverb-echo_mel_band_roformer_sdr_13.4843_v2.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:396432f5af25992fe82d0286634bd879027c073721db6ab10199e75459708b9f +size 455862568 diff --git a/models/Roformer/MelBand/dereverb_big_mbr_ep_362.ckpt b/models/Roformer/MelBand/dereverb_big_mbr_ep_362.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..814f35f8718aab66733f43e60a034fa933dd2f7f --- /dev/null +++ b/models/Roformer/MelBand/dereverb_big_mbr_ep_362.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0506455e74ffc02bbec700df9863ae243597034003815f1418227c6dee33b6ea +size 455864012 diff --git a/models/Roformer/MelBand/dereverb_echo_mbr_fused.ckpt b/models/Roformer/MelBand/dereverb_echo_mbr_fused.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..2d495600912d8c596c4f37c1a902c9b918e934fd --- /dev/null +++ b/models/Roformer/MelBand/dereverb_echo_mbr_fused.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1596b1063238f487d54a0510a8c92cb28c000c803a271dd618ac49efc99ef3f7 +size 455776577 diff --git a/models/Roformer/MelBand/dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt b/models/Roformer/MelBand/dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..8f21995361b651a6c446cb4e1e7664d94eb42bff --- /dev/null +++ b/models/Roformer/MelBand/dereverb_mel_band_roformer_anvuew_sdr_19.1729.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9262877b87e9ebb0fb808a456b0a411fa677f5df31c8383c1254af531c078970 +size 913107578 diff --git a/models/Roformer/MelBand/dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt b/models/Roformer/MelBand/dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..45d5062aae81caa8df3565e7d10c818f076cc588 --- /dev/null +++ b/models/Roformer/MelBand/dereverb_mel_band_roformer_less_aggressive_anvuew_sdr_18.8050.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0db8f1b41c00cead1112e967262a12802fd32e76c0c3a8eb207e772bae25d07b +size 913107578 diff --git a/models/Roformer/MelBand/dereverb_mel_band_roformer_mono_anvuew_sdr_20.4029.ckpt b/models/Roformer/MelBand/dereverb_mel_band_roformer_mono_anvuew_sdr_20.4029.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..06855114df2802baa49c15faf4b39a743b84e27b --- /dev/null +++ b/models/Roformer/MelBand/dereverb_mel_band_roformer_mono_anvuew_sdr_20.4029.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f099ee717eb57fb0ad5eb0e7c9ad6787c36168140b61ce2b158b90c2c4ecee79 +size 913097978 diff --git a/models/Roformer/MelBand/dereverb_super_big_mbr_ep_346.ckpt b/models/Roformer/MelBand/dereverb_super_big_mbr_ep_346.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..bc9d947ff83f741caa2504b673eed1983e6ac008 --- /dev/null +++ b/models/Roformer/MelBand/dereverb_super_big_mbr_ep_346.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26dda242bce4405555f2d6086d079fe8cc23f1f04e02e501d2689bfe3ece0489 +size 455864012 diff --git a/models/Roformer/MelBand/mel_band_roformer_bleed_suppressor_v1.ckpt b/models/Roformer/MelBand/mel_band_roformer_bleed_suppressor_v1.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..4bf56278572c098907e036b63e094bd7c63c1a20 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_bleed_suppressor_v1.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a9d10faa7f8997676a78e66d741d7acb9cc449334763f3c8f626d68ec6e575 +size 913102724 diff --git a/models/Roformer/MelBand/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt b/models/Roformer/MelBand/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..8d59dd9875ffadf10ca18f66aa3d20a0e289ce90 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca8799531fe51c94172cc047226209ed48bf7d8c02e04671795a15d2a1c318af +size 913096801 diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_bleedless_v1_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_bleedless_v1_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..881d22e5fc7fde06ca2c5de68277c4ab8bbceee7 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_inst_bleedless_v1_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6109687febb8f18cd5a45207fee35f18ba8b9467b18a4b2e982a3b7dc04a9d72 +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_bleedless_v2_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_bleedless_v2_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..65745b5be537052b903ae5f13f4e26f51912f6db --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_inst_bleedless_v2_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de972fb724601beef237abe94c8b934c73218e9baf3e344ab4c2333276e5cfe7 +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_denoise_debleed_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_denoise_debleed_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..8b595aaf271c7d97cbca025436c370b5954e7786 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_inst_denoise_debleed_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91aa7a546ed2e93482e4629c982d35b0d258bb3de6eeab497fd91658cc86c7fd +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v1_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v1_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..e73ab6f21e270a489e7310d9fe81a150cdfceb13 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v1_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31140eccf271d2a9e8a538b092b1f70dfd6471aa5ad163b22bccc758b9f38b62 +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v2_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v2_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..6d849f3079743541477aeb8748cd2dac3f05e0d0 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v2_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c446c34551333dd3d45b8d0708658a10f28c5e289f8ec27b5f0e22803681bef3 +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v3_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v3_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..c90caf360ea211abf7467c96c8bde8bf656fb521 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v3_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbb229209a8942d34664e19d2f4862e357ea3108a4e8c04b69aa0aba523a4481 +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v4_noise_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v4_noise_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..5a49522bec839a399c6545d2f812bae2ff5f32a1 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v4_noise_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f50296e913b9af3b5b3b961e92877ef0d4a74f9a433e796e89960c4c2b1abe53 +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v5_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v5_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..1a192beafebebb2f4785e076cb13aba8a074b8ce --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v5_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38e935cf1e97afcc1de84d0bdb87dd8090bad530fa0df28e707d16448e1d38e2 +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v5_noise_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v5_noise_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..5cea44b8d02d4a8379a8fef7d0dc7e3a920b6294 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v5_noise_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:175203923fac3e52ae00e7e37d41e8a7fef5020b6ee4e4144f4786daabc54b34 +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v6_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v6_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..9e4e1ed19b5579f8914a2229b05a859b8cb0bf34 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v6_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:677951b8556a27abe32e39705640638826e78101fa901a51ad73d20522be6d25 +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v6_noise_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v6_noise_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..edf54dd70e2304cc057d3055a46c02fbb79ef937 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v6_noise_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:802f3e5d183d7c4b50dea147c320e61634f5be6ff55fa899fdebeaf0f3cf7f42 +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v7_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v7_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..5f5aa9428c851e83a36c0432a3baf218aa6ca261 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v7_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e725a860176acb475d983a1ddd9c1a99a619c69cc9ceda808dd294d10db746a5 +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v7_noise_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v7_noise_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..0bb8eb969ef04c187caa14bc813e8c690d396036 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v7_noise_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0ca36af5d1314be46b56c8a53b6be02f98511fa5d7e3e196fd895755e65be3c +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v8_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v8_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..9816b322dacc2e3b6b87954a8202b64fdb78cddb --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_v8_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba9efd5167ca3cf1c74dfac3b545a9609fdbe5dc1f468953f0d3e624fede99a0 +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_fullness_vX_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_vX_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..93fa162e3077a085662452f147e5fbe0bacd108a --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_inst_fullness_vX_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:545ef13b0cdbac505818a38db98e09c54e7c03ea17b4e0c895a531bfa352fa59 +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_v1_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_v1_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..6ecff8779cfc8fa3961db8589050b44a43131203 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_inst_v1_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b3ad6bd8bed3aaaa4d9320ea2ca910d140196a2302186db1754f3a8d8e16fb1 +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_v2_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_v2_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..7758aa5f0d19e21b8cb82550a8eddb7a7ab10e23 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_inst_v2_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e03ca459c339f88b7521c367c897d0c3f5362b38a6cdb96cb28e625ca0f9931e +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_inst_v3_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_inst_v3_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..1596ed4c464e4346d614bc48bb64a996975bc445 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_inst_v3_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9ec9f299cf617bf6afe1c382f4b0761cd9bee78323da94889951812328e10fb +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_instrumental_becruily.ckpt b/models/Roformer/MelBand/mel_band_roformer_instrumental_becruily.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..9aa6c9ab355e9dfc090251f9703e299b9da7893d --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_instrumental_becruily.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8da6632a1c25efb1c9be783ce9ea367d226d4b918cd6c3717c8b1d7a396041d +size 913106900 diff --git a/models/Roformer/MelBand/mel_band_roformer_instrumental_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_instrumental_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..6ecff8779cfc8fa3961db8589050b44a43131203 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_instrumental_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b3ad6bd8bed3aaaa4d9320ea2ca910d140196a2302186db1754f3a8d8e16fb1 +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt b/models/Roformer/MelBand/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..977a5c60ab478ab0b78ac5bc7e5296f9185f5549 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1de20d459332fe8869aeb01327a31df0032262706e1365114e852dc271779813 +size 913096801 diff --git a/models/Roformer/MelBand/mel_band_roformer_karaoke_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_karaoke_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..69688626b9c8b8655e6eb549e7909f64ae862819 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_karaoke_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:303fc631e7aa587e9dc1e6ac4bb3667c6ba53aacb6b6a90abcfcf57935b92bd8 +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_kim_ft2_bleedless_unwa.ckpt b/models/Roformer/MelBand/mel_band_roformer_kim_ft2_bleedless_unwa.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..eb05d0fc17641b53deabf68c1940dd237695026c --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_kim_ft2_bleedless_unwa.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c450bd66a98b49dd03231fc5ebb84121eef8418236b179423c2b171d62b04d9 +size 913101368 diff --git a/models/Roformer/MelBand/mel_band_roformer_kim_ft2_unwa.ckpt b/models/Roformer/MelBand/mel_band_roformer_kim_ft2_unwa.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..64a9c082838de39b001e92ac622efe3de0c810da --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_kim_ft2_unwa.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ed7b9e4c2eebbec7a7e5e8113058f7b68ba5e6048db8eaccfbbeb884c7884c0 +size 913100690 diff --git a/models/Roformer/MelBand/mel_band_roformer_kim_ft_unwa.ckpt b/models/Roformer/MelBand/mel_band_roformer_kim_ft_unwa.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..2912039564c3940b1fb51563a795e3f241841226 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_kim_ft_unwa.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6bd8d333880191254a6ef6be3cb0ffa4dda9d3282e36b0cce2e88a660e00d39 +size 913100690 diff --git a/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v1_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v1_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..5b6fdc61e56b41297707de241f7c353a916c0494 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v1_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4dff354d81152d1b4321f6491f242c060919148239fbfe22a1015513de4a7fe +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v2_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v2_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..dccb5cbb2e7b8aa143e8cd8b803144646998abd5 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v2_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2888813aa5b519941fa8548efc5a4331d63c61909007eb17fe95c367be230196 +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v3_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v3_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..f5b0031173c6b54221895b9d66e9553037777cd5 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v3_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d81446b34a7848446efde7898b25bdc32fe872c2393617acb5356649f7ea93 +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v4_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v4_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..16f6cdd40cf3df7b1fbbf3f3702c1f326cdd8f94 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_voc_fullness_v4_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a9657de5fd3ed87ad4fd1a9d2069743ecb33424836973ad0f3288e2a64e90bc +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_voc_gabox.ckpt b/models/Roformer/MelBand/mel_band_roformer_voc_gabox.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..709869d299af3267439c266eede088a1c19b0745 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_voc_gabox.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff802a67501fac70587c3ff4e8dbc89c2558e7d8911c92222dfea2aaac208517 +size 913026650 diff --git a/models/Roformer/MelBand/mel_band_roformer_vocals_becruily.ckpt b/models/Roformer/MelBand/mel_band_roformer_vocals_becruily.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..fda579c578c11b43fc8267f25fdbe0552ec36c1b --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_vocals_becruily.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a05961310cc55fbb901290c2e8be02682942f73522b6ac76bf2ec11e347ed95a +size 913107578 diff --git a/models/Roformer/MelBand/mel_band_roformer_vocals_fullness_aname.ckpt b/models/Roformer/MelBand/mel_band_roformer_vocals_fullness_aname.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..380b1baf015018a1c4ae18ef1ade63a6ab76e441 --- /dev/null +++ b/models/Roformer/MelBand/mel_band_roformer_vocals_fullness_aname.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a64a27a672b457de23d9decd1fc7b58b0664a9f4f24bb43af154708e2ef07d2f +size 913090472 diff --git a/models/Roformer/MelBand/melband_roformer_big_beta4.ckpt b/models/Roformer/MelBand/melband_roformer_big_beta4.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..62cb0ed8e023394924b1108a5335daa8b91d78d4 --- /dev/null +++ b/models/Roformer/MelBand/melband_roformer_big_beta4.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700a9bd3831d4f7f44cc0019b238774e31045bcbc361fbb69235535c40fc1454 +size 1574477088 diff --git a/models/Roformer/MelBand/melband_roformer_big_beta5e.ckpt b/models/Roformer/MelBand/melband_roformer_big_beta5e.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..a59473cd797f3c5fc6d6eb420d324145dc7c1104 --- /dev/null +++ b/models/Roformer/MelBand/melband_roformer_big_beta5e.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32b876e1163716a9a007438b5a5107069586aa9b9ca653a5f63013b1edf6920c +size 1479749810 diff --git a/models/Roformer/MelBand/melband_roformer_big_beta6.ckpt b/models/Roformer/MelBand/melband_roformer_big_beta6.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..3e62cb6881f880ecadb2131661a7bffc1c651889 --- /dev/null +++ b/models/Roformer/MelBand/melband_roformer_big_beta6.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f51cbb94b4ed5c36cb36fd2024236a8af3ed6886567981702ad6f094b2c6c820 +size 1557078584 diff --git a/models/Roformer/MelBand/melband_roformer_big_beta6x.ckpt b/models/Roformer/MelBand/melband_roformer_big_beta6x.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..8a45e7206470ce16a36e6c5c60726331d16221db --- /dev/null +++ b/models/Roformer/MelBand/melband_roformer_big_beta6x.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e16d702f4e20f13d60b293541c1dea75cb4414a5846b36780e28ef70352a4e5c +size 1708527586 diff --git a/models/Roformer/MelBand/melband_roformer_inst_v1.ckpt b/models/Roformer/MelBand/melband_roformer_inst_v1.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..0a5775dcf471eae22321cf27576da3155bd773e9 --- /dev/null +++ b/models/Roformer/MelBand/melband_roformer_inst_v1.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f88d96958b2b7dec32286b0ced00bbcbd37e28741cad9038758b1eaf9b5c057 +size 913100690 diff --git a/models/Roformer/MelBand/melband_roformer_inst_v1e.ckpt b/models/Roformer/MelBand/melband_roformer_inst_v1e.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..f8370873342b453fb65d96b2fe069cbc0f8130e9 --- /dev/null +++ b/models/Roformer/MelBand/melband_roformer_inst_v1e.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df2bcdb8838b88264f5381dbb0ccd84a9926c9775cf548c34d8846f5cd20fe96 +size 913102724 diff --git a/models/Roformer/MelBand/melband_roformer_inst_v1e_plus.ckpt b/models/Roformer/MelBand/melband_roformer_inst_v1e_plus.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..d96e91089fa7a07f5c4052f55449fd6ed4047ce2 --- /dev/null +++ b/models/Roformer/MelBand/melband_roformer_inst_v1e_plus.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a4ddba739f0352407fb6e18b29206b82318ec427fe37fcedb0f83241e4e15fb +size 913090472 diff --git a/models/Roformer/MelBand/melband_roformer_inst_v2.ckpt b/models/Roformer/MelBand/melband_roformer_inst_v2.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..e59c3365c4976b3c225f0cf9ab34342531dc6955 --- /dev/null +++ b/models/Roformer/MelBand/melband_roformer_inst_v2.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd19766620f7d6f58fdf7aaada7e89907fe41bc64490ce3faa9a6dab15d6e1f2 +size 1574477088 diff --git a/models/Roformer/MelBand/melband_roformer_instvoc_duality_v1.ckpt b/models/Roformer/MelBand/melband_roformer_instvoc_duality_v1.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..2b99b0c8e53949042dce6f1db2300642212b31f7 --- /dev/null +++ b/models/Roformer/MelBand/melband_roformer_instvoc_duality_v1.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4640a59d454bf9f69d67460592ab71e7cdce3afa0c0a6f0cf4500bb4ac0b8381 +size 1719116358 diff --git a/models/Roformer/MelBand/melband_roformer_instvoc_duality_v2.ckpt b/models/Roformer/MelBand/melband_roformer_instvoc_duality_v2.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..e4042424f484dbb10d5f76f807c04f9b7f307e23 --- /dev/null +++ b/models/Roformer/MelBand/melband_roformer_instvoc_duality_v2.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4a69558708f2857e36ac86a0e03ed95c4e3d8b9c5b8113963987d0d7df7e20f +size 1719116358 diff --git a/models/Roformer/MelBand/melband_roformer_kim_vocals_fullness_v1_by_aname.ckpt b/models/Roformer/MelBand/melband_roformer_kim_vocals_fullness_v1_by_aname.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..f22a3e48fc797be7bd2ff6c480eb99e460515322 --- /dev/null +++ b/models/Roformer/MelBand/melband_roformer_kim_vocals_fullness_v1_by_aname.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d0662af510f63fe48bdc91035951c1a7ae3b41ac9ae92cf7ec88fe9d6a6cb6d +size 913090472 diff --git a/models/Roformer/MelBand/melband_roformer_kim_vocals_fullness_v2_by_aname.ckpt b/models/Roformer/MelBand/melband_roformer_kim_vocals_fullness_v2_by_aname.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..3d6a69e88c5297c4977d8c139225c8666c47ed72 --- /dev/null +++ b/models/Roformer/MelBand/melband_roformer_kim_vocals_fullness_v2_by_aname.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc4c02112d53fea925bfe362918a9cbea10b3c0893aa40506aa82874ad03138c +size 913090472 diff --git a/models/Roformer/MelBand/melband_roformer_kim_vocals_v1_by_aname.ckpt b/models/Roformer/MelBand/melband_roformer_kim_vocals_v1_by_aname.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..c5b1f32ed2a23defa050c8301a0c928e136b576b --- /dev/null +++ b/models/Roformer/MelBand/melband_roformer_kim_vocals_v1_by_aname.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:696825ec20f6bc48add7443000def04bb1736c1098784be7cdf0756a140e9621 +size 913106158 diff --git a/models/Roformer/MelBand/melband_roformer_kim_vocals_v2_by_aname.ckpt b/models/Roformer/MelBand/melband_roformer_kim_vocals_v2_by_aname.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..84f9d8e5a29e75f2ffc02392d22fba229a56f927 --- /dev/null +++ b/models/Roformer/MelBand/melband_roformer_kim_vocals_v2_by_aname.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b1a07034e74e169e1a332c15fe63fb8da117fbeac580dbab06ad99ddb27702 +size 913090472 diff --git a/models/Roformer/MelBand/melband_roformer_kim_vocals_v3_by_aname.ckpt b/models/Roformer/MelBand/melband_roformer_kim_vocals_v3_by_aname.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..076d6f84387932bdb741a916f23becd4c71c1217 --- /dev/null +++ b/models/Roformer/MelBand/melband_roformer_kim_vocals_v3_by_aname.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acf6708e4e9ead350abdeedd9f3385c5e7c08281eba24bd59c8a5fe63a446082 +size 913106158 diff --git a/models/Roformer/MelBand/melband_roformer_small_by_aname.ckpt b/models/Roformer/MelBand/melband_roformer_small_by_aname.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..5db369eedfade9297c978a4242c17b1e2769a572 --- /dev/null +++ b/models/Roformer/MelBand/melband_roformer_small_by_aname.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20cc592a9dff7dc34a3fb0bf399cd68c950b03ff5f334725e74feb3d0186272f +size 202573672 diff --git a/models/Roformer/MelBand/melband_roformer_vocals_bleedness_by_aname.ckpt b/models/Roformer/MelBand/melband_roformer_vocals_bleedness_by_aname.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..b9b07d24bf6b80f2b4b979ae3442cc2b45ecfd6b --- /dev/null +++ b/models/Roformer/MelBand/melband_roformer_vocals_bleedness_by_aname.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:614bcc380bafe7a39ad1c451103dbe6a487886102c6b34c0561b5d8a4cec1286 +size 1708534246 diff --git a/models/Roformer/MelBand/model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt b/models/Roformer/MelBand/model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..1cb36d54103c4fdd30f7e8a089164cba50cfb0b0 --- /dev/null +++ b/models/Roformer/MelBand/model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21b9d0958e35b8ebfbe2afe69bbd5444e5ffe2f5d80ae0d583b833d2f3c0d139 +size 1007816988 diff --git a/models/Roformer/MelBand/vocals_mel_band_roformer.ckpt b/models/Roformer/MelBand/vocals_mel_band_roformer.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..e9269937826d8cedf1855096bc9c1d49298bb4f8 --- /dev/null +++ b/models/Roformer/MelBand/vocals_mel_band_roformer.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87201f4d31afb5bc79993230fc49446918425574db48c01c405e44f365c7559e +size 913106900