diff --git a/Bandit_models/checkpoint-multi_fixed.ckpt b/Bandit_models/checkpoint-multi_fixed.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..3778bca329292abcc1a3b60d30782e379bf1c19d --- /dev/null +++ b/Bandit_models/checkpoint-multi_fixed.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20bcd513dc7eb0541dd045909a4e7dff8dab474cc2efba4904101c76524aee85 +size 149133378 diff --git a/Bandit_models/config_dnr_bandit_bsrnn_multi_mus64.yaml b/Bandit_models/config_dnr_bandit_bsrnn_multi_mus64.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f67bf9d966f88a30ba82bf47e6f109497375979 --- /dev/null +++ b/Bandit_models/config_dnr_bandit_bsrnn_multi_mus64.yaml @@ -0,0 +1,78 @@ +name: "MultiMaskMultiSourceBandSplitRNN" +audio: + chunk_size: 264600 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 + +model: + in_channel: 1 + stems: ['speech', 'music', 'effects'] + band_specs: "musical" + n_bands: 64 + fs: 44100 + require_no_overlap: false + require_no_gap: true + normalize_channel_independently: false + treat_channel_as_feature: true + n_sqm_modules: 8 + emb_dim: 128 + rnn_dim: 256 + bidirectional: true + rnn_type: "GRU" + mlp_dim: 512 + hidden_activation: "Tanh" + hidden_activation_kwargs: null + complex_mask: true + n_fft: 2048 + win_length: 2048 + hop_length: 512 + window_fn: "hann_window" + wkwargs: null + power: null + center: true + normalized: true + pad_mode: "constant" + onesided: true + +training: + batch_size: 4 + gradient_accumulation_steps: 4 + grad_clip: 0 + instruments: + - Speech + - Music + - Effects + lr: 9.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +augmentations: + enable: true # enable or disable all augmentations (to fast disable if needed) + loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max) + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3) + mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02) + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + all: + channel_shuffle: 0.5 # Set 0 or lower to disable + random_inverse: 0.1 # inverse track (better lower probability) + random_polarity: 0.5 # polarity change (multiply waveform to -1) + +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 4 \ No newline at end of file diff --git a/Bandit_models/config_dnr_bandit_v2_mus64.yaml b/Bandit_models/config_dnr_bandit_v2_mus64.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8691ea03cd880bd4da59fae1952eb5b2cf1defab --- /dev/null +++ b/Bandit_models/config_dnr_bandit_v2_mus64.yaml @@ -0,0 +1,78 @@ +cls: Bandit + +audio: + chunk_size: 384000 + num_channels: 2 + sample_rate: 48000 + min_mean_abs: 0.000 + +kwargs: + in_channels: 1 + stems: ['speech', 'music', 'sfx'] + band_type: musical + n_bands: 64 + normalize_channel_independently: false + treat_channel_as_feature: true + n_sqm_modules: 8 + emb_dim: 128 + rnn_dim: 256 + bidirectional: true + rnn_type: "GRU" + mlp_dim: 512 + hidden_activation: "Tanh" + hidden_activation_kwargs: + complex_mask: true + use_freq_weights: true + n_fft: 2048 + win_length: 2048 + hop_length: 512 + window_fn: "hann_window" + wkwargs: + power: + center: true + normalized: true + pad_mode: "reflect" + onesided: true + +training: + batch_size: 4 + gradient_accumulation_steps: 4 + grad_clip: 0 + instruments: + - Speech + - Music + - Sfx + lr: 9.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +augmentations: + enable: true # enable or disable all augmentations (to fast disable if needed) + loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max) + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3) + mixup_probs: !!python/tuple + # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02) + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + all: + channel_shuffle: 0.5 # Set 0 or lower to disable + random_inverse: 0.1 # inverse track (better lower probability) + random_polarity: 0.5 # polarity change (multiply waveform to -1) + +inference: + batch_size: 8 + dim_t: 256 + num_overlap: 4 diff --git a/Bandit_models/model_bandit_plus_dnr_sdr_11.47.ckpt b/Bandit_models/model_bandit_plus_dnr_sdr_11.47.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..f0b50711c499ac1971506ec2254f172cd5f01ca1 --- /dev/null +++ b/Bandit_models/model_bandit_plus_dnr_sdr_11.47.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c48284779f7d1258a6527d3aaa18a532d45c1f506e2dcc25d5ab179a8c5e2573 +size 148891175 diff --git a/MDX23C_models/MDX23C-8KFFT-InstVoc_HQ.ckpt b/MDX23C_models/MDX23C-8KFFT-InstVoc_HQ.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..d19b34ba845c464fd607a4ce4c9dd3f794d0bd23 --- /dev/null +++ b/MDX23C_models/MDX23C-8KFFT-InstVoc_HQ.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d51472769e34a2501cd1da782346a3212555c3a5619fc2c53507445528d816 +size 448101203 diff --git a/MDX23C_models/MDX23C-8KFFT-InstVoc_HQ_2.ckpt b/MDX23C_models/MDX23C-8KFFT-InstVoc_HQ_2.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..486c55bd1538c5471f02f9e08af5ceb8baec08e2 --- /dev/null +++ b/MDX23C_models/MDX23C-8KFFT-InstVoc_HQ_2.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d960d8e40a458120412c1bd807e013d2dbca7b959cc9da2bbcb0eb203d1daea +size 448093770 diff --git a/MDX23C_models/MDX23C-De-Reverb-aufr33-jarredou.ckpt b/MDX23C_models/MDX23C-De-Reverb-aufr33-jarredou.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..dca4c63709a57308193ba52a2d3309efed5915bc --- /dev/null +++ b/MDX23C_models/MDX23C-De-Reverb-aufr33-jarredou.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae2471b707758d74db38ac1b1d5800e12f57c4e9d1ebbb2faf004b8e086e914 +size 448098867 diff --git a/MDX23C_models/MDX23C-DrumSep-aufr33-jarredou.ckpt b/MDX23C_models/MDX23C-DrumSep-aufr33-jarredou.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..2f97198d19fcae11a05adbd6b2a746e1f466833f --- /dev/null +++ b/MDX23C_models/MDX23C-DrumSep-aufr33-jarredou.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2a4aa53eb584d21eead358a4e66d1882ad182911be018f052b5da73be9096d0 +size 437652699 diff --git a/MDX23C_models/MDX23C_D1581.ckpt b/MDX23C_models/MDX23C_D1581.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..62aeea1ec485760c60a560dbcc87e44995964d23 --- /dev/null +++ b/MDX23C_models/MDX23C_D1581.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7d4d2137f12764950205b095da20032fef1d41f077bacc8582f20ed40e8cb28 +size 183379219 diff --git a/MDX23C_models/config_dereverb_mdx23c.yaml b/MDX23C_models/config_dereverb_mdx23c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dda1844a2a7562fca0341a6c7cefc2ec11ec8cac --- /dev/null +++ b/MDX23C_models/config_dereverb_mdx23c.yaml @@ -0,0 +1,135 @@ +audio: + chunk_size: 261120 + dim_f: 4096 + dim_t: 256 + hop_length: 1024 + n_fft: 8192 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 + +model: + act: gelu + bottleneck_factor: 4 + growth: 128 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 + +training: + batch_size: 2 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - dry + - No dry + lr: 1.0e-06 + patience: 4 + reduce_factor: 0.93 + target_instrument: null + num_epochs: 40 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adamw + read_metadata_procs: 8 # Number of processes to use during metadata reading for dataset. Can speed up metadata generation + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +augmentations: + enable: false # enable or disable all augmentations (to fast disable if needed) + loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max) + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3) + mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02) + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + + all: + channel_shuffle: 0.5 # Set 0 or lower to disable + random_inverse: 0.05 # inverse track (better lower probability) + random_polarity: 0.5 # polarity change (multiply waveform to -1) + + # pedalboard chorus block + pedalboard_chorus: 0.001 + pedalboard_chorus_rate_hz_min: 1.0 + pedalboard_chorus_rate_hz_max: 7.0 + pedalboard_chorus_depth_min: 0.25 + pedalboard_chorus_depth_max: 0.95 + pedalboard_chorus_centre_delay_ms_min: 3 + pedalboard_chorus_centre_delay_ms_max: 10 + pedalboard_chorus_feedback_min: 0.0 + pedalboard_chorus_feedback_max: 0.01 + pedalboard_chorus_mix_min: 0.1 + pedalboard_chorus_mix_max: 0.9 + + # pedalboard phazer block + pedalboard_phazer: 0.001 + pedalboard_phazer_rate_hz_min: 1.0 + pedalboard_phazer_rate_hz_max: 10.0 + pedalboard_phazer_depth_min: 0.25 + pedalboard_phazer_depth_max: 0.95 + pedalboard_phazer_centre_frequency_hz_min: 200 + pedalboard_phazer_centre_frequency_hz_max: 12000 + pedalboard_phazer_feedback_min: 0.0 + pedalboard_phazer_feedback_max: 0.5 + pedalboard_phazer_mix_min: 0.1 + pedalboard_phazer_mix_max: 0.9 + + # pedalboard pitch shift block + pedalboard_pitch_shift: 0.01 + pedalboard_pitch_shift_semitones_min: -7 + pedalboard_pitch_shift_semitones_max: 7 + + # pedalboard resample block + pedalboard_resample: 0.001 + pedalboard_resample_target_sample_rate_min: 4000 + pedalboard_resample_target_sample_rate_max: 44100 + + mp3_compression_min_bitrate: 32 + mp3_compression_max_bitrate: 320 + mp3_compression_backend: "lameenc" + + dry: + # pedalboard distortion block + pedalboard_distortion: 0.001 + pedalboard_distortion_drive_db_min: 1.0 + pedalboard_distortion_drive_db_max: 25.0 + + tanh_distortion: 0.05 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.7 + # pedalboard bitcrash block + pedalboard_bitcrash: 0.005 + pedalboard_bitcrash_bit_depth_min: 4 + pedalboard_bitcrash_bit_depth_max: 16 + + seven_band_parametric_eq: 0.24 + seven_band_parametric_eq_min_gain_db: -9 + seven_band_parametric_eq_max_gain_db: 9 + + gaussian_noise: 0.005 + gaussian_noise_min_amplitude: 0.001 + gaussian_noise_max_amplitude: 0.01 + + time_stretch: 0.01 + time_stretch_min_rate: 0.8 + time_stretch_max_rate: 1.25 + other: + seven_band_parametric_eq: 0.24 + seven_band_parametric_eq_min_gain_db: -9 + seven_band_parametric_eq_max_gain_db: 9 + +inference: + batch_size: 2 + dim_t: 256 + num_overlap: 4 \ No newline at end of file diff --git a/MDX23C_models/config_drumsep_mdx23c.yaml b/MDX23C_models/config_drumsep_mdx23c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7b06ddb77b6d2df982922cabaa9805a189f4c6c6 --- /dev/null +++ b/MDX23C_models/config_drumsep_mdx23c.yaml @@ -0,0 +1,87 @@ +audio: + chunk_size: 130560 + dim_f: 1024 + dim_t: 256 + hop_length: 512 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 + +model: + act: gelu + bottleneck_factor: 4 + growth: 128 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 + +training: + batch_size: 12 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - kick + - snare + - toms + - hh + - ride + - crash + lr: 9.0e-05 + patience: 30 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1268 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +augmentations: + enable: true # enable or disable all augmentations (to fast disable if needed) + loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max) + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3) + mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02) + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + + # apply mp3 compression to mixture only (emulate downloading mp3 from internet) + mp3_compression_on_mixture: 0.0 + mp3_compression_on_mixture_bitrate_min: 32 + mp3_compression_on_mixture_bitrate_max: 320 + mp3_compression_on_mixture_backend: "lameenc" + + all: + channel_shuffle: 0.5 # Set 0 or lower to disable + random_inverse: 0.01 # inverse track (better lower probability) + random_polarity: 0.5 # polarity change (multiply waveform to -1) + mp3_compression: 0.0 + mp3_compression_min_bitrate: 32 + mp3_compression_max_bitrate: 320 + mp3_compression_backend: "lameenc" + pitch_shift: 0.1 + pitch_shift_min_semitones: -3 + pitch_shift_max_semitones: 3 + seven_band_parametric_eq: 0.5 + seven_band_parametric_eq_min_gain_db: -6 + seven_band_parametric_eq_max_gain_db: 6 + tanh_distortion: 0.2 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.5 + +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 4 \ No newline at end of file diff --git a/MDX23C_models/config_mdx23c_similarity.yaml b/MDX23C_models/config_mdx23c_similarity.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ce8239fc926820db231cb1a240d20a1ff3eca0e --- /dev/null +++ b/MDX23C_models/config_mdx23c_similarity.yaml @@ -0,0 +1,47 @@ +audio: + chunk_size: 130560 + dim_f: 1024 + dim_t: 256 + hop_length: 512 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 + +model: + act: gelu + bottleneck_factor: 4 + growth: 128 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 + +training: + batch_size: 2 + gradient_accumulation_steps: 3 + grad_clip: 0 + instruments: + - Similarity + - Difference + lr: 1.0 + patience: 15 + reduce_factor: 0.95 + target_instrument: Similarity + num_epochs: 1000 + num_steps: 2235 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: prodigy + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +inference: + batch_size: 8 + dim_t: 256 + num_overlap: 8 diff --git a/MDX23C_models/model_2_stem_061321.yaml b/MDX23C_models/model_2_stem_061321.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d5412e0e16ea287b59ef6c84435fd81169d81d53 --- /dev/null +++ b/MDX23C_models/model_2_stem_061321.yaml @@ -0,0 +1,36 @@ +audio: + chunk_size: 260096 + dim_f: 4096 + dim_t: 256 + hop_length: 2048 + n_fft: 12288 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 +model: + act: gelu + bottleneck_factor: 4 + growth: 64 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 + name: epoch_10.ckpt +training: + batch_size: 16 + grad_clip: 0 + instruments: + - Vocals + - Instrumental + lr: 5.0e-05 + target_instrument: null + num_epochs: 100 + num_steps: 1000 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 8 diff --git a/MDX23C_models/model_2_stem_full_band_8k.yaml b/MDX23C_models/model_2_stem_full_band_8k.yaml new file mode 100644 index 0000000000000000000000000000000000000000..207aa712b561221c136576db6a6e7d6e35915ba4 --- /dev/null +++ b/MDX23C_models/model_2_stem_full_band_8k.yaml @@ -0,0 +1,43 @@ +audio: + chunk_size: 261120 + dim_f: 4096 + dim_t: 256 + hop_length: 1024 + n_fft: 8192 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 +model: + act: gelu + bottleneck_factor: 4 + growth: 128 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 +training: + batch_size: 6 + grad_clip: 0 + instruments: + - Vocals + - Instrumental + lr: 1.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + augmentation: 1 + augmentation_type: simple1 + augmentation_mix: true + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 8 \ No newline at end of file diff --git a/MDX23C_models/model_mdx23c_ep_271_l1_freq_72.2383.ckpt b/MDX23C_models/model_mdx23c_ep_271_l1_freq_72.2383.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..d0268f2ad093d5b27ca484f20df986a025794a11 --- /dev/null +++ b/MDX23C_models/model_mdx23c_ep_271_l1_freq_72.2383.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1227f8d97c7436004d03e46091427393abefebcc08ce53ef30082742c4e482f7 +size 437613512 diff --git a/MDXNet_models/Kim_Inst.onnx b/MDXNet_models/Kim_Inst.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a1cfbc17713f1caef9d11696002bb78aae781ff2 --- /dev/null +++ b/MDXNet_models/Kim_Inst.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86b1940e7122fbdd2beadc65507cbff6c352d79012a8a7e60d56db98532af5f7 +size 66759214 diff --git a/MDXNet_models/Kim_Vocal_1.onnx b/MDXNet_models/Kim_Vocal_1.onnx new file mode 100644 index 0000000000000000000000000000000000000000..4d6e31ac4c3e53021d67bf71f7aa576bbff391b5 --- /dev/null +++ b/MDXNet_models/Kim_Vocal_1.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f313140ef8fecc3041881b60ecb993d985a0281a138b2fb634aa8901aebc38cb +size 66759214 diff --git a/MDXNet_models/Kim_Vocal_2.onnx b/MDXNet_models/Kim_Vocal_2.onnx new file mode 100644 index 0000000000000000000000000000000000000000..89d344b3926e4d0d7ee41b78e7f387558ec2acdc --- /dev/null +++ b/MDXNet_models/Kim_Vocal_2.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce74ef3b6a6024ce44211a07be9cf8bc6d87728cc852a68ab34eb8e58cde9c8b +size 66759214 diff --git a/MDXNet_models/Reverb_HQ_By_FoxJoy.onnx b/MDXNet_models/Reverb_HQ_By_FoxJoy.onnx new file mode 100644 index 0000000000000000000000000000000000000000..0af4086572259731e9237a7d5acc2254f2d30cf5 --- /dev/null +++ b/MDXNet_models/Reverb_HQ_By_FoxJoy.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:233bb5c6aaa365e568659a0a81211746fa881f8f47f82d9e864fce1f7692db80 +size 66780123 diff --git a/MDXNet_models/UVR-MDX-NET-Inst_1.onnx b/MDXNet_models/UVR-MDX-NET-Inst_1.onnx new file mode 100644 index 0000000000000000000000000000000000000000..86732ec554ea5624474d9d109f6a72b3570fdb34 --- /dev/null +++ b/MDXNet_models/UVR-MDX-NET-Inst_1.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ca53f94b7a0cbb04fcfcc8f3ea5ec1ae22cd8ad044f5e673588859f83976f5e +size 66759214 diff --git a/MDXNet_models/UVR-MDX-NET-Inst_2.onnx b/MDXNet_models/UVR-MDX-NET-Inst_2.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3d7d5ea74c2caf49634f847217378bb626e2e77a --- /dev/null +++ b/MDXNet_models/UVR-MDX-NET-Inst_2.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3a96a664d28b52db9def0a9cae9a16dbb524d8325bfe8f0ac64ac5d231456bc +size 66759214 diff --git a/MDXNet_models/UVR-MDX-NET-Inst_3.onnx b/MDXNet_models/UVR-MDX-NET-Inst_3.onnx new file mode 100644 index 0000000000000000000000000000000000000000..548cacd28b73e2f76dde5f4281f1e0d0dcf537e5 --- /dev/null +++ b/MDXNet_models/UVR-MDX-NET-Inst_3.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b7834e2972158d8c9864e7376e3a7d084079c80a23f38dc31c4b0a4e901a1cb +size 66759214 diff --git a/MDXNet_models/UVR-MDX-NET-Inst_HQ_1.onnx b/MDXNet_models/UVR-MDX-NET-Inst_HQ_1.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3302ace57202067fd1e9c709d76bc8d347658f5d --- /dev/null +++ b/MDXNet_models/UVR-MDX-NET-Inst_HQ_1.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38a045c4ded87e3bf97b609ec5be7910e8a7cecec455f507227ab12b5e29f7f9 +size 66759214 diff --git a/MDXNet_models/UVR-MDX-NET-Inst_HQ_2.onnx b/MDXNet_models/UVR-MDX-NET-Inst_HQ_2.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1340d10933420baf427d06cbf9a51ac7903b5398 --- /dev/null +++ b/MDXNet_models/UVR-MDX-NET-Inst_HQ_2.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:197f8ab296df850f961e68c595f6649acb7d9e621b5600b460f3458967299112 +size 66759214 diff --git a/MDXNet_models/UVR-MDX-NET-Inst_HQ_3.onnx b/MDXNet_models/UVR-MDX-NET-Inst_HQ_3.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f2b6b241d9246f392067e0717c8b252857022e6c --- /dev/null +++ b/MDXNet_models/UVR-MDX-NET-Inst_HQ_3.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:317554b07fe1ea5279a77f2b1520a41ea4b93432560c4ffd08792c30fddf9adc +size 66759214 diff --git a/MDXNet_models/UVR-MDX-NET-Inst_HQ_4.onnx b/MDXNet_models/UVR-MDX-NET-Inst_HQ_4.onnx new file mode 100644 index 0000000000000000000000000000000000000000..45b11285dea638eeecfe4487dfc39775da075b14 --- /dev/null +++ b/MDXNet_models/UVR-MDX-NET-Inst_HQ_4.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c4b5b9b05090fdf238f38ba5046813982d50e2a652e9cb3324ea79720c3c9c8 +size 59074342 diff --git a/MDXNet_models/UVR-MDX-NET-Inst_HQ_5.onnx b/MDXNet_models/UVR-MDX-NET-Inst_HQ_5.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e674c464504ee1cddb1371129a4314eacabbca64 --- /dev/null +++ b/MDXNet_models/UVR-MDX-NET-Inst_HQ_5.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:811cb24095d865763752310848b7ec86aeede0626cb05749ab35350e46897000 +size 59074342 diff --git a/MDXNet_models/UVR-MDX-NET-Inst_Main.onnx b/MDXNet_models/UVR-MDX-NET-Inst_Main.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3c1ab77a4f5874c013ff0269cc926fcc9a7ab466 --- /dev/null +++ b/MDXNet_models/UVR-MDX-NET-Inst_Main.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ab401dfe4a548b87deb64f975294bd56ff946aa32903f53b4b24bb13b2cce1e +size 52786726 diff --git a/MDXNet_models/UVR-MDX-NET-Inst_full_292.onnx b/MDXNet_models/UVR-MDX-NET-Inst_full_292.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9f6c60def7873f03a26391afabb238103fc5c318 --- /dev/null +++ b/MDXNet_models/UVR-MDX-NET-Inst_full_292.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:020f6b65fa219fb7c285e4f3fc2863bf22daf03c4c93e547b6d13d5f2757a7ec +size 66759214 diff --git a/MDXNet_models/UVR-MDX-NET-Voc_FT.onnx b/MDXNet_models/UVR-MDX-NET-Voc_FT.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3048949a6f427d212f310e9a13494306da6e00ec --- /dev/null +++ b/MDXNet_models/UVR-MDX-NET-Voc_FT.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:534b2070fcc7df514b13ef660dc8cbb328679c2374d04354a5c42bb14ecce111 +size 66762490 diff --git a/MDXNet_models/UVR-MDX-NET_Crowd_HQ_1.onnx b/MDXNet_models/UVR-MDX-NET_Crowd_HQ_1.onnx new file mode 100644 index 0000000000000000000000000000000000000000..466c3fa69b05f5b27c19cc11eb23c99909d2a4d0 --- /dev/null +++ b/MDXNet_models/UVR-MDX-NET_Crowd_HQ_1.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:313b7bf869c411fdafe005cf0d5a635c405cb3d0df137178a64091952d75225c +size 59074342 diff --git a/MDXNet_models/UVR-MDX-NET_Inst_187_beta.onnx b/MDXNet_models/UVR-MDX-NET_Inst_187_beta.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b2ff188d7a8ea6ed25bcf1916359853b8fd0cb8f --- /dev/null +++ b/MDXNet_models/UVR-MDX-NET_Inst_187_beta.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c74566f3c3033cacba996328b2ee90bf77ef79ea6c35b7841df183b7906f54a5 +size 66759214 diff --git a/MDXNet_models/UVR-MDX-NET_Inst_82_beta.onnx b/MDXNet_models/UVR-MDX-NET_Inst_82_beta.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f91e15a00d9d805623d70dc7d95b69e2e8329ba2 --- /dev/null +++ b/MDXNet_models/UVR-MDX-NET_Inst_82_beta.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6c268302f09ab53687072618e056a611272a7e2c3fd9b3b59164da152f3588e +size 66759214 diff --git a/MDXNet_models/UVR-MDX-NET_Inst_90_beta.onnx b/MDXNet_models/UVR-MDX-NET_Inst_90_beta.onnx new file mode 100644 index 0000000000000000000000000000000000000000..db0d80d538ff6ecf3a15cabaa0c84500ff0b5ccc --- /dev/null +++ b/MDXNet_models/UVR-MDX-NET_Inst_90_beta.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d902868a46575aea6ee2335736ff3b53faf497a6bdaa1b864e0fd84eb1b42a5 +size 66759214 diff --git a/MDXNet_models/UVR-MDX-NET_Main_340.onnx b/MDXNet_models/UVR-MDX-NET_Main_340.onnx new file mode 100644 index 0000000000000000000000000000000000000000..75ef0024acd46a2900ea78e948325d044ec7580a --- /dev/null +++ b/MDXNet_models/UVR-MDX-NET_Main_340.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78792633b4007755af12ecde20f709b4f0b99563b1d25fe0a501ed2122aff218 +size 66759214 diff --git a/MDXNet_models/UVR-MDX-NET_Main_390.onnx b/MDXNet_models/UVR-MDX-NET_Main_390.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a752391e7cfb5b134ebef388734cd1da9eb5dfb5 --- /dev/null +++ b/MDXNet_models/UVR-MDX-NET_Main_390.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:286c4f0847ca837e2c3f4c4058f756d5f150cbf080506aa6f33a2847aba92e8c +size 66759214 diff --git a/MDXNet_models/UVR-MDX-NET_Main_406.onnx b/MDXNet_models/UVR-MDX-NET_Main_406.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c0f58423f248611332f6a3c33382d0d426c67449 --- /dev/null +++ b/MDXNet_models/UVR-MDX-NET_Main_406.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f158816a44eef1f0ba0f48b813cbfcf460ed1c70a754af3609ade44aaf7d1b23 +size 66759214 diff --git a/MDXNet_models/UVR-MDX-NET_Main_427.onnx b/MDXNet_models/UVR-MDX-NET_Main_427.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f6731f9dcc99c5b1ad0af85dd845280e833acf46 --- /dev/null +++ b/MDXNet_models/UVR-MDX-NET_Main_427.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95275802a27801b97e3c0552b6eaa69f9bb3bd7df53cdf0536cce0a753f702cc +size 66759214 diff --git a/MDXNet_models/UVR-MDX-NET_Main_438.onnx b/MDXNet_models/UVR-MDX-NET_Main_438.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a6a17015453da90871426317c547193196bd6f1e --- /dev/null +++ b/MDXNet_models/UVR-MDX-NET_Main_438.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5e1ad93587a163a0987a0168b99a2ad875c0d9bfc3afb596b7c36b09c7f5c26 +size 66759214 diff --git a/MDXNet_models/UVR_MDXNET_1_9703.onnx b/MDXNet_models/UVR_MDXNET_1_9703.onnx new file mode 100644 index 0000000000000000000000000000000000000000..50d00b7e34e7763954283b9fc13f2d903072be03 --- /dev/null +++ b/MDXNet_models/UVR_MDXNET_1_9703.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:229ad3bb96a037e89d8ed86732d6d3675856e6a07c3e3f02896eac01ec7ee4be +size 29704436 diff --git a/MDXNet_models/UVR_MDXNET_2_9682.onnx b/MDXNet_models/UVR_MDXNET_2_9682.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1d4c430b6d6a8a7d8abea9bccac1959e4ea69ab0 --- /dev/null +++ b/MDXNet_models/UVR_MDXNET_2_9682.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1deb7295acd3206bc9582a5d92f1b0a74bf3f41c7c1fb78a0ac0123cde4372db +size 29704436 diff --git a/MDXNet_models/UVR_MDXNET_3_9662.onnx b/MDXNet_models/UVR_MDXNET_3_9662.onnx new file mode 100644 index 0000000000000000000000000000000000000000..140b8e3eb273df75384c691462998774b3928a52 --- /dev/null +++ b/MDXNet_models/UVR_MDXNET_3_9662.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e02220e80d8253f4c2209f8924298b2b686bbdf2868b788ff5500fb9bd94aadc +size 29704436 diff --git a/MDXNet_models/UVR_MDXNET_9482.onnx b/MDXNet_models/UVR_MDXNET_9482.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c40c9257378536385f625a75f7bc74cb35c1eaa5 --- /dev/null +++ b/MDXNet_models/UVR_MDXNET_9482.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4f365207c56deb115bceedff3ad8fe98a751c745f9e370cecec6226b8b47184 +size 29704436 diff --git a/MDXNet_models/UVR_MDXNET_KARA.onnx b/MDXNet_models/UVR_MDXNET_KARA.onnx new file mode 100644 index 0000000000000000000000000000000000000000..4bfb51872cc19150179df558fb7212e5359427ea --- /dev/null +++ b/MDXNet_models/UVR_MDXNET_KARA.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3167c87333a48548413e972a286bf40bf5694001d2853861eb1435953f02d63 +size 29704436 diff --git a/MDXNet_models/UVR_MDXNET_KARA_2.onnx b/MDXNet_models/UVR_MDXNET_KARA_2.onnx new file mode 100644 index 0000000000000000000000000000000000000000..18aeb6a8d05b84fa74fdd0de7ed917f6df273ebd --- /dev/null +++ b/MDXNet_models/UVR_MDXNET_KARA_2.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf32e15105a09c0f7dddd2b67346146334d6f3ecb399ed7638eba2ab07cbf5f4 +size 52786726 diff --git a/MDXNet_models/UVR_MDXNET_Main.onnx b/MDXNet_models/UVR_MDXNET_Main.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d9645fd239fd211619aabeb431d3093a45ed8b85 --- /dev/null +++ b/MDXNet_models/UVR_MDXNET_Main.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8289784cda38543ff431add4070662813311a8cccfc0112ca82f76d9dba2b4ca +size 66759214 diff --git a/MDXNet_models/kuielab_a_bass.onnx b/MDXNet_models/kuielab_a_bass.onnx new file mode 100644 index 0000000000000000000000000000000000000000..0fe816090f69e7198dc780025d2c37b9dc9a272f --- /dev/null +++ b/MDXNet_models/kuielab_a_bass.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c3e77b9963185b1ea6bb46a4b8924137d9370fc1ccdefec7b1b416ef550dcaa +size 29703204 diff --git a/MDXNet_models/kuielab_a_drums.onnx b/MDXNet_models/kuielab_a_drums.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e29380d6f0c92fe7a38a201709342a56931489df --- /dev/null +++ b/MDXNet_models/kuielab_a_drums.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40f586b7091934dd6f5563f0cba8f14bad57ce88440da1098bf388ea716c2901 +size 29703204 diff --git a/MDXNet_models/kuielab_a_other.onnx b/MDXNet_models/kuielab_a_other.onnx new file mode 100644 index 0000000000000000000000000000000000000000..7f2c3285075afc5e9ed9ba2fd6882d8df9921763 --- /dev/null +++ b/MDXNet_models/kuielab_a_other.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b67a1dcb5f232153528c59960b4c7bf8dc736b8114de360af0e719633f53358 +size 29703204 diff --git a/MDXNet_models/kuielab_a_vocals.onnx b/MDXNet_models/kuielab_a_vocals.onnx new file mode 100644 index 0000000000000000000000000000000000000000..8d80e6d5ea023a73a002c15005551a37c7cfa21c --- /dev/null +++ b/MDXNet_models/kuielab_a_vocals.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daba83c2ee1afee9139766ad64c9b6808d6b6f092fff04bed3338be50baac721 +size 29703204 diff --git a/MDXNet_models/kuielab_b_bass.onnx b/MDXNet_models/kuielab_b_bass.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1670d9993ab4080cc702ba397b629caae20ac699 --- /dev/null +++ b/MDXNet_models/kuielab_b_bass.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4b7080fe501d0bece62076c5d4eda4d6590c5207ed78ec84a57bac0740a061d +size 29703204 diff --git a/MDXNet_models/kuielab_b_drums.onnx b/MDXNet_models/kuielab_b_drums.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a00f9c84fd964d45c2cdd0470ff9e4b5a7f4ad27 --- /dev/null +++ b/MDXNet_models/kuielab_b_drums.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6fecee758059b33ed99f6dabba297439b3e7cacfac4b1097bd324aff8052208 +size 21930313 diff --git a/MDXNet_models/kuielab_b_other.onnx b/MDXNet_models/kuielab_b_other.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e17baa7f40b3dbd1299ee7ba7e6db461682e8320 --- /dev/null +++ b/MDXNet_models/kuielab_b_other.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0d0b63950ac332333fea2d58f68c92fd3ab0aae071398c2a8beeae1ad15b655 +size 29703204 diff --git a/MDXNet_models/kuielab_b_vocals.onnx b/MDXNet_models/kuielab_b_vocals.onnx new file mode 100644 index 0000000000000000000000000000000000000000..683e555b8a91e3035cc24bee5b47b7347390eb8a --- /dev/null +++ b/MDXNet_models/kuielab_b_vocals.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b7dcb9d878acb0f3e64ff3fd27750faae96577013f6d50f5996875bf4250713 +size 29703204 diff --git a/SCnet_models/SCNet-large_starrytong_fixed.ckpt b/SCnet_models/SCNet-large_starrytong_fixed.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..ddaa50ee05e3ca837df5ec3d540f34443aa66af7 --- /dev/null +++ b/SCnet_models/SCNet-large_starrytong_fixed.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65900dfa07d6b6e5d784c0f143920200a4bd281d6e78a806c549d0b912d5885e +size 168852258 diff --git a/SCnet_models/config_musdb18_scnet.yaml b/SCnet_models/config_musdb18_scnet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b74fc2f6f31deafc819aa680e5075f8e134d9793 --- /dev/null +++ b/SCnet_models/config_musdb18_scnet.yaml @@ -0,0 +1,83 @@ +audio: + chunk_size: 485100 # 44100 * 11 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + sources: + - drums + - bass + - other + - vocals + audio_channels: 2 + dims: + - 4 + - 32 + - 64 + - 128 + nfft: 4096 + hop_size: 1024 + win_size: 4096 + normalized: True + band_SR: + - 0.175 + - 0.392 + - 0.433 + band_stride: + - 1 + - 4 + - 16 + band_kernel: + - 3 + - 4 + - 16 + conv_depths: + - 3 + - 2 + - 1 + compress: 4 + conv_kernel: 3 + num_dplayer: 6 + expand: 1 + +training: + batch_size: 10 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - Drums + - Bass + - Other + - Vocals + lr: 5.0e-04 + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +augmentations: + enable: true # enable or disable all augmentations (to fast disable if needed) + loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max) + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3) + mixup_probs: + !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02) + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + +inference: + batch_size: 8 + dim_t: 256 + num_overlap: 4 + normalize: true diff --git a/SCnet_models/config_musdb18_scnet_large.yaml b/SCnet_models/config_musdb18_scnet_large.yaml new file mode 100644 index 0000000000000000000000000000000000000000..33912dfc5614881dd2b4f56318918b4ff65d58b1 --- /dev/null +++ b/SCnet_models/config_musdb18_scnet_large.yaml @@ -0,0 +1,88 @@ +audio: + chunk_size: 485100 # 44100 * 11 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + sources: + - drums + - bass + - other + - vocals + audio_channels: 2 + dims: + - 4 + - 64 + - 128 + - 256 + nfft: 4096 + hop_size: 1024 + win_size: 4096 + normalized: True + band_SR: + - 0.225 + - 0.372 + - 0.403 + band_stride: + - 1 + - 4 + - 16 + band_kernel: + - 3 + - 4 + - 16 + conv_depths: + - 3 + - 2 + - 1 + compress: 4 + conv_kernel: 3 + num_dplayer: 6 + expand: 1 + +training: + batch_size: 6 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - Drums + - Bass + - Other + - Vocals + # lr: 1.0e-04 + lr: 1.0 + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: prodigy + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +augmentations: + enable: true # enable or disable all augmentations (to fast disable if needed) + loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max) + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3) + mixup_probs: + !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02) + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + all: + channel_shuffle: 0.5 # Set 0 or lower to disable + random_inverse: 0.1 # inverse track (better lower probability) + random_polarity: 0.5 # polarity change (multiply waveform to -1) + +inference: + batch_size: 8 + dim_t: 256 + num_overlap: 4 + normalize: false diff --git a/SCnet_models/config_musdb18_scnet_large_starrytong.yaml b/SCnet_models/config_musdb18_scnet_large_starrytong.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7fbb6e4bcb5ddf3946833a137cd61c730bca20a8 --- /dev/null +++ b/SCnet_models/config_musdb18_scnet_large_starrytong.yaml @@ -0,0 +1,88 @@ +audio: + chunk_size: 485100 # 44100 * 11 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + sources: + - drums + - bass + - other + - vocals + audio_channels: 2 + dims: + - 4 + - 64 + - 128 + - 256 + nfft: 4096 + hop_size: 1024 + win_size: 4096 + normalized: True + band_SR: + - 0.225 + - 0.372 + - 0.403 + band_stride: + - 1 + - 4 + - 16 + band_kernel: + - 3 + - 4 + - 16 + conv_depths: + - 3 + - 2 + - 1 + compress: 4 + conv_kernel: 3 + num_dplayer: 6 + expand: 1 + +training: + batch_size: 6 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - Drums + - Bass + - Other + - Vocals + # lr: 1.0e-04 + lr: 1.0 + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: prodigy + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + +augmentations: + enable: true # enable or disable all augmentations (to fast disable if needed) + loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max) + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3) + mixup_probs: + !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02) + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + all: + channel_shuffle: 0.5 # Set 0 or lower to disable + random_inverse: 0.1 # inverse track (better lower probability) + random_polarity: 0.5 # polarity change (multiply waveform to -1) + +inference: + batch_size: 8 + dim_t: 256 + num_overlap: 4 + normalize: true diff --git a/SCnet_models/config_musdb18_scnet_xl.yaml b/SCnet_models/config_musdb18_scnet_xl.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a189dc899094da0df5508a1661302fc61f0bce64 --- /dev/null +++ b/SCnet_models/config_musdb18_scnet_xl.yaml @@ -0,0 +1,207 @@ +audio: + chunk_size: 485100 # 44100 * 11 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.000 + +model: + sources: + - drums + - bass + - other + - vocals + audio_channels: 2 + dims: + - 4 + - 64 + - 128 + - 256 + nfft: 4096 + hop_size: 1024 + win_size: 4096 + normalized: True + band_SR: + - 0.230 + - 0.370 + - 0.400 + band_stride: + - 1 + - 4 + - 16 + band_kernel: + - 3 + - 4 + - 16 + conv_depths: + - 3 + - 2 + - 1 + compress: 4 + conv_kernel: 3 + num_dplayer: 8 + expand: 1 + +training: + batch_size: 4 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - Drums + - Bass + - Other + - Vocals + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + # optimizer: prodigy + optimizer: adam + lr: 1.0e-05 + # lr: 1.0 + normalize: false # perform normalization on input of model (use the same for inference!) + other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental + use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true + + +augmentations: + enable: false # enable or disable all augmentations (to fast disable if needed) + loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max) + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3) + mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02) + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + + # apply mp3 compression to mixture only (emulate downloading mp3 from internet) + mp3_compression_on_mixture: 0.01 + mp3_compression_on_mixture_bitrate_min: 32 + mp3_compression_on_mixture_bitrate_max: 320 + mp3_compression_on_mixture_backend: "lameenc" + + all: + channel_shuffle: 0.5 # Set 0 or lower to disable + random_inverse: 0.1 # inverse track (better lower probability) + random_polarity: 0.5 # polarity change (multiply waveform to -1) + + mp3_compression: 0.01 + mp3_compression_min_bitrate: 32 + mp3_compression_max_bitrate: 320 + mp3_compression_backend: "lameenc" + + # pedalboard reverb block + pedalboard_reverb: 0.01 + pedalboard_reverb_room_size_min: 0.1 + pedalboard_reverb_room_size_max: 0.9 + pedalboard_reverb_damping_min: 0.1 + pedalboard_reverb_damping_max: 0.9 + pedalboard_reverb_wet_level_min: 0.1 + pedalboard_reverb_wet_level_max: 0.9 + pedalboard_reverb_dry_level_min: 0.1 + pedalboard_reverb_dry_level_max: 0.9 + pedalboard_reverb_width_min: 0.9 + pedalboard_reverb_width_max: 1.0 + + # pedalboard chorus block + pedalboard_chorus: 0.01 + pedalboard_chorus_rate_hz_min: 1.0 + pedalboard_chorus_rate_hz_max: 7.0 + pedalboard_chorus_depth_min: 0.25 + pedalboard_chorus_depth_max: 0.95 + pedalboard_chorus_centre_delay_ms_min: 3 + pedalboard_chorus_centre_delay_ms_max: 10 + pedalboard_chorus_feedback_min: 0.0 + pedalboard_chorus_feedback_max: 0.5 + pedalboard_chorus_mix_min: 0.1 + pedalboard_chorus_mix_max: 0.9 + + # pedalboard phazer block + pedalboard_phazer: 0.01 + pedalboard_phazer_rate_hz_min: 1.0 + pedalboard_phazer_rate_hz_max: 10.0 + pedalboard_phazer_depth_min: 0.25 + pedalboard_phazer_depth_max: 0.95 + pedalboard_phazer_centre_frequency_hz_min: 200 + pedalboard_phazer_centre_frequency_hz_max: 12000 + pedalboard_phazer_feedback_min: 0.0 + pedalboard_phazer_feedback_max: 0.5 + pedalboard_phazer_mix_min: 0.1 + pedalboard_phazer_mix_max: 0.9 + + # pedalboard distortion block + pedalboard_distortion: 0.01 + pedalboard_distortion_drive_db_min: 1.0 + pedalboard_distortion_drive_db_max: 25.0 + + # pedalboard pitch shift block + pedalboard_pitch_shift: 0.01 + pedalboard_pitch_shift_semitones_min: -7 + pedalboard_pitch_shift_semitones_max: 7 + + # pedalboard resample block + pedalboard_resample: 0.01 + pedalboard_resample_target_sample_rate_min: 4000 + pedalboard_resample_target_sample_rate_max: 44100 + + # pedalboard bitcrash block + pedalboard_bitcrash: 0.01 + pedalboard_bitcrash_bit_depth_min: 4 + pedalboard_bitcrash_bit_depth_max: 16 + + # pedalboard mp3 compressor block + pedalboard_mp3_compressor: 0.01 + pedalboard_mp3_compressor_pedalboard_mp3_compressor_min: 0 + pedalboard_mp3_compressor_pedalboard_mp3_compressor_max: 9.999 + + vocals: + pitch_shift: 0.1 + pitch_shift_min_semitones: -5 + pitch_shift_max_semitones: 5 + seven_band_parametric_eq: 0.25 + seven_band_parametric_eq_min_gain_db: -9 + seven_band_parametric_eq_max_gain_db: 9 + tanh_distortion: 0.1 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.7 + bass: + pitch_shift: 0.1 + pitch_shift_min_semitones: -2 + pitch_shift_max_semitones: 2 + seven_band_parametric_eq: 0.25 + seven_band_parametric_eq_min_gain_db: -3 + seven_band_parametric_eq_max_gain_db: 6 + tanh_distortion: 0.2 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.5 + drums: + pitch_shift: 0.33 + pitch_shift_min_semitones: -5 + pitch_shift_max_semitones: 5 + seven_band_parametric_eq: 0.25 + seven_band_parametric_eq_min_gain_db: -9 + seven_band_parametric_eq_max_gain_db: 9 + tanh_distortion: 0.33 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.6 + other: + pitch_shift: 0.1 + pitch_shift_min_semitones: -4 + pitch_shift_max_semitones: 4 + gaussian_noise: 0.1 + gaussian_noise_min_amplitude: 0.001 + gaussian_noise_max_amplitude: 0.015 + time_stretch: 0.01 + time_stretch_min_rate: 0.8 + time_stretch_max_rate: 1.25 + +inference: + batch_size: 4 + dim_t: 256 + num_overlap: 4 + normalize: false diff --git a/SCnet_models/model_scnet_ep_54_sdr_9.8051.ckpt b/SCnet_models/model_scnet_ep_54_sdr_9.8051.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..e876a2ed0ea8262bd2bae422e8ab02b13558cba6 --- /dev/null +++ b/SCnet_models/model_scnet_ep_54_sdr_9.8051.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd889cc1d97619ccac59280ecc859c190cd3cc1b1557fbe3a19b1610bb67e410 +size 216189106 diff --git a/SCnet_models/model_scnet_sdr_9.3244.ckpt b/SCnet_models/model_scnet_sdr_9.3244.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..b72d6b1472f10f043fa266b4f3d6507ba55e4cc5 --- /dev/null +++ b/SCnet_models/model_scnet_sdr_9.3244.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe550315a76e8f4aed8475d7d5952137504a3b6c63b3adcef2443bfe73aac540 +size 168868194 diff --git a/SCnet_models/scnet_checkpoint_musdb18.ckpt b/SCnet_models/scnet_checkpoint_musdb18.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..671b56f708c41055e2fd1ad71391254a8f097aac --- /dev/null +++ b/SCnet_models/scnet_checkpoint_musdb18.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bc0d1abb20bfdf966dcd07637bafd03e4bc13653d09ef18bc9b3e342eafe2aa +size 42434986