diff --git a/bandit/bandit_plus_config.yaml b/bandit/bandit_plus_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1c248dafcf20591ae8fe798badade36d7cc02ad9 --- /dev/null +++ b/bandit/bandit_plus_config.yaml @@ -0,0 +1,70 @@ +name: MultiMaskMultiSourceBandSplitRNN +audio: + chunk_size: 264600 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 +model: + in_channel: 1 + stems: + - speech + - music + - effects + band_specs: musical + n_bands: 64 + fs: 44100 + require_no_overlap: false + require_no_gap: true + normalize_channel_independently: false + treat_channel_as_feature: true + n_sqm_modules: 8 + emb_dim: 128 + rnn_dim: 256 + bidirectional: true + rnn_type: GRU + mlp_dim: 512 + hidden_activation: Tanh + hidden_activation_kwargs: null + complex_mask: true + n_fft: 2048 + win_length: 2048 + hop_length: 512 + window_fn: hann_window + wkwargs: null + power: null + center: true + normalized: true + pad_mode: constant + onesided: true +training: + batch_size: 4 + gradient_accumulation_steps: 4 + grad_clip: 0 + instruments: + - speech + - music + - effects + lr: 9.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + augmentation: false + augmentation_type: simple1 + use_mp3_compress: false + augmentation_mix: true + augmentation_loudness: true + augmentation_loudness_type: 1 + augmentation_loudness_min: 0.5 + augmentation_loudness_max: 1.5 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: true + use_amp: true +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 diff --git a/bandit_v2/bandit_v2_multi_config.yaml b/bandit_v2/bandit_v2_multi_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ccc74aa663abf6b09121b41fcbbd8a5d2b4f7fb4 --- /dev/null +++ b/bandit_v2/bandit_v2_multi_config.yaml @@ -0,0 +1,75 @@ +cls: Bandit +audio: + chunk_size: 384000 + num_channels: 2 + sample_rate: 48000 + min_mean_abs: 0.0 +kwargs: + in_channels: 1 + stems: + - speech + - music + - sfx + band_type: musical + n_bands: 64 + normalize_channel_independently: false + treat_channel_as_feature: true + n_sqm_modules: 8 + emb_dim: 128 + rnn_dim: 256 + bidirectional: true + rnn_type: GRU + mlp_dim: 512 + hidden_activation: Tanh + hidden_activation_kwargs: null + complex_mask: true + use_freq_weights: true + n_fft: 2048 + win_length: 2048 + hop_length: 512 + window_fn: hann_window + wkwargs: null + power: null + center: true + normalized: true + pad_mode: reflect + onesided: true +training: + batch_size: 4 + gradient_accumulation_steps: 4 + grad_clip: 0 + instruments: + - speech + - music + - sfx + lr: 9.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: true + use_amp: true +augmentations: + enable: true + loudness: true + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true + mixup_probs: !!python/tuple + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + all: + channel_shuffle: 0.5 + random_inverse: 0.1 + random_polarity: 0.5 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 diff --git a/htdemucs/demucs3_mmi.ckpt b/htdemucs/demucs3_mmi.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..7401e71bfbf0266d8004647514c8019101a4590b --- /dev/null +++ b/htdemucs/demucs3_mmi.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1941ce654b11df4132b9f4eae408556b4c83fad6fe26b4bc0dbcb36b975befb3 +size 167407275 diff --git a/htdemucs/demucs4_4stem.ckpt b/htdemucs/demucs4_4stem.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..94c5ce7eda92bb105307197a7be1d9635b417a1f --- /dev/null +++ b/htdemucs/demucs4_4stem.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8726e21a993978c7ba086d3872e7608d7d5bfca646ca4aca459ffda844faa8b4 +size 84141911 diff --git a/htdemucs/demucs4_6stem.ckpt b/htdemucs/demucs4_6stem.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..43e9de1c47734b05e2e2d9e8f2a70a548970326a --- /dev/null +++ b/htdemucs/demucs4_6stem.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34c22ccb381c6f9fdbf324f04e1e2fe21aaaf293f5ded163a162697ff9a02ddd +size 54996327 diff --git a/htdemucs/demucs4_choirsep.ckpt b/htdemucs/demucs4_choirsep.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..926612c4c022ea3659e39672fdb6bd74a0cea5b5 --- /dev/null +++ b/htdemucs/demucs4_choirsep.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fd18ca3a8345c02b9656a0852193ddcbb1796188884e1b7119ce5758250bbcd +size 109841648 diff --git a/htdemucs/demucs4_ft_bass.ckpt b/htdemucs/demucs4_ft_bass.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..b2516f0ba249a05c119eba7e3c52addc11785d41 --- /dev/null +++ b/htdemucs/demucs4_ft_bass.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e57c48e6b0e38af4f7118d7bd08c49f0a0c0edf7d09143bdd902ea0d237303e6 +size 84141271 diff --git a/htdemucs/demucs4_ft_drums.ckpt b/htdemucs/demucs4_ft_drums.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..1d2f2cebac207dbb924b9011860f22621184d741 --- /dev/null +++ b/htdemucs/demucs4_ft_drums.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba3fe64ae8ef66ac9a4857222ce48efbdc5eb3ad375cb79dd13debee5aaa4066 +size 84141271 diff --git a/htdemucs/demucs4_ft_other.ckpt b/htdemucs/demucs4_ft_other.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..dc5aeb5ce23edf05faa98f84e7f8df6e3d121f1f --- /dev/null +++ b/htdemucs/demucs4_ft_other.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef3bcb9c8b40d14ae5d51b6db2587339cc12c6b77c0be151ce6d69002e087bf2 +size 84141271 diff --git a/htdemucs/demucs4_ft_vocals.ckpt b/htdemucs/demucs4_ft_vocals.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..87e7befdc8b254d6cd7bfc8c2739e405783b7c3b --- /dev/null +++ b/htdemucs/demucs4_ft_vocals.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3cf25b222c4eed7cd49dd8b2c9597d50c18bd154090f7b919cfa5f93cf22c49 +size 84141271 diff --git a/htdemucs/demucs4_mvsep_vocals.ckpt b/htdemucs/demucs4_mvsep_vocals.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..b4055a4b1a95d052eb780e85ebca885ce82c4de4 --- /dev/null +++ b/htdemucs/demucs4_mvsep_vocals.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ea6e9495685045e6b4e66174131be5d19808bb0d6d1a1ba717d238f9380e8d0 +size 168124336 diff --git a/htdemucs/demucs_mid_side_wesleyr36.ckpt b/htdemucs/demucs_mid_side_wesleyr36.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..752ecd142025ffe7d1f8fc5a21f8b46dcded0005 --- /dev/null +++ b/htdemucs/demucs_mid_side_wesleyr36.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9970dca36a15c0d0cf3338d24592aa35469697dc3be4b07e9056f5d54b82185 +size 168122809 diff --git a/mdx23c/mdx23c_4stem_zfturbo_config.yaml b/mdx23c/mdx23c_4stem_zfturbo_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4cceb835bd7a84b13d7524e7d98bf49f71b982db --- /dev/null +++ b/mdx23c/mdx23c_4stem_zfturbo_config.yaml @@ -0,0 +1,54 @@ +audio: + chunk_size: 261120 + dim_f: 4096 + dim_t: 256 + hop_length: 1024 + n_fft: 8192 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 +model: + act: gelu + bottleneck_factor: 4 + growth: 128 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 +training: + batch_size: 6 + gradient_accumulation_steps: 6 + grad_clip: 0 + instruments: + - vocals + - bass + - drums + - other + lr: 5.0e-05 + patience: 3 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + augmentation: false + augmentation_type: simple1 + use_mp3_compress: false + augmentation_mix: true + augmentation_loudness: true + augmentation_loudness_type: 2 + augmentation_loudness_min: 0.8 + augmentation_loudness_max: 1.2 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: false + use_amp: true +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 diff --git a/mdx23c/mdx23c_d1581_config.yaml b/mdx23c/mdx23c_d1581_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..259827c82f6303559f6bf8079e926e66de9d4f03 --- /dev/null +++ b/mdx23c/mdx23c_d1581_config.yaml @@ -0,0 +1,37 @@ +audio: + chunk_size: 260096 + dim_f: 4096 + dim_t: 256 + hop_length: 2048 + n_fft: 12288 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 +model: + act: gelu + bottleneck_factor: 4 + growth: 64 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 + name: epoch_10.ckpt +training: + batch_size: 16 + grad_clip: 0 + instruments: + - Vocals + - Instrumental + lr: 5.0e-05 + target_instrument: null + num_epochs: 100 + num_steps: 1000 + use_amp: true +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 diff --git a/mdx23c/mdx23c_dereverb_aufr33_jarredou_config.yaml b/mdx23c/mdx23c_dereverb_aufr33_jarredou_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ddf60a75a7c603ec71713f2ef89db0ccbb685b88 --- /dev/null +++ b/mdx23c/mdx23c_dereverb_aufr33_jarredou_config.yaml @@ -0,0 +1,114 @@ +audio: + chunk_size: 261120 + dim_f: 4096 + dim_t: 256 + hop_length: 1024 + n_fft: 8192 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 +model: + act: gelu + bottleneck_factor: 4 + growth: 128 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 +training: + batch_size: 2 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - dry + - other + lr: 1.0e-06 + patience: 4 + reduce_factor: 0.93 + target_instrument: null + num_epochs: 40 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adamw + read_metadata_procs: 8 + other_fix: false + use_amp: true +augmentations: + enable: false + loudness: true + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true + mixup_probs: !!python/tuple + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + all: + channel_shuffle: 0.5 + random_inverse: 0.05 + random_polarity: 0.5 + pedalboard_chorus: 0.001 + pedalboard_chorus_rate_hz_min: 1.0 + pedalboard_chorus_rate_hz_max: 7.0 + pedalboard_chorus_depth_min: 0.25 + pedalboard_chorus_depth_max: 0.95 + pedalboard_chorus_centre_delay_ms_min: 3 + pedalboard_chorus_centre_delay_ms_max: 10 + pedalboard_chorus_feedback_min: 0.0 + pedalboard_chorus_feedback_max: 0.01 + pedalboard_chorus_mix_min: 0.1 + pedalboard_chorus_mix_max: 0.9 + pedalboard_phazer: 0.001 + pedalboard_phazer_rate_hz_min: 1.0 + pedalboard_phazer_rate_hz_max: 10.0 + pedalboard_phazer_depth_min: 0.25 + pedalboard_phazer_depth_max: 0.95 + pedalboard_phazer_centre_frequency_hz_min: 200 + pedalboard_phazer_centre_frequency_hz_max: 12000 + pedalboard_phazer_feedback_min: 0.0 + pedalboard_phazer_feedback_max: 0.5 + pedalboard_phazer_mix_min: 0.1 + pedalboard_phazer_mix_max: 0.9 + pedalboard_pitch_shift: 0.01 + pedalboard_pitch_shift_semitones_min: -7 + pedalboard_pitch_shift_semitones_max: 7 + pedalboard_resample: 0.001 + pedalboard_resample_target_sample_rate_min: 4000 + pedalboard_resample_target_sample_rate_max: 44100 + mp3_compression_min_bitrate: 32 + mp3_compression_max_bitrate: 320 + mp3_compression_backend: lameenc + dry: + pedalboard_distortion: 0.001 + pedalboard_distortion_drive_db_min: 1.0 + pedalboard_distortion_drive_db_max: 25.0 + tanh_distortion: 0.05 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.7 + pedalboard_bitcrash: 0.005 + pedalboard_bitcrash_bit_depth_min: 4 + pedalboard_bitcrash_bit_depth_max: 16 + seven_band_parametric_eq: 0.24 + seven_band_parametric_eq_min_gain_db: -9 + seven_band_parametric_eq_max_gain_db: 9 + gaussian_noise: 0.005 + gaussian_noise_min_amplitude: 0.001 + gaussian_noise_max_amplitude: 0.01 + time_stretch: 0.01 + time_stretch_min_rate: 0.8 + time_stretch_max_rate: 1.25 + other: + seven_band_parametric_eq: 0.24 + seven_band_parametric_eq_min_gain_db: -9 + seven_band_parametric_eq_max_gain_db: 9 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 diff --git a/mdx23c/mdx23c_drumsep_6stem_aufr33_jarredou_config.yaml b/mdx23c/mdx23c_drumsep_6stem_aufr33_jarredou_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67263559f6e7f6838ae66c9b516ffafa5c05ee8c --- /dev/null +++ b/mdx23c/mdx23c_drumsep_6stem_aufr33_jarredou_config.yaml @@ -0,0 +1,80 @@ +audio: + chunk_size: 130560 + dim_f: 1024 + dim_t: 256 + hop_length: 512 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 +model: + act: gelu + bottleneck_factor: 4 + growth: 128 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 +training: + batch_size: 12 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - kick + - snare + - toms + - hh + - ride + - crash + lr: 9.0e-05 + patience: 30 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1268 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: false + use_amp: true +augmentations: + enable: true + loudness: true + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true + mixup_probs: !!python/tuple + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + mp3_compression_on_mixture: 0.0 + mp3_compression_on_mixture_bitrate_min: 32 + mp3_compression_on_mixture_bitrate_max: 320 + mp3_compression_on_mixture_backend: lameenc + all: + channel_shuffle: 0.5 + random_inverse: 0.01 + random_polarity: 0.5 + mp3_compression: 0.0 + mp3_compression_min_bitrate: 32 + mp3_compression_max_bitrate: 320 + mp3_compression_backend: lameenc + pitch_shift: 0.1 + pitch_shift_min_semitones: -3 + pitch_shift_max_semitones: 3 + seven_band_parametric_eq: 0.5 + seven_band_parametric_eq_min_gain_db: -6 + seven_band_parametric_eq_max_gain_db: 6 + tanh_distortion: 0.2 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.5 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 diff --git a/mdx23c/mdx23c_instvoc_hq1_config.yaml b/mdx23c/mdx23c_instvoc_hq1_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..320df3d422b48db1d3145fcd1cefd2c95b7cd3d6 --- /dev/null +++ b/mdx23c/mdx23c_instvoc_hq1_config.yaml @@ -0,0 +1,44 @@ +audio: + chunk_size: 261120 + dim_f: 4096 + dim_t: 256 + hop_length: 1024 + n_fft: 8192 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 +model: + act: gelu + bottleneck_factor: 4 + growth: 128 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 +training: + batch_size: 6 + grad_clip: 0 + instruments: + - Vocals + - Instrumental + lr: 1.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + augmentation: 1 + augmentation_type: simple1 + augmentation_mix: true + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + use_amp: true +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 diff --git a/mdx23c/mdx23c_instvoc_hq2_config.yaml b/mdx23c/mdx23c_instvoc_hq2_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..320df3d422b48db1d3145fcd1cefd2c95b7cd3d6 --- /dev/null +++ b/mdx23c/mdx23c_instvoc_hq2_config.yaml @@ -0,0 +1,44 @@ +audio: + chunk_size: 261120 + dim_f: 4096 + dim_t: 256 + hop_length: 1024 + n_fft: 8192 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 +model: + act: gelu + bottleneck_factor: 4 + growth: 128 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 +training: + batch_size: 6 + grad_clip: 0 + instruments: + - Vocals + - Instrumental + lr: 1.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + augmentation: 1 + augmentation_type: simple1 + augmentation_mix: true + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + use_amp: true +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 diff --git a/mdx23c/mdx23c_instvoc_zfturbo_config.yaml b/mdx23c/mdx23c_instvoc_zfturbo_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..330ffa7067aa440f26520cf3bad453c4f2ba7c99 --- /dev/null +++ b/mdx23c/mdx23c_instvoc_zfturbo_config.yaml @@ -0,0 +1,88 @@ +audio: + chunk_size: 261120 + dim_f: 4096 + dim_t: 256 + hop_length: 1024 + n_fft: 8192 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 +model: + act: gelu + bottleneck_factor: 4 + growth: 128 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 +training: + batch_size: 6 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - vocals + - other + lr: 9.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + read_metadata_procs: 8 + other_fix: true + use_amp: true +augmentations: + enable: true + loudness: true + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true + mixup_probs: !!python/tuple + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + mp3_compression_on_mixture: 0.01 + mp3_compression_on_mixture_bitrate_min: 32 + mp3_compression_on_mixture_bitrate_max: 320 + mp3_compression_on_mixture_backend: lameenc + all: + channel_shuffle: 0.5 + random_inverse: 0.1 + random_polarity: 0.5 + mp3_compression: 0.01 + mp3_compression_min_bitrate: 32 + mp3_compression_max_bitrate: 320 + mp3_compression_backend: lameenc + vocals: + pitch_shift: 0.1 + pitch_shift_min_semitones: -5 + pitch_shift_max_semitones: 5 + seven_band_parametric_eq: 0.25 + seven_band_parametric_eq_min_gain_db: -9 + seven_band_parametric_eq_max_gain_db: 9 + tanh_distortion: 0.1 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.7 + other: + pitch_shift: 0.1 + pitch_shift_min_semitones: -4 + pitch_shift_max_semitones: 4 + gaussian_noise: 0.1 + gaussian_noise_min_amplitude: 0.001 + gaussian_noise_max_amplitude: 0.015 + time_stretch: 0.01 + time_stretch_min_rate: 0.8 + time_stretch_max_rate: 1.25 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 diff --git a/mdx23c/mdx23c_mid_side_wesleyr36_config.yaml b/mdx23c/mdx23c_mid_side_wesleyr36_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e95d1ed2ca02c6e5263cbb1bf8662e3547c2301 --- /dev/null +++ b/mdx23c/mdx23c_mid_side_wesleyr36_config.yaml @@ -0,0 +1,82 @@ +audio: + chunk_size: 130560 + dim_f: 1024 + dim_t: 256 + hop_length: 512 + n_fft: 2048 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 +model: + act: gelu + bottleneck_factor: 4 + growth: 128 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 +training: + batch_size: 2 + gradient_accumulation_steps: 3 + grad_clip: 0 + instruments: + - similarity + - difference + lr: 1.0 + patience: 15 + reduce_factor: 0.95 + target_instrument: similarity + num_epochs: 1000 + num_steps: 2235 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: prodigy + other_fix: false + use_amp: true +augmentations: + enable: true + loudness: true + loudness_min: 0.5 + loudness_max: 1.5 + all: + channel_shuffle: 0.5 + random_inverse: 0.01 + random_polarity: 0.5 + mp3_compression: 0.0 + mp3_compression_min_bitrate: 32 + mp3_compression_max_bitrate: 320 + mp3_compression_backend: lameenc + pitch_shift: 0.1 + pitch_shift_min_semitones: -3 + pitch_shift_max_semitones: 3 + seven_band_parametric_eq: 0.5 + seven_band_parametric_eq_min_gain_db: -6 + seven_band_parametric_eq_max_gain_db: 6 + tanh_distortion: 0.2 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.5 +similarity: + gaussian_noise: 0.1 + gaussian_noise_min_amplitude: 0.001 + gaussian_noise_max_amplitude: 0.015 +difference: + pedalboard_reverb: 0.01 + pedalboard_reverb_room_size_min: 0.1 + pedalboard_reverb_room_size_max: 0.9 + pedalboard_reverb_damping_min: 0.1 + pedalboard_reverb_damping_max: 0.9 + pedalboard_reverb_wet_level_min: 0.1 + pedalboard_reverb_wet_level_max: 0.5 + pedalboard_reverb_dry_level_min: 0.5 + pedalboard_reverb_dry_level_max: 0.9 + pedalboard_reverb_width_min: 0.3 + pedalboard_reverb_width_max: 1.0 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 diff --git a/mdx23c/mdx23c_orch_verosment_config.yaml b/mdx23c/mdx23c_orch_verosment_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d931f17fb220d9d63bab3c949624eff05bc7353 --- /dev/null +++ b/mdx23c/mdx23c_orch_verosment_config.yaml @@ -0,0 +1,93 @@ +audio: + chunk_size: 261120 + dim_f: 4096 + dim_t: 256 + hop_length: 1024 + n_fft: 8192 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.001 +model: + act: gelu + bottleneck_factor: 4 + growth: 128 + norm: InstanceNorm + num_blocks_per_scale: 2 + num_channels: 128 + num_scales: 5 + num_subbands: 4 + scale: + - 2 + - 2 +training: + batch_size: 1 + gradient_accumulation_steps: 3 + grad_clip: 0 + instruments: + - inst + - orch + lr: 9.0e-05 + patience: 6 + reduce_factor: 0.95 + target_instrument: orch + num_epochs: 1000 + num_steps: 750 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: true + use_amp: true +augmentations: + enable: true + loudness: true + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true + mixup_probs: + - 0.4 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + mp3_compression_on_mixture: 0.01 + mp3_compression_on_mixture_bitrate_min: 32 + mp3_compression_on_mixture_bitrate_max: 320 + mp3_compression_on_mixture_backend: lameenc + all: + channel_shuffle: 0.5 + random_inverse: 0.1 + random_polarity: 0.5 + mp3_compression: 0.01 + mp3_compression_min_bitrate: 32 + mp3_compression_max_bitrate: 320 + mp3_compression_backend: lameenc + orch: + pitch_shift: 0.25 + pitch_shift_min_semitones: -5 + pitch_shift_max_semitones: 5 + seven_band_parametric_eq: 1 + seven_band_parametric_eq_min_gain_db: -6 + seven_band_parametric_eq_max_gain_db: 6 + tanh_distortion: 0.5 + tanh_distortion_min: 0.05 + tanh_distortion_max: 0.6 + inst: + pitch_shift: 0.25 + pitch_shift_min_semitones: -5 + pitch_shift_max_semitones: 5 + gaussian_noise: 0.1 + gaussian_noise_min_amplitude: 0.001 + gaussian_noise_max_amplitude: 0.015 + time_stretch: 0.01 + time_stretch_min_rate: 0.8 + time_stretch_max_rate: 1.25 + seven_band_parametric_eq: 1 + seven_band_parametric_eq_min_gain_db: -6 + seven_band_parametric_eq_max_gain_db: 6 + tanh_distortion: 0.5 + tanh_distortion_min: 0.05 + tanh_distortion_max: 0.6 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 diff --git a/mdxnet/mdx_inst_hq1.onnx b/mdxnet/mdx_inst_hq1.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3302ace57202067fd1e9c709d76bc8d347658f5d --- /dev/null +++ b/mdxnet/mdx_inst_hq1.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38a045c4ded87e3bf97b609ec5be7910e8a7cecec455f507227ab12b5e29f7f9 +size 66759214 diff --git a/mdxnet/mdx_inst_hq5.onnx b/mdxnet/mdx_inst_hq5.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e674c464504ee1cddb1371129a4314eacabbca64 --- /dev/null +++ b/mdxnet/mdx_inst_hq5.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:811cb24095d865763752310848b7ec86aeede0626cb05749ab35350e46897000 +size 59074342 diff --git a/mdxnet/mdx_kim_inst.onnx b/mdxnet/mdx_kim_inst.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a1cfbc17713f1caef9d11696002bb78aae781ff2 --- /dev/null +++ b/mdxnet/mdx_kim_inst.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86b1940e7122fbdd2beadc65507cbff6c352d79012a8a7e60d56db98532af5f7 +size 66759214 diff --git a/mdxnet/mdx_kuielab_a_drums.onnx b/mdxnet/mdx_kuielab_a_drums.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e29380d6f0c92fe7a38a201709342a56931489df --- /dev/null +++ b/mdxnet/mdx_kuielab_a_drums.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40f586b7091934dd6f5563f0cba8f14bad57ce88440da1098bf388ea716c2901 +size 29703204 diff --git a/mdxnet/mdx_kuielab_a_other.onnx b/mdxnet/mdx_kuielab_a_other.onnx new file mode 100644 index 0000000000000000000000000000000000000000..7f2c3285075afc5e9ed9ba2fd6882d8df9921763 --- /dev/null +++ b/mdxnet/mdx_kuielab_a_other.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b67a1dcb5f232153528c59960b4c7bf8dc736b8114de360af0e719633f53358 +size 29703204 diff --git a/mdxnet/mdx_kuielab_a_vocals.onnx b/mdxnet/mdx_kuielab_a_vocals.onnx new file mode 100644 index 0000000000000000000000000000000000000000..8d80e6d5ea023a73a002c15005551a37c7cfa21c --- /dev/null +++ b/mdxnet/mdx_kuielab_a_vocals.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daba83c2ee1afee9139766ad64c9b6808d6b6f092fff04bed3338be50baac721 +size 29703204 diff --git a/mdxnet/mdx_main_390.onnx b/mdxnet/mdx_main_390.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a752391e7cfb5b134ebef388734cd1da9eb5dfb5 --- /dev/null +++ b/mdxnet/mdx_main_390.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:286c4f0847ca837e2c3f4c4058f756d5f150cbf080506aa6f33a2847aba92e8c +size 66759214 diff --git a/mdxnet/mdx_main_406.onnx b/mdxnet/mdx_main_406.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c0f58423f248611332f6a3c33382d0d426c67449 --- /dev/null +++ b/mdxnet/mdx_main_406.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f158816a44eef1f0ba0f48b813cbfcf460ed1c70a754af3609ade44aaf7d1b23 +size 66759214 diff --git a/scnet/scnet_4stem_zfturbo_config.yaml b/scnet/scnet_4stem_zfturbo_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ba2d1674b9b5f8cf8e5545b329afbf727890c96 --- /dev/null +++ b/scnet/scnet_4stem_zfturbo_config.yaml @@ -0,0 +1,82 @@ +audio: + chunk_size: 485100 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.0 +model: + sources: + - drums + - bass + - other + - vocals + audio_channels: 2 + dims: + - 4 + - 64 + - 128 + - 256 + nfft: 4096 + hop_size: 1024 + win_size: 4096 + normalized: true + band_SR: + - 0.225 + - 0.372 + - 0.403 + band_stride: + - 1 + - 4 + - 16 + band_kernel: + - 3 + - 4 + - 16 + conv_depths: + - 3 + - 2 + - 1 + compress: 4 + conv_kernel: 3 + num_dplayer: 6 + expand: 1 +training: + batch_size: 6 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - drums + - bass + - other + - vocals + lr: 1.0 + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: prodigy + other_fix: false + use_amp: true +augmentations: + enable: true + loudness: true + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true + mixup_probs: !!python/tuple + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + all: + channel_shuffle: 0.5 + random_inverse: 0.1 + random_polarity: 0.5 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 + normalize: true diff --git a/scnet/scnet_choirsep_exp_config.yaml b/scnet/scnet_choirsep_exp_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..33e0f7604acb7c17dc55028b5dd2b3e7e796245c --- /dev/null +++ b/scnet/scnet_choirsep_exp_config.yaml @@ -0,0 +1,101 @@ +audio: + chunk_size: 131072 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.0 +model: + sources: + - alto + - bass + - soprano + - tenor + audio_channels: 2 + dims: + - 4 + - 32 + - 64 + - 128 + nfft: 4096 + hop_size: 1024 + win_size: 4096 + normalized: true + band_SR: + - 0.175 + - 0.392 + - 0.433 + band_stride: + - 1 + - 4 + - 16 + band_kernel: + - 3 + - 4 + - 16 + conv_depths: + - 3 + - 2 + - 1 + compress: 4 + conv_kernel: 3 + num_dplayer: 6 + expand: 1 +training: + batch_size: 9 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - alto + - bass + - soprano + - tenor + lr: 0.0005 + patience: 6 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adamw8bit + other_fix: false + use_amp: true +loss_multistft: + fft_sizes: + - 1024 + - 2048 + - 4096 + hop_sizes: + - 512 + - 1024 + - 2048 + win_lengths: + - 1024 + - 2048 + - 4096 + window: hann_window + scale: mel + n_bins: 128 + sample_rate: 44100 + perceptual_weighting: true + w_sc: 1.0 + w_log_mag: 1.0 + w_lin_mag: 0.0 + w_phs: 0.0 + mag_distance: L1 +augmentations: + enable: false + loudness: true + loudness_min: 0.5 + loudness_max: 1.5 + mixup: false + mixup_probs: !!python/tuple + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 + normalize: false diff --git a/scnet/scnet_huge_4stem_aname_config.yaml b/scnet/scnet_huge_4stem_aname_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f7a351499f425196ce3e723a5f89f39d7104e83d --- /dev/null +++ b/scnet/scnet_huge_4stem_aname_config.yaml @@ -0,0 +1,67 @@ +audio: + chunk_size: 661500 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.0 +model: + sources: + - drums + - bass + - other + - vocals + audio_channels: 2 + dims: + - 4 + - 64 + - 128 + - 256 + nfft: 4096 + hop_size: 1024 + win_size: 4096 + normalized: true + band_SR: + - 0.23 + - 0.37 + - 0.4 + band_stride: + - 1 + - 4 + - 4 + band_kernel: + - 3 + - 4 + - 4 + conv_depths: + - 3 + - 3 + - 3 + compress: 4 + conv_kernel: 3 + num_dplayer: 10 + expand: 1 +training: + batch_size: 1 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - drums + - bass + - other + - vocals + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + lr: 1.0 + normalize: false + other_fix: false + use_amp: true +inference: + batch_size: 1 + dim_t: 1101 + num_overlap: 2 diff --git a/scnet/scnet_jazz_4stem_jorisvaneyghen_config.yaml b/scnet/scnet_jazz_4stem_jorisvaneyghen_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f7407083f604bb4e653365382c4f094c2c90d0e8 --- /dev/null +++ b/scnet/scnet_jazz_4stem_jorisvaneyghen_config.yaml @@ -0,0 +1,82 @@ +audio: + chunk_size: 485100 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.0 +model: + sources: + - drums + - bass + - piano + - other + audio_channels: 2 + dims: + - 4 + - 64 + - 128 + - 256 + nfft: 4096 + hop_size: 1024 + win_size: 4096 + normalized: true + band_SR: + - 0.225 + - 0.372 + - 0.403 + band_stride: + - 1 + - 4 + - 16 + band_kernel: + - 3 + - 4 + - 16 + conv_depths: + - 3 + - 2 + - 1 + compress: 4 + conv_kernel: 3 + num_dplayer: 6 + expand: 1 +training: + batch_size: 1 + gradient_accumulation_steps: 3 + grad_clip: 0 + instruments: + - drums + - bass + - piano + - other + lr: 5.0e-05 + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 500 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: false + use_amp: true +augmentations: + enable: true + loudness: true + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true + mixup_probs: !!python/tuple + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + all: + channel_shuffle: 0.5 + random_inverse: 0.1 + random_polarity: 0.5 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 + normalize: false diff --git a/scnet/scnet_xl_4stem_starrytong_config.yaml b/scnet/scnet_xl_4stem_starrytong_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3dd6e7417f5207cc4d414b313e8189826e18938d --- /dev/null +++ b/scnet/scnet_xl_4stem_starrytong_config.yaml @@ -0,0 +1,179 @@ +audio: + chunk_size: 485100 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.0 +model: + sources: + - drums + - bass + - other + - vocals + audio_channels: 2 + dims: + - 4 + - 64 + - 128 + - 256 + nfft: 4096 + hop_size: 1024 + win_size: 4096 + normalized: true + band_SR: + - 0.23 + - 0.37 + - 0.4 + band_stride: + - 1 + - 4 + - 16 + band_kernel: + - 3 + - 4 + - 16 + conv_depths: + - 3 + - 2 + - 1 + compress: 4 + conv_kernel: 3 + num_dplayer: 8 + expand: 1 +training: + batch_size: 4 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - drums + - bass + - other + - vocals + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + lr: 1.0e-05 + normalize: false + other_fix: false + use_amp: true +augmentations: + enable: false + loudness: true + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true + mixup_probs: !!python/tuple + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + mp3_compression_on_mixture: 0.01 + mp3_compression_on_mixture_bitrate_min: 32 + mp3_compression_on_mixture_bitrate_max: 320 + mp3_compression_on_mixture_backend: lameenc + all: + channel_shuffle: 0.5 + random_inverse: 0.1 + random_polarity: 0.5 + mp3_compression: 0.01 + mp3_compression_min_bitrate: 32 + mp3_compression_max_bitrate: 320 + mp3_compression_backend: lameenc + pedalboard_reverb: 0.01 + pedalboard_reverb_room_size_min: 0.1 + pedalboard_reverb_room_size_max: 0.9 + pedalboard_reverb_damping_min: 0.1 + pedalboard_reverb_damping_max: 0.9 + pedalboard_reverb_wet_level_min: 0.1 + pedalboard_reverb_wet_level_max: 0.9 + pedalboard_reverb_dry_level_min: 0.1 + pedalboard_reverb_dry_level_max: 0.9 + pedalboard_reverb_width_min: 0.9 + pedalboard_reverb_width_max: 1.0 + pedalboard_chorus: 0.01 + pedalboard_chorus_rate_hz_min: 1.0 + pedalboard_chorus_rate_hz_max: 7.0 + pedalboard_chorus_depth_min: 0.25 + pedalboard_chorus_depth_max: 0.95 + pedalboard_chorus_centre_delay_ms_min: 3 + pedalboard_chorus_centre_delay_ms_max: 10 + pedalboard_chorus_feedback_min: 0.0 + pedalboard_chorus_feedback_max: 0.5 + pedalboard_chorus_mix_min: 0.1 + pedalboard_chorus_mix_max: 0.9 + pedalboard_phazer: 0.01 + pedalboard_phazer_rate_hz_min: 1.0 + pedalboard_phazer_rate_hz_max: 10.0 + pedalboard_phazer_depth_min: 0.25 + pedalboard_phazer_depth_max: 0.95 + pedalboard_phazer_centre_frequency_hz_min: 200 + pedalboard_phazer_centre_frequency_hz_max: 12000 + pedalboard_phazer_feedback_min: 0.0 + pedalboard_phazer_feedback_max: 0.5 + pedalboard_phazer_mix_min: 0.1 + pedalboard_phazer_mix_max: 0.9 + pedalboard_distortion: 0.01 + pedalboard_distortion_drive_db_min: 1.0 + pedalboard_distortion_drive_db_max: 25.0 + pedalboard_pitch_shift: 0.01 + pedalboard_pitch_shift_semitones_min: -7 + pedalboard_pitch_shift_semitones_max: 7 + pedalboard_resample: 0.01 + pedalboard_resample_target_sample_rate_min: 4000 + pedalboard_resample_target_sample_rate_max: 44100 + pedalboard_bitcrash: 0.01 + pedalboard_bitcrash_bit_depth_min: 4 + pedalboard_bitcrash_bit_depth_max: 16 + pedalboard_mp3_compressor: 0.01 + pedalboard_mp3_compressor_pedalboard_mp3_compressor_min: 0 + pedalboard_mp3_compressor_pedalboard_mp3_compressor_max: 9.999 + vocals: + pitch_shift: 0.1 + pitch_shift_min_semitones: -5 + pitch_shift_max_semitones: 5 + seven_band_parametric_eq: 0.25 + seven_band_parametric_eq_min_gain_db: -9 + seven_band_parametric_eq_max_gain_db: 9 + tanh_distortion: 0.1 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.7 + bass: + pitch_shift: 0.1 + pitch_shift_min_semitones: -2 + pitch_shift_max_semitones: 2 + seven_band_parametric_eq: 0.25 + seven_band_parametric_eq_min_gain_db: -3 + seven_band_parametric_eq_max_gain_db: 6 + tanh_distortion: 0.2 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.5 + drums: + pitch_shift: 0.33 + pitch_shift_min_semitones: -5 + pitch_shift_max_semitones: 5 + seven_band_parametric_eq: 0.25 + seven_band_parametric_eq_min_gain_db: -9 + seven_band_parametric_eq_max_gain_db: 9 + tanh_distortion: 0.33 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.6 + other: + pitch_shift: 0.1 + pitch_shift_min_semitones: -4 + pitch_shift_max_semitones: 4 + gaussian_noise: 0.1 + gaussian_noise_min_amplitude: 0.001 + gaussian_noise_max_amplitude: 0.015 + time_stretch: 0.01 + time_stretch_min_rate: 0.8 + time_stretch_max_rate: 1.25 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 + normalize: false diff --git a/scnet/scnet_xl_4stem_zftrubo_config.yaml b/scnet/scnet_xl_4stem_zftrubo_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c12ed6d70edefbebf0964a4f5694eedfeb6acb8a --- /dev/null +++ b/scnet/scnet_xl_4stem_zftrubo_config.yaml @@ -0,0 +1,78 @@ +audio: + chunk_size: 485100 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.0 +model: + sources: + - drums + - bass + - other + - vocals + audio_channels: 2 + dims: + - 4 + - 32 + - 64 + - 128 + nfft: 4096 + hop_size: 1024 + win_size: 4096 + normalized: true + band_SR: + - 0.175 + - 0.392 + - 0.433 + band_stride: + - 1 + - 4 + - 16 + band_kernel: + - 3 + - 4 + - 16 + conv_depths: + - 3 + - 2 + - 1 + compress: 4 + conv_kernel: 3 + num_dplayer: 6 + expand: 1 +training: + batch_size: 10 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - drums + - bass + - other + - vocals + lr: 0.0005 + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + other_fix: false + use_amp: true +augmentations: + enable: true + loudness: true + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true + mixup_probs: !!python/tuple + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 + normalize: true diff --git a/scnet/scnet_xl_ihf_4stem_zfturbo_config.yaml b/scnet/scnet_xl_ihf_4stem_zfturbo_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ff219515c5d286db5cbd70ef1d5f0cd87cb4fec --- /dev/null +++ b/scnet/scnet_xl_ihf_4stem_zfturbo_config.yaml @@ -0,0 +1,180 @@ +audio: + chunk_size: 485100 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.0 +model: + sources: + - drums + - bass + - other + - vocals + audio_channels: 2 + dims: + - 4 + - 64 + - 128 + - 256 + nfft: 4096 + hop_size: 1024 + win_size: 4096 + normalized: true + band_SR: + - 0.23 + - 0.37 + - 0.4 + band_stride: + - 1 + - 4 + - 4 + band_kernel: + - 3 + - 4 + - 4 + conv_depths: + - 3 + - 3 + - 3 + compress: 4 + conv_kernel: 3 + num_dplayer: 8 + expand: 1 +training: + batch_size: 3 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - drums + - bass + - other + - vocals + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + lr: 5.0e-05 + normalize: false + other_fix: false + use_amp: true +augmentations: + enable: true + loudness: true + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true + mixup_probs: !!python/tuple + - 0.2 + - 0.02 + - 0.002 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + mp3_compression_on_mixture: 0.01 + mp3_compression_on_mixture_bitrate_min: 32 + mp3_compression_on_mixture_bitrate_max: 320 + mp3_compression_on_mixture_backend: lameenc + all: + channel_shuffle: 0.5 + random_inverse: 0.1 + random_polarity: 0.5 + mp3_compression: 0.01 + mp3_compression_min_bitrate: 32 + mp3_compression_max_bitrate: 320 + mp3_compression_backend: lameenc + pedalboard_reverb: 0.01 + pedalboard_reverb_room_size_min: 0.1 + pedalboard_reverb_room_size_max: 0.9 + pedalboard_reverb_damping_min: 0.1 + pedalboard_reverb_damping_max: 0.9 + pedalboard_reverb_wet_level_min: 0.1 + pedalboard_reverb_wet_level_max: 0.9 + pedalboard_reverb_dry_level_min: 0.1 + pedalboard_reverb_dry_level_max: 0.9 + pedalboard_reverb_width_min: 0.9 + pedalboard_reverb_width_max: 1.0 + pedalboard_chorus: 0.01 + pedalboard_chorus_rate_hz_min: 1.0 + pedalboard_chorus_rate_hz_max: 7.0 + pedalboard_chorus_depth_min: 0.25 + pedalboard_chorus_depth_max: 0.95 + pedalboard_chorus_centre_delay_ms_min: 3 + pedalboard_chorus_centre_delay_ms_max: 10 + pedalboard_chorus_feedback_min: 0.0 + pedalboard_chorus_feedback_max: 0.5 + pedalboard_chorus_mix_min: 0.1 + pedalboard_chorus_mix_max: 0.9 + pedalboard_phazer: 0.01 + pedalboard_phazer_rate_hz_min: 1.0 + pedalboard_phazer_rate_hz_max: 10.0 + pedalboard_phazer_depth_min: 0.25 + pedalboard_phazer_depth_max: 0.95 + pedalboard_phazer_centre_frequency_hz_min: 200 + pedalboard_phazer_centre_frequency_hz_max: 12000 + pedalboard_phazer_feedback_min: 0.0 + pedalboard_phazer_feedback_max: 0.5 + pedalboard_phazer_mix_min: 0.1 + pedalboard_phazer_mix_max: 0.9 + pedalboard_distortion: 0.01 + pedalboard_distortion_drive_db_min: 1.0 + pedalboard_distortion_drive_db_max: 25.0 + pedalboard_pitch_shift: 0.01 + pedalboard_pitch_shift_semitones_min: -7 + pedalboard_pitch_shift_semitones_max: 7 + pedalboard_resample: 0.01 + pedalboard_resample_target_sample_rate_min: 4000 + pedalboard_resample_target_sample_rate_max: 44100 + pedalboard_bitcrash: 0.01 + pedalboard_bitcrash_bit_depth_min: 4 + pedalboard_bitcrash_bit_depth_max: 16 + pedalboard_mp3_compressor: 0.01 + pedalboard_mp3_compressor_pedalboard_mp3_compressor_min: 0 + pedalboard_mp3_compressor_pedalboard_mp3_compressor_max: 9.999 + vocals: + pitch_shift: 0.1 + pitch_shift_min_semitones: -5 + pitch_shift_max_semitones: 5 + seven_band_parametric_eq: 0.25 + seven_band_parametric_eq_min_gain_db: -9 + seven_band_parametric_eq_max_gain_db: 9 + tanh_distortion: 0.1 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.7 + bass: + pitch_shift: 0.1 + pitch_shift_min_semitones: -2 + pitch_shift_max_semitones: 2 + seven_band_parametric_eq: 0.25 + seven_band_parametric_eq_min_gain_db: -3 + seven_band_parametric_eq_max_gain_db: 6 + tanh_distortion: 0.2 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.5 + drums: + pitch_shift: 0.33 + pitch_shift_min_semitones: -5 + pitch_shift_max_semitones: 5 + seven_band_parametric_eq: 0.25 + seven_band_parametric_eq_min_gain_db: -9 + seven_band_parametric_eq_max_gain_db: 9 + tanh_distortion: 0.33 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.6 + other: + pitch_shift: 0.1 + pitch_shift_min_semitones: -4 + pitch_shift_max_semitones: 4 + gaussian_noise: 0.1 + gaussian_noise_min_amplitude: 0.001 + gaussian_noise_max_amplitude: 0.015 + time_stretch: 0.01 + time_stretch_min_rate: 0.8 + time_stretch_max_rate: 1.25 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 + normalize: false diff --git a/scnet/scnet_xl_jazz_4stem_jorisvaneyghen_config.yaml b/scnet/scnet_xl_jazz_4stem_jorisvaneyghen_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c76ec1baaea00f3a617a30714976bd6ccf233a7 --- /dev/null +++ b/scnet/scnet_xl_jazz_4stem_jorisvaneyghen_config.yaml @@ -0,0 +1,83 @@ +audio: + chunk_size: 485100 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.0 +model: + sources: + - drums + - bass + - piano + - other + audio_channels: 2 + dims: + - 4 + - 64 + - 128 + - 256 + nfft: 4096 + hop_size: 1024 + win_size: 4096 + normalized: true + band_SR: + - 0.23 + - 0.37 + - 0.4 + band_stride: + - 1 + - 4 + - 16 + band_kernel: + - 3 + - 4 + - 16 + conv_depths: + - 3 + - 2 + - 1 + compress: 4 + conv_kernel: 3 + num_dplayer: 8 + expand: 1 +training: + batch_size: 1 + gradient_accumulation_steps: 4 + grad_clip: 0 + instruments: + - drums + - bass + - piano + - other + lr: 0.0001 + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 250 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + normalize: false + other_fix: false + use_amp: true +augmentations: + enable: true + loudness: true + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true + mixup_probs: !!python/tuple + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + all: + stereo_to_mono: 0.1 + channel_shuffle: 0.5 + random_polarity: 0.5 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 + normalize: false diff --git a/scnet_masked/scnet_masked_small_4stem_zftrubo_config.yaml b/scnet_masked/scnet_masked_small_4stem_zftrubo_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..92db121ca593c2f8a86a73c9994623beaef0f7a8 --- /dev/null +++ b/scnet_masked/scnet_masked_small_4stem_zftrubo_config.yaml @@ -0,0 +1,159 @@ +audio: + chunk_size: 485100 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.0 +model: + sources: + - drums + - bass + - other + - vocals + audio_channels: 2 + dims: + - 4 + - 32 + - 64 + - 128 + nfft: 4096 + hop_size: 1024 + win_size: 4096 + normalized: true + band_SR: + - 0.175 + - 0.392 + - 0.433 + band_stride: + - 1 + - 4 + - 16 + band_kernel: + - 3 + - 4 + - 16 + conv_depths: + - 3 + - 2 + - 1 + compress: 4 + conv_kernel: 3 + num_dplayer: 6 + expand: 1 +training: + batch_size: 10 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - drums + - bass + - other + - vocals + lr: 0.0005 + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + normalize: false + other_fix: false + use_amp: true +augmentations: + enable: true + loudness: true + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true + mixup_probs: !!python/tuple + - 0.2 + - 0.02 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + mp3_compression_on_mixture: 0.01 + mp3_compression_on_mixture_bitrate_min: 32 + mp3_compression_on_mixture_bitrate_max: 320 + mp3_compression_on_mixture_backend: lameenc + all: + channel_shuffle: 0.5 + random_inverse: 0.1 + random_polarity: 0.5 + mp3_compression: 0.01 + mp3_compression_min_bitrate: 32 + mp3_compression_max_bitrate: 320 + mp3_compression_backend: lameenc + pedalboard_reverb: 0.01 + pedalboard_reverb_room_size_min: 0.1 + pedalboard_reverb_room_size_max: 0.9 + pedalboard_reverb_damping_min: 0.1 + pedalboard_reverb_damping_max: 0.9 + pedalboard_reverb_wet_level_min: 0.1 + pedalboard_reverb_wet_level_max: 0.9 + pedalboard_reverb_dry_level_min: 0.1 + pedalboard_reverb_dry_level_max: 0.9 + pedalboard_reverb_width_min: 0.9 + pedalboard_reverb_width_max: 1.0 + pedalboard_chorus: 0.01 + pedalboard_chorus_rate_hz_min: 1.0 + pedalboard_chorus_rate_hz_max: 7.0 + pedalboard_chorus_depth_min: 0.25 + pedalboard_chorus_depth_max: 0.95 + pedalboard_chorus_centre_delay_ms_min: 3 + pedalboard_chorus_centre_delay_ms_max: 10 + pedalboard_chorus_feedback_min: 0.0 + pedalboard_chorus_feedback_max: 0.5 + pedalboard_chorus_mix_min: 0.1 + pedalboard_chorus_mix_max: 0.9 + pedalboard_phazer: 0.01 + pedalboard_phazer_rate_hz_min: 1.0 + pedalboard_phazer_rate_hz_max: 10.0 + pedalboard_phazer_depth_min: 0.25 + pedalboard_phazer_depth_max: 0.95 + pedalboard_phazer_centre_frequency_hz_min: 200 + pedalboard_phazer_centre_frequency_hz_max: 12000 + pedalboard_phazer_feedback_min: 0.0 + pedalboard_phazer_feedback_max: 0.5 + pedalboard_phazer_mix_min: 0.1 + pedalboard_phazer_mix_max: 0.9 + pedalboard_distortion: 0.01 + pedalboard_distortion_drive_db_min: 1.0 + pedalboard_distortion_drive_db_max: 25.0 + pedalboard_pitch_shift: 0.01 + pedalboard_pitch_shift_semitones_min: -7 + pedalboard_pitch_shift_semitones_max: 7 + pedalboard_resample: 0.01 + pedalboard_resample_target_sample_rate_min: 4000 + pedalboard_resample_target_sample_rate_max: 44100 + pedalboard_bitcrash: 0.01 + pedalboard_bitcrash_bit_depth_min: 4 + pedalboard_bitcrash_bit_depth_max: 16 + pedalboard_mp3_compressor: 0.01 + pedalboard_mp3_compressor_pedalboard_mp3_compressor_min: 0 + pedalboard_mp3_compressor_pedalboard_mp3_compressor_max: 9.999 + vocals: + pitch_shift: 0.1 + pitch_shift_min_semitones: -5 + pitch_shift_max_semitones: 5 + seven_band_parametric_eq: 0.1 + seven_band_parametric_eq_min_gain_db: -9 + seven_band_parametric_eq_max_gain_db: 9 + tanh_distortion: 0.1 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.7 + other: + pitch_shift: 0.1 + pitch_shift_min_semitones: -4 + pitch_shift_max_semitones: 4 + gaussian_noise: 0.1 + gaussian_noise_min_amplitude: 0.001 + gaussian_noise_max_amplitude: 0.015 + time_stretch: 0.01 + time_stretch_min_rate: 0.8 + time_stretch_max_rate: 1.25 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 + normalize: false diff --git a/scnet_masked/scnet_masked_xl_ihf_4stem_zftrubo_config.yaml b/scnet_masked/scnet_masked_xl_ihf_4stem_zftrubo_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fd51180f15e014a06e6530038ff101a6a2a8415b --- /dev/null +++ b/scnet_masked/scnet_masked_xl_ihf_4stem_zftrubo_config.yaml @@ -0,0 +1,180 @@ +audio: + chunk_size: 485100 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.0 +model: + sources: + - drums + - bass + - other + - vocals + audio_channels: 2 + dims: + - 4 + - 64 + - 128 + - 256 + nfft: 4096 + hop_size: 1024 + win_size: 4096 + normalized: true + band_SR: + - 0.23 + - 0.37 + - 0.4 + band_stride: + - 1 + - 4 + - 4 + band_kernel: + - 3 + - 4 + - 4 + conv_depths: + - 3 + - 3 + - 3 + compress: 4 + conv_kernel: 3 + num_dplayer: 8 + expand: 1 +training: + batch_size: 2 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - drums + - bass + - other + - vocals + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + lr: 5.0e-05 + normalize: false + other_fix: false + use_amp: true +augmentations: + enable: true + loudness: true + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true + mixup_probs: !!python/tuple + - 0.2 + - 0.02 + - 0.002 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + mp3_compression_on_mixture: 0.01 + mp3_compression_on_mixture_bitrate_min: 32 + mp3_compression_on_mixture_bitrate_max: 320 + mp3_compression_on_mixture_backend: lameenc + all: + channel_shuffle: 0.5 + random_inverse: 0.1 + random_polarity: 0.5 + mp3_compression: 0.01 + mp3_compression_min_bitrate: 32 + mp3_compression_max_bitrate: 320 + mp3_compression_backend: lameenc + pedalboard_reverb: 0.01 + pedalboard_reverb_room_size_min: 0.1 + pedalboard_reverb_room_size_max: 0.9 + pedalboard_reverb_damping_min: 0.1 + pedalboard_reverb_damping_max: 0.9 + pedalboard_reverb_wet_level_min: 0.1 + pedalboard_reverb_wet_level_max: 0.9 + pedalboard_reverb_dry_level_min: 0.1 + pedalboard_reverb_dry_level_max: 0.9 + pedalboard_reverb_width_min: 0.9 + pedalboard_reverb_width_max: 1.0 + pedalboard_chorus: 0.01 + pedalboard_chorus_rate_hz_min: 1.0 + pedalboard_chorus_rate_hz_max: 7.0 + pedalboard_chorus_depth_min: 0.25 + pedalboard_chorus_depth_max: 0.95 + pedalboard_chorus_centre_delay_ms_min: 3 + pedalboard_chorus_centre_delay_ms_max: 10 + pedalboard_chorus_feedback_min: 0.0 + pedalboard_chorus_feedback_max: 0.5 + pedalboard_chorus_mix_min: 0.1 + pedalboard_chorus_mix_max: 0.9 + pedalboard_phazer: 0.01 + pedalboard_phazer_rate_hz_min: 1.0 + pedalboard_phazer_rate_hz_max: 10.0 + pedalboard_phazer_depth_min: 0.25 + pedalboard_phazer_depth_max: 0.95 + pedalboard_phazer_centre_frequency_hz_min: 200 + pedalboard_phazer_centre_frequency_hz_max: 12000 + pedalboard_phazer_feedback_min: 0.0 + pedalboard_phazer_feedback_max: 0.5 + pedalboard_phazer_mix_min: 0.1 + pedalboard_phazer_mix_max: 0.9 + pedalboard_distortion: 0.01 + pedalboard_distortion_drive_db_min: 1.0 + pedalboard_distortion_drive_db_max: 25.0 + pedalboard_pitch_shift: 0.01 + pedalboard_pitch_shift_semitones_min: -7 + pedalboard_pitch_shift_semitones_max: 7 + pedalboard_resample: 0.01 + pedalboard_resample_target_sample_rate_min: 4000 + pedalboard_resample_target_sample_rate_max: 44100 + pedalboard_bitcrash: 0.01 + pedalboard_bitcrash_bit_depth_min: 4 + pedalboard_bitcrash_bit_depth_max: 16 + pedalboard_mp3_compressor: 0.01 + pedalboard_mp3_compressor_pedalboard_mp3_compressor_min: 0 + pedalboard_mp3_compressor_pedalboard_mp3_compressor_max: 9.999 + vocals: + pitch_shift: 0.1 + pitch_shift_min_semitones: -5 + pitch_shift_max_semitones: 5 + seven_band_parametric_eq: 0.1 + seven_band_parametric_eq_min_gain_db: -9 + seven_band_parametric_eq_max_gain_db: 9 + tanh_distortion: 0.1 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.7 + bass: + pitch_shift: 0.1 + pitch_shift_min_semitones: -2 + pitch_shift_max_semitones: 2 + seven_band_parametric_eq: 0.1 + seven_band_parametric_eq_min_gain_db: -3 + seven_band_parametric_eq_max_gain_db: 6 + tanh_distortion: 0.1 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.5 + drums: + pitch_shift: 0.1 + pitch_shift_min_semitones: -5 + pitch_shift_max_semitones: 5 + seven_band_parametric_eq: 0.1 + seven_band_parametric_eq_min_gain_db: -9 + seven_band_parametric_eq_max_gain_db: 9 + tanh_distortion: 0.1 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.6 + other: + pitch_shift: 0.1 + pitch_shift_min_semitones: -4 + pitch_shift_max_semitones: 4 + gaussian_noise: 0.1 + gaussian_noise_min_amplitude: 0.001 + gaussian_noise_max_amplitude: 0.015 + time_stretch: 0.01 + time_stretch_min_rate: 0.8 + time_stretch_max_rate: 1.25 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 + normalize: false diff --git a/scnet_tran/scnet_tran_4stem_zftrubo_config.yaml b/scnet_tran/scnet_tran_4stem_zftrubo_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ea5f8893e52f5de5bbe34761d62bf321fc9e2efe --- /dev/null +++ b/scnet_tran/scnet_tran_4stem_zftrubo_config.yaml @@ -0,0 +1,167 @@ +audio: + chunk_size: 485100 + num_channels: 2 + sample_rate: 44100 + min_mean_abs: 0.0 +model: + sources: + - drums + - bass + - other + - vocals + audio_channels: 2 + dims: + - 4 + - 32 + - 64 + - 128 + nfft: 4096 + hop_size: 1024 + win_size: 4096 + normalized: true + band_SR: + - 0.175 + - 0.392 + - 0.433 + band_stride: + - 1 + - 4 + - 16 + band_kernel: + - 3 + - 4 + - 16 + conv_depths: + - 3 + - 2 + - 1 + compress: 4 + conv_kernel: 3 + num_dplayer: 6 + expand: 1 + tran_rotary_embedding_dim: 64 + tran_depth: 1 + tran_heads: 8 + tran_dim_head: 64 + tran_attn_dropout: 0.0 + tran_ff_dropout: 0.0 + tran_flash_attn: false +training: + batch_size: 5 + gradient_accumulation_steps: 1 + grad_clip: 0 + instruments: + - drums + - bass + - other + - vocals + patience: 2 + reduce_factor: 0.95 + target_instrument: null + num_epochs: 1000 + num_steps: 1000 + q: 0.95 + coarse_loss_clip: true + ema_momentum: 0.999 + optimizer: adam + lr: 5.0e-05 + normalize: false + other_fix: false + use_amp: true +augmentations: + enable: true + loudness: true + loudness_min: 0.5 + loudness_max: 1.5 + mixup: true + mixup_probs: !!python/tuple + - 0.2 + - 0.02 + - 0.002 + mixup_loudness_min: 0.5 + mixup_loudness_max: 1.5 + mp3_compression_on_mixture: 0.01 + mp3_compression_on_mixture_bitrate_min: 32 + mp3_compression_on_mixture_bitrate_max: 320 + mp3_compression_on_mixture_backend: lameenc + all: + channel_shuffle: 0.5 + random_inverse: 0.01 + random_polarity: 0.5 + vocals: + pitch_shift: 0.1 + pitch_shift_min_semitones: -5 + pitch_shift_max_semitones: 5 + seven_band_parametric_eq: 0.1 + seven_band_parametric_eq_min_gain_db: -9 + seven_band_parametric_eq_max_gain_db: 9 + tanh_distortion: 0.1 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.7 + time_stretch: 0.01 + time_stretch_min_rate: 0.8 + time_stretch_max_rate: 1.25 + bass: + pitch_shift: 0.01 + pitch_shift_min_semitones: -2 + pitch_shift_max_semitones: 2 + seven_band_parametric_eq: 0.01 + seven_band_parametric_eq_min_gain_db: -3 + seven_band_parametric_eq_max_gain_db: 6 + tanh_distortion: 0.01 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.5 + time_stretch: 0.1 + time_stretch_min_rate: 0.9 + time_stretch_max_rate: 1.1 + drums: + pitch_shift: 0.1 + pitch_shift_min_semitones: -5 + pitch_shift_max_semitones: 5 + seven_band_parametric_eq: 0.1 + seven_band_parametric_eq_min_gain_db: -9 + seven_band_parametric_eq_max_gain_db: 9 + tanh_distortion: 0.1 + tanh_distortion_min: 0.1 + tanh_distortion_max: 0.6 + time_stretch: 0.01 + time_stretch_min_rate: 0.8 + time_stretch_max_rate: 1.25 + other: + pitch_shift: 0.1 + pitch_shift_min_semitones: -4 + pitch_shift_max_semitones: 4 + gaussian_noise: 0.1 + gaussian_noise_min_amplitude: 0.001 + gaussian_noise_max_amplitude: 0.015 + time_stretch: 0.01 + time_stretch_min_rate: 0.8 + time_stretch_max_rate: 1.25 +inference: + batch_size: 1 + dim_t: 256 + num_overlap: 2 + normalize: false +loss_multistft: + fft_sizes: + - 1024 + - 2048 + - 4096 + hop_sizes: + - 147 + - 256 + - 512 + win_lengths: + - 1024 + - 2048 + - 4096 + window: hann_window + scale: mel + n_bins: 128 + sample_rate: 44100 + perceptual_weighting: true + w_sc: 1.0 + w_log_mag: 1.0 + w_lin_mag: 0.0 + w_phs: 0.0 + mag_distance: L1 diff --git a/vr/10_sp-uvr-2b-32000-1_config.yaml b/vr/10_sp-uvr-2b-32000-1_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6819610a1f8553b4cea87b0994fed31815789b32 --- /dev/null +++ b/vr/10_sp-uvr-2b-32000-1_config.yaml @@ -0,0 +1,49 @@ +model: + model_params: + bins: 768 + unstable_bins: 7 + reduction_bins: 705 + band: + 1: + sr: 6000 + hl: 66 + n_fft: 512 + crop_start: 0 + crop_stop: 240 + lpf_start: 60 + lpf_stop: 118 + res_type: sinc_fastest + 2: + sr: 32000 + hl: 352 + n_fft: 1024 + crop_start: 22 + crop_stop: 505 + hpf_start: 44 + hpf_stop: 23 + res_type: sinc_medium + sr: 32000 + pre_filter_start: 710 + pre_filter_stop: 731 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Instrumental + - Vocals + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/11_sp-uvr-2b-32000-2_config.yaml b/vr/11_sp-uvr-2b-32000-2_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6819610a1f8553b4cea87b0994fed31815789b32 --- /dev/null +++ b/vr/11_sp-uvr-2b-32000-2_config.yaml @@ -0,0 +1,49 @@ +model: + model_params: + bins: 768 + unstable_bins: 7 + reduction_bins: 705 + band: + 1: + sr: 6000 + hl: 66 + n_fft: 512 + crop_start: 0 + crop_stop: 240 + lpf_start: 60 + lpf_stop: 118 + res_type: sinc_fastest + 2: + sr: 32000 + hl: 352 + n_fft: 1024 + crop_start: 22 + crop_stop: 505 + hpf_start: 44 + hpf_stop: 23 + res_type: sinc_medium + sr: 32000 + pre_filter_start: 710 + pre_filter_stop: 731 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Instrumental + - Vocals + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/12_sp-uvr-3b-44100_config.yaml b/vr/12_sp-uvr-3b-44100_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e6c09ba596e97627f0d6c79ebf62f74ed559b06 --- /dev/null +++ b/vr/12_sp-uvr-3b-44100_config.yaml @@ -0,0 +1,60 @@ +model: + model_params: + bins: 768 + unstable_bins: 5 + reduction_bins: 733 + band: + 1: + sr: 11025 + hl: 128 + n_fft: 768 + crop_start: 0 + crop_stop: 278 + lpf_start: 28 + lpf_stop: 140 + res_type: polyphase + 2: + sr: 22050 + hl: 256 + n_fft: 768 + crop_start: 14 + crop_stop: 322 + hpf_start: 70 + hpf_stop: 14 + lpf_start: 283 + lpf_stop: 314 + res_type: polyphase + 3: + sr: 44100 + hl: 512 + n_fft: 768 + crop_start: 131 + crop_stop: 313 + hpf_start: 154 + hpf_stop: 141 + res_type: sinc_medium + sr: 44100 + pre_filter_start: 757 + pre_filter_stop: 768 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Instrumental + - Vocals + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/13_sp-uvr-4b-44100-1_config.yaml b/vr/13_sp-uvr-4b-44100-1_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f3e69ee96652d8e0c5c85ea11ffa9ee1436788a2 --- /dev/null +++ b/vr/13_sp-uvr-4b-44100-1_config.yaml @@ -0,0 +1,71 @@ +model: + model_params: + bins: 768 + unstable_bins: 7 + reduction_bins: 668 + band: + 1: + sr: 11025 + hl: 128 + n_fft: 1024 + crop_start: 0 + crop_stop: 186 + lpf_start: 37 + lpf_stop: 73 + res_type: polyphase + 2: + sr: 11025 + hl: 128 + n_fft: 512 + crop_start: 4 + crop_stop: 185 + hpf_start: 36 + hpf_stop: 18 + lpf_start: 93 + lpf_stop: 185 + res_type: polyphase + 3: + sr: 22050 + hl: 256 + n_fft: 512 + crop_start: 46 + crop_stop: 186 + hpf_start: 93 + hpf_stop: 46 + lpf_start: 164 + lpf_stop: 186 + res_type: polyphase + 4: + sr: 44100 + hl: 512 + n_fft: 768 + crop_start: 121 + crop_stop: 382 + hpf_start: 138 + hpf_stop: 123 + res_type: sinc_medium + sr: 44100 + pre_filter_start: 740 + pre_filter_stop: 768 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Instrumental + - Vocals + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/14_sp-uvr-4b-44100-2_config.yaml b/vr/14_sp-uvr-4b-44100-2_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f3e69ee96652d8e0c5c85ea11ffa9ee1436788a2 --- /dev/null +++ b/vr/14_sp-uvr-4b-44100-2_config.yaml @@ -0,0 +1,71 @@ +model: + model_params: + bins: 768 + unstable_bins: 7 + reduction_bins: 668 + band: + 1: + sr: 11025 + hl: 128 + n_fft: 1024 + crop_start: 0 + crop_stop: 186 + lpf_start: 37 + lpf_stop: 73 + res_type: polyphase + 2: + sr: 11025 + hl: 128 + n_fft: 512 + crop_start: 4 + crop_stop: 185 + hpf_start: 36 + hpf_stop: 18 + lpf_start: 93 + lpf_stop: 185 + res_type: polyphase + 3: + sr: 22050 + hl: 256 + n_fft: 512 + crop_start: 46 + crop_stop: 186 + hpf_start: 93 + hpf_stop: 46 + lpf_start: 164 + lpf_stop: 186 + res_type: polyphase + 4: + sr: 44100 + hl: 512 + n_fft: 768 + crop_start: 121 + crop_stop: 382 + hpf_start: 138 + hpf_stop: 123 + res_type: sinc_medium + sr: 44100 + pre_filter_start: 740 + pre_filter_stop: 768 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Instrumental + - Vocals + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/15_sp-uvr-mid-44100-1_config.yaml b/vr/15_sp-uvr-mid-44100-1_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a40bbb21db86eb59fe196cae5e53ad600b5c4d9 --- /dev/null +++ b/vr/15_sp-uvr-mid-44100-1_config.yaml @@ -0,0 +1,60 @@ +model: + model_params: + mid_side: true + bins: 768 + unstable_bins: 5 + reduction_bins: 733 + band: + 1: + sr: 11025 + hl: 128 + n_fft: 768 + crop_start: 0 + crop_stop: 278 + lpf_start: 28 + lpf_stop: 140 + res_type: polyphase + 2: + sr: 22050 + hl: 256 + n_fft: 768 + crop_start: 14 + crop_stop: 322 + hpf_start: 70 + hpf_stop: 14 + lpf_start: 283 + lpf_stop: 314 + res_type: polyphase + 3: + sr: 44100 + hl: 512 + n_fft: 768 + crop_start: 131 + crop_stop: 313 + hpf_start: 154 + hpf_stop: 141 + res_type: sinc_medium + sr: 44100 + pre_filter_start: 757 + pre_filter_stop: 768 + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Instrumental + - Vocals + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/16_sp-uvr-mid-44100-2_config.yaml b/vr/16_sp-uvr-mid-44100-2_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a40bbb21db86eb59fe196cae5e53ad600b5c4d9 --- /dev/null +++ b/vr/16_sp-uvr-mid-44100-2_config.yaml @@ -0,0 +1,60 @@ +model: + model_params: + mid_side: true + bins: 768 + unstable_bins: 5 + reduction_bins: 733 + band: + 1: + sr: 11025 + hl: 128 + n_fft: 768 + crop_start: 0 + crop_stop: 278 + lpf_start: 28 + lpf_stop: 140 + res_type: polyphase + 2: + sr: 22050 + hl: 256 + n_fft: 768 + crop_start: 14 + crop_stop: 322 + hpf_start: 70 + hpf_stop: 14 + lpf_start: 283 + lpf_stop: 314 + res_type: polyphase + 3: + sr: 44100 + hl: 512 + n_fft: 768 + crop_start: 131 + crop_stop: 313 + hpf_start: 154 + hpf_stop: 141 + res_type: sinc_medium + sr: 44100 + pre_filter_start: 757 + pre_filter_stop: 768 + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Instrumental + - Vocals + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/17_hp-wind_inst-uvr_config.yaml b/vr/17_hp-wind_inst-uvr_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f3ec9f4626f29e3e9306f41af018124823a0e114 --- /dev/null +++ b/vr/17_hp-wind_inst-uvr_config.yaml @@ -0,0 +1,71 @@ +model: + model_params: + bins: 672 + unstable_bins: 8 + reduction_bins: 530 + band: + 1: + sr: 7350 + hl: 80 + n_fft: 640 + crop_start: 0 + crop_stop: 85 + lpf_start: 25 + lpf_stop: 53 + res_type: polyphase + 2: + sr: 7350 + hl: 80 + n_fft: 320 + crop_start: 4 + crop_stop: 87 + hpf_start: 25 + hpf_stop: 12 + lpf_start: 31 + lpf_stop: 62 + res_type: polyphase + 3: + sr: 14700 + hl: 160 + n_fft: 512 + crop_start: 17 + crop_stop: 216 + hpf_start: 48 + hpf_stop: 24 + lpf_start: 139 + lpf_stop: 210 + res_type: polyphase + 4: + sr: 44100 + hl: 480 + n_fft: 960 + crop_start: 78 + crop_stop: 383 + hpf_start: 130 + hpf_stop: 86 + res_type: kaiser_fast + sr: 44100 + pre_filter_start: 668 + pre_filter_stop: 672 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - No Woodwinds + - Woodwinds + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/1_hp-uvr_config.yaml b/vr/1_hp-uvr_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f3e69ee96652d8e0c5c85ea11ffa9ee1436788a2 --- /dev/null +++ b/vr/1_hp-uvr_config.yaml @@ -0,0 +1,71 @@ +model: + model_params: + bins: 768 + unstable_bins: 7 + reduction_bins: 668 + band: + 1: + sr: 11025 + hl: 128 + n_fft: 1024 + crop_start: 0 + crop_stop: 186 + lpf_start: 37 + lpf_stop: 73 + res_type: polyphase + 2: + sr: 11025 + hl: 128 + n_fft: 512 + crop_start: 4 + crop_stop: 185 + hpf_start: 36 + hpf_stop: 18 + lpf_start: 93 + lpf_stop: 185 + res_type: polyphase + 3: + sr: 22050 + hl: 256 + n_fft: 512 + crop_start: 46 + crop_stop: 186 + hpf_start: 93 + hpf_stop: 46 + lpf_start: 164 + lpf_stop: 186 + res_type: polyphase + 4: + sr: 44100 + hl: 512 + n_fft: 768 + crop_start: 121 + crop_stop: 382 + hpf_start: 138 + hpf_stop: 123 + res_type: sinc_medium + sr: 44100 + pre_filter_start: 740 + pre_filter_stop: 768 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Instrumental + - Vocals + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/2_hp-uvr_config.yaml b/vr/2_hp-uvr_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9eb4e037a89672cb85b1f78bc4b9689f258f237f --- /dev/null +++ b/vr/2_hp-uvr_config.yaml @@ -0,0 +1,71 @@ +model: + model_params: + bins: 672 + unstable_bins: 8 + reduction_bins: 637 + band: + 1: + sr: 7350 + hl: 80 + n_fft: 640 + crop_start: 0 + crop_stop: 85 + lpf_start: 25 + lpf_stop: 53 + res_type: polyphase + 2: + sr: 7350 + hl: 80 + n_fft: 320 + crop_start: 4 + crop_stop: 87 + hpf_start: 25 + hpf_stop: 12 + lpf_start: 31 + lpf_stop: 62 + res_type: polyphase + 3: + sr: 14700 + hl: 160 + n_fft: 512 + crop_start: 17 + crop_stop: 216 + hpf_start: 48 + hpf_stop: 24 + lpf_start: 139 + lpf_stop: 210 + res_type: polyphase + 4: + sr: 44100 + hl: 480 + n_fft: 960 + crop_start: 78 + crop_stop: 383 + hpf_start: 130 + hpf_stop: 86 + res_type: kaiser_fast + sr: 44100 + pre_filter_start: 668 + pre_filter_stop: 672 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Instrumental + - Vocals + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/3_hp-vocal-uvr.ckpt b/vr/3_hp-vocal-uvr.ckpt new file mode 100644 index 0000000000000000000000000000000000000000..3a0e7a3081a92f32895d6478390e544488268f13 --- /dev/null +++ b/vr/3_hp-vocal-uvr.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d3f9be265b2f2c3d684f23a9905674627c2552e0f6b5f36325cef8dc1f0ff6b +size 126792647 diff --git a/vr/3_hp-vocal-uvr_config.yaml b/vr/3_hp-vocal-uvr_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b4529eb264737e968959159af752b1087cd0e99 --- /dev/null +++ b/vr/3_hp-vocal-uvr_config.yaml @@ -0,0 +1,71 @@ +model: + model_params: + bins: 768 + unstable_bins: 7 + reduction_bins: 668 + band: + 1: + sr: 11025 + hl: 128 + n_fft: 1024 + crop_start: 0 + crop_stop: 186 + lpf_start: 37 + lpf_stop: 73 + res_type: polyphase + 2: + sr: 11025 + hl: 128 + n_fft: 512 + crop_start: 4 + crop_stop: 185 + hpf_start: 36 + hpf_stop: 18 + lpf_start: 93 + lpf_stop: 185 + res_type: polyphase + 3: + sr: 22050 + hl: 256 + n_fft: 512 + crop_start: 46 + crop_stop: 186 + hpf_start: 93 + hpf_stop: 46 + lpf_start: 164 + lpf_stop: 186 + res_type: polyphase + 4: + sr: 44100 + hl: 512 + n_fft: 768 + crop_start: 121 + crop_stop: 382 + hpf_start: 138 + hpf_stop: 123 + res_type: sinc_medium + sr: 44100 + pre_filter_start: 740 + pre_filter_stop: 768 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Vocals + - Instrumental + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/4_hp-vocal-uvr_config.yaml b/vr/4_hp-vocal-uvr_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b4529eb264737e968959159af752b1087cd0e99 --- /dev/null +++ b/vr/4_hp-vocal-uvr_config.yaml @@ -0,0 +1,71 @@ +model: + model_params: + bins: 768 + unstable_bins: 7 + reduction_bins: 668 + band: + 1: + sr: 11025 + hl: 128 + n_fft: 1024 + crop_start: 0 + crop_stop: 186 + lpf_start: 37 + lpf_stop: 73 + res_type: polyphase + 2: + sr: 11025 + hl: 128 + n_fft: 512 + crop_start: 4 + crop_stop: 185 + hpf_start: 36 + hpf_stop: 18 + lpf_start: 93 + lpf_stop: 185 + res_type: polyphase + 3: + sr: 22050 + hl: 256 + n_fft: 512 + crop_start: 46 + crop_stop: 186 + hpf_start: 93 + hpf_stop: 46 + lpf_start: 164 + lpf_stop: 186 + res_type: polyphase + 4: + sr: 44100 + hl: 512 + n_fft: 768 + crop_start: 121 + crop_stop: 382 + hpf_start: 138 + hpf_stop: 123 + res_type: sinc_medium + sr: 44100 + pre_filter_start: 740 + pre_filter_stop: 768 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Vocals + - Instrumental + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/5_hp-karaoke-uvr_config.yaml b/vr/5_hp-karaoke-uvr_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2563b016903f652efb345e5b03956ecc27c17607 --- /dev/null +++ b/vr/5_hp-karaoke-uvr_config.yaml @@ -0,0 +1,72 @@ +model: + model_params: + bins: 672 + unstable_bins: 8 + reduction_bins: 637 + band: + 1: + sr: 7350 + hl: 80 + n_fft: 640 + crop_start: 0 + crop_stop: 85 + lpf_start: 25 + lpf_stop: 53 + res_type: polyphase + 2: + sr: 7350 + hl: 80 + n_fft: 320 + crop_start: 4 + crop_stop: 87 + hpf_start: 25 + hpf_stop: 12 + lpf_start: 31 + lpf_stop: 62 + res_type: polyphase + 3: + sr: 14700 + hl: 160 + n_fft: 512 + crop_start: 17 + crop_stop: 216 + hpf_start: 48 + hpf_stop: 24 + lpf_start: 139 + lpf_stop: 210 + res_type: polyphase + 4: + sr: 44100 + hl: 480 + n_fft: 960 + crop_start: 78 + crop_stop: 383 + hpf_start: 130 + hpf_stop: 86 + convert_channels: stereo_n + res_type: kaiser_fast + sr: 44100 + pre_filter_start: 668 + pre_filter_stop: 672 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Instrumental + - Vocals + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/6_hp-karaoke-uvr_config.yaml b/vr/6_hp-karaoke-uvr_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..24da44f56c32f7bf435a438452f488be01493f9e --- /dev/null +++ b/vr/6_hp-karaoke-uvr_config.yaml @@ -0,0 +1,60 @@ +model: + model_params: + mid_side_b2: true + bins: 640 + unstable_bins: 7 + reduction_bins: 565 + band: + 1: + sr: 11025 + hl: 108 + n_fft: 1024 + crop_start: 0 + crop_stop: 187 + lpf_start: 92 + lpf_stop: 186 + res_type: polyphase + 2: + sr: 22050 + hl: 216 + n_fft: 768 + crop_start: 0 + crop_stop: 212 + hpf_start: 68 + hpf_stop: 34 + lpf_start: 174 + lpf_stop: 209 + res_type: polyphase + 3: + sr: 44100 + hl: 432 + n_fft: 640 + crop_start: 66 + crop_stop: 307 + hpf_start: 86 + hpf_stop: 72 + res_type: kaiser_fast + sr: 44100 + pre_filter_start: 639 + pre_filter_stop: 640 + mid_side: false + mid_side_b: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Instrumental + - Vocals + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/7_hp2-uvr_config.yaml b/vr/7_hp2-uvr_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..24da44f56c32f7bf435a438452f488be01493f9e --- /dev/null +++ b/vr/7_hp2-uvr_config.yaml @@ -0,0 +1,60 @@ +model: + model_params: + mid_side_b2: true + bins: 640 + unstable_bins: 7 + reduction_bins: 565 + band: + 1: + sr: 11025 + hl: 108 + n_fft: 1024 + crop_start: 0 + crop_stop: 187 + lpf_start: 92 + lpf_stop: 186 + res_type: polyphase + 2: + sr: 22050 + hl: 216 + n_fft: 768 + crop_start: 0 + crop_stop: 212 + hpf_start: 68 + hpf_stop: 34 + lpf_start: 174 + lpf_stop: 209 + res_type: polyphase + 3: + sr: 44100 + hl: 432 + n_fft: 640 + crop_start: 66 + crop_stop: 307 + hpf_start: 86 + hpf_stop: 72 + res_type: kaiser_fast + sr: 44100 + pre_filter_start: 639 + pre_filter_stop: 640 + mid_side: false + mid_side_b: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Instrumental + - Vocals + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/8_hp2-uvr_config.yaml b/vr/8_hp2-uvr_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f3e69ee96652d8e0c5c85ea11ffa9ee1436788a2 --- /dev/null +++ b/vr/8_hp2-uvr_config.yaml @@ -0,0 +1,71 @@ +model: + model_params: + bins: 768 + unstable_bins: 7 + reduction_bins: 668 + band: + 1: + sr: 11025 + hl: 128 + n_fft: 1024 + crop_start: 0 + crop_stop: 186 + lpf_start: 37 + lpf_stop: 73 + res_type: polyphase + 2: + sr: 11025 + hl: 128 + n_fft: 512 + crop_start: 4 + crop_stop: 185 + hpf_start: 36 + hpf_stop: 18 + lpf_start: 93 + lpf_stop: 185 + res_type: polyphase + 3: + sr: 22050 + hl: 256 + n_fft: 512 + crop_start: 46 + crop_stop: 186 + hpf_start: 93 + hpf_stop: 46 + lpf_start: 164 + lpf_stop: 186 + res_type: polyphase + 4: + sr: 44100 + hl: 512 + n_fft: 768 + crop_start: 121 + crop_stop: 382 + hpf_start: 138 + hpf_stop: 123 + res_type: sinc_medium + sr: 44100 + pre_filter_start: 740 + pre_filter_stop: 768 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Instrumental + - Vocals + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/9_hp2-uvr_config.yaml b/vr/9_hp2-uvr_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f3e69ee96652d8e0c5c85ea11ffa9ee1436788a2 --- /dev/null +++ b/vr/9_hp2-uvr_config.yaml @@ -0,0 +1,71 @@ +model: + model_params: + bins: 768 + unstable_bins: 7 + reduction_bins: 668 + band: + 1: + sr: 11025 + hl: 128 + n_fft: 1024 + crop_start: 0 + crop_stop: 186 + lpf_start: 37 + lpf_stop: 73 + res_type: polyphase + 2: + sr: 11025 + hl: 128 + n_fft: 512 + crop_start: 4 + crop_stop: 185 + hpf_start: 36 + hpf_stop: 18 + lpf_start: 93 + lpf_stop: 185 + res_type: polyphase + 3: + sr: 22050 + hl: 256 + n_fft: 512 + crop_start: 46 + crop_stop: 186 + hpf_start: 93 + hpf_stop: 46 + lpf_start: 164 + lpf_stop: 186 + res_type: polyphase + 4: + sr: 44100 + hl: 512 + n_fft: 768 + crop_start: 121 + crop_stop: 382 + hpf_start: 138 + hpf_stop: 123 + res_type: sinc_medium + sr: 44100 + pre_filter_start: 740 + pre_filter_stop: 768 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Instrumental + - Vocals + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/mgm-v5-karokee-32000-beta1_config.yaml b/vr/mgm-v5-karokee-32000-beta1_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6819610a1f8553b4cea87b0994fed31815789b32 --- /dev/null +++ b/vr/mgm-v5-karokee-32000-beta1_config.yaml @@ -0,0 +1,49 @@ +model: + model_params: + bins: 768 + unstable_bins: 7 + reduction_bins: 705 + band: + 1: + sr: 6000 + hl: 66 + n_fft: 512 + crop_start: 0 + crop_stop: 240 + lpf_start: 60 + lpf_stop: 118 + res_type: sinc_fastest + 2: + sr: 32000 + hl: 352 + n_fft: 1024 + crop_start: 22 + crop_stop: 505 + hpf_start: 44 + hpf_stop: 23 + res_type: sinc_medium + sr: 32000 + pre_filter_start: 710 + pre_filter_stop: 731 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Instrumental + - Vocals + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/mgm-v5-karokee-32000-beta2-agr_config.yaml b/vr/mgm-v5-karokee-32000-beta2-agr_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8981c7ad5b9ab7ba069dfb4f6c4ce0c094ddb736 --- /dev/null +++ b/vr/mgm-v5-karokee-32000-beta2-agr_config.yaml @@ -0,0 +1,49 @@ +model: + model_params: + bins: 768 + unstable_bins: 7 + reduction_bins: 705 + band: + 1: + sr: 6000 + hl: 66 + n_fft: 512 + crop_start: 0 + crop_stop: 240 + lpf_start: 60 + lpf_stop: 240 + res_type: sinc_fastest + 2: + sr: 32000 + hl: 352 + n_fft: 1024 + crop_start: 22 + crop_stop: 505 + hpf_start: 82 + hpf_stop: 22 + res_type: polyphase + sr: 32000 + pre_filter_start: 710 + pre_filter_stop: 731 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Instrumental + - Vocals + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/mgm_highend_v4_config.yaml b/vr/mgm_highend_v4_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aa31e1c19bbde0b1df21741d9636ac8733e6f426 --- /dev/null +++ b/vr/mgm_highend_v4_config.yaml @@ -0,0 +1,39 @@ +model: + model_params: + bins: 1024 + unstable_bins: 0 + reduction_bins: 0 + band: + 1: + sr: 44100 + hl: 1024 + n_fft: 2048 + crop_start: 0 + crop_stop: 1024 + hpf_start: -1 + res_type: sinc_best + sr: 44100 + pre_filter_start: 1023 + pre_filter_stop: 1024 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Instrumental + - Vocals + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/mgm_lowend_a_v4_config.yaml b/vr/mgm_lowend_a_v4_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..29bc40e21b1ab51523e193a6d6ff5443e8355093 --- /dev/null +++ b/vr/mgm_lowend_a_v4_config.yaml @@ -0,0 +1,39 @@ +model: + model_params: + bins: 1024 + unstable_bins: 0 + reduction_bins: 0 + band: + 1: + sr: 32000 + hl: 512 + n_fft: 2048 + crop_start: 0 + crop_stop: 1024 + hpf_start: -1 + res_type: kaiser_fast + sr: 32000 + pre_filter_start: 1000 + pre_filter_stop: 1021 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Instrumental + - Vocals + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/mgm_lowend_b_v4_config.yaml b/vr/mgm_lowend_b_v4_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1116ae0edb00d5c8cb964a70e6c351a33f62d9c1 --- /dev/null +++ b/vr/mgm_lowend_b_v4_config.yaml @@ -0,0 +1,39 @@ +model: + model_params: + bins: 1024 + unstable_bins: 0 + reduction_bins: 0 + band: + 1: + sr: 33075 + hl: 384 + n_fft: 2048 + crop_start: 0 + crop_stop: 1024 + hpf_start: -1 + res_type: sinc_best + sr: 33075 + pre_filter_start: 1000 + pre_filter_stop: 1021 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Instrumental + - Vocals + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/mgm_main_v4_config.yaml b/vr/mgm_main_v4_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..38bb04098ecbfacb4f3e59f023bb6dad7b69c595 --- /dev/null +++ b/vr/mgm_main_v4_config.yaml @@ -0,0 +1,39 @@ +model: + model_params: + bins: 1024 + unstable_bins: 0 + reduction_bins: 0 + band: + 1: + sr: 44100 + hl: 512 + n_fft: 2048 + crop_start: 0 + crop_stop: 1024 + hpf_start: -1 + res_type: sinc_best + sr: 44100 + pre_filter_start: 1023 + pre_filter_stop: 1024 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Instrumental + - Vocals + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/uvr-bve-4b_sn-44100-1_config.yaml b/vr/uvr-bve-4b_sn-44100-1_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..807ed2d2ae64532200c88bf4ec2428a05e2b3301 --- /dev/null +++ b/vr/uvr-bve-4b_sn-44100-1_config.yaml @@ -0,0 +1,73 @@ +model: + model_params: + n_bins: 672 + unstable_bins: 8 + stable_bins: 530 + band: + 1: + sr: 7350 + hl: 80 + n_fft: 640 + crop_start: 0 + crop_stop: 85 + lpf_start: 25 + lpf_stop: 53 + res_type: polyphase + 2: + sr: 7350 + hl: 80 + n_fft: 320 + crop_start: 4 + crop_stop: 87 + hpf_start: 25 + hpf_stop: 12 + lpf_start: 31 + lpf_stop: 62 + res_type: polyphase + 3: + sr: 14700 + hl: 160 + n_fft: 512 + crop_start: 17 + crop_stop: 216 + hpf_start: 48 + hpf_stop: 24 + lpf_start: 139 + lpf_stop: 210 + res_type: polyphase + 4: + sr: 44100 + hl: 480 + n_fft: 960 + crop_start: 78 + crop_stop: 383 + hpf_start: 130 + hpf_stop: 86 + convert_channels: stereo_n + res_type: kaiser_fast + sr: 44100 + pre_filter_start: 668 + pre_filter_stop: 672 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + bins: 672 + nout: 64 + nout_lstm: 128 +training: + target_instrument: null + instruments: + - Vocals + - Instrumental + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/uvr-bve-v2-4b-sn-44100_config.yaml b/vr/uvr-bve-v2-4b-sn-44100_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a032f319ad4d77cf62efa2a7c4803758c653faf2 --- /dev/null +++ b/vr/uvr-bve-v2-4b-sn-44100_config.yaml @@ -0,0 +1,76 @@ +model: + model_params: + n_bins: 896 + unstable_bins: 9 + stable_bins: 530 + band: + 1: + sr: 7350 + hl: 96 + n_fft: 768 + crop_start: 0 + crop_stop: 102 + lpf_start: 30 + lpf_stop: 62 + res_type: polyphase + convert_channels: mid_side + 2: + sr: 7350 + hl: 96 + n_fft: 384 + crop_start: 5 + crop_stop: 104 + hpf_start: 30 + hpf_stop: 14 + lpf_start: 37 + lpf_stop: 73 + res_type: polyphase + convert_channels: mid_side + 3: + sr: 14700 + hl: 192 + n_fft: 640 + crop_start: 20 + crop_stop: 259 + hpf_start: 58 + hpf_stop: 29 + lpf_start: 191 + lpf_stop: 262 + res_type: polyphase + convert_channels: mid_side + 4: + sr: 44100 + hl: 576 + n_fft: 1152 + crop_start: 119 + crop_stop: 575 + hpf_start: 157 + hpf_stop: 110 + res_type: kaiser_fast + convert_channels: mid_side + sr: 44100 + pre_filter_start: -1 + pre_filter_stop: -1 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + bins: 896 + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Vocals + - Instrumental + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/uvr-de-breath-sucial-v1_config.yaml b/vr/uvr-de-breath-sucial-v1_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67f4e85ad08dacfd53e82c71c5eb8a5371c059e4 --- /dev/null +++ b/vr/uvr-de-breath-sucial-v1_config.yaml @@ -0,0 +1,39 @@ +model: + model_params: + bins: 1024 + unstable_bins: 0 + reduction_bins: 0 + band: + 1: + sr: 44100 + hl: 1024 + n_fft: 2048 + crop_start: 0 + crop_stop: 1024 + hpf_start: -1 + res_type: sinc_best + sr: 44100 + pre_filter_start: 1023 + pre_filter_stop: 1024 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Breath + - No Breath + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/uvr-de-breath-sucial-v2_config.yaml b/vr/uvr-de-breath-sucial-v2_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67f4e85ad08dacfd53e82c71c5eb8a5371c059e4 --- /dev/null +++ b/vr/uvr-de-breath-sucial-v2_config.yaml @@ -0,0 +1,39 @@ +model: + model_params: + bins: 1024 + unstable_bins: 0 + reduction_bins: 0 + band: + 1: + sr: 44100 + hl: 1024 + n_fft: 2048 + crop_start: 0 + crop_stop: 1024 + hpf_start: -1 + res_type: sinc_best + sr: 44100 + pre_filter_start: 1023 + pre_filter_stop: 1024 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Breath + - No Breath + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/uvr-de-echo-aggressive_config.yaml b/vr/uvr-de-echo-aggressive_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc03245b6399e83674a230155a720e9679e70f08 --- /dev/null +++ b/vr/uvr-de-echo-aggressive_config.yaml @@ -0,0 +1,71 @@ +model: + model_params: + bins: 672 + unstable_bins: 8 + reduction_bins: 530 + band: + 1: + sr: 7350 + hl: 80 + n_fft: 640 + crop_start: 0 + crop_stop: 85 + lpf_start: 25 + lpf_stop: 53 + res_type: polyphase + 2: + sr: 7350 + hl: 80 + n_fft: 320 + crop_start: 4 + crop_stop: 87 + hpf_start: 25 + hpf_stop: 12 + lpf_start: 31 + lpf_stop: 62 + res_type: polyphase + 3: + sr: 14700 + hl: 160 + n_fft: 512 + crop_start: 17 + crop_stop: 216 + hpf_start: 48 + hpf_stop: 24 + lpf_start: 139 + lpf_stop: 210 + res_type: polyphase + 4: + sr: 44100 + hl: 480 + n_fft: 960 + crop_start: 78 + crop_stop: 383 + hpf_start: 130 + hpf_stop: 86 + res_type: kaiser_fast + sr: 44100 + pre_filter_start: 668 + pre_filter_stop: 672 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: 48 + nout_lstm: 128 +training: + target_instrument: null + instruments: + - No Echo + - Echo + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/uvr-de-echo-normal_config.yaml b/vr/uvr-de-echo-normal_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc03245b6399e83674a230155a720e9679e70f08 --- /dev/null +++ b/vr/uvr-de-echo-normal_config.yaml @@ -0,0 +1,71 @@ +model: + model_params: + bins: 672 + unstable_bins: 8 + reduction_bins: 530 + band: + 1: + sr: 7350 + hl: 80 + n_fft: 640 + crop_start: 0 + crop_stop: 85 + lpf_start: 25 + lpf_stop: 53 + res_type: polyphase + 2: + sr: 7350 + hl: 80 + n_fft: 320 + crop_start: 4 + crop_stop: 87 + hpf_start: 25 + hpf_stop: 12 + lpf_start: 31 + lpf_stop: 62 + res_type: polyphase + 3: + sr: 14700 + hl: 160 + n_fft: 512 + crop_start: 17 + crop_stop: 216 + hpf_start: 48 + hpf_stop: 24 + lpf_start: 139 + lpf_stop: 210 + res_type: polyphase + 4: + sr: 44100 + hl: 480 + n_fft: 960 + crop_start: 78 + crop_stop: 383 + hpf_start: 130 + hpf_stop: 86 + res_type: kaiser_fast + sr: 44100 + pre_filter_start: 668 + pre_filter_stop: 672 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: 48 + nout_lstm: 128 +training: + target_instrument: null + instruments: + - No Echo + - Echo + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/uvr-de-reverb-aufr33-jarredou_config.yaml b/vr/uvr-de-reverb-aufr33-jarredou_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..268d81eeb359989e885df4219ec6319e2028c298 --- /dev/null +++ b/vr/uvr-de-reverb-aufr33-jarredou_config.yaml @@ -0,0 +1,76 @@ +model: + model_params: + n_bins: 896 + unstable_bins: 9 + stable_bins: 530 + band: + 1: + sr: 7350 + hl: 96 + n_fft: 768 + crop_start: 0 + crop_stop: 102 + lpf_start: 30 + lpf_stop: 62 + res_type: polyphase + convert_channels: mid_side + 2: + sr: 7350 + hl: 96 + n_fft: 384 + crop_start: 5 + crop_stop: 104 + hpf_start: 30 + hpf_stop: 14 + lpf_start: 37 + lpf_stop: 73 + res_type: polyphase + convert_channels: mid_side + 3: + sr: 14700 + hl: 192 + n_fft: 640 + crop_start: 20 + crop_stop: 259 + hpf_start: 58 + hpf_stop: 29 + lpf_start: 191 + lpf_stop: 262 + res_type: polyphase + convert_channels: mid_side + 4: + sr: 44100 + hl: 576 + n_fft: 1152 + crop_start: 119 + crop_stop: 575 + hpf_start: 157 + hpf_stop: 110 + res_type: kaiser_fast + convert_channels: mid_side + sr: 44100 + pre_filter_start: -1 + pre_filter_stop: -1 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + bins: 896 + nout: 32 + nout_lstm: 128 +training: + target_instrument: null + instruments: + - Dry + - No Dry + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/uvr-deecho-dereverb_config.yaml b/vr/uvr-deecho-dereverb_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a67d9df587280889618900fcaab069e6cce0a8bf --- /dev/null +++ b/vr/uvr-deecho-dereverb_config.yaml @@ -0,0 +1,71 @@ +model: + model_params: + bins: 672 + unstable_bins: 8 + reduction_bins: 530 + band: + 1: + sr: 7350 + hl: 80 + n_fft: 640 + crop_start: 0 + crop_stop: 85 + lpf_start: 25 + lpf_stop: 53 + res_type: polyphase + 2: + sr: 7350 + hl: 80 + n_fft: 320 + crop_start: 4 + crop_stop: 87 + hpf_start: 25 + hpf_stop: 12 + lpf_start: 31 + lpf_stop: 62 + res_type: polyphase + 3: + sr: 14700 + hl: 160 + n_fft: 512 + crop_start: 17 + crop_stop: 216 + hpf_start: 48 + hpf_stop: 24 + lpf_start: 139 + lpf_stop: 210 + res_type: polyphase + 4: + sr: 44100 + hl: 480 + n_fft: 960 + crop_start: 78 + crop_stop: 383 + hpf_start: 130 + hpf_stop: 86 + res_type: kaiser_fast + sr: 44100 + pre_filter_start: 668 + pre_filter_stop: 672 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - No Reverb + - Reverb + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/uvr-denoise-lite_config.yaml b/vr/uvr-denoise-lite_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f2c53693afb3ff97d2f807d818f8273d6b8043a --- /dev/null +++ b/vr/uvr-denoise-lite_config.yaml @@ -0,0 +1,39 @@ +model: + model_params: + bins: 1024 + unstable_bins: 0 + reduction_bins: 0 + band: + 1: + sr: 44100 + hl: 1024 + n_fft: 2048 + crop_start: 0 + crop_stop: 1024 + hpf_start: -1 + res_type: sinc_best + sr: 44100 + pre_filter_start: 1023 + pre_filter_stop: 1024 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: 16 + nout_lstm: 128 +training: + target_instrument: null + instruments: + - Noise + - No Noise + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/uvr-denoise_config.yaml b/vr/uvr-denoise_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..16ece17ce271b7eced4ab6c29f1329c9fd3ab5ec --- /dev/null +++ b/vr/uvr-denoise_config.yaml @@ -0,0 +1,71 @@ +model: + model_params: + bins: 672 + unstable_bins: 8 + reduction_bins: 530 + band: + 1: + sr: 7350 + hl: 80 + n_fft: 640 + crop_start: 0 + crop_stop: 85 + lpf_start: 25 + lpf_stop: 53 + res_type: polyphase + 2: + sr: 7350 + hl: 80 + n_fft: 320 + crop_start: 4 + crop_stop: 87 + hpf_start: 25 + hpf_stop: 12 + lpf_start: 31 + lpf_stop: 62 + res_type: polyphase + 3: + sr: 14700 + hl: 160 + n_fft: 512 + crop_start: 17 + crop_stop: 216 + hpf_start: 48 + hpf_stop: 24 + lpf_start: 139 + lpf_stop: 210 + res_type: polyphase + 4: + sr: 44100 + hl: 480 + n_fft: 960 + crop_start: 78 + crop_stop: 383 + hpf_start: 130 + hpf_stop: 86 + res_type: kaiser_fast + sr: 44100 + pre_filter_start: 668 + pre_filter_stop: 672 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: 48 + nout_lstm: 128 +training: + target_instrument: null + instruments: + - Noise + - No Noise + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100 diff --git a/vr/vr_harmonic_noise_sep_config.yaml b/vr/vr_harmonic_noise_sep_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..22295d1c803c5248f73b51c27dc1f073264abba7 --- /dev/null +++ b/vr/vr_harmonic_noise_sep_config.yaml @@ -0,0 +1,39 @@ +model: + model_params: + bins: 1024 + unstable_bins: 0 + reduction_bins: 0 + band: + 1: + sr: 44100 + hl: 1024 + n_fft: 2048 + crop_start: 0 + crop_stop: 1024 + hpf_start: -1 + res_type: sinc_best + sr: 44100 + pre_filter_start: 1023 + pre_filter_stop: 1024 + mid_side: false + mid_side_b: false + mid_side_b2: false + stereo_w: false + stereo_n: false + reverse: false + nout: null + nout_lstm: null +training: + target_instrument: null + instruments: + - Noise + - No Noise + use_amp: true +inference: + batch_size: 1 + aggression: 5 + high_end_process: false + post_process_threshold: 0.2 + window_size: 512 +audio: + sample_rate: 44100