noblebarkrr commited on
Commit
cb837ba
·
verified ·
1 Parent(s): 4da5c91

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. bandit/bandit_plus_config.yaml +70 -0
  2. bandit_v2/bandit_v2_multi_config.yaml +75 -0
  3. htdemucs/demucs3_mmi.ckpt +3 -0
  4. htdemucs/demucs4_4stem.ckpt +3 -0
  5. htdemucs/demucs4_6stem.ckpt +3 -0
  6. htdemucs/demucs4_choirsep.ckpt +3 -0
  7. htdemucs/demucs4_ft_bass.ckpt +3 -0
  8. htdemucs/demucs4_ft_drums.ckpt +3 -0
  9. htdemucs/demucs4_ft_other.ckpt +3 -0
  10. htdemucs/demucs4_ft_vocals.ckpt +3 -0
  11. htdemucs/demucs4_mvsep_vocals.ckpt +3 -0
  12. htdemucs/demucs_mid_side_wesleyr36.ckpt +3 -0
  13. mdx23c/mdx23c_4stem_zfturbo_config.yaml +54 -0
  14. mdx23c/mdx23c_d1581_config.yaml +37 -0
  15. mdx23c/mdx23c_dereverb_aufr33_jarredou_config.yaml +114 -0
  16. mdx23c/mdx23c_drumsep_6stem_aufr33_jarredou_config.yaml +80 -0
  17. mdx23c/mdx23c_instvoc_hq1_config.yaml +44 -0
  18. mdx23c/mdx23c_instvoc_hq2_config.yaml +44 -0
  19. mdx23c/mdx23c_instvoc_zfturbo_config.yaml +88 -0
  20. mdx23c/mdx23c_mid_side_wesleyr36_config.yaml +82 -0
  21. mdx23c/mdx23c_orch_verosment_config.yaml +93 -0
  22. mdxnet/mdx_inst_hq1.onnx +3 -0
  23. mdxnet/mdx_inst_hq5.onnx +3 -0
  24. mdxnet/mdx_kim_inst.onnx +3 -0
  25. mdxnet/mdx_kuielab_a_drums.onnx +3 -0
  26. mdxnet/mdx_kuielab_a_other.onnx +3 -0
  27. mdxnet/mdx_kuielab_a_vocals.onnx +3 -0
  28. mdxnet/mdx_main_390.onnx +3 -0
  29. mdxnet/mdx_main_406.onnx +3 -0
  30. scnet/scnet_4stem_zfturbo_config.yaml +82 -0
  31. scnet/scnet_choirsep_exp_config.yaml +101 -0
  32. scnet/scnet_huge_4stem_aname_config.yaml +67 -0
  33. scnet/scnet_jazz_4stem_jorisvaneyghen_config.yaml +82 -0
  34. scnet/scnet_xl_4stem_starrytong_config.yaml +179 -0
  35. scnet/scnet_xl_4stem_zftrubo_config.yaml +78 -0
  36. scnet/scnet_xl_ihf_4stem_zfturbo_config.yaml +180 -0
  37. scnet/scnet_xl_jazz_4stem_jorisvaneyghen_config.yaml +83 -0
  38. scnet_masked/scnet_masked_small_4stem_zftrubo_config.yaml +159 -0
  39. scnet_masked/scnet_masked_xl_ihf_4stem_zftrubo_config.yaml +180 -0
  40. scnet_tran/scnet_tran_4stem_zftrubo_config.yaml +167 -0
  41. vr/10_sp-uvr-2b-32000-1_config.yaml +49 -0
  42. vr/11_sp-uvr-2b-32000-2_config.yaml +49 -0
  43. vr/12_sp-uvr-3b-44100_config.yaml +60 -0
  44. vr/13_sp-uvr-4b-44100-1_config.yaml +71 -0
  45. vr/14_sp-uvr-4b-44100-2_config.yaml +71 -0
  46. vr/15_sp-uvr-mid-44100-1_config.yaml +60 -0
  47. vr/16_sp-uvr-mid-44100-2_config.yaml +60 -0
  48. vr/17_hp-wind_inst-uvr_config.yaml +71 -0
  49. vr/1_hp-uvr_config.yaml +71 -0
  50. vr/2_hp-uvr_config.yaml +71 -0
bandit/bandit_plus_config.yaml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: MultiMaskMultiSourceBandSplitRNN
2
+ audio:
3
+ chunk_size: 264600
4
+ num_channels: 2
5
+ sample_rate: 44100
6
+ min_mean_abs: 0.001
7
+ model:
8
+ in_channel: 1
9
+ stems:
10
+ - speech
11
+ - music
12
+ - effects
13
+ band_specs: musical
14
+ n_bands: 64
15
+ fs: 44100
16
+ require_no_overlap: false
17
+ require_no_gap: true
18
+ normalize_channel_independently: false
19
+ treat_channel_as_feature: true
20
+ n_sqm_modules: 8
21
+ emb_dim: 128
22
+ rnn_dim: 256
23
+ bidirectional: true
24
+ rnn_type: GRU
25
+ mlp_dim: 512
26
+ hidden_activation: Tanh
27
+ hidden_activation_kwargs: null
28
+ complex_mask: true
29
+ n_fft: 2048
30
+ win_length: 2048
31
+ hop_length: 512
32
+ window_fn: hann_window
33
+ wkwargs: null
34
+ power: null
35
+ center: true
36
+ normalized: true
37
+ pad_mode: constant
38
+ onesided: true
39
+ training:
40
+ batch_size: 4
41
+ gradient_accumulation_steps: 4
42
+ grad_clip: 0
43
+ instruments:
44
+ - speech
45
+ - music
46
+ - effects
47
+ lr: 9.0e-05
48
+ patience: 2
49
+ reduce_factor: 0.95
50
+ target_instrument: null
51
+ num_epochs: 1000
52
+ num_steps: 1000
53
+ augmentation: false
54
+ augmentation_type: simple1
55
+ use_mp3_compress: false
56
+ augmentation_mix: true
57
+ augmentation_loudness: true
58
+ augmentation_loudness_type: 1
59
+ augmentation_loudness_min: 0.5
60
+ augmentation_loudness_max: 1.5
61
+ q: 0.95
62
+ coarse_loss_clip: true
63
+ ema_momentum: 0.999
64
+ optimizer: adam
65
+ other_fix: true
66
+ use_amp: true
67
+ inference:
68
+ batch_size: 1
69
+ dim_t: 256
70
+ num_overlap: 2
bandit_v2/bandit_v2_multi_config.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cls: Bandit
2
+ audio:
3
+ chunk_size: 384000
4
+ num_channels: 2
5
+ sample_rate: 48000
6
+ min_mean_abs: 0.0
7
+ kwargs:
8
+ in_channels: 1
9
+ stems:
10
+ - speech
11
+ - music
12
+ - sfx
13
+ band_type: musical
14
+ n_bands: 64
15
+ normalize_channel_independently: false
16
+ treat_channel_as_feature: true
17
+ n_sqm_modules: 8
18
+ emb_dim: 128
19
+ rnn_dim: 256
20
+ bidirectional: true
21
+ rnn_type: GRU
22
+ mlp_dim: 512
23
+ hidden_activation: Tanh
24
+ hidden_activation_kwargs: null
25
+ complex_mask: true
26
+ use_freq_weights: true
27
+ n_fft: 2048
28
+ win_length: 2048
29
+ hop_length: 512
30
+ window_fn: hann_window
31
+ wkwargs: null
32
+ power: null
33
+ center: true
34
+ normalized: true
35
+ pad_mode: reflect
36
+ onesided: true
37
+ training:
38
+ batch_size: 4
39
+ gradient_accumulation_steps: 4
40
+ grad_clip: 0
41
+ instruments:
42
+ - speech
43
+ - music
44
+ - sfx
45
+ lr: 9.0e-05
46
+ patience: 2
47
+ reduce_factor: 0.95
48
+ target_instrument: null
49
+ num_epochs: 1000
50
+ num_steps: 1000
51
+ q: 0.95
52
+ coarse_loss_clip: true
53
+ ema_momentum: 0.999
54
+ optimizer: adam
55
+ other_fix: true
56
+ use_amp: true
57
+ augmentations:
58
+ enable: true
59
+ loudness: true
60
+ loudness_min: 0.5
61
+ loudness_max: 1.5
62
+ mixup: true
63
+ mixup_probs: !!python/tuple
64
+ - 0.2
65
+ - 0.02
66
+ mixup_loudness_min: 0.5
67
+ mixup_loudness_max: 1.5
68
+ all:
69
+ channel_shuffle: 0.5
70
+ random_inverse: 0.1
71
+ random_polarity: 0.5
72
+ inference:
73
+ batch_size: 1
74
+ dim_t: 256
75
+ num_overlap: 2
htdemucs/demucs3_mmi.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1941ce654b11df4132b9f4eae408556b4c83fad6fe26b4bc0dbcb36b975befb3
3
+ size 167407275
htdemucs/demucs4_4stem.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8726e21a993978c7ba086d3872e7608d7d5bfca646ca4aca459ffda844faa8b4
3
+ size 84141911
htdemucs/demucs4_6stem.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34c22ccb381c6f9fdbf324f04e1e2fe21aaaf293f5ded163a162697ff9a02ddd
3
+ size 54996327
htdemucs/demucs4_choirsep.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fd18ca3a8345c02b9656a0852193ddcbb1796188884e1b7119ce5758250bbcd
3
+ size 109841648
htdemucs/demucs4_ft_bass.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e57c48e6b0e38af4f7118d7bd08c49f0a0c0edf7d09143bdd902ea0d237303e6
3
+ size 84141271
htdemucs/demucs4_ft_drums.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba3fe64ae8ef66ac9a4857222ce48efbdc5eb3ad375cb79dd13debee5aaa4066
3
+ size 84141271
htdemucs/demucs4_ft_other.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef3bcb9c8b40d14ae5d51b6db2587339cc12c6b77c0be151ce6d69002e087bf2
3
+ size 84141271
htdemucs/demucs4_ft_vocals.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3cf25b222c4eed7cd49dd8b2c9597d50c18bd154090f7b919cfa5f93cf22c49
3
+ size 84141271
htdemucs/demucs4_mvsep_vocals.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ea6e9495685045e6b4e66174131be5d19808bb0d6d1a1ba717d238f9380e8d0
3
+ size 168124336
htdemucs/demucs_mid_side_wesleyr36.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9970dca36a15c0d0cf3338d24592aa35469697dc3be4b07e9056f5d54b82185
3
+ size 168122809
mdx23c/mdx23c_4stem_zfturbo_config.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 261120
3
+ dim_f: 4096
4
+ dim_t: 256
5
+ hop_length: 1024
6
+ n_fft: 8192
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+ model:
11
+ act: gelu
12
+ bottleneck_factor: 4
13
+ growth: 128
14
+ norm: InstanceNorm
15
+ num_blocks_per_scale: 2
16
+ num_channels: 128
17
+ num_scales: 5
18
+ num_subbands: 4
19
+ scale:
20
+ - 2
21
+ - 2
22
+ training:
23
+ batch_size: 6
24
+ gradient_accumulation_steps: 6
25
+ grad_clip: 0
26
+ instruments:
27
+ - vocals
28
+ - bass
29
+ - drums
30
+ - other
31
+ lr: 5.0e-05
32
+ patience: 3
33
+ reduce_factor: 0.95
34
+ target_instrument: null
35
+ num_epochs: 1000
36
+ num_steps: 1000
37
+ augmentation: false
38
+ augmentation_type: simple1
39
+ use_mp3_compress: false
40
+ augmentation_mix: true
41
+ augmentation_loudness: true
42
+ augmentation_loudness_type: 2
43
+ augmentation_loudness_min: 0.8
44
+ augmentation_loudness_max: 1.2
45
+ q: 0.95
46
+ coarse_loss_clip: true
47
+ ema_momentum: 0.999
48
+ optimizer: adam
49
+ other_fix: false
50
+ use_amp: true
51
+ inference:
52
+ batch_size: 1
53
+ dim_t: 256
54
+ num_overlap: 2
mdx23c/mdx23c_d1581_config.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 260096
3
+ dim_f: 4096
4
+ dim_t: 256
5
+ hop_length: 2048
6
+ n_fft: 12288
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+ model:
11
+ act: gelu
12
+ bottleneck_factor: 4
13
+ growth: 64
14
+ norm: InstanceNorm
15
+ num_blocks_per_scale: 2
16
+ num_channels: 128
17
+ num_scales: 5
18
+ num_subbands: 4
19
+ scale:
20
+ - 2
21
+ - 2
22
+ name: epoch_10.ckpt
23
+ training:
24
+ batch_size: 16
25
+ grad_clip: 0
26
+ instruments:
27
+ - Vocals
28
+ - Instrumental
29
+ lr: 5.0e-05
30
+ target_instrument: null
31
+ num_epochs: 100
32
+ num_steps: 1000
33
+ use_amp: true
34
+ inference:
35
+ batch_size: 1
36
+ dim_t: 256
37
+ num_overlap: 2
mdx23c/mdx23c_dereverb_aufr33_jarredou_config.yaml ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 261120
3
+ dim_f: 4096
4
+ dim_t: 256
5
+ hop_length: 1024
6
+ n_fft: 8192
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+ model:
11
+ act: gelu
12
+ bottleneck_factor: 4
13
+ growth: 128
14
+ norm: InstanceNorm
15
+ num_blocks_per_scale: 2
16
+ num_channels: 128
17
+ num_scales: 5
18
+ num_subbands: 4
19
+ scale:
20
+ - 2
21
+ - 2
22
+ training:
23
+ batch_size: 2
24
+ gradient_accumulation_steps: 1
25
+ grad_clip: 0
26
+ instruments:
27
+ - dry
28
+ - other
29
+ lr: 1.0e-06
30
+ patience: 4
31
+ reduce_factor: 0.93
32
+ target_instrument: null
33
+ num_epochs: 40
34
+ num_steps: 1000
35
+ q: 0.95
36
+ coarse_loss_clip: true
37
+ ema_momentum: 0.999
38
+ optimizer: adamw
39
+ read_metadata_procs: 8
40
+ other_fix: false
41
+ use_amp: true
42
+ augmentations:
43
+ enable: false
44
+ loudness: true
45
+ loudness_min: 0.5
46
+ loudness_max: 1.5
47
+ mixup: true
48
+ mixup_probs: !!python/tuple
49
+ - 0.2
50
+ - 0.02
51
+ mixup_loudness_min: 0.5
52
+ mixup_loudness_max: 1.5
53
+ all:
54
+ channel_shuffle: 0.5
55
+ random_inverse: 0.05
56
+ random_polarity: 0.5
57
+ pedalboard_chorus: 0.001
58
+ pedalboard_chorus_rate_hz_min: 1.0
59
+ pedalboard_chorus_rate_hz_max: 7.0
60
+ pedalboard_chorus_depth_min: 0.25
61
+ pedalboard_chorus_depth_max: 0.95
62
+ pedalboard_chorus_centre_delay_ms_min: 3
63
+ pedalboard_chorus_centre_delay_ms_max: 10
64
+ pedalboard_chorus_feedback_min: 0.0
65
+ pedalboard_chorus_feedback_max: 0.01
66
+ pedalboard_chorus_mix_min: 0.1
67
+ pedalboard_chorus_mix_max: 0.9
68
+ pedalboard_phazer: 0.001
69
+ pedalboard_phazer_rate_hz_min: 1.0
70
+ pedalboard_phazer_rate_hz_max: 10.0
71
+ pedalboard_phazer_depth_min: 0.25
72
+ pedalboard_phazer_depth_max: 0.95
73
+ pedalboard_phazer_centre_frequency_hz_min: 200
74
+ pedalboard_phazer_centre_frequency_hz_max: 12000
75
+ pedalboard_phazer_feedback_min: 0.0
76
+ pedalboard_phazer_feedback_max: 0.5
77
+ pedalboard_phazer_mix_min: 0.1
78
+ pedalboard_phazer_mix_max: 0.9
79
+ pedalboard_pitch_shift: 0.01
80
+ pedalboard_pitch_shift_semitones_min: -7
81
+ pedalboard_pitch_shift_semitones_max: 7
82
+ pedalboard_resample: 0.001
83
+ pedalboard_resample_target_sample_rate_min: 4000
84
+ pedalboard_resample_target_sample_rate_max: 44100
85
+ mp3_compression_min_bitrate: 32
86
+ mp3_compression_max_bitrate: 320
87
+ mp3_compression_backend: lameenc
88
+ dry:
89
+ pedalboard_distortion: 0.001
90
+ pedalboard_distortion_drive_db_min: 1.0
91
+ pedalboard_distortion_drive_db_max: 25.0
92
+ tanh_distortion: 0.05
93
+ tanh_distortion_min: 0.1
94
+ tanh_distortion_max: 0.7
95
+ pedalboard_bitcrash: 0.005
96
+ pedalboard_bitcrash_bit_depth_min: 4
97
+ pedalboard_bitcrash_bit_depth_max: 16
98
+ seven_band_parametric_eq: 0.24
99
+ seven_band_parametric_eq_min_gain_db: -9
100
+ seven_band_parametric_eq_max_gain_db: 9
101
+ gaussian_noise: 0.005
102
+ gaussian_noise_min_amplitude: 0.001
103
+ gaussian_noise_max_amplitude: 0.01
104
+ time_stretch: 0.01
105
+ time_stretch_min_rate: 0.8
106
+ time_stretch_max_rate: 1.25
107
+ other:
108
+ seven_band_parametric_eq: 0.24
109
+ seven_band_parametric_eq_min_gain_db: -9
110
+ seven_band_parametric_eq_max_gain_db: 9
111
+ inference:
112
+ batch_size: 1
113
+ dim_t: 256
114
+ num_overlap: 2
mdx23c/mdx23c_drumsep_6stem_aufr33_jarredou_config.yaml ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 130560
3
+ dim_f: 1024
4
+ dim_t: 256
5
+ hop_length: 512
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+ model:
11
+ act: gelu
12
+ bottleneck_factor: 4
13
+ growth: 128
14
+ norm: InstanceNorm
15
+ num_blocks_per_scale: 2
16
+ num_channels: 128
17
+ num_scales: 5
18
+ num_subbands: 4
19
+ scale:
20
+ - 2
21
+ - 2
22
+ training:
23
+ batch_size: 12
24
+ gradient_accumulation_steps: 1
25
+ grad_clip: 0
26
+ instruments:
27
+ - kick
28
+ - snare
29
+ - toms
30
+ - hh
31
+ - ride
32
+ - crash
33
+ lr: 9.0e-05
34
+ patience: 30
35
+ reduce_factor: 0.95
36
+ target_instrument: null
37
+ num_epochs: 1000
38
+ num_steps: 1268
39
+ q: 0.95
40
+ coarse_loss_clip: true
41
+ ema_momentum: 0.999
42
+ optimizer: adam
43
+ other_fix: false
44
+ use_amp: true
45
+ augmentations:
46
+ enable: true
47
+ loudness: true
48
+ loudness_min: 0.5
49
+ loudness_max: 1.5
50
+ mixup: true
51
+ mixup_probs: !!python/tuple
52
+ - 0.2
53
+ - 0.02
54
+ mixup_loudness_min: 0.5
55
+ mixup_loudness_max: 1.5
56
+ mp3_compression_on_mixture: 0.0
57
+ mp3_compression_on_mixture_bitrate_min: 32
58
+ mp3_compression_on_mixture_bitrate_max: 320
59
+ mp3_compression_on_mixture_backend: lameenc
60
+ all:
61
+ channel_shuffle: 0.5
62
+ random_inverse: 0.01
63
+ random_polarity: 0.5
64
+ mp3_compression: 0.0
65
+ mp3_compression_min_bitrate: 32
66
+ mp3_compression_max_bitrate: 320
67
+ mp3_compression_backend: lameenc
68
+ pitch_shift: 0.1
69
+ pitch_shift_min_semitones: -3
70
+ pitch_shift_max_semitones: 3
71
+ seven_band_parametric_eq: 0.5
72
+ seven_band_parametric_eq_min_gain_db: -6
73
+ seven_band_parametric_eq_max_gain_db: 6
74
+ tanh_distortion: 0.2
75
+ tanh_distortion_min: 0.1
76
+ tanh_distortion_max: 0.5
77
+ inference:
78
+ batch_size: 1
79
+ dim_t: 256
80
+ num_overlap: 2
mdx23c/mdx23c_instvoc_hq1_config.yaml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 261120
3
+ dim_f: 4096
4
+ dim_t: 256
5
+ hop_length: 1024
6
+ n_fft: 8192
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+ model:
11
+ act: gelu
12
+ bottleneck_factor: 4
13
+ growth: 128
14
+ norm: InstanceNorm
15
+ num_blocks_per_scale: 2
16
+ num_channels: 128
17
+ num_scales: 5
18
+ num_subbands: 4
19
+ scale:
20
+ - 2
21
+ - 2
22
+ training:
23
+ batch_size: 6
24
+ grad_clip: 0
25
+ instruments:
26
+ - Vocals
27
+ - Instrumental
28
+ lr: 1.0e-05
29
+ patience: 2
30
+ reduce_factor: 0.95
31
+ target_instrument: null
32
+ num_epochs: 1000
33
+ num_steps: 1000
34
+ augmentation: 1
35
+ augmentation_type: simple1
36
+ augmentation_mix: true
37
+ q: 0.95
38
+ coarse_loss_clip: true
39
+ ema_momentum: 0.999
40
+ use_amp: true
41
+ inference:
42
+ batch_size: 1
43
+ dim_t: 256
44
+ num_overlap: 2
mdx23c/mdx23c_instvoc_hq2_config.yaml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 261120
3
+ dim_f: 4096
4
+ dim_t: 256
5
+ hop_length: 1024
6
+ n_fft: 8192
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+ model:
11
+ act: gelu
12
+ bottleneck_factor: 4
13
+ growth: 128
14
+ norm: InstanceNorm
15
+ num_blocks_per_scale: 2
16
+ num_channels: 128
17
+ num_scales: 5
18
+ num_subbands: 4
19
+ scale:
20
+ - 2
21
+ - 2
22
+ training:
23
+ batch_size: 6
24
+ grad_clip: 0
25
+ instruments:
26
+ - Vocals
27
+ - Instrumental
28
+ lr: 1.0e-05
29
+ patience: 2
30
+ reduce_factor: 0.95
31
+ target_instrument: null
32
+ num_epochs: 1000
33
+ num_steps: 1000
34
+ augmentation: 1
35
+ augmentation_type: simple1
36
+ augmentation_mix: true
37
+ q: 0.95
38
+ coarse_loss_clip: true
39
+ ema_momentum: 0.999
40
+ use_amp: true
41
+ inference:
42
+ batch_size: 1
43
+ dim_t: 256
44
+ num_overlap: 2
mdx23c/mdx23c_instvoc_zfturbo_config.yaml ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 261120
3
+ dim_f: 4096
4
+ dim_t: 256
5
+ hop_length: 1024
6
+ n_fft: 8192
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+ model:
11
+ act: gelu
12
+ bottleneck_factor: 4
13
+ growth: 128
14
+ norm: InstanceNorm
15
+ num_blocks_per_scale: 2
16
+ num_channels: 128
17
+ num_scales: 5
18
+ num_subbands: 4
19
+ scale:
20
+ - 2
21
+ - 2
22
+ training:
23
+ batch_size: 6
24
+ gradient_accumulation_steps: 1
25
+ grad_clip: 0
26
+ instruments:
27
+ - vocals
28
+ - other
29
+ lr: 9.0e-05
30
+ patience: 2
31
+ reduce_factor: 0.95
32
+ target_instrument: null
33
+ num_epochs: 1000
34
+ num_steps: 1000
35
+ q: 0.95
36
+ coarse_loss_clip: true
37
+ ema_momentum: 0.999
38
+ optimizer: adam
39
+ read_metadata_procs: 8
40
+ other_fix: true
41
+ use_amp: true
42
+ augmentations:
43
+ enable: true
44
+ loudness: true
45
+ loudness_min: 0.5
46
+ loudness_max: 1.5
47
+ mixup: true
48
+ mixup_probs: !!python/tuple
49
+ - 0.2
50
+ - 0.02
51
+ mixup_loudness_min: 0.5
52
+ mixup_loudness_max: 1.5
53
+ mp3_compression_on_mixture: 0.01
54
+ mp3_compression_on_mixture_bitrate_min: 32
55
+ mp3_compression_on_mixture_bitrate_max: 320
56
+ mp3_compression_on_mixture_backend: lameenc
57
+ all:
58
+ channel_shuffle: 0.5
59
+ random_inverse: 0.1
60
+ random_polarity: 0.5
61
+ mp3_compression: 0.01
62
+ mp3_compression_min_bitrate: 32
63
+ mp3_compression_max_bitrate: 320
64
+ mp3_compression_backend: lameenc
65
+ vocals:
66
+ pitch_shift: 0.1
67
+ pitch_shift_min_semitones: -5
68
+ pitch_shift_max_semitones: 5
69
+ seven_band_parametric_eq: 0.25
70
+ seven_band_parametric_eq_min_gain_db: -9
71
+ seven_band_parametric_eq_max_gain_db: 9
72
+ tanh_distortion: 0.1
73
+ tanh_distortion_min: 0.1
74
+ tanh_distortion_max: 0.7
75
+ other:
76
+ pitch_shift: 0.1
77
+ pitch_shift_min_semitones: -4
78
+ pitch_shift_max_semitones: 4
79
+ gaussian_noise: 0.1
80
+ gaussian_noise_min_amplitude: 0.001
81
+ gaussian_noise_max_amplitude: 0.015
82
+ time_stretch: 0.01
83
+ time_stretch_min_rate: 0.8
84
+ time_stretch_max_rate: 1.25
85
+ inference:
86
+ batch_size: 1
87
+ dim_t: 256
88
+ num_overlap: 2
mdx23c/mdx23c_mid_side_wesleyr36_config.yaml ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 130560
3
+ dim_f: 1024
4
+ dim_t: 256
5
+ hop_length: 512
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+ model:
11
+ act: gelu
12
+ bottleneck_factor: 4
13
+ growth: 128
14
+ norm: InstanceNorm
15
+ num_blocks_per_scale: 2
16
+ num_channels: 128
17
+ num_scales: 5
18
+ num_subbands: 4
19
+ scale:
20
+ - 2
21
+ - 2
22
+ training:
23
+ batch_size: 2
24
+ gradient_accumulation_steps: 3
25
+ grad_clip: 0
26
+ instruments:
27
+ - similarity
28
+ - difference
29
+ lr: 1.0
30
+ patience: 15
31
+ reduce_factor: 0.95
32
+ target_instrument: similarity
33
+ num_epochs: 1000
34
+ num_steps: 2235
35
+ q: 0.95
36
+ coarse_loss_clip: true
37
+ ema_momentum: 0.999
38
+ optimizer: prodigy
39
+ other_fix: false
40
+ use_amp: true
41
+ augmentations:
42
+ enable: true
43
+ loudness: true
44
+ loudness_min: 0.5
45
+ loudness_max: 1.5
46
+ all:
47
+ channel_shuffle: 0.5
48
+ random_inverse: 0.01
49
+ random_polarity: 0.5
50
+ mp3_compression: 0.0
51
+ mp3_compression_min_bitrate: 32
52
+ mp3_compression_max_bitrate: 320
53
+ mp3_compression_backend: lameenc
54
+ pitch_shift: 0.1
55
+ pitch_shift_min_semitones: -3
56
+ pitch_shift_max_semitones: 3
57
+ seven_band_parametric_eq: 0.5
58
+ seven_band_parametric_eq_min_gain_db: -6
59
+ seven_band_parametric_eq_max_gain_db: 6
60
+ tanh_distortion: 0.2
61
+ tanh_distortion_min: 0.1
62
+ tanh_distortion_max: 0.5
63
+ similarity:
64
+ gaussian_noise: 0.1
65
+ gaussian_noise_min_amplitude: 0.001
66
+ gaussian_noise_max_amplitude: 0.015
67
+ difference:
68
+ pedalboard_reverb: 0.01
69
+ pedalboard_reverb_room_size_min: 0.1
70
+ pedalboard_reverb_room_size_max: 0.9
71
+ pedalboard_reverb_damping_min: 0.1
72
+ pedalboard_reverb_damping_max: 0.9
73
+ pedalboard_reverb_wet_level_min: 0.1
74
+ pedalboard_reverb_wet_level_max: 0.5
75
+ pedalboard_reverb_dry_level_min: 0.5
76
+ pedalboard_reverb_dry_level_max: 0.9
77
+ pedalboard_reverb_width_min: 0.3
78
+ pedalboard_reverb_width_max: 1.0
79
+ inference:
80
+ batch_size: 1
81
+ dim_t: 256
82
+ num_overlap: 2
mdx23c/mdx23c_orch_verosment_config.yaml ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 261120
3
+ dim_f: 4096
4
+ dim_t: 256
5
+ hop_length: 1024
6
+ n_fft: 8192
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+ model:
11
+ act: gelu
12
+ bottleneck_factor: 4
13
+ growth: 128
14
+ norm: InstanceNorm
15
+ num_blocks_per_scale: 2
16
+ num_channels: 128
17
+ num_scales: 5
18
+ num_subbands: 4
19
+ scale:
20
+ - 2
21
+ - 2
22
+ training:
23
+ batch_size: 1
24
+ gradient_accumulation_steps: 3
25
+ grad_clip: 0
26
+ instruments:
27
+ - inst
28
+ - orch
29
+ lr: 9.0e-05
30
+ patience: 6
31
+ reduce_factor: 0.95
32
+ target_instrument: orch
33
+ num_epochs: 1000
34
+ num_steps: 750
35
+ q: 0.95
36
+ coarse_loss_clip: true
37
+ ema_momentum: 0.999
38
+ optimizer: adam
39
+ other_fix: true
40
+ use_amp: true
41
+ augmentations:
42
+ enable: true
43
+ loudness: true
44
+ loudness_min: 0.5
45
+ loudness_max: 1.5
46
+ mixup: true
47
+ mixup_probs:
48
+ - 0.4
49
+ - 0.02
50
+ mixup_loudness_min: 0.5
51
+ mixup_loudness_max: 1.5
52
+ mp3_compression_on_mixture: 0.01
53
+ mp3_compression_on_mixture_bitrate_min: 32
54
+ mp3_compression_on_mixture_bitrate_max: 320
55
+ mp3_compression_on_mixture_backend: lameenc
56
+ all:
57
+ channel_shuffle: 0.5
58
+ random_inverse: 0.1
59
+ random_polarity: 0.5
60
+ mp3_compression: 0.01
61
+ mp3_compression_min_bitrate: 32
62
+ mp3_compression_max_bitrate: 320
63
+ mp3_compression_backend: lameenc
64
+ orch:
65
+ pitch_shift: 0.25
66
+ pitch_shift_min_semitones: -5
67
+ pitch_shift_max_semitones: 5
68
+ seven_band_parametric_eq: 1
69
+ seven_band_parametric_eq_min_gain_db: -6
70
+ seven_band_parametric_eq_max_gain_db: 6
71
+ tanh_distortion: 0.5
72
+ tanh_distortion_min: 0.05
73
+ tanh_distortion_max: 0.6
74
+ inst:
75
+ pitch_shift: 0.25
76
+ pitch_shift_min_semitones: -5
77
+ pitch_shift_max_semitones: 5
78
+ gaussian_noise: 0.1
79
+ gaussian_noise_min_amplitude: 0.001
80
+ gaussian_noise_max_amplitude: 0.015
81
+ time_stretch: 0.01
82
+ time_stretch_min_rate: 0.8
83
+ time_stretch_max_rate: 1.25
84
+ seven_band_parametric_eq: 1
85
+ seven_band_parametric_eq_min_gain_db: -6
86
+ seven_band_parametric_eq_max_gain_db: 6
87
+ tanh_distortion: 0.5
88
+ tanh_distortion_min: 0.05
89
+ tanh_distortion_max: 0.6
90
+ inference:
91
+ batch_size: 1
92
+ dim_t: 256
93
+ num_overlap: 2
mdxnet/mdx_inst_hq1.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38a045c4ded87e3bf97b609ec5be7910e8a7cecec455f507227ab12b5e29f7f9
3
+ size 66759214
mdxnet/mdx_inst_hq5.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:811cb24095d865763752310848b7ec86aeede0626cb05749ab35350e46897000
3
+ size 59074342
mdxnet/mdx_kim_inst.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86b1940e7122fbdd2beadc65507cbff6c352d79012a8a7e60d56db98532af5f7
3
+ size 66759214
mdxnet/mdx_kuielab_a_drums.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40f586b7091934dd6f5563f0cba8f14bad57ce88440da1098bf388ea716c2901
3
+ size 29703204
mdxnet/mdx_kuielab_a_other.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b67a1dcb5f232153528c59960b4c7bf8dc736b8114de360af0e719633f53358
3
+ size 29703204
mdxnet/mdx_kuielab_a_vocals.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daba83c2ee1afee9139766ad64c9b6808d6b6f092fff04bed3338be50baac721
3
+ size 29703204
mdxnet/mdx_main_390.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:286c4f0847ca837e2c3f4c4058f756d5f150cbf080506aa6f33a2847aba92e8c
3
+ size 66759214
mdxnet/mdx_main_406.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f158816a44eef1f0ba0f48b813cbfcf460ed1c70a754af3609ade44aaf7d1b23
3
+ size 66759214
scnet/scnet_4stem_zfturbo_config.yaml ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 485100
3
+ num_channels: 2
4
+ sample_rate: 44100
5
+ min_mean_abs: 0.0
6
+ model:
7
+ sources:
8
+ - drums
9
+ - bass
10
+ - other
11
+ - vocals
12
+ audio_channels: 2
13
+ dims:
14
+ - 4
15
+ - 64
16
+ - 128
17
+ - 256
18
+ nfft: 4096
19
+ hop_size: 1024
20
+ win_size: 4096
21
+ normalized: true
22
+ band_SR:
23
+ - 0.225
24
+ - 0.372
25
+ - 0.403
26
+ band_stride:
27
+ - 1
28
+ - 4
29
+ - 16
30
+ band_kernel:
31
+ - 3
32
+ - 4
33
+ - 16
34
+ conv_depths:
35
+ - 3
36
+ - 2
37
+ - 1
38
+ compress: 4
39
+ conv_kernel: 3
40
+ num_dplayer: 6
41
+ expand: 1
42
+ training:
43
+ batch_size: 6
44
+ gradient_accumulation_steps: 1
45
+ grad_clip: 0
46
+ instruments:
47
+ - drums
48
+ - bass
49
+ - other
50
+ - vocals
51
+ lr: 1.0
52
+ patience: 2
53
+ reduce_factor: 0.95
54
+ target_instrument: null
55
+ num_epochs: 1000
56
+ num_steps: 1000
57
+ q: 0.95
58
+ coarse_loss_clip: true
59
+ ema_momentum: 0.999
60
+ optimizer: prodigy
61
+ other_fix: false
62
+ use_amp: true
63
+ augmentations:
64
+ enable: true
65
+ loudness: true
66
+ loudness_min: 0.5
67
+ loudness_max: 1.5
68
+ mixup: true
69
+ mixup_probs: !!python/tuple
70
+ - 0.2
71
+ - 0.02
72
+ mixup_loudness_min: 0.5
73
+ mixup_loudness_max: 1.5
74
+ all:
75
+ channel_shuffle: 0.5
76
+ random_inverse: 0.1
77
+ random_polarity: 0.5
78
+ inference:
79
+ batch_size: 1
80
+ dim_t: 256
81
+ num_overlap: 2
82
+ normalize: true
scnet/scnet_choirsep_exp_config.yaml ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 131072
3
+ num_channels: 2
4
+ sample_rate: 44100
5
+ min_mean_abs: 0.0
6
+ model:
7
+ sources:
8
+ - alto
9
+ - bass
10
+ - soprano
11
+ - tenor
12
+ audio_channels: 2
13
+ dims:
14
+ - 4
15
+ - 32
16
+ - 64
17
+ - 128
18
+ nfft: 4096
19
+ hop_size: 1024
20
+ win_size: 4096
21
+ normalized: true
22
+ band_SR:
23
+ - 0.175
24
+ - 0.392
25
+ - 0.433
26
+ band_stride:
27
+ - 1
28
+ - 4
29
+ - 16
30
+ band_kernel:
31
+ - 3
32
+ - 4
33
+ - 16
34
+ conv_depths:
35
+ - 3
36
+ - 2
37
+ - 1
38
+ compress: 4
39
+ conv_kernel: 3
40
+ num_dplayer: 6
41
+ expand: 1
42
+ training:
43
+ batch_size: 9
44
+ gradient_accumulation_steps: 1
45
+ grad_clip: 0
46
+ instruments:
47
+ - alto
48
+ - bass
49
+ - soprano
50
+ - tenor
51
+ lr: 0.0005
52
+ patience: 6
53
+ reduce_factor: 0.95
54
+ target_instrument: null
55
+ num_epochs: 1000
56
+ num_steps: 1000
57
+ q: 0.95
58
+ coarse_loss_clip: true
59
+ ema_momentum: 0.999
60
+ optimizer: adamw8bit
61
+ other_fix: false
62
+ use_amp: true
63
+ loss_multistft:
64
+ fft_sizes:
65
+ - 1024
66
+ - 2048
67
+ - 4096
68
+ hop_sizes:
69
+ - 512
70
+ - 1024
71
+ - 2048
72
+ win_lengths:
73
+ - 1024
74
+ - 2048
75
+ - 4096
76
+ window: hann_window
77
+ scale: mel
78
+ n_bins: 128
79
+ sample_rate: 44100
80
+ perceptual_weighting: true
81
+ w_sc: 1.0
82
+ w_log_mag: 1.0
83
+ w_lin_mag: 0.0
84
+ w_phs: 0.0
85
+ mag_distance: L1
86
+ augmentations:
87
+ enable: false
88
+ loudness: true
89
+ loudness_min: 0.5
90
+ loudness_max: 1.5
91
+ mixup: false
92
+ mixup_probs: !!python/tuple
93
+ - 0.2
94
+ - 0.02
95
+ mixup_loudness_min: 0.5
96
+ mixup_loudness_max: 1.5
97
+ inference:
98
+ batch_size: 1
99
+ dim_t: 256
100
+ num_overlap: 2
101
+ normalize: false
scnet/scnet_huge_4stem_aname_config.yaml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 661500
3
+ num_channels: 2
4
+ sample_rate: 44100
5
+ min_mean_abs: 0.0
6
+ model:
7
+ sources:
8
+ - drums
9
+ - bass
10
+ - other
11
+ - vocals
12
+ audio_channels: 2
13
+ dims:
14
+ - 4
15
+ - 64
16
+ - 128
17
+ - 256
18
+ nfft: 4096
19
+ hop_size: 1024
20
+ win_size: 4096
21
+ normalized: true
22
+ band_SR:
23
+ - 0.23
24
+ - 0.37
25
+ - 0.4
26
+ band_stride:
27
+ - 1
28
+ - 4
29
+ - 4
30
+ band_kernel:
31
+ - 3
32
+ - 4
33
+ - 4
34
+ conv_depths:
35
+ - 3
36
+ - 3
37
+ - 3
38
+ compress: 4
39
+ conv_kernel: 3
40
+ num_dplayer: 10
41
+ expand: 1
42
+ training:
43
+ batch_size: 1
44
+ gradient_accumulation_steps: 1
45
+ grad_clip: 0
46
+ instruments:
47
+ - drums
48
+ - bass
49
+ - other
50
+ - vocals
51
+ patience: 2
52
+ reduce_factor: 0.95
53
+ target_instrument: null
54
+ num_epochs: 1000
55
+ num_steps: 1000
56
+ q: 0.95
57
+ coarse_loss_clip: true
58
+ ema_momentum: 0.999
59
+ optimizer: adam
60
+ lr: 1.0
61
+ normalize: false
62
+ other_fix: false
63
+ use_amp: true
64
+ inference:
65
+ batch_size: 1
66
+ dim_t: 1101
67
+ num_overlap: 2
scnet/scnet_jazz_4stem_jorisvaneyghen_config.yaml ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 485100
3
+ num_channels: 2
4
+ sample_rate: 44100
5
+ min_mean_abs: 0.0
6
+ model:
7
+ sources:
8
+ - drums
9
+ - bass
10
+ - piano
11
+ - other
12
+ audio_channels: 2
13
+ dims:
14
+ - 4
15
+ - 64
16
+ - 128
17
+ - 256
18
+ nfft: 4096
19
+ hop_size: 1024
20
+ win_size: 4096
21
+ normalized: true
22
+ band_SR:
23
+ - 0.225
24
+ - 0.372
25
+ - 0.403
26
+ band_stride:
27
+ - 1
28
+ - 4
29
+ - 16
30
+ band_kernel:
31
+ - 3
32
+ - 4
33
+ - 16
34
+ conv_depths:
35
+ - 3
36
+ - 2
37
+ - 1
38
+ compress: 4
39
+ conv_kernel: 3
40
+ num_dplayer: 6
41
+ expand: 1
42
+ training:
43
+ batch_size: 1
44
+ gradient_accumulation_steps: 3
45
+ grad_clip: 0
46
+ instruments:
47
+ - drums
48
+ - bass
49
+ - piano
50
+ - other
51
+ lr: 5.0e-05
52
+ patience: 2
53
+ reduce_factor: 0.95
54
+ target_instrument: null
55
+ num_epochs: 1000
56
+ num_steps: 500
57
+ q: 0.95
58
+ coarse_loss_clip: true
59
+ ema_momentum: 0.999
60
+ optimizer: adam
61
+ other_fix: false
62
+ use_amp: true
63
+ augmentations:
64
+ enable: true
65
+ loudness: true
66
+ loudness_min: 0.5
67
+ loudness_max: 1.5
68
+ mixup: true
69
+ mixup_probs: !!python/tuple
70
+ - 0.2
71
+ - 0.02
72
+ mixup_loudness_min: 0.5
73
+ mixup_loudness_max: 1.5
74
+ all:
75
+ channel_shuffle: 0.5
76
+ random_inverse: 0.1
77
+ random_polarity: 0.5
78
+ inference:
79
+ batch_size: 1
80
+ dim_t: 256
81
+ num_overlap: 2
82
+ normalize: false
scnet/scnet_xl_4stem_starrytong_config.yaml ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 485100
3
+ num_channels: 2
4
+ sample_rate: 44100
5
+ min_mean_abs: 0.0
6
+ model:
7
+ sources:
8
+ - drums
9
+ - bass
10
+ - other
11
+ - vocals
12
+ audio_channels: 2
13
+ dims:
14
+ - 4
15
+ - 64
16
+ - 128
17
+ - 256
18
+ nfft: 4096
19
+ hop_size: 1024
20
+ win_size: 4096
21
+ normalized: true
22
+ band_SR:
23
+ - 0.23
24
+ - 0.37
25
+ - 0.4
26
+ band_stride:
27
+ - 1
28
+ - 4
29
+ - 16
30
+ band_kernel:
31
+ - 3
32
+ - 4
33
+ - 16
34
+ conv_depths:
35
+ - 3
36
+ - 2
37
+ - 1
38
+ compress: 4
39
+ conv_kernel: 3
40
+ num_dplayer: 8
41
+ expand: 1
42
+ training:
43
+ batch_size: 4
44
+ gradient_accumulation_steps: 1
45
+ grad_clip: 0
46
+ instruments:
47
+ - drums
48
+ - bass
49
+ - other
50
+ - vocals
51
+ patience: 2
52
+ reduce_factor: 0.95
53
+ target_instrument: null
54
+ num_epochs: 1000
55
+ num_steps: 1000
56
+ q: 0.95
57
+ coarse_loss_clip: true
58
+ ema_momentum: 0.999
59
+ optimizer: adam
60
+ lr: 1.0e-05
61
+ normalize: false
62
+ other_fix: false
63
+ use_amp: true
64
+ augmentations:
65
+ enable: false
66
+ loudness: true
67
+ loudness_min: 0.5
68
+ loudness_max: 1.5
69
+ mixup: true
70
+ mixup_probs: !!python/tuple
71
+ - 0.2
72
+ - 0.02
73
+ mixup_loudness_min: 0.5
74
+ mixup_loudness_max: 1.5
75
+ mp3_compression_on_mixture: 0.01
76
+ mp3_compression_on_mixture_bitrate_min: 32
77
+ mp3_compression_on_mixture_bitrate_max: 320
78
+ mp3_compression_on_mixture_backend: lameenc
79
+ all:
80
+ channel_shuffle: 0.5
81
+ random_inverse: 0.1
82
+ random_polarity: 0.5
83
+ mp3_compression: 0.01
84
+ mp3_compression_min_bitrate: 32
85
+ mp3_compression_max_bitrate: 320
86
+ mp3_compression_backend: lameenc
87
+ pedalboard_reverb: 0.01
88
+ pedalboard_reverb_room_size_min: 0.1
89
+ pedalboard_reverb_room_size_max: 0.9
90
+ pedalboard_reverb_damping_min: 0.1
91
+ pedalboard_reverb_damping_max: 0.9
92
+ pedalboard_reverb_wet_level_min: 0.1
93
+ pedalboard_reverb_wet_level_max: 0.9
94
+ pedalboard_reverb_dry_level_min: 0.1
95
+ pedalboard_reverb_dry_level_max: 0.9
96
+ pedalboard_reverb_width_min: 0.9
97
+ pedalboard_reverb_width_max: 1.0
98
+ pedalboard_chorus: 0.01
99
+ pedalboard_chorus_rate_hz_min: 1.0
100
+ pedalboard_chorus_rate_hz_max: 7.0
101
+ pedalboard_chorus_depth_min: 0.25
102
+ pedalboard_chorus_depth_max: 0.95
103
+ pedalboard_chorus_centre_delay_ms_min: 3
104
+ pedalboard_chorus_centre_delay_ms_max: 10
105
+ pedalboard_chorus_feedback_min: 0.0
106
+ pedalboard_chorus_feedback_max: 0.5
107
+ pedalboard_chorus_mix_min: 0.1
108
+ pedalboard_chorus_mix_max: 0.9
109
+ pedalboard_phazer: 0.01
110
+ pedalboard_phazer_rate_hz_min: 1.0
111
+ pedalboard_phazer_rate_hz_max: 10.0
112
+ pedalboard_phazer_depth_min: 0.25
113
+ pedalboard_phazer_depth_max: 0.95
114
+ pedalboard_phazer_centre_frequency_hz_min: 200
115
+ pedalboard_phazer_centre_frequency_hz_max: 12000
116
+ pedalboard_phazer_feedback_min: 0.0
117
+ pedalboard_phazer_feedback_max: 0.5
118
+ pedalboard_phazer_mix_min: 0.1
119
+ pedalboard_phazer_mix_max: 0.9
120
+ pedalboard_distortion: 0.01
121
+ pedalboard_distortion_drive_db_min: 1.0
122
+ pedalboard_distortion_drive_db_max: 25.0
123
+ pedalboard_pitch_shift: 0.01
124
+ pedalboard_pitch_shift_semitones_min: -7
125
+ pedalboard_pitch_shift_semitones_max: 7
126
+ pedalboard_resample: 0.01
127
+ pedalboard_resample_target_sample_rate_min: 4000
128
+ pedalboard_resample_target_sample_rate_max: 44100
129
+ pedalboard_bitcrash: 0.01
130
+ pedalboard_bitcrash_bit_depth_min: 4
131
+ pedalboard_bitcrash_bit_depth_max: 16
132
+ pedalboard_mp3_compressor: 0.01
133
+ pedalboard_mp3_compressor_pedalboard_mp3_compressor_min: 0
134
+ pedalboard_mp3_compressor_pedalboard_mp3_compressor_max: 9.999
135
+ vocals:
136
+ pitch_shift: 0.1
137
+ pitch_shift_min_semitones: -5
138
+ pitch_shift_max_semitones: 5
139
+ seven_band_parametric_eq: 0.25
140
+ seven_band_parametric_eq_min_gain_db: -9
141
+ seven_band_parametric_eq_max_gain_db: 9
142
+ tanh_distortion: 0.1
143
+ tanh_distortion_min: 0.1
144
+ tanh_distortion_max: 0.7
145
+ bass:
146
+ pitch_shift: 0.1
147
+ pitch_shift_min_semitones: -2
148
+ pitch_shift_max_semitones: 2
149
+ seven_band_parametric_eq: 0.25
150
+ seven_band_parametric_eq_min_gain_db: -3
151
+ seven_band_parametric_eq_max_gain_db: 6
152
+ tanh_distortion: 0.2
153
+ tanh_distortion_min: 0.1
154
+ tanh_distortion_max: 0.5
155
+ drums:
156
+ pitch_shift: 0.33
157
+ pitch_shift_min_semitones: -5
158
+ pitch_shift_max_semitones: 5
159
+ seven_band_parametric_eq: 0.25
160
+ seven_band_parametric_eq_min_gain_db: -9
161
+ seven_band_parametric_eq_max_gain_db: 9
162
+ tanh_distortion: 0.33
163
+ tanh_distortion_min: 0.1
164
+ tanh_distortion_max: 0.6
165
+ other:
166
+ pitch_shift: 0.1
167
+ pitch_shift_min_semitones: -4
168
+ pitch_shift_max_semitones: 4
169
+ gaussian_noise: 0.1
170
+ gaussian_noise_min_amplitude: 0.001
171
+ gaussian_noise_max_amplitude: 0.015
172
+ time_stretch: 0.01
173
+ time_stretch_min_rate: 0.8
174
+ time_stretch_max_rate: 1.25
175
+ inference:
176
+ batch_size: 1
177
+ dim_t: 256
178
+ num_overlap: 2
179
+ normalize: false
scnet/scnet_xl_4stem_zftrubo_config.yaml ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 485100
3
+ num_channels: 2
4
+ sample_rate: 44100
5
+ min_mean_abs: 0.0
6
+ model:
7
+ sources:
8
+ - drums
9
+ - bass
10
+ - other
11
+ - vocals
12
+ audio_channels: 2
13
+ dims:
14
+ - 4
15
+ - 32
16
+ - 64
17
+ - 128
18
+ nfft: 4096
19
+ hop_size: 1024
20
+ win_size: 4096
21
+ normalized: true
22
+ band_SR:
23
+ - 0.175
24
+ - 0.392
25
+ - 0.433
26
+ band_stride:
27
+ - 1
28
+ - 4
29
+ - 16
30
+ band_kernel:
31
+ - 3
32
+ - 4
33
+ - 16
34
+ conv_depths:
35
+ - 3
36
+ - 2
37
+ - 1
38
+ compress: 4
39
+ conv_kernel: 3
40
+ num_dplayer: 6
41
+ expand: 1
42
+ training:
43
+ batch_size: 10
44
+ gradient_accumulation_steps: 1
45
+ grad_clip: 0
46
+ instruments:
47
+ - drums
48
+ - bass
49
+ - other
50
+ - vocals
51
+ lr: 0.0005
52
+ patience: 2
53
+ reduce_factor: 0.95
54
+ target_instrument: null
55
+ num_epochs: 1000
56
+ num_steps: 1000
57
+ q: 0.95
58
+ coarse_loss_clip: true
59
+ ema_momentum: 0.999
60
+ optimizer: adam
61
+ other_fix: false
62
+ use_amp: true
63
+ augmentations:
64
+ enable: true
65
+ loudness: true
66
+ loudness_min: 0.5
67
+ loudness_max: 1.5
68
+ mixup: true
69
+ mixup_probs: !!python/tuple
70
+ - 0.2
71
+ - 0.02
72
+ mixup_loudness_min: 0.5
73
+ mixup_loudness_max: 1.5
74
+ inference:
75
+ batch_size: 1
76
+ dim_t: 256
77
+ num_overlap: 2
78
+ normalize: true
scnet/scnet_xl_ihf_4stem_zfturbo_config.yaml ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 485100
3
+ num_channels: 2
4
+ sample_rate: 44100
5
+ min_mean_abs: 0.0
6
+ model:
7
+ sources:
8
+ - drums
9
+ - bass
10
+ - other
11
+ - vocals
12
+ audio_channels: 2
13
+ dims:
14
+ - 4
15
+ - 64
16
+ - 128
17
+ - 256
18
+ nfft: 4096
19
+ hop_size: 1024
20
+ win_size: 4096
21
+ normalized: true
22
+ band_SR:
23
+ - 0.23
24
+ - 0.37
25
+ - 0.4
26
+ band_stride:
27
+ - 1
28
+ - 4
29
+ - 4
30
+ band_kernel:
31
+ - 3
32
+ - 4
33
+ - 4
34
+ conv_depths:
35
+ - 3
36
+ - 3
37
+ - 3
38
+ compress: 4
39
+ conv_kernel: 3
40
+ num_dplayer: 8
41
+ expand: 1
42
+ training:
43
+ batch_size: 3
44
+ gradient_accumulation_steps: 1
45
+ grad_clip: 0
46
+ instruments:
47
+ - drums
48
+ - bass
49
+ - other
50
+ - vocals
51
+ patience: 2
52
+ reduce_factor: 0.95
53
+ target_instrument: null
54
+ num_epochs: 1000
55
+ num_steps: 1000
56
+ q: 0.95
57
+ coarse_loss_clip: true
58
+ ema_momentum: 0.999
59
+ optimizer: adam
60
+ lr: 5.0e-05
61
+ normalize: false
62
+ other_fix: false
63
+ use_amp: true
64
+ augmentations:
65
+ enable: true
66
+ loudness: true
67
+ loudness_min: 0.5
68
+ loudness_max: 1.5
69
+ mixup: true
70
+ mixup_probs: !!python/tuple
71
+ - 0.2
72
+ - 0.02
73
+ - 0.002
74
+ mixup_loudness_min: 0.5
75
+ mixup_loudness_max: 1.5
76
+ mp3_compression_on_mixture: 0.01
77
+ mp3_compression_on_mixture_bitrate_min: 32
78
+ mp3_compression_on_mixture_bitrate_max: 320
79
+ mp3_compression_on_mixture_backend: lameenc
80
+ all:
81
+ channel_shuffle: 0.5
82
+ random_inverse: 0.1
83
+ random_polarity: 0.5
84
+ mp3_compression: 0.01
85
+ mp3_compression_min_bitrate: 32
86
+ mp3_compression_max_bitrate: 320
87
+ mp3_compression_backend: lameenc
88
+ pedalboard_reverb: 0.01
89
+ pedalboard_reverb_room_size_min: 0.1
90
+ pedalboard_reverb_room_size_max: 0.9
91
+ pedalboard_reverb_damping_min: 0.1
92
+ pedalboard_reverb_damping_max: 0.9
93
+ pedalboard_reverb_wet_level_min: 0.1
94
+ pedalboard_reverb_wet_level_max: 0.9
95
+ pedalboard_reverb_dry_level_min: 0.1
96
+ pedalboard_reverb_dry_level_max: 0.9
97
+ pedalboard_reverb_width_min: 0.9
98
+ pedalboard_reverb_width_max: 1.0
99
+ pedalboard_chorus: 0.01
100
+ pedalboard_chorus_rate_hz_min: 1.0
101
+ pedalboard_chorus_rate_hz_max: 7.0
102
+ pedalboard_chorus_depth_min: 0.25
103
+ pedalboard_chorus_depth_max: 0.95
104
+ pedalboard_chorus_centre_delay_ms_min: 3
105
+ pedalboard_chorus_centre_delay_ms_max: 10
106
+ pedalboard_chorus_feedback_min: 0.0
107
+ pedalboard_chorus_feedback_max: 0.5
108
+ pedalboard_chorus_mix_min: 0.1
109
+ pedalboard_chorus_mix_max: 0.9
110
+ pedalboard_phazer: 0.01
111
+ pedalboard_phazer_rate_hz_min: 1.0
112
+ pedalboard_phazer_rate_hz_max: 10.0
113
+ pedalboard_phazer_depth_min: 0.25
114
+ pedalboard_phazer_depth_max: 0.95
115
+ pedalboard_phazer_centre_frequency_hz_min: 200
116
+ pedalboard_phazer_centre_frequency_hz_max: 12000
117
+ pedalboard_phazer_feedback_min: 0.0
118
+ pedalboard_phazer_feedback_max: 0.5
119
+ pedalboard_phazer_mix_min: 0.1
120
+ pedalboard_phazer_mix_max: 0.9
121
+ pedalboard_distortion: 0.01
122
+ pedalboard_distortion_drive_db_min: 1.0
123
+ pedalboard_distortion_drive_db_max: 25.0
124
+ pedalboard_pitch_shift: 0.01
125
+ pedalboard_pitch_shift_semitones_min: -7
126
+ pedalboard_pitch_shift_semitones_max: 7
127
+ pedalboard_resample: 0.01
128
+ pedalboard_resample_target_sample_rate_min: 4000
129
+ pedalboard_resample_target_sample_rate_max: 44100
130
+ pedalboard_bitcrash: 0.01
131
+ pedalboard_bitcrash_bit_depth_min: 4
132
+ pedalboard_bitcrash_bit_depth_max: 16
133
+ pedalboard_mp3_compressor: 0.01
134
+ pedalboard_mp3_compressor_pedalboard_mp3_compressor_min: 0
135
+ pedalboard_mp3_compressor_pedalboard_mp3_compressor_max: 9.999
136
+ vocals:
137
+ pitch_shift: 0.1
138
+ pitch_shift_min_semitones: -5
139
+ pitch_shift_max_semitones: 5
140
+ seven_band_parametric_eq: 0.25
141
+ seven_band_parametric_eq_min_gain_db: -9
142
+ seven_band_parametric_eq_max_gain_db: 9
143
+ tanh_distortion: 0.1
144
+ tanh_distortion_min: 0.1
145
+ tanh_distortion_max: 0.7
146
+ bass:
147
+ pitch_shift: 0.1
148
+ pitch_shift_min_semitones: -2
149
+ pitch_shift_max_semitones: 2
150
+ seven_band_parametric_eq: 0.25
151
+ seven_band_parametric_eq_min_gain_db: -3
152
+ seven_band_parametric_eq_max_gain_db: 6
153
+ tanh_distortion: 0.2
154
+ tanh_distortion_min: 0.1
155
+ tanh_distortion_max: 0.5
156
+ drums:
157
+ pitch_shift: 0.33
158
+ pitch_shift_min_semitones: -5
159
+ pitch_shift_max_semitones: 5
160
+ seven_band_parametric_eq: 0.25
161
+ seven_band_parametric_eq_min_gain_db: -9
162
+ seven_band_parametric_eq_max_gain_db: 9
163
+ tanh_distortion: 0.33
164
+ tanh_distortion_min: 0.1
165
+ tanh_distortion_max: 0.6
166
+ other:
167
+ pitch_shift: 0.1
168
+ pitch_shift_min_semitones: -4
169
+ pitch_shift_max_semitones: 4
170
+ gaussian_noise: 0.1
171
+ gaussian_noise_min_amplitude: 0.001
172
+ gaussian_noise_max_amplitude: 0.015
173
+ time_stretch: 0.01
174
+ time_stretch_min_rate: 0.8
175
+ time_stretch_max_rate: 1.25
176
+ inference:
177
+ batch_size: 1
178
+ dim_t: 256
179
+ num_overlap: 2
180
+ normalize: false
scnet/scnet_xl_jazz_4stem_jorisvaneyghen_config.yaml ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 485100
3
+ num_channels: 2
4
+ sample_rate: 44100
5
+ min_mean_abs: 0.0
6
+ model:
7
+ sources:
8
+ - drums
9
+ - bass
10
+ - piano
11
+ - other
12
+ audio_channels: 2
13
+ dims:
14
+ - 4
15
+ - 64
16
+ - 128
17
+ - 256
18
+ nfft: 4096
19
+ hop_size: 1024
20
+ win_size: 4096
21
+ normalized: true
22
+ band_SR:
23
+ - 0.23
24
+ - 0.37
25
+ - 0.4
26
+ band_stride:
27
+ - 1
28
+ - 4
29
+ - 16
30
+ band_kernel:
31
+ - 3
32
+ - 4
33
+ - 16
34
+ conv_depths:
35
+ - 3
36
+ - 2
37
+ - 1
38
+ compress: 4
39
+ conv_kernel: 3
40
+ num_dplayer: 8
41
+ expand: 1
42
+ training:
43
+ batch_size: 1
44
+ gradient_accumulation_steps: 4
45
+ grad_clip: 0
46
+ instruments:
47
+ - drums
48
+ - bass
49
+ - piano
50
+ - other
51
+ lr: 0.0001
52
+ patience: 2
53
+ reduce_factor: 0.95
54
+ target_instrument: null
55
+ num_epochs: 250
56
+ num_steps: 1000
57
+ q: 0.95
58
+ coarse_loss_clip: true
59
+ ema_momentum: 0.999
60
+ optimizer: adam
61
+ normalize: false
62
+ other_fix: false
63
+ use_amp: true
64
+ augmentations:
65
+ enable: true
66
+ loudness: true
67
+ loudness_min: 0.5
68
+ loudness_max: 1.5
69
+ mixup: true
70
+ mixup_probs: !!python/tuple
71
+ - 0.2
72
+ - 0.02
73
+ mixup_loudness_min: 0.5
74
+ mixup_loudness_max: 1.5
75
+ all:
76
+ stereo_to_mono: 0.1
77
+ channel_shuffle: 0.5
78
+ random_polarity: 0.5
79
+ inference:
80
+ batch_size: 1
81
+ dim_t: 256
82
+ num_overlap: 2
83
+ normalize: false
scnet_masked/scnet_masked_small_4stem_zftrubo_config.yaml ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 485100
3
+ num_channels: 2
4
+ sample_rate: 44100
5
+ min_mean_abs: 0.0
6
+ model:
7
+ sources:
8
+ - drums
9
+ - bass
10
+ - other
11
+ - vocals
12
+ audio_channels: 2
13
+ dims:
14
+ - 4
15
+ - 32
16
+ - 64
17
+ - 128
18
+ nfft: 4096
19
+ hop_size: 1024
20
+ win_size: 4096
21
+ normalized: true
22
+ band_SR:
23
+ - 0.175
24
+ - 0.392
25
+ - 0.433
26
+ band_stride:
27
+ - 1
28
+ - 4
29
+ - 16
30
+ band_kernel:
31
+ - 3
32
+ - 4
33
+ - 16
34
+ conv_depths:
35
+ - 3
36
+ - 2
37
+ - 1
38
+ compress: 4
39
+ conv_kernel: 3
40
+ num_dplayer: 6
41
+ expand: 1
42
+ training:
43
+ batch_size: 10
44
+ gradient_accumulation_steps: 1
45
+ grad_clip: 0
46
+ instruments:
47
+ - drums
48
+ - bass
49
+ - other
50
+ - vocals
51
+ lr: 0.0005
52
+ patience: 2
53
+ reduce_factor: 0.95
54
+ target_instrument: null
55
+ num_epochs: 1000
56
+ num_steps: 1000
57
+ q: 0.95
58
+ coarse_loss_clip: true
59
+ ema_momentum: 0.999
60
+ optimizer: adam
61
+ normalize: false
62
+ other_fix: false
63
+ use_amp: true
64
+ augmentations:
65
+ enable: true
66
+ loudness: true
67
+ loudness_min: 0.5
68
+ loudness_max: 1.5
69
+ mixup: true
70
+ mixup_probs: !!python/tuple
71
+ - 0.2
72
+ - 0.02
73
+ mixup_loudness_min: 0.5
74
+ mixup_loudness_max: 1.5
75
+ mp3_compression_on_mixture: 0.01
76
+ mp3_compression_on_mixture_bitrate_min: 32
77
+ mp3_compression_on_mixture_bitrate_max: 320
78
+ mp3_compression_on_mixture_backend: lameenc
79
+ all:
80
+ channel_shuffle: 0.5
81
+ random_inverse: 0.1
82
+ random_polarity: 0.5
83
+ mp3_compression: 0.01
84
+ mp3_compression_min_bitrate: 32
85
+ mp3_compression_max_bitrate: 320
86
+ mp3_compression_backend: lameenc
87
+ pedalboard_reverb: 0.01
88
+ pedalboard_reverb_room_size_min: 0.1
89
+ pedalboard_reverb_room_size_max: 0.9
90
+ pedalboard_reverb_damping_min: 0.1
91
+ pedalboard_reverb_damping_max: 0.9
92
+ pedalboard_reverb_wet_level_min: 0.1
93
+ pedalboard_reverb_wet_level_max: 0.9
94
+ pedalboard_reverb_dry_level_min: 0.1
95
+ pedalboard_reverb_dry_level_max: 0.9
96
+ pedalboard_reverb_width_min: 0.9
97
+ pedalboard_reverb_width_max: 1.0
98
+ pedalboard_chorus: 0.01
99
+ pedalboard_chorus_rate_hz_min: 1.0
100
+ pedalboard_chorus_rate_hz_max: 7.0
101
+ pedalboard_chorus_depth_min: 0.25
102
+ pedalboard_chorus_depth_max: 0.95
103
+ pedalboard_chorus_centre_delay_ms_min: 3
104
+ pedalboard_chorus_centre_delay_ms_max: 10
105
+ pedalboard_chorus_feedback_min: 0.0
106
+ pedalboard_chorus_feedback_max: 0.5
107
+ pedalboard_chorus_mix_min: 0.1
108
+ pedalboard_chorus_mix_max: 0.9
109
+ pedalboard_phazer: 0.01
110
+ pedalboard_phazer_rate_hz_min: 1.0
111
+ pedalboard_phazer_rate_hz_max: 10.0
112
+ pedalboard_phazer_depth_min: 0.25
113
+ pedalboard_phazer_depth_max: 0.95
114
+ pedalboard_phazer_centre_frequency_hz_min: 200
115
+ pedalboard_phazer_centre_frequency_hz_max: 12000
116
+ pedalboard_phazer_feedback_min: 0.0
117
+ pedalboard_phazer_feedback_max: 0.5
118
+ pedalboard_phazer_mix_min: 0.1
119
+ pedalboard_phazer_mix_max: 0.9
120
+ pedalboard_distortion: 0.01
121
+ pedalboard_distortion_drive_db_min: 1.0
122
+ pedalboard_distortion_drive_db_max: 25.0
123
+ pedalboard_pitch_shift: 0.01
124
+ pedalboard_pitch_shift_semitones_min: -7
125
+ pedalboard_pitch_shift_semitones_max: 7
126
+ pedalboard_resample: 0.01
127
+ pedalboard_resample_target_sample_rate_min: 4000
128
+ pedalboard_resample_target_sample_rate_max: 44100
129
+ pedalboard_bitcrash: 0.01
130
+ pedalboard_bitcrash_bit_depth_min: 4
131
+ pedalboard_bitcrash_bit_depth_max: 16
132
+ pedalboard_mp3_compressor: 0.01
133
+ pedalboard_mp3_compressor_pedalboard_mp3_compressor_min: 0
134
+ pedalboard_mp3_compressor_pedalboard_mp3_compressor_max: 9.999
135
+ vocals:
136
+ pitch_shift: 0.1
137
+ pitch_shift_min_semitones: -5
138
+ pitch_shift_max_semitones: 5
139
+ seven_band_parametric_eq: 0.1
140
+ seven_band_parametric_eq_min_gain_db: -9
141
+ seven_band_parametric_eq_max_gain_db: 9
142
+ tanh_distortion: 0.1
143
+ tanh_distortion_min: 0.1
144
+ tanh_distortion_max: 0.7
145
+ other:
146
+ pitch_shift: 0.1
147
+ pitch_shift_min_semitones: -4
148
+ pitch_shift_max_semitones: 4
149
+ gaussian_noise: 0.1
150
+ gaussian_noise_min_amplitude: 0.001
151
+ gaussian_noise_max_amplitude: 0.015
152
+ time_stretch: 0.01
153
+ time_stretch_min_rate: 0.8
154
+ time_stretch_max_rate: 1.25
155
+ inference:
156
+ batch_size: 1
157
+ dim_t: 256
158
+ num_overlap: 2
159
+ normalize: false
scnet_masked/scnet_masked_xl_ihf_4stem_zftrubo_config.yaml ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 485100
3
+ num_channels: 2
4
+ sample_rate: 44100
5
+ min_mean_abs: 0.0
6
+ model:
7
+ sources:
8
+ - drums
9
+ - bass
10
+ - other
11
+ - vocals
12
+ audio_channels: 2
13
+ dims:
14
+ - 4
15
+ - 64
16
+ - 128
17
+ - 256
18
+ nfft: 4096
19
+ hop_size: 1024
20
+ win_size: 4096
21
+ normalized: true
22
+ band_SR:
23
+ - 0.23
24
+ - 0.37
25
+ - 0.4
26
+ band_stride:
27
+ - 1
28
+ - 4
29
+ - 4
30
+ band_kernel:
31
+ - 3
32
+ - 4
33
+ - 4
34
+ conv_depths:
35
+ - 3
36
+ - 3
37
+ - 3
38
+ compress: 4
39
+ conv_kernel: 3
40
+ num_dplayer: 8
41
+ expand: 1
42
+ training:
43
+ batch_size: 2
44
+ gradient_accumulation_steps: 1
45
+ grad_clip: 0
46
+ instruments:
47
+ - drums
48
+ - bass
49
+ - other
50
+ - vocals
51
+ patience: 2
52
+ reduce_factor: 0.95
53
+ target_instrument: null
54
+ num_epochs: 1000
55
+ num_steps: 1000
56
+ q: 0.95
57
+ coarse_loss_clip: true
58
+ ema_momentum: 0.999
59
+ optimizer: adam
60
+ lr: 5.0e-05
61
+ normalize: false
62
+ other_fix: false
63
+ use_amp: true
64
+ augmentations:
65
+ enable: true
66
+ loudness: true
67
+ loudness_min: 0.5
68
+ loudness_max: 1.5
69
+ mixup: true
70
+ mixup_probs: !!python/tuple
71
+ - 0.2
72
+ - 0.02
73
+ - 0.002
74
+ mixup_loudness_min: 0.5
75
+ mixup_loudness_max: 1.5
76
+ mp3_compression_on_mixture: 0.01
77
+ mp3_compression_on_mixture_bitrate_min: 32
78
+ mp3_compression_on_mixture_bitrate_max: 320
79
+ mp3_compression_on_mixture_backend: lameenc
80
+ all:
81
+ channel_shuffle: 0.5
82
+ random_inverse: 0.1
83
+ random_polarity: 0.5
84
+ mp3_compression: 0.01
85
+ mp3_compression_min_bitrate: 32
86
+ mp3_compression_max_bitrate: 320
87
+ mp3_compression_backend: lameenc
88
+ pedalboard_reverb: 0.01
89
+ pedalboard_reverb_room_size_min: 0.1
90
+ pedalboard_reverb_room_size_max: 0.9
91
+ pedalboard_reverb_damping_min: 0.1
92
+ pedalboard_reverb_damping_max: 0.9
93
+ pedalboard_reverb_wet_level_min: 0.1
94
+ pedalboard_reverb_wet_level_max: 0.9
95
+ pedalboard_reverb_dry_level_min: 0.1
96
+ pedalboard_reverb_dry_level_max: 0.9
97
+ pedalboard_reverb_width_min: 0.9
98
+ pedalboard_reverb_width_max: 1.0
99
+ pedalboard_chorus: 0.01
100
+ pedalboard_chorus_rate_hz_min: 1.0
101
+ pedalboard_chorus_rate_hz_max: 7.0
102
+ pedalboard_chorus_depth_min: 0.25
103
+ pedalboard_chorus_depth_max: 0.95
104
+ pedalboard_chorus_centre_delay_ms_min: 3
105
+ pedalboard_chorus_centre_delay_ms_max: 10
106
+ pedalboard_chorus_feedback_min: 0.0
107
+ pedalboard_chorus_feedback_max: 0.5
108
+ pedalboard_chorus_mix_min: 0.1
109
+ pedalboard_chorus_mix_max: 0.9
110
+ pedalboard_phazer: 0.01
111
+ pedalboard_phazer_rate_hz_min: 1.0
112
+ pedalboard_phazer_rate_hz_max: 10.0
113
+ pedalboard_phazer_depth_min: 0.25
114
+ pedalboard_phazer_depth_max: 0.95
115
+ pedalboard_phazer_centre_frequency_hz_min: 200
116
+ pedalboard_phazer_centre_frequency_hz_max: 12000
117
+ pedalboard_phazer_feedback_min: 0.0
118
+ pedalboard_phazer_feedback_max: 0.5
119
+ pedalboard_phazer_mix_min: 0.1
120
+ pedalboard_phazer_mix_max: 0.9
121
+ pedalboard_distortion: 0.01
122
+ pedalboard_distortion_drive_db_min: 1.0
123
+ pedalboard_distortion_drive_db_max: 25.0
124
+ pedalboard_pitch_shift: 0.01
125
+ pedalboard_pitch_shift_semitones_min: -7
126
+ pedalboard_pitch_shift_semitones_max: 7
127
+ pedalboard_resample: 0.01
128
+ pedalboard_resample_target_sample_rate_min: 4000
129
+ pedalboard_resample_target_sample_rate_max: 44100
130
+ pedalboard_bitcrash: 0.01
131
+ pedalboard_bitcrash_bit_depth_min: 4
132
+ pedalboard_bitcrash_bit_depth_max: 16
133
+ pedalboard_mp3_compressor: 0.01
134
+ pedalboard_mp3_compressor_pedalboard_mp3_compressor_min: 0
135
+ pedalboard_mp3_compressor_pedalboard_mp3_compressor_max: 9.999
136
+ vocals:
137
+ pitch_shift: 0.1
138
+ pitch_shift_min_semitones: -5
139
+ pitch_shift_max_semitones: 5
140
+ seven_band_parametric_eq: 0.1
141
+ seven_band_parametric_eq_min_gain_db: -9
142
+ seven_band_parametric_eq_max_gain_db: 9
143
+ tanh_distortion: 0.1
144
+ tanh_distortion_min: 0.1
145
+ tanh_distortion_max: 0.7
146
+ bass:
147
+ pitch_shift: 0.1
148
+ pitch_shift_min_semitones: -2
149
+ pitch_shift_max_semitones: 2
150
+ seven_band_parametric_eq: 0.1
151
+ seven_band_parametric_eq_min_gain_db: -3
152
+ seven_band_parametric_eq_max_gain_db: 6
153
+ tanh_distortion: 0.1
154
+ tanh_distortion_min: 0.1
155
+ tanh_distortion_max: 0.5
156
+ drums:
157
+ pitch_shift: 0.1
158
+ pitch_shift_min_semitones: -5
159
+ pitch_shift_max_semitones: 5
160
+ seven_band_parametric_eq: 0.1
161
+ seven_band_parametric_eq_min_gain_db: -9
162
+ seven_band_parametric_eq_max_gain_db: 9
163
+ tanh_distortion: 0.1
164
+ tanh_distortion_min: 0.1
165
+ tanh_distortion_max: 0.6
166
+ other:
167
+ pitch_shift: 0.1
168
+ pitch_shift_min_semitones: -4
169
+ pitch_shift_max_semitones: 4
170
+ gaussian_noise: 0.1
171
+ gaussian_noise_min_amplitude: 0.001
172
+ gaussian_noise_max_amplitude: 0.015
173
+ time_stretch: 0.01
174
+ time_stretch_min_rate: 0.8
175
+ time_stretch_max_rate: 1.25
176
+ inference:
177
+ batch_size: 1
178
+ dim_t: 256
179
+ num_overlap: 2
180
+ normalize: false
scnet_tran/scnet_tran_4stem_zftrubo_config.yaml ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 485100
3
+ num_channels: 2
4
+ sample_rate: 44100
5
+ min_mean_abs: 0.0
6
+ model:
7
+ sources:
8
+ - drums
9
+ - bass
10
+ - other
11
+ - vocals
12
+ audio_channels: 2
13
+ dims:
14
+ - 4
15
+ - 32
16
+ - 64
17
+ - 128
18
+ nfft: 4096
19
+ hop_size: 1024
20
+ win_size: 4096
21
+ normalized: true
22
+ band_SR:
23
+ - 0.175
24
+ - 0.392
25
+ - 0.433
26
+ band_stride:
27
+ - 1
28
+ - 4
29
+ - 16
30
+ band_kernel:
31
+ - 3
32
+ - 4
33
+ - 16
34
+ conv_depths:
35
+ - 3
36
+ - 2
37
+ - 1
38
+ compress: 4
39
+ conv_kernel: 3
40
+ num_dplayer: 6
41
+ expand: 1
42
+ tran_rotary_embedding_dim: 64
43
+ tran_depth: 1
44
+ tran_heads: 8
45
+ tran_dim_head: 64
46
+ tran_attn_dropout: 0.0
47
+ tran_ff_dropout: 0.0
48
+ tran_flash_attn: false
49
+ training:
50
+ batch_size: 5
51
+ gradient_accumulation_steps: 1
52
+ grad_clip: 0
53
+ instruments:
54
+ - drums
55
+ - bass
56
+ - other
57
+ - vocals
58
+ patience: 2
59
+ reduce_factor: 0.95
60
+ target_instrument: null
61
+ num_epochs: 1000
62
+ num_steps: 1000
63
+ q: 0.95
64
+ coarse_loss_clip: true
65
+ ema_momentum: 0.999
66
+ optimizer: adam
67
+ lr: 5.0e-05
68
+ normalize: false
69
+ other_fix: false
70
+ use_amp: true
71
+ augmentations:
72
+ enable: true
73
+ loudness: true
74
+ loudness_min: 0.5
75
+ loudness_max: 1.5
76
+ mixup: true
77
+ mixup_probs: !!python/tuple
78
+ - 0.2
79
+ - 0.02
80
+ - 0.002
81
+ mixup_loudness_min: 0.5
82
+ mixup_loudness_max: 1.5
83
+ mp3_compression_on_mixture: 0.01
84
+ mp3_compression_on_mixture_bitrate_min: 32
85
+ mp3_compression_on_mixture_bitrate_max: 320
86
+ mp3_compression_on_mixture_backend: lameenc
87
+ all:
88
+ channel_shuffle: 0.5
89
+ random_inverse: 0.01
90
+ random_polarity: 0.5
91
+ vocals:
92
+ pitch_shift: 0.1
93
+ pitch_shift_min_semitones: -5
94
+ pitch_shift_max_semitones: 5
95
+ seven_band_parametric_eq: 0.1
96
+ seven_band_parametric_eq_min_gain_db: -9
97
+ seven_band_parametric_eq_max_gain_db: 9
98
+ tanh_distortion: 0.1
99
+ tanh_distortion_min: 0.1
100
+ tanh_distortion_max: 0.7
101
+ time_stretch: 0.01
102
+ time_stretch_min_rate: 0.8
103
+ time_stretch_max_rate: 1.25
104
+ bass:
105
+ pitch_shift: 0.01
106
+ pitch_shift_min_semitones: -2
107
+ pitch_shift_max_semitones: 2
108
+ seven_band_parametric_eq: 0.01
109
+ seven_band_parametric_eq_min_gain_db: -3
110
+ seven_band_parametric_eq_max_gain_db: 6
111
+ tanh_distortion: 0.01
112
+ tanh_distortion_min: 0.1
113
+ tanh_distortion_max: 0.5
114
+ time_stretch: 0.1
115
+ time_stretch_min_rate: 0.9
116
+ time_stretch_max_rate: 1.1
117
+ drums:
118
+ pitch_shift: 0.1
119
+ pitch_shift_min_semitones: -5
120
+ pitch_shift_max_semitones: 5
121
+ seven_band_parametric_eq: 0.1
122
+ seven_band_parametric_eq_min_gain_db: -9
123
+ seven_band_parametric_eq_max_gain_db: 9
124
+ tanh_distortion: 0.1
125
+ tanh_distortion_min: 0.1
126
+ tanh_distortion_max: 0.6
127
+ time_stretch: 0.01
128
+ time_stretch_min_rate: 0.8
129
+ time_stretch_max_rate: 1.25
130
+ other:
131
+ pitch_shift: 0.1
132
+ pitch_shift_min_semitones: -4
133
+ pitch_shift_max_semitones: 4
134
+ gaussian_noise: 0.1
135
+ gaussian_noise_min_amplitude: 0.001
136
+ gaussian_noise_max_amplitude: 0.015
137
+ time_stretch: 0.01
138
+ time_stretch_min_rate: 0.8
139
+ time_stretch_max_rate: 1.25
140
+ inference:
141
+ batch_size: 1
142
+ dim_t: 256
143
+ num_overlap: 2
144
+ normalize: false
145
+ loss_multistft:
146
+ fft_sizes:
147
+ - 1024
148
+ - 2048
149
+ - 4096
150
+ hop_sizes:
151
+ - 147
152
+ - 256
153
+ - 512
154
+ win_lengths:
155
+ - 1024
156
+ - 2048
157
+ - 4096
158
+ window: hann_window
159
+ scale: mel
160
+ n_bins: 128
161
+ sample_rate: 44100
162
+ perceptual_weighting: true
163
+ w_sc: 1.0
164
+ w_log_mag: 1.0
165
+ w_lin_mag: 0.0
166
+ w_phs: 0.0
167
+ mag_distance: L1
vr/10_sp-uvr-2b-32000-1_config.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ model_params:
3
+ bins: 768
4
+ unstable_bins: 7
5
+ reduction_bins: 705
6
+ band:
7
+ 1:
8
+ sr: 6000
9
+ hl: 66
10
+ n_fft: 512
11
+ crop_start: 0
12
+ crop_stop: 240
13
+ lpf_start: 60
14
+ lpf_stop: 118
15
+ res_type: sinc_fastest
16
+ 2:
17
+ sr: 32000
18
+ hl: 352
19
+ n_fft: 1024
20
+ crop_start: 22
21
+ crop_stop: 505
22
+ hpf_start: 44
23
+ hpf_stop: 23
24
+ res_type: sinc_medium
25
+ sr: 32000
26
+ pre_filter_start: 710
27
+ pre_filter_stop: 731
28
+ mid_side: false
29
+ mid_side_b: false
30
+ mid_side_b2: false
31
+ stereo_w: false
32
+ stereo_n: false
33
+ reverse: false
34
+ nout: null
35
+ nout_lstm: null
36
+ training:
37
+ target_instrument: null
38
+ instruments:
39
+ - Instrumental
40
+ - Vocals
41
+ use_amp: true
42
+ inference:
43
+ batch_size: 1
44
+ aggression: 5
45
+ high_end_process: false
46
+ post_process_threshold: 0.2
47
+ window_size: 512
48
+ audio:
49
+ sample_rate: 44100
vr/11_sp-uvr-2b-32000-2_config.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ model_params:
3
+ bins: 768
4
+ unstable_bins: 7
5
+ reduction_bins: 705
6
+ band:
7
+ 1:
8
+ sr: 6000
9
+ hl: 66
10
+ n_fft: 512
11
+ crop_start: 0
12
+ crop_stop: 240
13
+ lpf_start: 60
14
+ lpf_stop: 118
15
+ res_type: sinc_fastest
16
+ 2:
17
+ sr: 32000
18
+ hl: 352
19
+ n_fft: 1024
20
+ crop_start: 22
21
+ crop_stop: 505
22
+ hpf_start: 44
23
+ hpf_stop: 23
24
+ res_type: sinc_medium
25
+ sr: 32000
26
+ pre_filter_start: 710
27
+ pre_filter_stop: 731
28
+ mid_side: false
29
+ mid_side_b: false
30
+ mid_side_b2: false
31
+ stereo_w: false
32
+ stereo_n: false
33
+ reverse: false
34
+ nout: null
35
+ nout_lstm: null
36
+ training:
37
+ target_instrument: null
38
+ instruments:
39
+ - Instrumental
40
+ - Vocals
41
+ use_amp: true
42
+ inference:
43
+ batch_size: 1
44
+ aggression: 5
45
+ high_end_process: false
46
+ post_process_threshold: 0.2
47
+ window_size: 512
48
+ audio:
49
+ sample_rate: 44100
vr/12_sp-uvr-3b-44100_config.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ model_params:
3
+ bins: 768
4
+ unstable_bins: 5
5
+ reduction_bins: 733
6
+ band:
7
+ 1:
8
+ sr: 11025
9
+ hl: 128
10
+ n_fft: 768
11
+ crop_start: 0
12
+ crop_stop: 278
13
+ lpf_start: 28
14
+ lpf_stop: 140
15
+ res_type: polyphase
16
+ 2:
17
+ sr: 22050
18
+ hl: 256
19
+ n_fft: 768
20
+ crop_start: 14
21
+ crop_stop: 322
22
+ hpf_start: 70
23
+ hpf_stop: 14
24
+ lpf_start: 283
25
+ lpf_stop: 314
26
+ res_type: polyphase
27
+ 3:
28
+ sr: 44100
29
+ hl: 512
30
+ n_fft: 768
31
+ crop_start: 131
32
+ crop_stop: 313
33
+ hpf_start: 154
34
+ hpf_stop: 141
35
+ res_type: sinc_medium
36
+ sr: 44100
37
+ pre_filter_start: 757
38
+ pre_filter_stop: 768
39
+ mid_side: false
40
+ mid_side_b: false
41
+ mid_side_b2: false
42
+ stereo_w: false
43
+ stereo_n: false
44
+ reverse: false
45
+ nout: null
46
+ nout_lstm: null
47
+ training:
48
+ target_instrument: null
49
+ instruments:
50
+ - Instrumental
51
+ - Vocals
52
+ use_amp: true
53
+ inference:
54
+ batch_size: 1
55
+ aggression: 5
56
+ high_end_process: false
57
+ post_process_threshold: 0.2
58
+ window_size: 512
59
+ audio:
60
+ sample_rate: 44100
vr/13_sp-uvr-4b-44100-1_config.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ model_params:
3
+ bins: 768
4
+ unstable_bins: 7
5
+ reduction_bins: 668
6
+ band:
7
+ 1:
8
+ sr: 11025
9
+ hl: 128
10
+ n_fft: 1024
11
+ crop_start: 0
12
+ crop_stop: 186
13
+ lpf_start: 37
14
+ lpf_stop: 73
15
+ res_type: polyphase
16
+ 2:
17
+ sr: 11025
18
+ hl: 128
19
+ n_fft: 512
20
+ crop_start: 4
21
+ crop_stop: 185
22
+ hpf_start: 36
23
+ hpf_stop: 18
24
+ lpf_start: 93
25
+ lpf_stop: 185
26
+ res_type: polyphase
27
+ 3:
28
+ sr: 22050
29
+ hl: 256
30
+ n_fft: 512
31
+ crop_start: 46
32
+ crop_stop: 186
33
+ hpf_start: 93
34
+ hpf_stop: 46
35
+ lpf_start: 164
36
+ lpf_stop: 186
37
+ res_type: polyphase
38
+ 4:
39
+ sr: 44100
40
+ hl: 512
41
+ n_fft: 768
42
+ crop_start: 121
43
+ crop_stop: 382
44
+ hpf_start: 138
45
+ hpf_stop: 123
46
+ res_type: sinc_medium
47
+ sr: 44100
48
+ pre_filter_start: 740
49
+ pre_filter_stop: 768
50
+ mid_side: false
51
+ mid_side_b: false
52
+ mid_side_b2: false
53
+ stereo_w: false
54
+ stereo_n: false
55
+ reverse: false
56
+ nout: null
57
+ nout_lstm: null
58
+ training:
59
+ target_instrument: null
60
+ instruments:
61
+ - Instrumental
62
+ - Vocals
63
+ use_amp: true
64
+ inference:
65
+ batch_size: 1
66
+ aggression: 5
67
+ high_end_process: false
68
+ post_process_threshold: 0.2
69
+ window_size: 512
70
+ audio:
71
+ sample_rate: 44100
vr/14_sp-uvr-4b-44100-2_config.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ model_params:
3
+ bins: 768
4
+ unstable_bins: 7
5
+ reduction_bins: 668
6
+ band:
7
+ 1:
8
+ sr: 11025
9
+ hl: 128
10
+ n_fft: 1024
11
+ crop_start: 0
12
+ crop_stop: 186
13
+ lpf_start: 37
14
+ lpf_stop: 73
15
+ res_type: polyphase
16
+ 2:
17
+ sr: 11025
18
+ hl: 128
19
+ n_fft: 512
20
+ crop_start: 4
21
+ crop_stop: 185
22
+ hpf_start: 36
23
+ hpf_stop: 18
24
+ lpf_start: 93
25
+ lpf_stop: 185
26
+ res_type: polyphase
27
+ 3:
28
+ sr: 22050
29
+ hl: 256
30
+ n_fft: 512
31
+ crop_start: 46
32
+ crop_stop: 186
33
+ hpf_start: 93
34
+ hpf_stop: 46
35
+ lpf_start: 164
36
+ lpf_stop: 186
37
+ res_type: polyphase
38
+ 4:
39
+ sr: 44100
40
+ hl: 512
41
+ n_fft: 768
42
+ crop_start: 121
43
+ crop_stop: 382
44
+ hpf_start: 138
45
+ hpf_stop: 123
46
+ res_type: sinc_medium
47
+ sr: 44100
48
+ pre_filter_start: 740
49
+ pre_filter_stop: 768
50
+ mid_side: false
51
+ mid_side_b: false
52
+ mid_side_b2: false
53
+ stereo_w: false
54
+ stereo_n: false
55
+ reverse: false
56
+ nout: null
57
+ nout_lstm: null
58
+ training:
59
+ target_instrument: null
60
+ instruments:
61
+ - Instrumental
62
+ - Vocals
63
+ use_amp: true
64
+ inference:
65
+ batch_size: 1
66
+ aggression: 5
67
+ high_end_process: false
68
+ post_process_threshold: 0.2
69
+ window_size: 512
70
+ audio:
71
+ sample_rate: 44100
vr/15_sp-uvr-mid-44100-1_config.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ model_params:
3
+ mid_side: true
4
+ bins: 768
5
+ unstable_bins: 5
6
+ reduction_bins: 733
7
+ band:
8
+ 1:
9
+ sr: 11025
10
+ hl: 128
11
+ n_fft: 768
12
+ crop_start: 0
13
+ crop_stop: 278
14
+ lpf_start: 28
15
+ lpf_stop: 140
16
+ res_type: polyphase
17
+ 2:
18
+ sr: 22050
19
+ hl: 256
20
+ n_fft: 768
21
+ crop_start: 14
22
+ crop_stop: 322
23
+ hpf_start: 70
24
+ hpf_stop: 14
25
+ lpf_start: 283
26
+ lpf_stop: 314
27
+ res_type: polyphase
28
+ 3:
29
+ sr: 44100
30
+ hl: 512
31
+ n_fft: 768
32
+ crop_start: 131
33
+ crop_stop: 313
34
+ hpf_start: 154
35
+ hpf_stop: 141
36
+ res_type: sinc_medium
37
+ sr: 44100
38
+ pre_filter_start: 757
39
+ pre_filter_stop: 768
40
+ mid_side_b: false
41
+ mid_side_b2: false
42
+ stereo_w: false
43
+ stereo_n: false
44
+ reverse: false
45
+ nout: null
46
+ nout_lstm: null
47
+ training:
48
+ target_instrument: null
49
+ instruments:
50
+ - Instrumental
51
+ - Vocals
52
+ use_amp: true
53
+ inference:
54
+ batch_size: 1
55
+ aggression: 5
56
+ high_end_process: false
57
+ post_process_threshold: 0.2
58
+ window_size: 512
59
+ audio:
60
+ sample_rate: 44100
vr/16_sp-uvr-mid-44100-2_config.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ model_params:
3
+ mid_side: true
4
+ bins: 768
5
+ unstable_bins: 5
6
+ reduction_bins: 733
7
+ band:
8
+ 1:
9
+ sr: 11025
10
+ hl: 128
11
+ n_fft: 768
12
+ crop_start: 0
13
+ crop_stop: 278
14
+ lpf_start: 28
15
+ lpf_stop: 140
16
+ res_type: polyphase
17
+ 2:
18
+ sr: 22050
19
+ hl: 256
20
+ n_fft: 768
21
+ crop_start: 14
22
+ crop_stop: 322
23
+ hpf_start: 70
24
+ hpf_stop: 14
25
+ lpf_start: 283
26
+ lpf_stop: 314
27
+ res_type: polyphase
28
+ 3:
29
+ sr: 44100
30
+ hl: 512
31
+ n_fft: 768
32
+ crop_start: 131
33
+ crop_stop: 313
34
+ hpf_start: 154
35
+ hpf_stop: 141
36
+ res_type: sinc_medium
37
+ sr: 44100
38
+ pre_filter_start: 757
39
+ pre_filter_stop: 768
40
+ mid_side_b: false
41
+ mid_side_b2: false
42
+ stereo_w: false
43
+ stereo_n: false
44
+ reverse: false
45
+ nout: null
46
+ nout_lstm: null
47
+ training:
48
+ target_instrument: null
49
+ instruments:
50
+ - Instrumental
51
+ - Vocals
52
+ use_amp: true
53
+ inference:
54
+ batch_size: 1
55
+ aggression: 5
56
+ high_end_process: false
57
+ post_process_threshold: 0.2
58
+ window_size: 512
59
+ audio:
60
+ sample_rate: 44100
vr/17_hp-wind_inst-uvr_config.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ model_params:
3
+ bins: 672
4
+ unstable_bins: 8
5
+ reduction_bins: 530
6
+ band:
7
+ 1:
8
+ sr: 7350
9
+ hl: 80
10
+ n_fft: 640
11
+ crop_start: 0
12
+ crop_stop: 85
13
+ lpf_start: 25
14
+ lpf_stop: 53
15
+ res_type: polyphase
16
+ 2:
17
+ sr: 7350
18
+ hl: 80
19
+ n_fft: 320
20
+ crop_start: 4
21
+ crop_stop: 87
22
+ hpf_start: 25
23
+ hpf_stop: 12
24
+ lpf_start: 31
25
+ lpf_stop: 62
26
+ res_type: polyphase
27
+ 3:
28
+ sr: 14700
29
+ hl: 160
30
+ n_fft: 512
31
+ crop_start: 17
32
+ crop_stop: 216
33
+ hpf_start: 48
34
+ hpf_stop: 24
35
+ lpf_start: 139
36
+ lpf_stop: 210
37
+ res_type: polyphase
38
+ 4:
39
+ sr: 44100
40
+ hl: 480
41
+ n_fft: 960
42
+ crop_start: 78
43
+ crop_stop: 383
44
+ hpf_start: 130
45
+ hpf_stop: 86
46
+ res_type: kaiser_fast
47
+ sr: 44100
48
+ pre_filter_start: 668
49
+ pre_filter_stop: 672
50
+ mid_side: false
51
+ mid_side_b: false
52
+ mid_side_b2: false
53
+ stereo_w: false
54
+ stereo_n: false
55
+ reverse: false
56
+ nout: null
57
+ nout_lstm: null
58
+ training:
59
+ target_instrument: null
60
+ instruments:
61
+ - No Woodwinds
62
+ - Woodwinds
63
+ use_amp: true
64
+ inference:
65
+ batch_size: 1
66
+ aggression: 5
67
+ high_end_process: false
68
+ post_process_threshold: 0.2
69
+ window_size: 512
70
+ audio:
71
+ sample_rate: 44100
vr/1_hp-uvr_config.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ model_params:
3
+ bins: 768
4
+ unstable_bins: 7
5
+ reduction_bins: 668
6
+ band:
7
+ 1:
8
+ sr: 11025
9
+ hl: 128
10
+ n_fft: 1024
11
+ crop_start: 0
12
+ crop_stop: 186
13
+ lpf_start: 37
14
+ lpf_stop: 73
15
+ res_type: polyphase
16
+ 2:
17
+ sr: 11025
18
+ hl: 128
19
+ n_fft: 512
20
+ crop_start: 4
21
+ crop_stop: 185
22
+ hpf_start: 36
23
+ hpf_stop: 18
24
+ lpf_start: 93
25
+ lpf_stop: 185
26
+ res_type: polyphase
27
+ 3:
28
+ sr: 22050
29
+ hl: 256
30
+ n_fft: 512
31
+ crop_start: 46
32
+ crop_stop: 186
33
+ hpf_start: 93
34
+ hpf_stop: 46
35
+ lpf_start: 164
36
+ lpf_stop: 186
37
+ res_type: polyphase
38
+ 4:
39
+ sr: 44100
40
+ hl: 512
41
+ n_fft: 768
42
+ crop_start: 121
43
+ crop_stop: 382
44
+ hpf_start: 138
45
+ hpf_stop: 123
46
+ res_type: sinc_medium
47
+ sr: 44100
48
+ pre_filter_start: 740
49
+ pre_filter_stop: 768
50
+ mid_side: false
51
+ mid_side_b: false
52
+ mid_side_b2: false
53
+ stereo_w: false
54
+ stereo_n: false
55
+ reverse: false
56
+ nout: null
57
+ nout_lstm: null
58
+ training:
59
+ target_instrument: null
60
+ instruments:
61
+ - Instrumental
62
+ - Vocals
63
+ use_amp: true
64
+ inference:
65
+ batch_size: 1
66
+ aggression: 5
67
+ high_end_process: false
68
+ post_process_threshold: 0.2
69
+ window_size: 512
70
+ audio:
71
+ sample_rate: 44100
vr/2_hp-uvr_config.yaml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ model_params:
3
+ bins: 672
4
+ unstable_bins: 8
5
+ reduction_bins: 637
6
+ band:
7
+ 1:
8
+ sr: 7350
9
+ hl: 80
10
+ n_fft: 640
11
+ crop_start: 0
12
+ crop_stop: 85
13
+ lpf_start: 25
14
+ lpf_stop: 53
15
+ res_type: polyphase
16
+ 2:
17
+ sr: 7350
18
+ hl: 80
19
+ n_fft: 320
20
+ crop_start: 4
21
+ crop_stop: 87
22
+ hpf_start: 25
23
+ hpf_stop: 12
24
+ lpf_start: 31
25
+ lpf_stop: 62
26
+ res_type: polyphase
27
+ 3:
28
+ sr: 14700
29
+ hl: 160
30
+ n_fft: 512
31
+ crop_start: 17
32
+ crop_stop: 216
33
+ hpf_start: 48
34
+ hpf_stop: 24
35
+ lpf_start: 139
36
+ lpf_stop: 210
37
+ res_type: polyphase
38
+ 4:
39
+ sr: 44100
40
+ hl: 480
41
+ n_fft: 960
42
+ crop_start: 78
43
+ crop_stop: 383
44
+ hpf_start: 130
45
+ hpf_stop: 86
46
+ res_type: kaiser_fast
47
+ sr: 44100
48
+ pre_filter_start: 668
49
+ pre_filter_stop: 672
50
+ mid_side: false
51
+ mid_side_b: false
52
+ mid_side_b2: false
53
+ stereo_w: false
54
+ stereo_n: false
55
+ reverse: false
56
+ nout: null
57
+ nout_lstm: null
58
+ training:
59
+ target_instrument: null
60
+ instruments:
61
+ - Instrumental
62
+ - Vocals
63
+ use_amp: true
64
+ inference:
65
+ batch_size: 1
66
+ aggression: 5
67
+ high_end_process: false
68
+ post_process_threshold: 0.2
69
+ window_size: 512
70
+ audio:
71
+ sample_rate: 44100