listra92 commited on
Commit
3e6ec55
·
verified ·
1 Parent(s): 9327b37

Delete _mdx_c_configs

Browse files
_mdx_c_configs/config_aspiration_mel_band_roformer.yaml DELETED
@@ -1,77 +0,0 @@
1
- audio:
2
- chunk_size: 352800
3
- dim_f: 1024
4
- dim_t: 801 # don't work (use in model)
5
- hop_length: 441 # don't work (use in model)
6
- n_fft: 2048
7
- num_channels: 2
8
- sample_rate: 44100
9
- min_mean_abs: 0.000
10
-
11
- model:
12
- dim: 256
13
- depth: 8
14
- stereo: true
15
- num_stems: 2
16
- time_transformer_depth: 1
17
- freq_transformer_depth: 1
18
- linear_transformer_depth: 0
19
- num_bands: 60
20
- dim_head: 64
21
- heads: 8
22
- attn_dropout: 0.1
23
- ff_dropout: 0.1
24
- flash_attn: True
25
- dim_freqs_in: 1025
26
- sample_rate: 44100 # needed for mel filter bank from librosa
27
- stft_n_fft: 2048
28
- stft_hop_length: 441
29
- stft_win_length: 2048
30
- stft_normalized: False
31
- mask_estimator_depth: 2
32
- multi_stft_resolution_loss_weight: 1.0
33
- multi_stft_resolutions_window_sizes: !!python/tuple
34
- - 4096
35
- - 2048
36
- - 1024
37
- - 512
38
- - 256
39
- multi_stft_hop_size: 147
40
- multi_stft_normalized: False
41
-
42
- training:
43
- batch_size: 1
44
- gradient_accumulation_steps: 8
45
- grad_clip: 0
46
- instruments:
47
- - aspiration
48
- - other
49
- lr: 4.0e-05
50
- patience: 2
51
- reduce_factor: 0.95
52
- target_instrument: null
53
- num_epochs: 1000
54
- num_steps: 1000
55
- q: 0.95
56
- coarse_loss_clip: true
57
- ema_momentum: 0.999
58
- optimizer: adam
59
- other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
60
- use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
61
-
62
- augmentations:
63
- enable: true # enable or disable all augmentations (to fast disable if needed)
64
- loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
65
- loudness_min: 0.5
66
- loudness_max: 1.5
67
- mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
68
- mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
69
- - 0.2
70
- - 0.02
71
- mixup_loudness_min: 0.5
72
- mixup_loudness_max: 1.5
73
-
74
- inference:
75
- batch_size: 4
76
- dim_t: 1101
77
- num_overlap: 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
_mdx_c_configs/config_mel_band_roformer_karaoke.yaml DELETED
@@ -1,71 +0,0 @@
1
- audio:
2
- chunk_size: 352800
3
- dim_f: 1024
4
- dim_t: 256
5
- hop_length: 441
6
- n_fft: 2048
7
- num_channels: 2
8
- sample_rate: 44100
9
- min_mean_abs: 000
10
-
11
- model:
12
- dim: 384
13
- depth: 6
14
- stereo: true
15
- num_stems: 1
16
- time_transformer_depth: 1
17
- freq_transformer_depth: 1
18
- num_bands: 60
19
- dim_head: 64
20
- heads: 8
21
- attn_dropout: 0
22
- ff_dropout: 0
23
- flash_attn: True
24
- dim_freqs_in: 1025
25
- sample_rate: 44100 # needed for mel filter bank from librosa
26
- stft_n_fft: 2048
27
- stft_hop_length: 441
28
- stft_win_length: 2048
29
- stft_normalized: False
30
- mask_estimator_depth: 2
31
- multi_stft_resolution_loss_weight: 1.0
32
- multi_stft_resolutions_window_sizes: !!python/tuple
33
- - 4096
34
- - 2048
35
- - 1024
36
- - 512
37
- - 256
38
- multi_stft_hop_size: 147
39
- multi_stft_normalized: False
40
-
41
- training:
42
- batch_size: 4
43
- gradient_accumulation_steps: 1
44
- grad_clip: 0
45
- instruments:
46
- - karaoke
47
- - other
48
- lr: 1.0e-05
49
- patience: 2
50
- reduce_factor: 0.95
51
- target_instrument: karaoke
52
- num_epochs: 1000
53
- num_steps: 2000
54
- augmentation: false # enable augmentations by audiomentations and pedalboard
55
- augmentation_type: null
56
- use_mp3_compress: false # Deprecated
57
- augmentation_mix: false # Mix several stems of the same type with some probability
58
- augmentation_loudness: false # randomly change loudness of each stem
59
- augmentation_loudness_type: 1 # Type 1 or 2
60
- augmentation_loudness_min: 0
61
- augmentation_loudness_max: 0
62
- q: 0.95
63
- coarse_loss_clip: false
64
- ema_momentum: 0.999
65
- optimizer: adam
66
- other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
67
-
68
- inference:
69
- batch_size: 1
70
- dim_t: 1101
71
- num_overlap: 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
_mdx_c_configs/config_melbandroformer_big.yaml DELETED
@@ -1,48 +0,0 @@
1
- audio:
2
- chunk_size: 352800
3
- dim_f: 1024
4
- dim_t: 1101
5
- hop_length: 441
6
- n_fft: 2048
7
- num_channels: 2
8
- sample_rate: 44100
9
- min_mean_abs: 0.0
10
- model:
11
- dim: 384
12
- depth: 6
13
- stereo: true
14
- num_stems: 1
15
- time_transformer_depth: 1
16
- freq_transformer_depth: 1
17
- num_bands: 60
18
- dim_head: 64
19
- heads: 8
20
- attn_dropout: 0
21
- ff_dropout: 0
22
- flash_attn: true
23
- dim_freqs_in: 1025
24
- sample_rate: 44100
25
- stft_n_fft: 2048
26
- stft_hop_length: 441
27
- stft_win_length: 2048
28
- stft_normalized: false
29
- mask_estimator_depth: 3
30
- multi_stft_resolution_loss_weight: 1.0
31
- multi_stft_resolutions_window_sizes: !!python/tuple
32
- - 4096
33
- - 2048
34
- - 1024
35
- - 512
36
- - 256
37
- multi_stft_hop_size: 147
38
- multi_stft_normalized: false
39
- training:
40
- instruments:
41
- - vocals
42
- - other
43
- target_instrument: vocals
44
- use_amp: true
45
- inference:
46
- batch_size: 1
47
- dim_t: 1101
48
- num_overlap: 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
_mdx_c_configs/config_vocals_mdx23c.yaml DELETED
@@ -1,96 +0,0 @@
1
- audio:
2
- chunk_size: 261120
3
- dim_f: 4096
4
- dim_t: 256
5
- hop_length: 1024
6
- n_fft: 8192
7
- num_channels: 2
8
- sample_rate: 44100
9
- min_mean_abs: 0.001
10
-
11
- model:
12
- act: gelu
13
- bottleneck_factor: 4
14
- growth: 128
15
- norm: InstanceNorm
16
- num_blocks_per_scale: 2
17
- num_channels: 128
18
- num_scales: 5
19
- num_subbands: 4
20
- scale:
21
- - 2
22
- - 2
23
-
24
- training:
25
- batch_size: 6
26
- gradient_accumulation_steps: 1
27
- grad_clip: 0
28
- instruments:
29
- - vocals
30
- - other
31
- lr: 9.0e-05
32
- patience: 2
33
- reduce_factor: 0.95
34
- target_instrument: null
35
- num_epochs: 1000
36
- num_steps: 1000
37
- q: 0.95
38
- coarse_loss_clip: true
39
- ema_momentum: 0.999
40
- optimizer: adam
41
- read_metadata_procs: 8 # Number of processes to use during metadata reading for dataset. Can speed up metadata generation
42
- other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
43
- use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
44
-
45
- augmentations:
46
- enable: true # enable or disable all augmentations (to fast disable if needed)
47
- loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
48
- loudness_min: 0.5
49
- loudness_max: 1.5
50
- mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
51
- mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
52
- - 0.2
53
- - 0.02
54
- mixup_loudness_min: 0.5
55
- mixup_loudness_max: 1.5
56
-
57
- # apply mp3 compression to mixture only (emulate downloading mp3 from internet)
58
- mp3_compression_on_mixture: 0.01
59
- mp3_compression_on_mixture_bitrate_min: 32
60
- mp3_compression_on_mixture_bitrate_max: 320
61
- mp3_compression_on_mixture_backend: "lameenc"
62
-
63
- all:
64
- channel_shuffle: 0.5 # Set 0 or lower to disable
65
- random_inverse: 0.1 # inverse track (better lower probability)
66
- random_polarity: 0.5 # polarity change (multiply waveform to -1)
67
- mp3_compression: 0.01
68
- mp3_compression_min_bitrate: 32
69
- mp3_compression_max_bitrate: 320
70
- mp3_compression_backend: "lameenc"
71
-
72
- vocals:
73
- pitch_shift: 0.1
74
- pitch_shift_min_semitones: -5
75
- pitch_shift_max_semitones: 5
76
- seven_band_parametric_eq: 0.25
77
- seven_band_parametric_eq_min_gain_db: -9
78
- seven_band_parametric_eq_max_gain_db: 9
79
- tanh_distortion: 0.1
80
- tanh_distortion_min: 0.1
81
- tanh_distortion_max: 0.7
82
- other:
83
- pitch_shift: 0.1
84
- pitch_shift_min_semitones: -4
85
- pitch_shift_max_semitones: 4
86
- gaussian_noise: 0.1
87
- gaussian_noise_min_amplitude: 0.001
88
- gaussian_noise_max_amplitude: 0.015
89
- time_stretch: 0.01
90
- time_stretch_min_rate: 0.8
91
- time_stretch_max_rate: 1.25
92
-
93
- inference:
94
- batch_size: 1
95
- dim_t: 512
96
- num_overlap: 8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
_mdx_c_configs/config_vocals_mel_band_roformer_kj.yaml DELETED
@@ -1,69 +0,0 @@
1
- audio:
2
- chunk_size: 352800
3
- dim_f: 1024
4
- dim_t: 256
5
- hop_length: 441
6
- n_fft: 2048
7
- num_channels: 2
8
- sample_rate: 44100
9
- min_mean_abs: 0.0
10
- model:
11
- dim: 384
12
- depth: 6
13
- stereo: true
14
- num_stems: 1
15
- time_transformer_depth: 1
16
- freq_transformer_depth: 1
17
- num_bands: 60
18
- dim_head: 64
19
- heads: 8
20
- attn_dropout: 0
21
- ff_dropout: 0
22
- flash_attn: true
23
- dim_freqs_in: 1025
24
- sample_rate: 44100
25
- stft_n_fft: 2048
26
- stft_hop_length: 441
27
- stft_win_length: 2048
28
- stft_normalized: false
29
- mask_estimator_depth: 2
30
- multi_stft_resolution_loss_weight: 1.0
31
- multi_stft_resolutions_window_sizes: !!python/tuple
32
- - 4096
33
- - 2048
34
- - 1024
35
- - 512
36
- - 256
37
- multi_stft_hop_size: 147
38
- multi_stft_normalized: false
39
- training:
40
- batch_size: 4
41
- gradient_accumulation_steps: 1
42
- grad_clip: 0
43
- instruments:
44
- - vocals
45
- - other
46
- lr: 1.0e-05
47
- patience: 2
48
- reduce_factor: 0.95
49
- target_instrument: vocals
50
- num_epochs: 1000
51
- num_steps: 1000
52
- augmentation: false
53
- augmentation_type: null
54
- use_mp3_compress: false
55
- augmentation_mix: false
56
- augmentation_loudness: false
57
- augmentation_loudness_type: 1
58
- augmentation_loudness_min: 0
59
- augmentation_loudness_max: 0
60
- q: 0.95
61
- coarse_loss_clip: false
62
- ema_momentum: 0.999
63
- optimizer: adam
64
- other_fix: true
65
- use_amp: true
66
- inference:
67
- batch_size: 4
68
- dim_t: 1101
69
- num_overlap: 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
_mdx_c_configs/model_mel_band_roformer_denoise.yaml DELETED
@@ -1,71 +0,0 @@
1
- audio:
2
- chunk_size: 352800
3
- dim_f: 1024
4
- dim_t: 256
5
- hop_length: 441
6
- n_fft: 2048
7
- num_channels: 2
8
- sample_rate: 44100
9
- min_mean_abs: 000
10
-
11
- model:
12
- dim: 384
13
- depth: 6
14
- stereo: true
15
- num_stems: 1
16
- time_transformer_depth: 1
17
- freq_transformer_depth: 1
18
- num_bands: 60
19
- dim_head: 64
20
- heads: 8
21
- attn_dropout: 0
22
- ff_dropout: 0
23
- flash_attn: True
24
- dim_freqs_in: 1025
25
- sample_rate: 44100 # needed for mel filter bank from librosa
26
- stft_n_fft: 2048
27
- stft_hop_length: 441
28
- stft_win_length: 2048
29
- stft_normalized: False
30
- mask_estimator_depth: 2
31
- multi_stft_resolution_loss_weight: 1.0
32
- multi_stft_resolutions_window_sizes: !!python/tuple
33
- - 4096
34
- - 2048
35
- - 1024
36
- - 512
37
- - 256
38
- multi_stft_hop_size: 147
39
- multi_stft_normalized: False
40
-
41
- training:
42
- batch_size: 2
43
- gradient_accumulation_steps: 1
44
- grad_clip: 0
45
- instruments:
46
- - dry
47
- - other
48
- lr: 1.0e-05
49
- patience: 8
50
- reduce_factor: 0.95
51
- target_instrument: dry
52
- num_epochs: 1000
53
- num_steps: 4032
54
- augmentation: false # enable augmentations by audiomentations and pedalboard
55
- augmentation_type: null
56
- use_mp3_compress: false # Deprecated
57
- augmentation_mix: false # Mix several stems of the same type with some probability
58
- augmentation_loudness: false # randomly change loudness of each stem
59
- augmentation_loudness_type: 1 # Type 1 or 2
60
- augmentation_loudness_min: 0
61
- augmentation_loudness_max: 0
62
- q: 0.95
63
- coarse_loss_clip: false
64
- ema_momentum: 0.999
65
- optimizer: adam
66
- other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
67
-
68
- inference:
69
- batch_size: 2
70
- dim_t: 1101
71
- num_overlap: 4