File size: 3,410 Bytes
f73ae00
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
audio:
  chunk_size: 588800
  dim_f: 1024
  dim_t: 801
  hop_length: 441
  min_mean_abs: 0.0
  n_fft: 2048
  num_channels: 2
  sample_rate: 44100
augmentations:
  all:
    channel_shuffle: 0.5
    random_inverse: 0.1
    random_polarity: 0.5
  bass:
    pitch_shift: 0.1
    pitch_shift_max_semitones: 2
    pitch_shift_min_semitones: -2
    seven_band_parametric_eq: 0.1
    seven_band_parametric_eq_max_gain_db: 6
    seven_band_parametric_eq_min_gain_db: -3
    tanh_distortion: 0.1
    tanh_distortion_max: 0.5
    tanh_distortion_min: 0.1
  drums:
    pitch_shift: 0.1
    pitch_shift_max_semitones: 5
    pitch_shift_min_semitones: -5
    seven_band_parametric_eq: 0.1
    seven_band_parametric_eq_max_gain_db: 9
    seven_band_parametric_eq_min_gain_db: -9
    tanh_distortion: 0.1
    tanh_distortion_max: 0.6
    tanh_distortion_min: 0.1
  enable: true
  loudness: true
  loudness_max: 1.5
  loudness_min: 0.5
  mixup: true
  mixup_loudness_max: 1.5
  mixup_loudness_min: 0.5
  mixup_probs: !!python/tuple
  - 0.2
  - 0.02
  other:
    gaussian_noise: 0.1
    gaussian_noise_max_amplitude: 0.015
    gaussian_noise_min_amplitude: 0.001
    pitch_shift: 0.1
    pitch_shift_max_semitones: 4
    pitch_shift_min_semitones: -4
    time_stretch: 0.1
    time_stretch_max_rate: 1.25
    time_stretch_min_rate: 0.8
  vocals:
    pitch_shift: 0.1
    pitch_shift_max_semitones: 5
    pitch_shift_min_semitones: -5
    seven_band_parametric_eq: 0.1
    seven_band_parametric_eq_max_gain_db: 9
    seven_band_parametric_eq_min_gain_db: -9
    tanh_distortion: 0.1
    tanh_distortion_max: 0.7
    tanh_distortion_min: 0.1
inference:
  batch_size: 1
  dim_t: 1101
  normalize: false
  num_overlap: 2
model:
  attn_dropout: 0.1
  depth: 12
  dim: 256
  dim_freqs_in: 1025
  dim_head: 64
  ff_dropout: 0.1
  flash_attn: false
  freq_transformer_depth: 1
  freqs_per_bands:
  - 2
  - 2
  - 2
  - 2
  - 2
  - 2
  - 2
  - 2
  - 2
  - 2
  - 2
  - 2
  - 2
  - 2
  - 2
  - 2
  - 2
  - 2
  - 2
  - 2
  - 2
  - 2
  - 2
  - 2
  - 4
  - 4
  - 4
  - 4
  - 4
  - 4
  - 4
  - 4
  - 4
  - 4
  - 4
  - 4
  - 12
  - 12
  - 12
  - 12
  - 12
  - 12
  - 12
  - 12
  - 24
  - 24
  - 24
  - 24
  - 24
  - 24
  - 24
  - 24
  - 48
  - 48
  - 48
  - 48
  - 48
  - 48
  - 48
  - 48
  - 128
  - 129
  heads: 8
  kan_grid_size: 8
  linear_transformer_depth: 0
  mask_estimator_depth: 2
  mlp_expansion_factor: 4
  multi_stft_hop_size: 147
  multi_stft_normalized: false
  multi_stft_resolution_loss_weight: 1.0
  multi_stft_resolutions_window_sizes:
  - 4096
  - 2048
  - 1024
  - 512
  - 256
  num_stems: 6
  sage_attention: false
  skip_connection: false
  stereo: true
  stft_hop_length: 512
  stft_n_fft: 2048
  stft_normalized: false
  stft_win_length: 2048
  time_transformer_depth: 1
  use_kan: true
  use_torch_checkpoint: false
training:
  augmentation: false
  augmentation_loudness: true
  augmentation_loudness_max: 1.5
  augmentation_loudness_min: 0.5
  augmentation_loudness_type: 1
  augmentation_mix: true
  augmentation_type: simple1
  batch_size: 2
  coarse_loss_clip: true
  ema_momentum: 0.999
  grad_clip: 0
  gradient_accumulation_steps: 1
  instruments:
  - bass
  - drums
  - other
  - vocals
  - guitar
  - piano
  lr: 1.0e-05
  num_epochs: 1000
  num_steps: 1000
  optimizer: adam
  other_fix: false
  patience: 3
  q: 0.95
  reduce_factor: 0.95
  target_instrument: null
  use_amp: true
  use_mp3_compress: false