audio: chunk_size: 588800 dim_f: 1024 dim_t: 801 hop_length: 441 min_mean_abs: 0.0 n_fft: 2048 num_channels: 2 sample_rate: 44100 augmentations: all: channel_shuffle: 0.5 random_inverse: 0.1 random_polarity: 0.5 bass: pitch_shift: 0.1 pitch_shift_max_semitones: 2 pitch_shift_min_semitones: -2 seven_band_parametric_eq: 0.1 seven_band_parametric_eq_max_gain_db: 6 seven_band_parametric_eq_min_gain_db: -3 tanh_distortion: 0.1 tanh_distortion_max: 0.5 tanh_distortion_min: 0.1 drums: pitch_shift: 0.1 pitch_shift_max_semitones: 5 pitch_shift_min_semitones: -5 seven_band_parametric_eq: 0.1 seven_band_parametric_eq_max_gain_db: 9 seven_band_parametric_eq_min_gain_db: -9 tanh_distortion: 0.1 tanh_distortion_max: 0.6 tanh_distortion_min: 0.1 enable: true loudness: true loudness_max: 1.5 loudness_min: 0.5 mixup: true mixup_loudness_max: 1.5 mixup_loudness_min: 0.5 mixup_probs: !!python/tuple - 0.2 - 0.02 other: gaussian_noise: 0.1 gaussian_noise_max_amplitude: 0.015 gaussian_noise_min_amplitude: 0.001 pitch_shift: 0.1 pitch_shift_max_semitones: 4 pitch_shift_min_semitones: -4 time_stretch: 0.1 time_stretch_max_rate: 1.25 time_stretch_min_rate: 0.8 vocals: pitch_shift: 0.1 pitch_shift_max_semitones: 5 pitch_shift_min_semitones: -5 seven_band_parametric_eq: 0.1 seven_band_parametric_eq_max_gain_db: 9 seven_band_parametric_eq_min_gain_db: -9 tanh_distortion: 0.1 tanh_distortion_max: 0.7 tanh_distortion_min: 0.1 inference: batch_size: 1 dim_t: 1101 normalize: false num_overlap: 2 model: attn_dropout: 0.1 depth: 12 dim: 256 dim_freqs_in: 1025 dim_head: 64 ff_dropout: 0.1 flash_attn: false freq_transformer_depth: 1 freqs_per_bands: - 2 - 2 - 2 - 2 - 2 - 2 - 2 - 2 - 2 - 2 - 2 - 2 - 2 - 2 - 2 - 2 - 2 - 2 - 2 - 2 - 2 - 2 - 2 - 2 - 4 - 4 - 4 - 4 - 4 - 4 - 4 - 4 - 4 - 4 - 4 - 4 - 12 - 12 - 12 - 12 - 12 - 12 - 12 - 12 - 24 - 24 - 24 - 24 - 24 - 24 - 24 - 24 - 48 - 48 - 48 - 48 - 48 - 48 - 48 - 48 - 128 - 129 heads: 8 kan_grid_size: 8 linear_transformer_depth: 0 mask_estimator_depth: 2 mlp_expansion_factor: 4 multi_stft_hop_size: 147 multi_stft_normalized: false multi_stft_resolution_loss_weight: 1.0 multi_stft_resolutions_window_sizes: - 4096 - 2048 - 1024 - 512 - 256 num_stems: 6 sage_attention: false skip_connection: false stereo: true stft_hop_length: 512 stft_n_fft: 2048 stft_normalized: false stft_win_length: 2048 time_transformer_depth: 1 use_kan: true use_torch_checkpoint: false training: augmentation: false augmentation_loudness: true augmentation_loudness_max: 1.5 augmentation_loudness_min: 0.5 augmentation_loudness_type: 1 augmentation_mix: true augmentation_type: simple1 batch_size: 2 coarse_loss_clip: true ema_momentum: 0.999 grad_clip: 0 gradient_accumulation_steps: 1 instruments: - bass - drums - other - vocals - guitar - piano lr: 1.0e-05 num_epochs: 1000 num_steps: 1000 optimizer: adam other_fix: false patience: 3 q: 0.95 reduce_factor: 0.95 target_instrument: null use_amp: true use_mp3_compress: false