modelConfiguration = {   'audio': {   'chunk_size': 485100,
                 'dim_f': 1024,
                 'dim_t': 1101,
                 'hop_length': 441,
                 'min_mean_abs': 0.0,
                 'n_fft': 2048,
                 'num_channels': 2,
                 'sample_rate': 44100},
    'inference': {'batch_size': 4, 'dim_t': 1101, 'num_overlap': 2},
    'model': {   'attn_dropout': 0.0,
                 'depth': 12,
                 'dim': 512,
                 'dim_freqs_in': 1025,
                 'dim_head': 64,
                 'ff_dropout': 0.0,
                 'flash_attn': True,
                 'freq_transformer_depth': 1,
                 'freqs_per_bands': (   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 12, 12, 12,
                                        12, 12, 12, 12, 12, 24, 24, 24, 24, 24, 24, 24, 24, 48, 48, 48, 48, 48, 48, 48, 48, 128, 129),
                 'heads': 8,
                 'linear_transformer_depth': 0,
                 'mask_estimator_depth': 1,
                 'multi_stft_hop_size': 147,
                 'multi_stft_normalized': False,
                 'multi_stft_resolution_loss_weight': 1.0,
                 'multi_stft_resolutions_window_sizes': (4096, 2048, 1024, 512, 256),
                 'num_stems': 1,
                 'stereo': True,
                 'stft_hop_length': 441,
                 'stft_n_fft': 2048,
                 'stft_normalized': False,
                 'stft_win_length': 882,
                 'time_transformer_depth': 1},
    'training': {   'augmentation': False,
                    'augmentation_loudness': False,
                    'augmentation_loudness_max': 0,
                    'augmentation_loudness_min': 0,
                    'augmentation_loudness_type': 1,
                    'augmentation_mix': False,
                    'augmentation_type': None,
                    'batch_size': 1,
                    'coarse_loss_clip': False,
                    'ema_momentum': 0.999,
                    'grad_clip': 0,
                    'gradient_accumulation_steps': 1,
                    'instruments': ['vocals', 'other'],
                    'lr': 1e-05,
                    'num_epochs': 1000,
                    'num_steps': 1000,
                    'optimizer': 'adam',
                    'other_fix': True,
                    'patience': 2,
                    'q': 0.95,
                    'reduce_factor': 0.95,
                    'target_instrument': 'vocals',
                    'use_amp': True,
                    'use_mp3_compress': False}}