{ "model_dim": 128, "model_depth": 4, "audio_channels": 2, "num_stems": 1, "time_transformer_depth": 1, "freq_transformer_depth": 1, "dim_head": 32, "heads": 4, "ff_mult": 4, "stft_n_fft": 2048, "stft_hop_length": 441, "stft_win_length": 2048, "stft_normalized": false, "mask_estimator_depth": 2, "mask_mlp_expansion_factor": 4, "freq_range": [2, 734], "freqs_per_bands": [ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 12, 12, 24, 24, 24, 24, 24, 24, 24, 24, 48, 48, 48, 48, 48, 48, 62 ] }