| { | |
| "model_dim": 128, | |
| "model_depth": 4, | |
| "audio_channels": 2, | |
| "num_stems": 1, | |
| "time_transformer_depth": 1, | |
| "freq_transformer_depth": 1, | |
| "dim_head": 32, | |
| "heads": 4, | |
| "ff_mult": 4, | |
| "stft_n_fft": 2048, | |
| "stft_hop_length": 441, | |
| "stft_win_length": 2048, | |
| "stft_normalized": false, | |
| "mask_estimator_depth": 2, | |
| "mask_mlp_expansion_factor": 4, | |
| "freq_range": [2, 734], | |
| "freqs_per_bands": [ | |
| 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
| 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
| 2, 2, 2, 2, | |
| 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
| 12, 12, 12, 12, 12, 12, 12, 12, | |
| 24, 24, 24, 24, 24, 24, 24, 24, | |
| 48, 48, 48, 48, 48, 48, 62 | |
| ] | |
| } |