| { | |
| "model_name": "mdx", | |
| "model_class": "BagOfModelsMLX", | |
| "sub_model_class": "DemucsMLX", | |
| "num_models": 4, | |
| "weights": [ | |
| [ | |
| 1.0, | |
| 1.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| [ | |
| 0.0, | |
| 1.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| [ | |
| 1.0, | |
| 0.0, | |
| 1.0, | |
| 1.0 | |
| ], | |
| [ | |
| 1.0, | |
| 0.0, | |
| 1.0, | |
| 1.0 | |
| ] | |
| ], | |
| "args": [], | |
| "kwargs": { | |
| "sources": [ | |
| "drums", | |
| "bass", | |
| "other", | |
| "vocals" | |
| ], | |
| "audio_channels": 2, | |
| "samplerate": 44100, | |
| "segment": 44, | |
| "channels": 64, | |
| "growth": 2, | |
| "depth": 6, | |
| "rewrite": false, | |
| "lstm_layers": 0, | |
| "kernel_size": 8, | |
| "stride": 4, | |
| "context": 1, | |
| "gelu": true, | |
| "glu": true, | |
| "norm_groups": 4, | |
| "norm_starts": 4, | |
| "dconv_depth": 2, | |
| "dconv_mode": 1, | |
| "dconv_comp": 4, | |
| "dconv_attn": 4, | |
| "dconv_lstm": 4, | |
| "dconv_init": 0.0001, | |
| "resample": true, | |
| "normalize": true, | |
| "rescale": 0.1, | |
| "gelu_act": true, | |
| "glu_act": true | |
| }, | |
| "mlx_version": "0.30.3", | |
| "tensor_count": 1298, | |
| "sub_model_classes": [ | |
| "DemucsMLX", | |
| "DemucsMLX", | |
| "HDemucsMLX", | |
| "HDemucsMLX" | |
| ], | |
| "model_configs": [ | |
| { | |
| "model_class": "DemucsMLX", | |
| "kwargs": { | |
| "sources": [ | |
| "drums", | |
| "bass", | |
| "other", | |
| "vocals" | |
| ], | |
| "audio_channels": 2, | |
| "samplerate": 44100, | |
| "segment": 44, | |
| "channels": 64, | |
| "growth": 2, | |
| "depth": 6, | |
| "rewrite": false, | |
| "lstm_layers": 0, | |
| "kernel_size": 8, | |
| "stride": 4, | |
| "context": 1, | |
| "gelu": true, | |
| "glu": true, | |
| "norm_groups": 4, | |
| "norm_starts": 4, | |
| "dconv_depth": 2, | |
| "dconv_mode": 1, | |
| "dconv_comp": 4, | |
| "dconv_attn": 4, | |
| "dconv_lstm": 4, | |
| "dconv_init": 0.0001, | |
| "resample": true, | |
| "normalize": true, | |
| "rescale": 0.1, | |
| "gelu_act": true, | |
| "glu_act": true | |
| } | |
| }, | |
| { | |
| "model_class": "DemucsMLX", | |
| "kwargs": { | |
| "sources": [ | |
| "drums", | |
| "bass", | |
| "other", | |
| "vocals" | |
| ], | |
| "audio_channels": 2, | |
| "samplerate": 44100, | |
| "segment": 44, | |
| "channels": 64, | |
| "growth": 2, | |
| "depth": 6, | |
| "rewrite": false, | |
| "lstm_layers": 0, | |
| "kernel_size": 8, | |
| "stride": 4, | |
| "context": 1, | |
| "gelu": true, | |
| "glu": true, | |
| "norm_groups": 4, | |
| "norm_starts": 4, | |
| "dconv_depth": 2, | |
| "dconv_mode": 1, | |
| "dconv_comp": 4, | |
| "dconv_attn": 4, | |
| "dconv_lstm": 4, | |
| "dconv_init": 0.0001, | |
| "resample": true, | |
| "normalize": true, | |
| "rescale": 0.1, | |
| "gelu_act": true, | |
| "glu_act": true | |
| } | |
| }, | |
| { | |
| "model_class": "HDemucsMLX", | |
| "kwargs": { | |
| "sources": [ | |
| "drums", | |
| "bass", | |
| "other", | |
| "vocals" | |
| ], | |
| "audio_channels": 2, | |
| "samplerate": 44100, | |
| "segment": 44, | |
| "channels": 48, | |
| "channels_time": null, | |
| "growth": 2, | |
| "nfft": 4096, | |
| "wiener_iters": 0, | |
| "end_iters": 0, | |
| "wiener_residual": false, | |
| "cac": false, | |
| "depth": 6, | |
| "rewrite": true, | |
| "hybrid": true, | |
| "hybrid_old": true, | |
| "multi_freqs": [], | |
| "multi_freqs_depth": 3, | |
| "freq_emb": 0.2, | |
| "emb_scale": 10, | |
| "emb_smooth": true, | |
| "kernel_size": 8, | |
| "stride": 4, | |
| "time_stride": 2, | |
| "context": 1, | |
| "context_enc": 0, | |
| "norm_starts": 999, | |
| "norm_groups": 4, | |
| "dconv_mode": 1, | |
| "dconv_depth": 2, | |
| "dconv_comp": 4, | |
| "dconv_attn": 4, | |
| "dconv_lstm": 4, | |
| "dconv_init": 0.0001, | |
| "rescale": 0.1 | |
| } | |
| }, | |
| { | |
| "model_class": "HDemucsMLX", | |
| "kwargs": { | |
| "sources": [ | |
| "drums", | |
| "bass", | |
| "other", | |
| "vocals" | |
| ], | |
| "audio_channels": 2, | |
| "samplerate": 44100, | |
| "segment": 44, | |
| "channels": 48, | |
| "channels_time": null, | |
| "growth": 2, | |
| "nfft": 4096, | |
| "wiener_iters": 0, | |
| "end_iters": 0, | |
| "wiener_residual": false, | |
| "cac": true, | |
| "depth": 6, | |
| "rewrite": true, | |
| "hybrid": true, | |
| "hybrid_old": false, | |
| "multi_freqs": [ | |
| 0.1, | |
| 0.3 | |
| ], | |
| "multi_freqs_depth": 2, | |
| "freq_emb": 0.2, | |
| "emb_scale": 10, | |
| "emb_smooth": true, | |
| "kernel_size": 8, | |
| "stride": 4, | |
| "time_stride": 2, | |
| "context": 1, | |
| "context_enc": 0, | |
| "norm_starts": 999, | |
| "norm_groups": 4, | |
| "dconv_mode": 1, | |
| "dconv_depth": 2, | |
| "dconv_comp": 4, | |
| "dconv_attn": 4, | |
| "dconv_lstm": 4, | |
| "dconv_init": 0.0001, | |
| "rescale": 0.1 | |
| } | |
| } | |
| ] | |
| } |