| { | |
| "model_type": "spatial_dit", | |
| "architectures": [ | |
| "SpatialDiT" | |
| ], | |
| "sample_rate": 48000, | |
| "latent_fps": 50, | |
| "channel_layout": "7.1.4", | |
| "channel_order": [ | |
| "FL", | |
| "FR", | |
| "FC", | |
| "LFE", | |
| "BL", | |
| "BR", | |
| "SL", | |
| "SR", | |
| "TFL", | |
| "TFR", | |
| "TBL", | |
| "TBR" | |
| ], | |
| "target_channels": 12, | |
| "cond_channels": 1, | |
| "latent_dim": 64, | |
| "hidden_dim": 1024, | |
| "num_layers": 12, | |
| "num_heads": 16, | |
| "mlp_ratio": 4.0, | |
| "timestep_embed_dim": 1024, | |
| "timestep_scale": 1000.0, | |
| "max_period": 10000.0, | |
| "num_memory_tokens": 32 | |
| } | |