{ "model_type": "spatial_dit", "architectures": [ "SpatialDiT" ], "sample_rate": 48000, "latent_fps": 50, "channel_layout": "7.1.4", "channel_order": [ "FL", "FR", "FC", "LFE", "BL", "BR", "SL", "SR", "TFL", "TFR", "TBL", "TBR" ], "target_channels": 12, "cond_channels": 1, "latent_dim": 64, "hidden_dim": 1024, "num_layers": 12, "num_heads": 16, "mlp_ratio": 4.0, "timestep_embed_dim": 1024, "timestep_scale": 1000.0, "max_period": 10000.0, "num_memory_tokens": 32 }