{ "model_type": "autoencoder_kl_prism_2d", "base_dim": 96, "decoder_base_dim": null, "z_dim": 16, "dim_mult": [ 1, 2, 4, 4 ], "num_res_blocks": 2, "attn_scales": [], "temporal_downsample": [ false, true, true ], "dropout": 0.0, "is_residual": false, "in_channels": 6, "out_channels": 6, "scale_factor_temporal": 4, "latents_mean": [ 0.0092, -0.0013, -0.0052, 0.0025, -0.0012, 0.0024, 0.0021, -0.0004, -0.0015, 0.0013, -0.0002, 0.0014, 0.0018, -0.0001, -0.0008, 0.0009 ], "latents_std": [ 0.993707, 1.020968, 0.996201, 1.025335, 0.997547, 1.035847, 1.008814, 0.999811, 0.980396, 1.000318, 1.033794, 0.993485, 0.998681, 1.038657, 1.001396, 0.997597 ], "use_static": false, "use_rollout_trans": true }