{ "in_channels": 3, "patch_size": 16, "model_dim": 896, "encoder_depth": 6, "decoder_depth": 8, "decoder_start_blocks": 2, "decoder_end_blocks": 2, "bottleneck_dim": 128, "mlp_ratio": 4.0, "encoder_mlp_type": "gelu", "depthwise_kernel_size": 7, "adaln_low_rank_rank": 128, "bottleneck_posterior_kind": "diagonal_gaussian", "bottleneck_norm_mode": "disabled", "logsnr_min": -10.0, "logsnr_max": 10.0, "pixel_noise_std": 0.558, "latent_running_stats_eps": 0.0001, "class_head_feature_dim": 768, "class_head_model_dim": 768, "class_head_head_dim": 64, "class_head_mlp_ratio": 4.0, "class_head_mlp_type": "gelu", "class_head_register_token_count": 4 }