| { |
| "in_channels": 3, |
| "patch_size": 16, |
| "model_dim": 896, |
| "encoder_depth": 6, |
| "decoder_depth": 8, |
| "decoder_start_blocks": 2, |
| "decoder_end_blocks": 2, |
| "bottleneck_dim": 128, |
| "mlp_ratio": 4.0, |
| "encoder_mlp_type": "gelu", |
| "depthwise_kernel_size": 7, |
| "adaln_low_rank_rank": 128, |
| "bottleneck_posterior_kind": "diagonal_gaussian", |
| "bottleneck_norm_mode": "disabled", |
| "logsnr_min": -10.0, |
| "logsnr_max": 10.0, |
| "pixel_noise_std": 0.558, |
| "latent_running_stats_eps": 0.0001, |
| "class_head_feature_dim": 768, |
| "class_head_model_dim": 768, |
| "class_head_head_dim": 64, |
| "class_head_mlp_ratio": 4.0, |
| "class_head_mlp_type": "gelu", |
| "class_head_register_token_count": 4 |
| } |
|
|