{ "_class_name": "VisionTransformer", "_diffusers_version": "0.35.2", "axes_dim": [ 8, 36, 36 ], "context_in_dim": 1536, "depth": 0, "depth_single_blocks": 35, "guidance_embed": false, "hidden_size": 1920, "in_channels": 16, "mlp_ratio": 4.0, "num_heads": 24, "qkv_bias": true, "theta": 10000, "use_patchify": false, "vec_in_dim": 1536 }