{ "add_pooling_layer": true, "architectures": [ "ViTNepaForPreTraining" ], "attention_probs_dropout_prob": 0.0, "drop_path_prob": 0.0, "dtype": "float32", "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "image_size": 224, "initializer_range": 0.02, "intermediate_size": 3072, "is_causal": true, "layer_norm_eps": 1e-12, "layerscale_value": 1e-05, "model_type": "vit_nepa", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "num_prompt_token": 0, "patch_size": 14, "pos_embed_jitter": null, "pos_embed_rescale": 2.0, "pos_embed_shift": null, "qk_norm": true, "qk_norm_affine": false, "qk_norm_bias": false, "qkv_bias": true, "rope_theta": 100.0, "transformers_version": "4.56.2", "use_gated_mlp": false }