| { | |
| "architectures": [ | |
| "AttAlexNetForClassification" | |
| ], | |
| "attention_type": "sdpa", | |
| "classifier_dim": 4096, | |
| "classifier_dropout": 0.1, | |
| "dual_obj": false, | |
| "hidden_act": "silu", | |
| "hidden_size": 64, | |
| "img_size": 1024, | |
| "in_channels": 3, | |
| "intermediate_size": 1024, | |
| "is_causal": false, | |
| "max_position_embeddings": 4096, | |
| "model_type": "att_alexnet", | |
| "moe": false, | |
| "n_filts": 4, | |
| "num_attention_heads": 8, | |
| "num_classes": 3, | |
| "num_experts": 8, | |
| "num_hidden_layers": 6, | |
| "num_layers": 2, | |
| "output_router_logits": true, | |
| "patch_size": 16, | |
| "problem_type": "single_label_classification", | |
| "router_aux_loss_coef": 0.01, | |
| "topk": 2, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.41.2" | |
| } | |