{ "_attn_implementation_autoset": false, "architectures": [ "MllamaVisionModel" ], "attention_heads": 16, "dtype": "float32", "hidden_act": "gelu", "hidden_size": 1280, "image_size": 448, "initializer_range": 0.02, "intermediate_layers_indices": [ 3, 7, 15, 23, 30 ], "intermediate_size": 5120, "max_num_tiles": 4, "model_type": "mllama_vision_model", "norm_eps": 1e-05, "num_channels": 3, "num_global_layers": 8, "num_hidden_layers": 32, "patch_size": 14, "supported_aspect_ratios": [ [ 1, 1 ], [ 1, 2 ], [ 1, 3 ], [ 1, 4 ], [ 2, 1 ], [ 2, 2 ], [ 3, 1 ], [ 4, 1 ] ], "transformers_version": "4.57.6", "vision_output_dim": 7680 }