File size: 1,676 Bytes
c315639 2379302 c315639 ca18b3a c315639 73f1ddf c315639 ca18b3a c315639 4cb5a10 c315639 4cb5a10 c315639 ca18b3a c315639 ca18b3a 95f5cc5 c315639 ca18b3a c315639 95f5cc5 c315639 24e6807 c315639 73f1ddf ca18b3a c315639 4cb5a10 c315639 4cb5a10 ca18b3a c315639 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
{
"architectures": [
"Gemma3ForConditionalGeneration"
],
"boi_token_index": 255999,
"dtype": "bfloat16",
"eoi_token_index": 256000,
"eos_token_id": [
1,
106
],
"image_token_index": 262144,
"initializer_range": 0.02,
"mm_tokens_per_image": 256,
"model_type": "gemma3",
"text_config": {
"_sliding_window_pattern": 6,
"attention_bias": false,
"attention_dropout": 0.0,
"attn_logit_softcapping": null,
"final_logit_softcapping": null,
"head_dim": 256,
"hidden_activation": "gelu_pytorch_tanh",
"hidden_size": 16,
"initializer_range": 0.02,
"intermediate_size": 10240,
"layer_types": [
"sliding_attention",
"sliding_attention"
],
"max_position_embeddings": 131072,
"model_type": "gemma3_text",
"num_attention_heads": 4,
"num_hidden_layers": 2,
"num_key_value_heads": 2,
"query_pre_attn_scalar": 256,
"rms_norm_eps": 1e-06,
"rope_local_base_freq": 10000.0,
"rope_scaling": {
"factor": 8.0,
"rope_type": "linear"
},
"rope_theta": 1000000.0,
"sliding_window": 1024,
"use_bidirectional_attention": false,
"use_cache": true,
"vocab_size": 262208
},
"transformers_version": "4.57.0.dev0",
"vision_config": {
"attention_dropout": 0.0,
"embed_dim": 64,
"hidden_act": "gelu_pytorch_tanh",
"hidden_size": 16,
"image_size": 896,
"intermediate_size": 4304,
"layer_norm_eps": 1e-06,
"model_type": "siglip_vision_model",
"num_attention_heads": 4,
"num_channels": 3,
"num_hidden_layers": 2,
"num_key_value_heads": 2,
"patch_size": 14,
"vision_use_head": false
}
}
|