File size: 1,674 Bytes
c315639
 
 
 
 
2379302
c315639
ca18b3a
 
 
 
c315639
 
 
 
 
 
 
 
 
 
 
 
4258ffe
c315639
ca18b3a
c315639
 
 
 
 
 
4cb5a10
c315639
4cb5a10
c315639
 
 
ca18b3a
 
 
 
c315639
ca18b3a
95f5cc5
c315639
ca18b3a
c315639
95f5cc5
c315639
 
768b27d
c315639
4258ffe
ca18b3a
 
c315639
 
4cb5a10
c315639
 
4cb5a10
ca18b3a
 
c315639
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
{
  "architectures": [
    "Gemma3ForConditionalGeneration"
  ],
  "boi_token_index": 255999,
  "dtype": "bfloat16",
  "eoi_token_index": 256000,
  "eos_token_id": [
    1,
    106
  ],
  "image_token_index": 262144,
  "initializer_range": 0.02,
  "mm_tokens_per_image": 256,
  "model_type": "gemma3",
  "text_config": {
    "_sliding_window_pattern": 6,
    "attention_bias": false,
    "attention_dropout": 0.0,
    "attn_logit_softcapping": null,
    "final_logit_softcapping": null,
    "head_dim": 256,
    "hidden_activation": "gelu_pytorch_tanh",
    "hidden_size": 8,
    "initializer_range": 0.02,
    "intermediate_size": 10240,
    "layer_types": [
      "sliding_attention",
      "sliding_attention"
    ],
    "max_position_embeddings": 131072,
    "model_type": "gemma3_text",
    "num_attention_heads": 4,
    "num_hidden_layers": 2,
    "num_key_value_heads": 2,
    "query_pre_attn_scalar": 256,
    "rms_norm_eps": 1e-06,
    "rope_local_base_freq": 10000.0,
    "rope_scaling": {
      "factor": 8.0,
      "rope_type": "linear"
    },
    "rope_theta": 1000000.0,
    "sliding_window": 1024,
    "use_bidirectional_attention": false,
    "use_cache": true,
    "vocab_size": 262208
  },
  "transformers_version": "4.57.0.dev0",
  "vision_config": {
    "attention_dropout": 0.0,
    "embed_dim": 64,
    "hidden_act": "gelu_pytorch_tanh",
    "hidden_size": 8,
    "image_size": 896,
    "intermediate_size": 4304,
    "layer_norm_eps": 1e-06,
    "model_type": "siglip_vision_model",
    "num_attention_heads": 4,
    "num_channels": 3,
    "num_hidden_layers": 2,
    "num_key_value_heads": 2,
    "patch_size": 14,
    "vision_use_head": false
  }
}