File size: 2,468 Bytes
c315639
 
 
 
 
 
ca18b3a
 
 
 
c315639
 
 
 
 
 
 
 
 
 
 
 
73f1ddf
c315639
ca18b3a
c315639
ca18b3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c315639
 
 
 
 
4cb5a10
c315639
4cb5a10
c315639
 
 
ca18b3a
 
 
 
c315639
ca18b3a
c315639
ca18b3a
c315639
8363135
bd95200
c315639
 
 
73f1ddf
ca18b3a
 
c315639
 
4cb5a10
c315639
 
4cb5a10
ca18b3a
 
c315639
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
{
  "architectures": [
    "Gemma3ForConditionalGeneration"
  ],
  "boi_token_index": 255999,
  "eoi_token_index": 256000,
  "eos_token_id": [
    1,
    106
  ],
  "image_token_index": 262144,
  "initializer_range": 0.02,
  "mm_tokens_per_image": 256,
  "model_type": "gemma3",
  "text_config": {
    "_sliding_window_pattern": 6,
    "attention_bias": false,
    "attention_dropout": 0.0,
    "attn_logit_softcapping": null,
    "final_logit_softcapping": null,
    "head_dim": 256,
    "hidden_activation": "gelu_pytorch_tanh",
    "hidden_size": 16,
    "initializer_range": 0.02,
    "intermediate_size": 10240,
    "layer_types": [
      "sliding_attention",
      "sliding_attention",
      "sliding_attention",
      "sliding_attention",
      "sliding_attention",
      "full_attention",
      "sliding_attention",
      "sliding_attention",
      "sliding_attention",
      "sliding_attention",
      "sliding_attention",
      "full_attention",
      "sliding_attention",
      "sliding_attention",
      "sliding_attention",
      "sliding_attention",
      "sliding_attention",
      "full_attention",
      "sliding_attention",
      "sliding_attention",
      "sliding_attention",
      "sliding_attention",
      "sliding_attention",
      "full_attention",
      "sliding_attention",
      "sliding_attention",
      "sliding_attention",
      "sliding_attention",
      "sliding_attention",
      "full_attention",
      "sliding_attention",
      "sliding_attention",
      "sliding_attention",
      "sliding_attention"
    ],
    "max_position_embeddings": 131072,
    "model_type": "gemma3_text",
    "num_attention_heads": 4,
    "num_hidden_layers": 2,
    "num_key_value_heads": 2,
    "query_pre_attn_scalar": 256,
    "rms_norm_eps": 1e-06,
    "rope_local_base_freq": 10000.0,
    "rope_scaling": {
      "factor": 8.0,
      "rope_type": "linear"
    },
    "rope_theta": 1000000.0,
    "sliding_window": 1024,
    "use_cache": true,
    "vocab_size": 262208
  },
  "torch_dtype": "bfloat16",
  "transformers_version": "4.56.0.dev0",
  "vision_config": {
    "attention_dropout": 0.0,
    "hidden_act": "gelu_pytorch_tanh",
    "hidden_size": 16,
    "image_size": 896,
    "intermediate_size": 4304,
    "layer_norm_eps": 1e-06,
    "model_type": "siglip_vision_model",
    "num_attention_heads": 4,
    "num_channels": 3,
    "num_hidden_layers": 2,
    "num_key_value_heads": 2,
    "patch_size": 14,
    "vision_use_head": false
  }
}