ariG23498 HF Staff commited on
Commit
a9ea01e
·
verified ·
1 Parent(s): 40667dd

Upload Gemma4ForConditionalGeneration

Browse files
Files changed (2) hide show
  1. config.json +7 -7
  2. model.safetensors +2 -2
config.json CHANGED
@@ -15,7 +15,7 @@
15
  "dtype": "bfloat16",
16
  "gradient_clipping": 10000000000.0,
17
  "hidden_act": "silu",
18
- "hidden_size": 1024,
19
  "id2label": {
20
  "0": "LABEL_0",
21
  "1": "LABEL_1"
@@ -27,7 +27,7 @@
27
  "LABEL_1": 1
28
  },
29
  "model_type": "gemma4_audio",
30
- "num_attention_heads": 8,
31
  "num_hidden_layers": 1,
32
  "output_attentions": false,
33
  "output_hidden_states": false,
@@ -65,7 +65,7 @@
65
  "global_head_dim": 512,
66
  "head_dim": 256,
67
  "hidden_activation": "gelu_pytorch_tanh",
68
- "hidden_size": 1536,
69
  "hidden_size_per_layer_input": 256,
70
  "initializer_range": 0.02,
71
  "intermediate_size": 6144,
@@ -109,10 +109,10 @@
109
  "max_position_embeddings": 131072,
110
  "model_type": "gemma4_text",
111
  "moe_intermediate_size": null,
112
- "num_attention_heads": 8,
113
  "num_experts": null,
114
  "num_global_key_value_heads": null,
115
- "num_hidden_layers": 1,
116
  "num_key_value_heads": 1,
117
  "num_kv_shared_layers": 20,
118
  "pad_token_id": 0,
@@ -151,7 +151,7 @@
151
  "global_head_dim": 64,
152
  "head_dim": 64,
153
  "hidden_activation": "gelu_pytorch_tanh",
154
- "hidden_size": 768,
155
  "id2label": {
156
  "0": "LABEL_0",
157
  "1": "LABEL_1"
@@ -165,7 +165,7 @@
165
  },
166
  "max_position_embeddings": 131072,
167
  "model_type": "gemma4_vision",
168
- "num_attention_heads": 12,
169
  "num_hidden_layers": 1,
170
  "num_key_value_heads": 12,
171
  "output_attentions": false,
 
15
  "dtype": "bfloat16",
16
  "gradient_clipping": 10000000000.0,
17
  "hidden_act": "silu",
18
+ "hidden_size": 8,
19
  "id2label": {
20
  "0": "LABEL_0",
21
  "1": "LABEL_1"
 
27
  "LABEL_1": 1
28
  },
29
  "model_type": "gemma4_audio",
30
+ "num_attention_heads": 1,
31
  "num_hidden_layers": 1,
32
  "output_attentions": false,
33
  "output_hidden_states": false,
 
65
  "global_head_dim": 512,
66
  "head_dim": 256,
67
  "hidden_activation": "gelu_pytorch_tanh",
68
+ "hidden_size": 8,
69
  "hidden_size_per_layer_input": 256,
70
  "initializer_range": 0.02,
71
  "intermediate_size": 6144,
 
109
  "max_position_embeddings": 131072,
110
  "model_type": "gemma4_text",
111
  "moe_intermediate_size": null,
112
+ "num_attention_heads": 1,
113
  "num_experts": null,
114
  "num_global_key_value_heads": null,
115
+ "num_hidden_layers": 35,
116
  "num_key_value_heads": 1,
117
  "num_kv_shared_layers": 20,
118
  "pad_token_id": 0,
 
151
  "global_head_dim": 64,
152
  "head_dim": 64,
153
  "hidden_activation": "gelu_pytorch_tanh",
154
+ "hidden_size": 8,
155
  "id2label": {
156
  "0": "LABEL_0",
157
  "1": "LABEL_1"
 
165
  },
166
  "max_position_embeddings": 131072,
167
  "model_type": "gemma4_vision",
168
+ "num_attention_heads": 1,
169
  "num_hidden_layers": 1,
170
  "num_key_value_heads": 12,
171
  "output_attentions": false,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af3c84640ecf79b16b380640c814508a88793bf5d54efb97823e82f2708f56ea
3
- size 1126978354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca8f8870c7dc56af8f1d333201bfa9585a107eab0f1b662edac405ea5a267507
3
+ size 4719736990