yujiepan commited on
Commit
984a8fe
·
verified ·
1 Parent(s): 28af482

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. .meta.json +2 -2
  2. README.md +52 -52
  3. chat_template.jinja +17 -10
  4. config.json +1 -1
  5. generation_config.json +1 -1
  6. tokenizer_config.json +1 -0
.meta.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "torch": "2.11.0",
3
- "transformers": "5.5.0"
4
  }
 
1
  {
2
+ "torch": "2.11.0+cu129",
3
+ "transformers": "5.7.0"
4
  }
README.md CHANGED
@@ -170,16 +170,62 @@ model.save_pretrained(save_folder)
170
  ```text
171
  Gemma4ForConditionalGeneration(
172
  (model): Gemma4Model(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  (language_model): Gemma4TextModel(
174
  (embed_tokens): Gemma4TextScaledWordEmbedding(262144, 8, padding_idx=0)
175
  (layers): ModuleList(
176
  (0): Gemma4TextDecoderLayer(
177
  (self_attn): Gemma4TextAttention(
 
178
  (q_norm): Gemma4RMSNorm()
179
  (k_norm): Gemma4RMSNorm()
180
  (v_norm): Gemma4RMSNorm()
181
  (k_proj): Linear(in_features=8, out_features=128, bias=False)
182
- (q_proj): Linear(in_features=8, out_features=256, bias=False)
183
  (v_proj): Linear(in_features=8, out_features=128, bias=False)
184
  (o_proj): Linear(in_features=256, out_features=8, bias=False)
185
  )
@@ -196,11 +242,11 @@ Gemma4ForConditionalGeneration(
196
  )
197
  (1): Gemma4TextDecoderLayer(
198
  (self_attn): Gemma4TextAttention(
 
199
  (q_norm): Gemma4RMSNorm()
200
  (k_norm): Gemma4RMSNorm()
201
  (v_norm): Gemma4RMSNorm()
202
  (k_proj): Linear(in_features=8, out_features=256, bias=False)
203
- (q_proj): Linear(in_features=8, out_features=512, bias=False)
204
  (o_proj): Linear(in_features=512, out_features=8, bias=False)
205
  )
206
  (mlp): Gemma4TextMLP(
@@ -216,11 +262,11 @@ Gemma4ForConditionalGeneration(
216
  )
217
  (2): Gemma4TextDecoderLayer(
218
  (self_attn): Gemma4TextAttention(
 
219
  (q_norm): Gemma4RMSNorm()
220
  (k_norm): Gemma4RMSNorm()
221
  (v_norm): Gemma4RMSNorm()
222
  (k_proj): Linear(in_features=8, out_features=128, bias=False)
223
- (q_proj): Linear(in_features=8, out_features=256, bias=False)
224
  (v_proj): Linear(in_features=8, out_features=128, bias=False)
225
  (o_proj): Linear(in_features=256, out_features=8, bias=False)
226
  )
@@ -237,11 +283,11 @@ Gemma4ForConditionalGeneration(
237
  )
238
  (3): Gemma4TextDecoderLayer(
239
  (self_attn): Gemma4TextAttention(
 
240
  (q_norm): Gemma4RMSNorm()
241
  (k_norm): Gemma4RMSNorm()
242
  (v_norm): Gemma4RMSNorm()
243
  (k_proj): Linear(in_features=8, out_features=256, bias=False)
244
- (q_proj): Linear(in_features=8, out_features=512, bias=False)
245
  (o_proj): Linear(in_features=512, out_features=8, bias=False)
246
  )
247
  (mlp): Gemma4TextMLP(
@@ -259,52 +305,6 @@ Gemma4ForConditionalGeneration(
259
  (norm): Gemma4RMSNorm()
260
  (rotary_emb): Gemma4TextRotaryEmbedding()
261
  )
262
- (vision_tower): Gemma4VisionModel(
263
- (patch_embedder): Gemma4VisionPatchEmbedder(
264
- (input_proj): Linear(in_features=768, out_features=8, bias=False)
265
- )
266
- (encoder): Gemma4VisionEncoder(
267
- (rotary_emb): Gemma4VisionRotaryEmbedding()
268
- (layers): ModuleList(
269
- (0-1): 2 x Gemma4VisionEncoderLayer(
270
- (self_attn): Gemma4VisionAttention(
271
- (q_proj): Gemma4ClippableLinear(
272
- (linear): Linear(in_features=8, out_features=128, bias=False)
273
- )
274
- (k_proj): Gemma4ClippableLinear(
275
- (linear): Linear(in_features=8, out_features=128, bias=False)
276
- )
277
- (v_proj): Gemma4ClippableLinear(
278
- (linear): Linear(in_features=8, out_features=128, bias=False)
279
- )
280
- (o_proj): Gemma4ClippableLinear(
281
- (linear): Linear(in_features=128, out_features=8, bias=False)
282
- )
283
- (q_norm): Gemma4RMSNorm()
284
- (k_norm): Gemma4RMSNorm()
285
- (v_norm): Gemma4RMSNorm()
286
- )
287
- (mlp): Gemma4VisionMLP(
288
- (gate_proj): Gemma4ClippableLinear(
289
- (linear): Linear(in_features=8, out_features=64, bias=False)
290
- )
291
- (up_proj): Gemma4ClippableLinear(
292
- (linear): Linear(in_features=8, out_features=64, bias=False)
293
- )
294
- (down_proj): Gemma4ClippableLinear(
295
- (linear): Linear(in_features=64, out_features=8, bias=False)
296
- )
297
- (act_fn): GELUTanh()
298
- )
299
- (input_layernorm): Gemma4RMSNorm()
300
- (post_attention_layernorm): Gemma4RMSNorm()
301
- (pre_feedforward_layernorm): Gemma4RMSNorm()
302
- (post_feedforward_layernorm): Gemma4RMSNorm()
303
- )
304
- )
305
- )
306
- (pooler): Gemma4VisionPooler()
307
- )
308
  (embed_vision): Gemma4MultimodalEmbedder(
309
  (embedding_projection): Linear(in_features=8, out_features=8, bias=False)
310
  (embedding_pre_projection_norm): Gemma4RMSNorm()
@@ -318,5 +318,5 @@ Gemma4ForConditionalGeneration(
318
 
319
  ### Test environment:
320
 
321
- - torch: 2.11.0
322
- - transformers: 5.5.0
 
170
  ```text
171
  Gemma4ForConditionalGeneration(
172
  (model): Gemma4Model(
173
+ (vision_tower): Gemma4VisionModel(
174
+ (patch_embedder): Gemma4VisionPatchEmbedder(
175
+ (input_proj): Linear(in_features=768, out_features=8, bias=False)
176
+ )
177
+ (encoder): Gemma4VisionEncoder(
178
+ (rotary_emb): Gemma4VisionRotaryEmbedding()
179
+ (layers): ModuleList(
180
+ (0-1): 2 x Gemma4VisionEncoderLayer(
181
+ (self_attn): Gemma4VisionAttention(
182
+ (q_proj): Gemma4ClippableLinear(
183
+ (linear): Linear(in_features=8, out_features=128, bias=False)
184
+ )
185
+ (k_proj): Gemma4ClippableLinear(
186
+ (linear): Linear(in_features=8, out_features=128, bias=False)
187
+ )
188
+ (v_proj): Gemma4ClippableLinear(
189
+ (linear): Linear(in_features=8, out_features=128, bias=False)
190
+ )
191
+ (o_proj): Gemma4ClippableLinear(
192
+ (linear): Linear(in_features=128, out_features=8, bias=False)
193
+ )
194
+ (q_norm): Gemma4RMSNorm()
195
+ (k_norm): Gemma4RMSNorm()
196
+ (v_norm): Gemma4RMSNorm()
197
+ )
198
+ (mlp): Gemma4VisionMLP(
199
+ (gate_proj): Gemma4ClippableLinear(
200
+ (linear): Linear(in_features=8, out_features=64, bias=False)
201
+ )
202
+ (up_proj): Gemma4ClippableLinear(
203
+ (linear): Linear(in_features=8, out_features=64, bias=False)
204
+ )
205
+ (down_proj): Gemma4ClippableLinear(
206
+ (linear): Linear(in_features=64, out_features=8, bias=False)
207
+ )
208
+ (act_fn): GELUTanh()
209
+ )
210
+ (input_layernorm): Gemma4RMSNorm()
211
+ (post_attention_layernorm): Gemma4RMSNorm()
212
+ (pre_feedforward_layernorm): Gemma4RMSNorm()
213
+ (post_feedforward_layernorm): Gemma4RMSNorm()
214
+ )
215
+ )
216
+ )
217
+ (pooler): Gemma4VisionPooler()
218
+ )
219
  (language_model): Gemma4TextModel(
220
  (embed_tokens): Gemma4TextScaledWordEmbedding(262144, 8, padding_idx=0)
221
  (layers): ModuleList(
222
  (0): Gemma4TextDecoderLayer(
223
  (self_attn): Gemma4TextAttention(
224
+ (q_proj): Linear(in_features=8, out_features=256, bias=False)
225
  (q_norm): Gemma4RMSNorm()
226
  (k_norm): Gemma4RMSNorm()
227
  (v_norm): Gemma4RMSNorm()
228
  (k_proj): Linear(in_features=8, out_features=128, bias=False)
 
229
  (v_proj): Linear(in_features=8, out_features=128, bias=False)
230
  (o_proj): Linear(in_features=256, out_features=8, bias=False)
231
  )
 
242
  )
243
  (1): Gemma4TextDecoderLayer(
244
  (self_attn): Gemma4TextAttention(
245
+ (q_proj): Linear(in_features=8, out_features=512, bias=False)
246
  (q_norm): Gemma4RMSNorm()
247
  (k_norm): Gemma4RMSNorm()
248
  (v_norm): Gemma4RMSNorm()
249
  (k_proj): Linear(in_features=8, out_features=256, bias=False)
 
250
  (o_proj): Linear(in_features=512, out_features=8, bias=False)
251
  )
252
  (mlp): Gemma4TextMLP(
 
262
  )
263
  (2): Gemma4TextDecoderLayer(
264
  (self_attn): Gemma4TextAttention(
265
+ (q_proj): Linear(in_features=8, out_features=256, bias=False)
266
  (q_norm): Gemma4RMSNorm()
267
  (k_norm): Gemma4RMSNorm()
268
  (v_norm): Gemma4RMSNorm()
269
  (k_proj): Linear(in_features=8, out_features=128, bias=False)
 
270
  (v_proj): Linear(in_features=8, out_features=128, bias=False)
271
  (o_proj): Linear(in_features=256, out_features=8, bias=False)
272
  )
 
283
  )
284
  (3): Gemma4TextDecoderLayer(
285
  (self_attn): Gemma4TextAttention(
286
+ (q_proj): Linear(in_features=8, out_features=512, bias=False)
287
  (q_norm): Gemma4RMSNorm()
288
  (k_norm): Gemma4RMSNorm()
289
  (v_norm): Gemma4RMSNorm()
290
  (k_proj): Linear(in_features=8, out_features=256, bias=False)
 
291
  (o_proj): Linear(in_features=512, out_features=8, bias=False)
292
  )
293
  (mlp): Gemma4TextMLP(
 
305
  (norm): Gemma4RMSNorm()
306
  (rotary_emb): Gemma4TextRotaryEmbedding()
307
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
  (embed_vision): Gemma4MultimodalEmbedder(
309
  (embedding_projection): Linear(in_features=8, out_features=8, bias=False)
310
  (embedding_pre_projection_norm): Gemma4RMSNorm()
 
318
 
319
  ### Test environment:
320
 
321
+ - torch: 2.11.0+cu129
322
+ - transformers: 5.7.0
chat_template.jinja CHANGED
@@ -1,9 +1,9 @@
1
- {%- macro format_parameters(properties, required) -%}
2
  {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
3
  {%- set ns = namespace(found_first=false) -%}
4
  {%- for key, value in properties | dictsort -%}
5
  {%- set add_comma = false -%}
6
- {%- if key not in standard_keys -%}
7
  {%- if ns.found_first %},{% endif -%}
8
  {%- set ns.found_first = true -%}
9
  {{ key }}:{
@@ -65,7 +65,7 @@
65
  {%- elif value is mapping -%}
66
  {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
67
  properties:{
68
- {{- format_parameters(value, value['required'] | default([])) -}}
69
  }
70
  {%- endif -%}
71
  {%- if value['required'] -%}
@@ -178,18 +178,21 @@
178
  {#- Handle System/Tool Definitions Block -#}
179
  {%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
180
  {{- '<|turn>system\n' -}}
181
-
182
  {#- Inject Thinking token at the very top of the FIRST system turn -#}
183
  {%- if enable_thinking is defined and enable_thinking -%}
184
  {{- '<|think|>\n' -}}
185
  {%- set ns.prev_message_type = 'think' -%}
186
  {%- endif -%}
187
-
188
  {%- if messages[0]['role'] in ['system', 'developer'] -%}
189
- {{- messages[0]['content'] | trim -}}
 
 
 
 
 
 
190
  {%- set loop_messages = messages[1:] -%}
191
  {%- endif -%}
192
-
193
  {%- if tools -%}
194
  {%- for tool in tools %}
195
  {{- '<|tool>' -}}
@@ -198,7 +201,6 @@
198
  {%- endfor %}
199
  {%- set ns.prev_message_type = 'tool' -%}
200
  {%- endif -%}
201
-
202
  {{- '<turn|>\n' -}}
203
  {%- endif %}
204
 
@@ -302,6 +304,7 @@
302
  {%- endfor -%}
303
  {%- endif -%}
304
 
 
305
  {%- if message['content'] is string -%}
306
  {%- if role == 'model' -%}
307
  {{- strip_thinking(message['content']) -}}
@@ -328,10 +331,14 @@
328
  {%- endif -%}
329
  {%- endfor -%}
330
  {%- endif -%}
 
 
 
 
331
 
332
  {%- if ns.prev_message_type == 'tool_call' and not ns_tr_out.flag -%}
333
  {{- '<|tool_response>' -}}
334
- {%- elif not (ns_tr_out.flag and not message.get('content')) -%}
335
  {{- '<turn|>\n' -}}
336
  {%- endif -%}
337
  {%- endif -%}
@@ -344,4 +351,4 @@
344
  {{- '<|channel>thought\n<channel|>' -}}
345
  {%- endif -%}
346
  {%- endif -%}
347
- {%- endif -%}
 
1
+ {%- macro format_parameters(properties, required, filter_keys=false) -%}
2
  {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
3
  {%- set ns = namespace(found_first=false) -%}
4
  {%- for key, value in properties | dictsort -%}
5
  {%- set add_comma = false -%}
6
+ {%- if not filter_keys or key not in standard_keys -%}
7
  {%- if ns.found_first %},{% endif -%}
8
  {%- set ns.found_first = true -%}
9
  {{ key }}:{
 
65
  {%- elif value is mapping -%}
66
  {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
67
  properties:{
68
+ {{- format_parameters(value, value['required'] | default([]), filter_keys=true) -}}
69
  }
70
  {%- endif -%}
71
  {%- if value['required'] -%}
 
178
  {#- Handle System/Tool Definitions Block -#}
179
  {%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
180
  {{- '<|turn>system\n' -}}
 
181
  {#- Inject Thinking token at the very top of the FIRST system turn -#}
182
  {%- if enable_thinking is defined and enable_thinking -%}
183
  {{- '<|think|>\n' -}}
184
  {%- set ns.prev_message_type = 'think' -%}
185
  {%- endif -%}
 
186
  {%- if messages[0]['role'] in ['system', 'developer'] -%}
187
+ {%- if messages[0]['content'] is string -%}
188
+ {{- messages[0]['content'] | trim -}}
189
+ {%- elif messages[0]['content'] is sequence -%}
190
+ {%- for item in messages[0]['content'] -%}
191
+ {{- item['text'] | trim + ' '-}}
192
+ {%- endfor -%}
193
+ {%- endif -%}
194
  {%- set loop_messages = messages[1:] -%}
195
  {%- endif -%}
 
196
  {%- if tools -%}
197
  {%- for tool in tools %}
198
  {{- '<|tool>' -}}
 
201
  {%- endfor %}
202
  {%- set ns.prev_message_type = 'tool' -%}
203
  {%- endif -%}
 
204
  {{- '<turn|>\n' -}}
205
  {%- endif %}
206
 
 
304
  {%- endfor -%}
305
  {%- endif -%}
306
 
307
+ {%- set captured_content -%}
308
  {%- if message['content'] is string -%}
309
  {%- if role == 'model' -%}
310
  {{- strip_thinking(message['content']) -}}
 
331
  {%- endif -%}
332
  {%- endfor -%}
333
  {%- endif -%}
334
+ {%- endset -%}
335
+
336
+ {{- captured_content -}}
337
+ {%- set has_content = captured_content | trim | length > 0 -%}
338
 
339
  {%- if ns.prev_message_type == 'tool_call' and not ns_tr_out.flag -%}
340
  {{- '<|tool_response>' -}}
341
+ {%- elif not (ns_tr_out.flag and not has_content) -%}
342
  {{- '<turn|>\n' -}}
343
  {%- endif -%}
344
  {%- endif -%}
 
351
  {{- '<|channel>thought\n<channel|>' -}}
352
  {%- endif -%}
353
  {%- endif -%}
354
+ {%- endif -%}
config.json CHANGED
@@ -72,7 +72,7 @@
72
  "vocab_size_per_layer_input": 262144
73
  },
74
  "tie_word_embeddings": true,
75
- "transformers_version": "5.5.0",
76
  "video_token_id": 258884,
77
  "vision_config": {
78
  "_name_or_path": "",
 
72
  "vocab_size_per_layer_input": 262144
73
  },
74
  "tie_word_embeddings": true,
75
+ "transformers_version": "5.7.0",
76
  "video_token_id": 258884,
77
  "vision_config": {
78
  "_name_or_path": "",
generation_config.json CHANGED
@@ -10,6 +10,6 @@
10
  "temperature": 1.0,
11
  "top_k": 64,
12
  "top_p": 0.95,
13
- "transformers_version": "5.5.0",
14
  "trust_remote_code": true
15
  }
 
10
  "temperature": 1.0,
11
  "top_k": 64,
12
  "top_p": 0.95,
13
+ "transformers_version": "5.7.0",
14
  "trust_remote_code": true
15
  }
tokenizer_config.json CHANGED
@@ -18,6 +18,7 @@
18
  ],
19
  "image_token": "<|image|>",
20
  "is_local": false,
 
21
  "mask_token": "<mask>",
22
  "model_max_length": 1000000000000000019884624838656,
23
  "model_specific_special_tokens": {
 
18
  ],
19
  "image_token": "<|image|>",
20
  "is_local": false,
21
+ "local_files_only": false,
22
  "mask_token": "<mask>",
23
  "model_max_length": 1000000000000000019884624838656,
24
  "model_specific_special_tokens": {