GadflyII commited on
Commit
adad1ef
·
verified ·
1 Parent(s): 15cf279

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
calibrated_input_scales.json ADDED
The diff for this file is too large to render. See raw diff
 
chat_template.jinja ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [gMASK]<sop>
2
+ {%- if tools -%}
3
+ <|system|>
4
+ # Tools
5
+
6
+ You may call one or more functions to assist with the user query.
7
+
8
+ You are provided with function signatures within <tools></tools> XML tags:
9
+ <tools>
10
+ {% for tool in tools %}
11
+ {{ tool | tojson(ensure_ascii=False) }}
12
+ {% endfor %}
13
+ </tools>
14
+
15
+ For each function call, output the function name and arguments within the following XML format:
16
+ <tool_call>{function-name}
17
+ <arg_key>{arg-key-1}</arg_key>
18
+ <arg_value>{arg-value-1}</arg_value>
19
+ <arg_key>{arg-key-2}</arg_key>
20
+ <arg_value>{arg-value-2}</arg_value>
21
+ ...
22
+ </tool_call>{%- endif -%}
23
+ {%- macro visible_text(content) -%}
24
+ {%- if content is string -%}
25
+ {{- content }}
26
+ {%- elif content is iterable and content is not mapping -%}
27
+ {%- for item in content -%}
28
+ {%- if item is mapping and item.type == 'text' -%}
29
+ {{- item.text }}
30
+ {%- elif item is mapping and (item.type == 'image' or 'image' in item) -%}
31
+ <|begin_of_image|><|image|><|end_of_image|>
32
+ {%- elif item is mapping and (item.type == 'video' or 'video' in item) -%}
33
+ <|begin_of_video|><|video|><|end_of_video|>
34
+ {%- elif item is string -%}
35
+ {{- item }}
36
+ {%- endif -%}
37
+ {%- endfor -%}
38
+ {%- else -%}
39
+ {{- content }}
40
+ {%- endif -%}
41
+ {%- endmacro -%}
42
+ {%- set ns = namespace(last_user_index=-1) %}
43
+ {%- for m in messages %}
44
+ {%- if m.role == 'user' %}
45
+ {% set ns.last_user_index = loop.index0 -%}
46
+ {%- endif %}
47
+ {%- endfor %}
48
+ {% for m in messages %}
49
+ {%- if m.role == 'user' -%}<|user|>
50
+ {% if m.content is string %}
51
+ {{ m.content }}
52
+ {%- else %}
53
+ {%- for item in m.content %}
54
+ {% if item.type == 'video' or 'video' in item %}
55
+ <|begin_of_video|><|video|><|end_of_video|>{% elif item.type == 'image' or 'image' in item %}
56
+ <|begin_of_image|><|image|><|end_of_image|>{% elif item.type == 'text' %}
57
+ {{ item.text }}
58
+ {%- endif %}
59
+ {%- endfor %}
60
+ {%- endif %}
61
+ {{- '/nothink' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith("/nothink")) else '' -}}
62
+ {%- elif m.role == 'assistant' -%}
63
+ <|assistant|>
64
+ {%- set reasoning_content = '' %}
65
+ {%- set content = visible_text(m.content) %}
66
+ {%- if m.reasoning_content is string %}
67
+ {%- set reasoning_content = m.reasoning_content %}
68
+ {%- else %}
69
+ {%- if '</think>' in content %}
70
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
71
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
72
+ {%- endif %}
73
+ {%- endif %}
74
+ {%- if loop.index0 > ns.last_user_index and reasoning_content -%}
75
+ {{ '\n<think>' + reasoning_content.strip() + '</think>'}}
76
+ {%- else -%}
77
+ {{ '\n<think></think>' }}
78
+ {%- endif -%}
79
+ {%- if content.strip() -%}
80
+ {{ '\n' + content.strip() }}
81
+ {%- endif -%}
82
+ {% if m.tool_calls %}
83
+ {% for tc in m.tool_calls %}
84
+ {%- if tc.function %}
85
+ {%- set tc = tc.function %}
86
+ {%- endif %}
87
+ {{ '\n<tool_call>' + tc.name }}
88
+ {% set _args = tc.arguments %}
89
+ {% for k, v in _args.items() %}
90
+ <arg_key>{{ k }}</arg_key>
91
+ <arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>
92
+ {% endfor %}
93
+ </tool_call>{% endfor %}
94
+ {% endif %}
95
+ {%- elif m.role == 'tool' -%}
96
+ {%- if m.content is string -%}
97
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
98
+ {{- '<|observation|>' }}
99
+ {%- endif %}
100
+ {{- '\n<tool_response>\n' }}
101
+ {{- m.content }}
102
+ {{- '\n</tool_response>' }}
103
+ {% elif m.content is iterable and m.content is not mapping %}
104
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
105
+ {{- '<|observation|>' }}
106
+ {%- endif %}
107
+ {{- '\n<tool_response>\n' }}
108
+ {%- for tr in m.content -%}
109
+ {%- if tr is mapping and tr.type is defined -%}
110
+ {%- set t = tr.type | lower -%}
111
+ {%- if t == 'text' and tr.text is defined -%}
112
+ {{ tr.text }}
113
+ {%- elif t in ['image', 'image_url'] -%}
114
+ <|begin_of_image|><|image|><|end_of_image|>
115
+ {%- elif t in ['video', 'video_url'] -%}
116
+ <|begin_of_video|><|video|><|end_of_video|>
117
+ {%- else -%}
118
+ {{ tr | tojson(ensure_ascii=False) }}
119
+ {%- endif -%}
120
+ {%- else -%}
121
+ {{ tr.output if tr.output is defined else tr }}
122
+ {%- endif -%}
123
+ {%- endfor -%}
124
+ {{- '\n</tool_response>' }}
125
+ {%- else -%}
126
+ <|observation|>{% for tr in m.content %}
127
+
128
+ <tool_response>
129
+ {{ tr.output if tr.output is defined else tr }}
130
+ </tool_response>{% endfor -%}
131
+ {% endif -%}
132
+ {# ====== 逻辑结束 ====== #}
133
+ {%- elif m.role == 'system' -%}
134
+ <|system|>
135
+ {{ visible_text(m.content) }}
136
+ {%- endif -%}
137
+ {%- endfor -%}
138
+ {%- if add_generation_prompt -%}
139
+ <|assistant|>
140
+ {{'<think></think>\n' if (enable_thinking is defined and not enable_thinking) else ''}}
141
+ {%- endif -%}
config.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Glm4vMoeForConditionalGeneration"
4
+ ],
5
+ "model_type": "glm4v_moe",
6
+ "text_config": {
7
+ "attention_bias": true,
8
+ "attention_dropout": 0.0,
9
+ "dtype": "bfloat16",
10
+ "eos_token_id": [
11
+ 151329,
12
+ 151336,
13
+ 151338
14
+ ],
15
+ "first_k_dense_replace": 1,
16
+ "head_dim": 128,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 4096,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 10944,
21
+ "max_position_embeddings": 131072,
22
+ "model_type": "glm4v_moe_text",
23
+ "moe_intermediate_size": 1408,
24
+ "n_group": 1,
25
+ "n_routed_experts": 128,
26
+ "n_shared_experts": 1,
27
+ "norm_topk_prob": true,
28
+ "num_attention_heads": 96,
29
+ "num_experts_per_tok": 8,
30
+ "num_hidden_layers": 46,
31
+ "num_key_value_heads": 8,
32
+ "num_nextn_predict_layers": 0,
33
+ "pad_token_id": 151329,
34
+ "partial_rotary_factor": 0.5,
35
+ "qk_layernorm": false,
36
+ "rms_norm_eps": 1e-05,
37
+ "rope_parameters": {
38
+ "mrope_section": [
39
+ 8,
40
+ 12,
41
+ 12
42
+ ],
43
+ "partial_rotary_factor": 0.5,
44
+ "rope_theta": 500000,
45
+ "rope_type": "default"
46
+ },
47
+ "routed_scaling_factor": 1.0,
48
+ "topk_group": 1,
49
+ "use_cache": true,
50
+ "use_qk_norm": false,
51
+ "vocab_size": 151552
52
+ },
53
+ "tie_word_embeddings": false,
54
+ "transformers_version": "5.0.0rc0",
55
+ "image_start_token_id": 151339,
56
+ "image_end_token_id": 151340,
57
+ "video_start_token_id": 151341,
58
+ "video_end_token_id": 151342,
59
+ "image_token_id": 151363,
60
+ "video_token_id": 151364,
61
+ "vision_config": {
62
+ "attention_bias": false,
63
+ "attention_dropout": 0.0,
64
+ "depth": 24,
65
+ "hidden_act": "silu",
66
+ "hidden_dropout_prob": 0.0,
67
+ "hidden_size": 1536,
68
+ "image_size": 336,
69
+ "in_channels": 3,
70
+ "initializer_range": 0.02,
71
+ "intermediate_size": 10944,
72
+ "model_type": "glm4v_moe_vision",
73
+ "num_heads": 12,
74
+ "out_hidden_size": 4096,
75
+ "patch_size": 14,
76
+ "rms_norm_eps": 1e-05,
77
+ "spatial_merge_size": 2,
78
+ "temporal_patch_size": 2
79
+ },
80
+ "quantization_config": {
81
+ "quant_method": "compressed-tensors",
82
+ "config_groups": {
83
+ "group_0": {
84
+ "input_activations": {
85
+ "num_bits": 4,
86
+ "type": "float",
87
+ "strategy": "tensor_group",
88
+ "group_size": 16,
89
+ "symmetric": true,
90
+ "dynamic": false
91
+ },
92
+ "weights": {
93
+ "num_bits": 4,
94
+ "type": "float",
95
+ "strategy": "tensor_group",
96
+ "group_size": 16,
97
+ "symmetric": true
98
+ },
99
+ "targets": [
100
+ "Linear"
101
+ ]
102
+ }
103
+ },
104
+ "format": "nvfp4-pack-quantized",
105
+ "ignore": [
106
+ "lm_head",
107
+ "re:visual.*",
108
+ "re:model.visual.*"
109
+ ],
110
+ "dynamic": true
111
+ }
112
+ }
eval_results/mmlu_results_20260112_072142.json ADDED
@@ -0,0 +1,373 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "timestamp": "20260112_072142",
3
+ "model": "glm-4.6v",
4
+ "overall_accuracy": 0.7355789773536533,
5
+ "total_correct": 10329,
6
+ "total_questions": 14042,
7
+ "category_results": {
8
+ "STEM": {
9
+ "accuracy": 0.7025055502695845,
10
+ "correct": 2215,
11
+ "total": 3153
12
+ },
13
+ "Other": {
14
+ "accuracy": 0.7837141937560348,
15
+ "correct": 2435,
16
+ "total": 3107
17
+ },
18
+ "Social Sciences": {
19
+ "accuracy": 0.8189795255118621,
20
+ "correct": 2520,
21
+ "total": 3077
22
+ },
23
+ "Humanities": {
24
+ "accuracy": 0.6714133900106269,
25
+ "correct": 3159,
26
+ "total": 4705
27
+ }
28
+ },
29
+ "subject_results": [
30
+ {
31
+ "subject": "abstract_algebra",
32
+ "correct": 54,
33
+ "total": 100,
34
+ "accuracy": 0.54
35
+ },
36
+ {
37
+ "subject": "anatomy",
38
+ "correct": 104,
39
+ "total": 135,
40
+ "accuracy": 0.7703703703703704
41
+ },
42
+ {
43
+ "subject": "astronomy",
44
+ "correct": 131,
45
+ "total": 152,
46
+ "accuracy": 0.8618421052631579
47
+ },
48
+ {
49
+ "subject": "business_ethics",
50
+ "correct": 83,
51
+ "total": 100,
52
+ "accuracy": 0.83
53
+ },
54
+ {
55
+ "subject": "clinical_knowledge",
56
+ "correct": 201,
57
+ "total": 265,
58
+ "accuracy": 0.7584905660377359
59
+ },
60
+ {
61
+ "subject": "college_biology",
62
+ "correct": 123,
63
+ "total": 144,
64
+ "accuracy": 0.8541666666666666
65
+ },
66
+ {
67
+ "subject": "college_chemistry",
68
+ "correct": 55,
69
+ "total": 100,
70
+ "accuracy": 0.55
71
+ },
72
+ {
73
+ "subject": "college_computer_science",
74
+ "correct": 76,
75
+ "total": 100,
76
+ "accuracy": 0.76
77
+ },
78
+ {
79
+ "subject": "college_mathematics",
80
+ "correct": 56,
81
+ "total": 100,
82
+ "accuracy": 0.56
83
+ },
84
+ {
85
+ "subject": "college_medicine",
86
+ "correct": 130,
87
+ "total": 173,
88
+ "accuracy": 0.7514450867052023
89
+ },
90
+ {
91
+ "subject": "college_physics",
92
+ "correct": 66,
93
+ "total": 102,
94
+ "accuracy": 0.6470588235294118
95
+ },
96
+ {
97
+ "subject": "computer_security",
98
+ "correct": 72,
99
+ "total": 100,
100
+ "accuracy": 0.72
101
+ },
102
+ {
103
+ "subject": "conceptual_physics",
104
+ "correct": 191,
105
+ "total": 235,
106
+ "accuracy": 0.8127659574468085
107
+ },
108
+ {
109
+ "subject": "econometrics",
110
+ "correct": 72,
111
+ "total": 114,
112
+ "accuracy": 0.631578947368421
113
+ },
114
+ {
115
+ "subject": "electrical_engineering",
116
+ "correct": 104,
117
+ "total": 145,
118
+ "accuracy": 0.7172413793103448
119
+ },
120
+ {
121
+ "subject": "elementary_mathematics",
122
+ "correct": 250,
123
+ "total": 378,
124
+ "accuracy": 0.6613756613756614
125
+ },
126
+ {
127
+ "subject": "formal_logic",
128
+ "correct": 82,
129
+ "total": 126,
130
+ "accuracy": 0.6507936507936508
131
+ },
132
+ {
133
+ "subject": "global_facts",
134
+ "correct": 58,
135
+ "total": 100,
136
+ "accuracy": 0.58
137
+ },
138
+ {
139
+ "subject": "high_school_biology",
140
+ "correct": 273,
141
+ "total": 310,
142
+ "accuracy": 0.8806451612903226
143
+ },
144
+ {
145
+ "subject": "high_school_chemistry",
146
+ "correct": 140,
147
+ "total": 203,
148
+ "accuracy": 0.6896551724137931
149
+ },
150
+ {
151
+ "subject": "high_school_computer_science",
152
+ "correct": 68,
153
+ "total": 100,
154
+ "accuracy": 0.68
155
+ },
156
+ {
157
+ "subject": "high_school_european_history",
158
+ "correct": 141,
159
+ "total": 165,
160
+ "accuracy": 0.8545454545454545
161
+ },
162
+ {
163
+ "subject": "high_school_geography",
164
+ "correct": 171,
165
+ "total": 198,
166
+ "accuracy": 0.8636363636363636
167
+ },
168
+ {
169
+ "subject": "high_school_government_and_politics",
170
+ "correct": 175,
171
+ "total": 193,
172
+ "accuracy": 0.9067357512953368
173
+ },
174
+ {
175
+ "subject": "high_school_macroeconomics",
176
+ "correct": 314,
177
+ "total": 390,
178
+ "accuracy": 0.8051282051282052
179
+ },
180
+ {
181
+ "subject": "high_school_mathematics",
182
+ "correct": 128,
183
+ "total": 270,
184
+ "accuracy": 0.4740740740740741
185
+ },
186
+ {
187
+ "subject": "high_school_microeconomics",
188
+ "correct": 198,
189
+ "total": 238,
190
+ "accuracy": 0.8319327731092437
191
+ },
192
+ {
193
+ "subject": "high_school_physics",
194
+ "correct": 108,
195
+ "total": 151,
196
+ "accuracy": 0.7152317880794702
197
+ },
198
+ {
199
+ "subject": "high_school_psychology",
200
+ "correct": 488,
201
+ "total": 545,
202
+ "accuracy": 0.8954128440366973
203
+ },
204
+ {
205
+ "subject": "high_school_statistics",
206
+ "correct": 149,
207
+ "total": 216,
208
+ "accuracy": 0.6898148148148148
209
+ },
210
+ {
211
+ "subject": "high_school_us_history",
212
+ "correct": 180,
213
+ "total": 204,
214
+ "accuracy": 0.8823529411764706
215
+ },
216
+ {
217
+ "subject": "high_school_world_history",
218
+ "correct": 210,
219
+ "total": 237,
220
+ "accuracy": 0.8860759493670886
221
+ },
222
+ {
223
+ "subject": "human_aging",
224
+ "correct": 172,
225
+ "total": 223,
226
+ "accuracy": 0.7713004484304933
227
+ },
228
+ {
229
+ "subject": "human_sexuality",
230
+ "correct": 104,
231
+ "total": 131,
232
+ "accuracy": 0.7938931297709924
233
+ },
234
+ {
235
+ "subject": "international_law",
236
+ "correct": 96,
237
+ "total": 121,
238
+ "accuracy": 0.7933884297520661
239
+ },
240
+ {
241
+ "subject": "jurisprudence",
242
+ "correct": 89,
243
+ "total": 108,
244
+ "accuracy": 0.8240740740740741
245
+ },
246
+ {
247
+ "subject": "logical_fallacies",
248
+ "correct": 131,
249
+ "total": 163,
250
+ "accuracy": 0.803680981595092
251
+ },
252
+ {
253
+ "subject": "machine_learning",
254
+ "correct": 67,
255
+ "total": 112,
256
+ "accuracy": 0.5982142857142857
257
+ },
258
+ {
259
+ "subject": "management",
260
+ "correct": 87,
261
+ "total": 103,
262
+ "accuracy": 0.8446601941747572
263
+ },
264
+ {
265
+ "subject": "marketing",
266
+ "correct": 191,
267
+ "total": 234,
268
+ "accuracy": 0.8162393162393162
269
+ },
270
+ {
271
+ "subject": "medical_genetics",
272
+ "correct": 83,
273
+ "total": 100,
274
+ "accuracy": 0.83
275
+ },
276
+ {
277
+ "subject": "miscellaneous",
278
+ "correct": 686,
279
+ "total": 783,
280
+ "accuracy": 0.876117496807152
281
+ },
282
+ {
283
+ "subject": "moral_disputes",
284
+ "correct": 252,
285
+ "total": 346,
286
+ "accuracy": 0.7283236994219653
287
+ },
288
+ {
289
+ "subject": "moral_scenarios",
290
+ "correct": 409,
291
+ "total": 895,
292
+ "accuracy": 0.4569832402234637
293
+ },
294
+ {
295
+ "subject": "nutrition",
296
+ "correct": 246,
297
+ "total": 306,
298
+ "accuracy": 0.803921568627451
299
+ },
300
+ {
301
+ "subject": "philosophy",
302
+ "correct": 237,
303
+ "total": 311,
304
+ "accuracy": 0.7620578778135049
305
+ },
306
+ {
307
+ "subject": "prehistory",
308
+ "correct": 268,
309
+ "total": 324,
310
+ "accuracy": 0.8271604938271605
311
+ },
312
+ {
313
+ "subject": "professional_accounting",
314
+ "correct": 173,
315
+ "total": 282,
316
+ "accuracy": 0.6134751773049646
317
+ },
318
+ {
319
+ "subject": "professional_law",
320
+ "correct": 926,
321
+ "total": 1534,
322
+ "accuracy": 0.6036505867014341
323
+ },
324
+ {
325
+ "subject": "professional_medicine",
326
+ "correct": 240,
327
+ "total": 272,
328
+ "accuracy": 0.8823529411764706
329
+ },
330
+ {
331
+ "subject": "professional_psychology",
332
+ "correct": 495,
333
+ "total": 612,
334
+ "accuracy": 0.8088235294117647
335
+ },
336
+ {
337
+ "subject": "public_relations",
338
+ "correct": 74,
339
+ "total": 110,
340
+ "accuracy": 0.6727272727272727
341
+ },
342
+ {
343
+ "subject": "security_studies",
344
+ "correct": 185,
345
+ "total": 245,
346
+ "accuracy": 0.7551020408163265
347
+ },
348
+ {
349
+ "subject": "sociology",
350
+ "correct": 154,
351
+ "total": 201,
352
+ "accuracy": 0.7661691542288557
353
+ },
354
+ {
355
+ "subject": "us_foreign_policy",
356
+ "correct": 90,
357
+ "total": 100,
358
+ "accuracy": 0.9
359
+ },
360
+ {
361
+ "subject": "virology",
362
+ "correct": 85,
363
+ "total": 166,
364
+ "accuracy": 0.5120481927710844
365
+ },
366
+ {
367
+ "subject": "world_religions",
368
+ "correct": 138,
369
+ "total": 171,
370
+ "accuracy": 0.8070175438596491
371
+ }
372
+ ]
373
+ }
generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151329,
6
+ 151336,
7
+ 151338
8
+ ],
9
+ "pad_token_id": 151329,
10
+ "top_p": 0.6,
11
+ "temperature": 0.8,
12
+ "top_k": 2,
13
+ "transformers_version": "5.0.0rc0"
14
+ }
hf_quant_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "producer": {
3
+ "name": "glm4v-nvfp4-layer-by-layer",
4
+ "version": "1.0.0"
5
+ },
6
+ "quantization": {
7
+ "quant_algo": "NVFP4",
8
+ "kv_cache_quant_algo": null,
9
+ "group_size": 16
10
+ }
11
+ }
model-00000-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c226f163bfa5dd3ff157003ebbc82d7d5b450e909c0e6c9520835063ae65594
3
+ size 5368529744
model-00001-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05db2e38a8211f9b3f6e2de358c30c195b194e684d7050a51a34a72f9391361c
3
+ size 5369359944
model-00002-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29649ab31148f1ca7694a9ea75345f16c421444ca4e400595b7bca8745298d67
3
+ size 5366840544
model-00003-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb2a63f302651db9ca473d4e9c7bab156d26542d973ec066af9b97efd129ed0c
3
+ size 5369366488
model-00004-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be2ba213afbe7f9b982074683b3b4df4f48de7e9a5b61c3a891e9482cb085ab5
3
+ size 5366843744
model-00005-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f6e4a9c32e4ef6c8a6fa6bb3e6ce539c03ed6e02437cbaff0720a34ca6abb25
3
+ size 5369366648
model-00006-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88be4823bcda218061f81e8528003dde65e6a185af085c300d09f65345de650e
3
+ size 5366843808
model-00007-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2548081d41090e456124d07e66391fd34d5fe2e9f922eb083fbe2af35df2c32f
3
+ size 5369366736
model-00008-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7862961f1285df37c4fe9ca0d7c246cdca5ef31bfac0ab53c0cca01e47374d8a
3
+ size 5366843928
model-00009-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1704d31f562aeaff92abd1e23319e8639cd10b683914064f59320e40b2681a84
3
+ size 5369366696
model-00010-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0d57bba9b7f4e473ef863081d7c743d2381e2a666f68c2d08ea978f6d9b8f67
3
+ size 5366843960
model-00011-of-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f714c97e96028748a2fd095af5a291ffd3dc24d4e11670770fc758abe3b877f2
3
+ size 4602256272
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
preprocessor_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "size": {"shortest_edge": 12544, "longest_edge": 9633792},
3
+ "do_rescale": true,
4
+ "patch_size": 14,
5
+ "temporal_patch_size": 2,
6
+ "merge_size": 2,
7
+ "image_mean": [0.48145466, 0.4578275, 0.40821073],
8
+ "image_std": [0.26862954, 0.26130258, 0.27577711],
9
+ "image_processor_type": "Glm46VImageProcessor",
10
+ "processor_class": "Glm46VProcessor"
11
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9340665016419c825c4bdabbcc9acc43b7ca2c68ce142724afa829abb1be5efd
3
+ size 19970699
tokenizer_config.json ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "151329": {
4
+ "content": "<|endoftext|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "151330": {
12
+ "content": "[MASK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "151331": {
20
+ "content": "[gMASK]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "151332": {
28
+ "content": "[sMASK]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "151333": {
36
+ "content": "<sop>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "151334": {
44
+ "content": "<eop>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "151335": {
52
+ "content": "<|system|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "151336": {
60
+ "content": "<|user|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "151337": {
68
+ "content": "<|assistant|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "151338": {
76
+ "content": "<|observation|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "151339": {
84
+ "content": "<|begin_of_image|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "151340": {
92
+ "content": "<|end_of_image|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "151341": {
100
+ "content": "<|begin_of_video|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "151342": {
108
+ "content": "<|end_of_video|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "151343": {
116
+ "content": "<|begin_of_audio|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "151344": {
124
+ "content": "<|end_of_audio|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "151345": {
132
+ "content": "<|begin_of_transcription|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "151346": {
140
+ "content": "<|end_of_transcription|>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "151347": {
148
+ "content": "<|code_prefix|>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "151348": {
156
+ "content": "<|code_middle|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "151349": {
164
+ "content": "<|code_suffix|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "151350": {
172
+ "content": "<think>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": false
178
+ },
179
+ "151351": {
180
+ "content": "</think>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": false
186
+ },
187
+ "151352": {
188
+ "content": "<tool_call>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": false
194
+ },
195
+ "151353": {
196
+ "content": "</tool_call>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": false
202
+ },
203
+ "151354": {
204
+ "content": "<tool_response>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": false
210
+ },
211
+ "151355": {
212
+ "content": "</tool_response>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": false
218
+ },
219
+ "151356": {
220
+ "content": "<arg_key>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": false
226
+ },
227
+ "151357": {
228
+ "content": "</arg_key>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": false
234
+ },
235
+ "151358": {
236
+ "content": "<arg_value>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": false
242
+ },
243
+ "151359": {
244
+ "content": "</arg_value>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": false
250
+ },
251
+ "151360": {
252
+ "content": "/nothink",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "151361": {
260
+ "content": "<|begin_of_box|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": false
266
+ },
267
+ "151362": {
268
+ "content": "<|end_of_box|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": false
274
+ },
275
+ "151363": {
276
+ "content": "<|image|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": false
282
+ },
283
+ "151364": {
284
+ "content": "<|video|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": false
290
+ }
291
+ },
292
+ "additional_special_tokens": [
293
+ "<|endoftext|>",
294
+ "[MASK]",
295
+ "[gMASK]",
296
+ "[sMASK]",
297
+ "<sop>",
298
+ "<eop>",
299
+ "<|system|>",
300
+ "<|user|>",
301
+ "<|assistant|>",
302
+ "<|observation|>",
303
+ "<|begin_of_image|>",
304
+ "<|end_of_image|>",
305
+ "<|begin_of_video|>",
306
+ "<|end_of_video|>",
307
+ "<|begin_of_audio|>",
308
+ "<|end_of_audio|>",
309
+ "<|image|>",
310
+ "<|video|>",
311
+ "<|begin_of_transcription|>",
312
+ "<|end_of_transcription|>",
313
+ "<|code_prefix|>",
314
+ "<|code_middle|>",
315
+ "<|code_suffix|>",
316
+ "/nothink"
317
+ ],
318
+ "clean_up_tokenization_spaces": false,
319
+ "do_lower_case": false,
320
+ "eos_token": "<|endoftext|>",
321
+ "extra_special_tokens": {},
322
+ "model_max_length": 128000,
323
+ "pad_token": "<|endoftext|>",
324
+ "padding_side": "left",
325
+ "remove_space": false,
326
+ "tokenizer_class": "PreTrainedTokenizer"
327
+ }
video_preprocessor_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "size": {"shortest_edge": 12544, "longest_edge": 47040000},
3
+ "do_rescale": true,
4
+ "patch_size": 14,
5
+ "temporal_patch_size": 2,
6
+ "merge_size": 2,
7
+ "image_mean": [0.48145466, 0.4578275, 0.40821073],
8
+ "image_std": [0.26862954, 0.26130258, 0.27577711],
9
+ "video_processor_type": "Glm46VVideoProcessor",
10
+ "processor_class": "Glm46VProcessor"
11
+ }