ekurtic commited on
Commit
e50edfe
·
verified ·
1 Parent(s): 212046d

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
chat_template.jinja ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if messages | length % 2 == 0 -%}
2
+ {%- set role = 'Agent' -%}
3
+ {%- else -%}
4
+ {%- set role = 'User' -%}
5
+ {%- endif -%}
6
+
7
+ {%- if categories is not defined -%}
8
+ {%- set ns = namespace(contains_image=false) -%}
9
+
10
+ {%- for message in messages -%}
11
+ {%- for content in message['content'] | selectattr('type', 'equalto', 'image') -%}
12
+ {%- set ns.contains_image = true -%}
13
+ {%- endfor -%}
14
+ {%- endfor -%}
15
+
16
+ {%- if ns.contains_image -%}
17
+ {%- set categories = {
18
+ "S1": "Violent Crimes.",
19
+ "S2": "Non-Violent Crimes.",
20
+ "S3": "Sex Crimes.",
21
+ "S4": "Child Exploitation.",
22
+ "S5": "Defamation.",
23
+ "S6": "Specialized Advice.",
24
+ "S7": "Privacy.",
25
+ "S8": "Intellectual Property.",
26
+ "S9": "Indiscriminate Weapons.",
27
+ "S10": "Hate.",
28
+ "S11": "Self-Harm.",
29
+ "S12": "Sexual Content.",
30
+ "S13": "Elections.",
31
+ } -%}
32
+ {%- else -%}
33
+ {%- set categories = {
34
+ "S1": "Violent Crimes.",
35
+ "S2": "Non-Violent Crimes.",
36
+ "S3": "Sex Crimes.",
37
+ "S4": "Child Exploitation.",
38
+ "S5": "Defamation.",
39
+ "S6": "Specialized Advice.",
40
+ "S7": "Privacy.",
41
+ "S8": "Intellectual Property.",
42
+ "S9": "Indiscriminate Weapons.",
43
+ "S10": "Hate.",
44
+ "S11": "Self-Harm.",
45
+ "S12": "Sexual Content.",
46
+ "S13": "Elections.",
47
+ "S14": "Code Interpreter Abuse.",
48
+ } -%}
49
+ {%- endif -%}
50
+ {%- endif -%}
51
+
52
+ {{ "<|begin_of_text|><|header_start|>user<|header_end|>\n" }}
53
+ {{ "Task: Check if there is unsafe content in '" + role + "' messages in conversations according to our safety policy with the below categories.\n" }}
54
+ {{ "<BEGIN UNSAFE CONTENT CATEGORIES>\n" }}
55
+
56
+ {%- for key in categories -%}
57
+ {%- if key not in excluded_category_keys -%}
58
+ {{ key + ": " + categories[key] + "\n" }}
59
+ {%- endif -%}
60
+ {%- endfor -%}
61
+
62
+ {{ "<END UNSAFE CONTENT CATEGORIES>\n" }}
63
+ {{ "<BEGIN CONVERSATION>\n" }}
64
+
65
+ {%- for message in messages -%}
66
+ {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
67
+ {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
68
+ {%- endif -%}
69
+
70
+ {%- set content = message['content'] -%}
71
+
72
+ {%- if message['role'] == 'user' -%}
73
+ {%- set role = 'User' -%}
74
+ {%- elif message['role'] == 'assistant' -%}
75
+ {%- set role = 'Agent' -%}
76
+ {%- endif -%}
77
+
78
+ {#— #1 Accumulate all images for this message —#}
79
+ {%- set ns = namespace(img_tokens='') -%}
80
+ {%- for img in message.content | selectattr('type', 'equalto', 'image') -%}
81
+ {%- set ns.img_tokens = ns.img_tokens + '<|image|>' -%}
82
+ {%- endfor -%}
83
+
84
+ {#— #2 Then emit all text —#}
85
+ {%- for txt in message.content | selectattr('type','equalto','text') -%}
86
+ {{ role + ': ' }}{{ ns.img_tokens }}{{ txt.text | trim }}\n\n
87
+
88
+ {#— clear the buffer so only first text prints images —#}
89
+ {%- if loop.first -%}
90
+ {%- set ns.img_tokens = '' -%}
91
+ {%- endif -%}
92
+ {%- endfor -%}
93
+ {%- endfor -%}
94
+
95
+ {{ "<END CONVERSATION>\n" }}
96
+ {{ "Provide your safety assessment for ONLY THE LAST " + role + " message in the above conversation:" }}
97
+ {{ " - First line must read 'safe' or 'unsafe'." }}
98
+ {{ " - If unsafe, a second line must include a comma-separated list of violated categories. <|eot|><|header_start|>assistant<|header_end|>" }}
config.json ADDED
@@ -0,0 +1,393 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Llama4ForConditionalGeneration"
4
+ ],
5
+ "boi_token_index": 200080,
6
+ "eoi_token_index": 200081,
7
+ "image_token_index": 200092,
8
+ "model_type": "llama4",
9
+ "text_config": {
10
+ "_attn_implementation_autoset": true,
11
+ "attention_bias": false,
12
+ "attention_chunk_size": 8192,
13
+ "attention_dropout": 0.0,
14
+ "attn_scale": 0.1,
15
+ "attn_temperature_tuning": true,
16
+ "bos_token_id": 200000,
17
+ "cache_implementation": "static",
18
+ "eos_token_id": [
19
+ 200001,
20
+ 200007,
21
+ 200008
22
+ ],
23
+ "floor_scale": 8192,
24
+ "for_llm_compressor": false,
25
+ "head_dim": 128,
26
+ "hidden_act": "silu",
27
+ "hidden_size": 5120,
28
+ "initializer_range": 0.02,
29
+ "interleave_moe_layer_step": 0,
30
+ "intermediate_size": 8192,
31
+ "intermediate_size_mlp": 8192,
32
+ "max_position_embeddings": 10485760,
33
+ "model_type": "llama4_text",
34
+ "moe_layers": [],
35
+ "no_rope_layers": [
36
+ 1,
37
+ 1,
38
+ 1,
39
+ 1,
40
+ 1,
41
+ 1,
42
+ 1,
43
+ 1,
44
+ 1,
45
+ 1,
46
+ 1,
47
+ 1,
48
+ 1,
49
+ 1,
50
+ 1,
51
+ 1,
52
+ 1,
53
+ 1,
54
+ 1,
55
+ 1,
56
+ 1,
57
+ 1,
58
+ 1,
59
+ 1,
60
+ 1,
61
+ 1,
62
+ 1,
63
+ 1,
64
+ 1,
65
+ 1,
66
+ 1,
67
+ 1,
68
+ 1,
69
+ 1,
70
+ 1,
71
+ 1,
72
+ 1,
73
+ 1,
74
+ 1,
75
+ 1,
76
+ 1,
77
+ 1,
78
+ 1,
79
+ 1,
80
+ 1,
81
+ 1,
82
+ 1,
83
+ 1
84
+ ],
85
+ "num_attention_heads": 40,
86
+ "num_experts_per_tok": 1,
87
+ "num_hidden_layers": 48,
88
+ "num_key_value_heads": 8,
89
+ "num_local_experts": 0,
90
+ "output_router_logits": false,
91
+ "pad_token_id": 200018,
92
+ "rms_norm_eps": 1e-05,
93
+ "rope_scaling": {
94
+ "factor": 16.0,
95
+ "high_freq_factor": 1.0,
96
+ "low_freq_factor": 1.0,
97
+ "original_max_position_embeddings": 8192,
98
+ "rope_type": "llama3"
99
+ },
100
+ "rope_theta": 500000.0,
101
+ "router_aux_loss_coef": 0.001,
102
+ "router_jitter_noise": 0.0,
103
+ "torch_dtype": "bfloat16",
104
+ "use_cache": true,
105
+ "use_qk_norm": true,
106
+ "vocab_size": 202048
107
+ },
108
+ "tie_word_embeddings": false,
109
+ "torch_dtype": "bfloat16",
110
+ "transformers_version": "4.52.0.dev0",
111
+ "vision_config": {
112
+ "_attn_implementation_autoset": true,
113
+ "attention_dropout": 0.0,
114
+ "hidden_act": "gelu",
115
+ "hidden_size": 1408,
116
+ "image_size": 336,
117
+ "initializer_range": 0.02,
118
+ "intermediate_size": 5632,
119
+ "model_type": "llama4_vision_model",
120
+ "multi_modal_projector_bias": false,
121
+ "norm_eps": 1e-05,
122
+ "num_attention_heads": 16,
123
+ "num_channels": 3,
124
+ "num_hidden_layers": 34,
125
+ "patch_size": 14,
126
+ "pixel_shuffle_ratio": 0.5,
127
+ "projector_dropout": 0.0,
128
+ "projector_input_dim": 4096,
129
+ "projector_output_dim": 4096,
130
+ "rope_theta": 10000,
131
+ "vision_feature_layer": -1,
132
+ "vision_feature_select_strategy": "default",
133
+ "vision_output_dim": 4096
134
+ },
135
+ "quantization_config": {
136
+ "config_groups": {
137
+ "group_0": {
138
+ "format": "int-quantized",
139
+ "input_activations": {
140
+ "actorder": null,
141
+ "block_structure": null,
142
+ "dynamic": true,
143
+ "group_size": null,
144
+ "num_bits": 8,
145
+ "observer": null,
146
+ "observer_kwargs": {},
147
+ "scale_dtype": null,
148
+ "strategy": "token",
149
+ "symmetric": true,
150
+ "type": "int",
151
+ "zp_dtype": null
152
+ },
153
+ "output_activations": null,
154
+ "targets": [
155
+ "Linear"
156
+ ],
157
+ "weights": {
158
+ "actorder": null,
159
+ "block_structure": null,
160
+ "dynamic": false,
161
+ "group_size": null,
162
+ "num_bits": 8,
163
+ "observer": "minmax",
164
+ "observer_kwargs": {},
165
+ "scale_dtype": null,
166
+ "strategy": "channel",
167
+ "symmetric": true,
168
+ "type": "int",
169
+ "zp_dtype": null
170
+ }
171
+ }
172
+ },
173
+ "format": "int-quantized",
174
+ "global_compression_ratio": null,
175
+ "ignore": [
176
+ "vision_model.patch_embedding.linear",
177
+ "vision_model.model.layers.0.self_attn.q_proj",
178
+ "vision_model.model.layers.0.self_attn.k_proj",
179
+ "vision_model.model.layers.0.self_attn.v_proj",
180
+ "vision_model.model.layers.0.self_attn.o_proj",
181
+ "vision_model.model.layers.0.mlp.fc1",
182
+ "vision_model.model.layers.0.mlp.fc2",
183
+ "vision_model.model.layers.1.self_attn.q_proj",
184
+ "vision_model.model.layers.1.self_attn.k_proj",
185
+ "vision_model.model.layers.1.self_attn.v_proj",
186
+ "vision_model.model.layers.1.self_attn.o_proj",
187
+ "vision_model.model.layers.1.mlp.fc1",
188
+ "vision_model.model.layers.1.mlp.fc2",
189
+ "vision_model.model.layers.2.self_attn.q_proj",
190
+ "vision_model.model.layers.2.self_attn.k_proj",
191
+ "vision_model.model.layers.2.self_attn.v_proj",
192
+ "vision_model.model.layers.2.self_attn.o_proj",
193
+ "vision_model.model.layers.2.mlp.fc1",
194
+ "vision_model.model.layers.2.mlp.fc2",
195
+ "vision_model.model.layers.3.self_attn.q_proj",
196
+ "vision_model.model.layers.3.self_attn.k_proj",
197
+ "vision_model.model.layers.3.self_attn.v_proj",
198
+ "vision_model.model.layers.3.self_attn.o_proj",
199
+ "vision_model.model.layers.3.mlp.fc1",
200
+ "vision_model.model.layers.3.mlp.fc2",
201
+ "vision_model.model.layers.4.self_attn.q_proj",
202
+ "vision_model.model.layers.4.self_attn.k_proj",
203
+ "vision_model.model.layers.4.self_attn.v_proj",
204
+ "vision_model.model.layers.4.self_attn.o_proj",
205
+ "vision_model.model.layers.4.mlp.fc1",
206
+ "vision_model.model.layers.4.mlp.fc2",
207
+ "vision_model.model.layers.5.self_attn.q_proj",
208
+ "vision_model.model.layers.5.self_attn.k_proj",
209
+ "vision_model.model.layers.5.self_attn.v_proj",
210
+ "vision_model.model.layers.5.self_attn.o_proj",
211
+ "vision_model.model.layers.5.mlp.fc1",
212
+ "vision_model.model.layers.5.mlp.fc2",
213
+ "vision_model.model.layers.6.self_attn.q_proj",
214
+ "vision_model.model.layers.6.self_attn.k_proj",
215
+ "vision_model.model.layers.6.self_attn.v_proj",
216
+ "vision_model.model.layers.6.self_attn.o_proj",
217
+ "vision_model.model.layers.6.mlp.fc1",
218
+ "vision_model.model.layers.6.mlp.fc2",
219
+ "vision_model.model.layers.7.self_attn.q_proj",
220
+ "vision_model.model.layers.7.self_attn.k_proj",
221
+ "vision_model.model.layers.7.self_attn.v_proj",
222
+ "vision_model.model.layers.7.self_attn.o_proj",
223
+ "vision_model.model.layers.7.mlp.fc1",
224
+ "vision_model.model.layers.7.mlp.fc2",
225
+ "vision_model.model.layers.8.self_attn.q_proj",
226
+ "vision_model.model.layers.8.self_attn.k_proj",
227
+ "vision_model.model.layers.8.self_attn.v_proj",
228
+ "vision_model.model.layers.8.self_attn.o_proj",
229
+ "vision_model.model.layers.8.mlp.fc1",
230
+ "vision_model.model.layers.8.mlp.fc2",
231
+ "vision_model.model.layers.9.self_attn.q_proj",
232
+ "vision_model.model.layers.9.self_attn.k_proj",
233
+ "vision_model.model.layers.9.self_attn.v_proj",
234
+ "vision_model.model.layers.9.self_attn.o_proj",
235
+ "vision_model.model.layers.9.mlp.fc1",
236
+ "vision_model.model.layers.9.mlp.fc2",
237
+ "vision_model.model.layers.10.self_attn.q_proj",
238
+ "vision_model.model.layers.10.self_attn.k_proj",
239
+ "vision_model.model.layers.10.self_attn.v_proj",
240
+ "vision_model.model.layers.10.self_attn.o_proj",
241
+ "vision_model.model.layers.10.mlp.fc1",
242
+ "vision_model.model.layers.10.mlp.fc2",
243
+ "vision_model.model.layers.11.self_attn.q_proj",
244
+ "vision_model.model.layers.11.self_attn.k_proj",
245
+ "vision_model.model.layers.11.self_attn.v_proj",
246
+ "vision_model.model.layers.11.self_attn.o_proj",
247
+ "vision_model.model.layers.11.mlp.fc1",
248
+ "vision_model.model.layers.11.mlp.fc2",
249
+ "vision_model.model.layers.12.self_attn.q_proj",
250
+ "vision_model.model.layers.12.self_attn.k_proj",
251
+ "vision_model.model.layers.12.self_attn.v_proj",
252
+ "vision_model.model.layers.12.self_attn.o_proj",
253
+ "vision_model.model.layers.12.mlp.fc1",
254
+ "vision_model.model.layers.12.mlp.fc2",
255
+ "vision_model.model.layers.13.self_attn.q_proj",
256
+ "vision_model.model.layers.13.self_attn.k_proj",
257
+ "vision_model.model.layers.13.self_attn.v_proj",
258
+ "vision_model.model.layers.13.self_attn.o_proj",
259
+ "vision_model.model.layers.13.mlp.fc1",
260
+ "vision_model.model.layers.13.mlp.fc2",
261
+ "vision_model.model.layers.14.self_attn.q_proj",
262
+ "vision_model.model.layers.14.self_attn.k_proj",
263
+ "vision_model.model.layers.14.self_attn.v_proj",
264
+ "vision_model.model.layers.14.self_attn.o_proj",
265
+ "vision_model.model.layers.14.mlp.fc1",
266
+ "vision_model.model.layers.14.mlp.fc2",
267
+ "vision_model.model.layers.15.self_attn.q_proj",
268
+ "vision_model.model.layers.15.self_attn.k_proj",
269
+ "vision_model.model.layers.15.self_attn.v_proj",
270
+ "vision_model.model.layers.15.self_attn.o_proj",
271
+ "vision_model.model.layers.15.mlp.fc1",
272
+ "vision_model.model.layers.15.mlp.fc2",
273
+ "vision_model.model.layers.16.self_attn.q_proj",
274
+ "vision_model.model.layers.16.self_attn.k_proj",
275
+ "vision_model.model.layers.16.self_attn.v_proj",
276
+ "vision_model.model.layers.16.self_attn.o_proj",
277
+ "vision_model.model.layers.16.mlp.fc1",
278
+ "vision_model.model.layers.16.mlp.fc2",
279
+ "vision_model.model.layers.17.self_attn.q_proj",
280
+ "vision_model.model.layers.17.self_attn.k_proj",
281
+ "vision_model.model.layers.17.self_attn.v_proj",
282
+ "vision_model.model.layers.17.self_attn.o_proj",
283
+ "vision_model.model.layers.17.mlp.fc1",
284
+ "vision_model.model.layers.17.mlp.fc2",
285
+ "vision_model.model.layers.18.self_attn.q_proj",
286
+ "vision_model.model.layers.18.self_attn.k_proj",
287
+ "vision_model.model.layers.18.self_attn.v_proj",
288
+ "vision_model.model.layers.18.self_attn.o_proj",
289
+ "vision_model.model.layers.18.mlp.fc1",
290
+ "vision_model.model.layers.18.mlp.fc2",
291
+ "vision_model.model.layers.19.self_attn.q_proj",
292
+ "vision_model.model.layers.19.self_attn.k_proj",
293
+ "vision_model.model.layers.19.self_attn.v_proj",
294
+ "vision_model.model.layers.19.self_attn.o_proj",
295
+ "vision_model.model.layers.19.mlp.fc1",
296
+ "vision_model.model.layers.19.mlp.fc2",
297
+ "vision_model.model.layers.20.self_attn.q_proj",
298
+ "vision_model.model.layers.20.self_attn.k_proj",
299
+ "vision_model.model.layers.20.self_attn.v_proj",
300
+ "vision_model.model.layers.20.self_attn.o_proj",
301
+ "vision_model.model.layers.20.mlp.fc1",
302
+ "vision_model.model.layers.20.mlp.fc2",
303
+ "vision_model.model.layers.21.self_attn.q_proj",
304
+ "vision_model.model.layers.21.self_attn.k_proj",
305
+ "vision_model.model.layers.21.self_attn.v_proj",
306
+ "vision_model.model.layers.21.self_attn.o_proj",
307
+ "vision_model.model.layers.21.mlp.fc1",
308
+ "vision_model.model.layers.21.mlp.fc2",
309
+ "vision_model.model.layers.22.self_attn.q_proj",
310
+ "vision_model.model.layers.22.self_attn.k_proj",
311
+ "vision_model.model.layers.22.self_attn.v_proj",
312
+ "vision_model.model.layers.22.self_attn.o_proj",
313
+ "vision_model.model.layers.22.mlp.fc1",
314
+ "vision_model.model.layers.22.mlp.fc2",
315
+ "vision_model.model.layers.23.self_attn.q_proj",
316
+ "vision_model.model.layers.23.self_attn.k_proj",
317
+ "vision_model.model.layers.23.self_attn.v_proj",
318
+ "vision_model.model.layers.23.self_attn.o_proj",
319
+ "vision_model.model.layers.23.mlp.fc1",
320
+ "vision_model.model.layers.23.mlp.fc2",
321
+ "vision_model.model.layers.24.self_attn.q_proj",
322
+ "vision_model.model.layers.24.self_attn.k_proj",
323
+ "vision_model.model.layers.24.self_attn.v_proj",
324
+ "vision_model.model.layers.24.self_attn.o_proj",
325
+ "vision_model.model.layers.24.mlp.fc1",
326
+ "vision_model.model.layers.24.mlp.fc2",
327
+ "vision_model.model.layers.25.self_attn.q_proj",
328
+ "vision_model.model.layers.25.self_attn.k_proj",
329
+ "vision_model.model.layers.25.self_attn.v_proj",
330
+ "vision_model.model.layers.25.self_attn.o_proj",
331
+ "vision_model.model.layers.25.mlp.fc1",
332
+ "vision_model.model.layers.25.mlp.fc2",
333
+ "vision_model.model.layers.26.self_attn.q_proj",
334
+ "vision_model.model.layers.26.self_attn.k_proj",
335
+ "vision_model.model.layers.26.self_attn.v_proj",
336
+ "vision_model.model.layers.26.self_attn.o_proj",
337
+ "vision_model.model.layers.26.mlp.fc1",
338
+ "vision_model.model.layers.26.mlp.fc2",
339
+ "vision_model.model.layers.27.self_attn.q_proj",
340
+ "vision_model.model.layers.27.self_attn.k_proj",
341
+ "vision_model.model.layers.27.self_attn.v_proj",
342
+ "vision_model.model.layers.27.self_attn.o_proj",
343
+ "vision_model.model.layers.27.mlp.fc1",
344
+ "vision_model.model.layers.27.mlp.fc2",
345
+ "vision_model.model.layers.28.self_attn.q_proj",
346
+ "vision_model.model.layers.28.self_attn.k_proj",
347
+ "vision_model.model.layers.28.self_attn.v_proj",
348
+ "vision_model.model.layers.28.self_attn.o_proj",
349
+ "vision_model.model.layers.28.mlp.fc1",
350
+ "vision_model.model.layers.28.mlp.fc2",
351
+ "vision_model.model.layers.29.self_attn.q_proj",
352
+ "vision_model.model.layers.29.self_attn.k_proj",
353
+ "vision_model.model.layers.29.self_attn.v_proj",
354
+ "vision_model.model.layers.29.self_attn.o_proj",
355
+ "vision_model.model.layers.29.mlp.fc1",
356
+ "vision_model.model.layers.29.mlp.fc2",
357
+ "vision_model.model.layers.30.self_attn.q_proj",
358
+ "vision_model.model.layers.30.self_attn.k_proj",
359
+ "vision_model.model.layers.30.self_attn.v_proj",
360
+ "vision_model.model.layers.30.self_attn.o_proj",
361
+ "vision_model.model.layers.30.mlp.fc1",
362
+ "vision_model.model.layers.30.mlp.fc2",
363
+ "vision_model.model.layers.31.self_attn.q_proj",
364
+ "vision_model.model.layers.31.self_attn.k_proj",
365
+ "vision_model.model.layers.31.self_attn.v_proj",
366
+ "vision_model.model.layers.31.self_attn.o_proj",
367
+ "vision_model.model.layers.31.mlp.fc1",
368
+ "vision_model.model.layers.31.mlp.fc2",
369
+ "vision_model.model.layers.32.self_attn.q_proj",
370
+ "vision_model.model.layers.32.self_attn.k_proj",
371
+ "vision_model.model.layers.32.self_attn.v_proj",
372
+ "vision_model.model.layers.32.self_attn.o_proj",
373
+ "vision_model.model.layers.32.mlp.fc1",
374
+ "vision_model.model.layers.32.mlp.fc2",
375
+ "vision_model.model.layers.33.self_attn.q_proj",
376
+ "vision_model.model.layers.33.self_attn.k_proj",
377
+ "vision_model.model.layers.33.self_attn.v_proj",
378
+ "vision_model.model.layers.33.self_attn.o_proj",
379
+ "vision_model.model.layers.33.mlp.fc1",
380
+ "vision_model.model.layers.33.mlp.fc2",
381
+ "vision_model.vision_adapter.mlp.fc1",
382
+ "vision_model.vision_adapter.mlp.fc2",
383
+ "multi_modal_projector.linear_1",
384
+ "language_model.lm_head"
385
+ ],
386
+ "kv_cache_scheme": null,
387
+ "quant_method": "compressed-tensors",
388
+ "quantization_status": "compressed",
389
+ "sparsity_config": {},
390
+ "transform_config": {},
391
+ "version": "0.13.0"
392
+ }
393
+ }
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 200000,
4
+ "cache_implementation": "static",
5
+ "eos_token_id": [
6
+ 200001,
7
+ 200007,
8
+ 200008
9
+ ],
10
+ "pad_token_id": 200018,
11
+ "transformers_version": "4.52.0.dev0"
12
+ }
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95bab8e772e1d5b78846d8e9d3c0b79976b397d33ae84739d6c3ce46d92d0872
3
+ size 4982624384
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d629ab718e2b33a34d3be6f8f7961d0d46a6c420fc7a67380a76e101882e64d
3
+ size 4977864480
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df5ed4df2e3c673416e0f6e15f13136df83965e6422a3c0be4866b05fdbe04b2
3
+ size 4985424632
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
preprocessor_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": null,
3
+ "data_format": "channels_first",
4
+ "default_to_square": true,
5
+ "device": null,
6
+ "do_center_crop": null,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "image_processor_type": "Llama4ImageProcessorFast",
17
+ "image_std": [
18
+ 0.5,
19
+ 0.5,
20
+ 0.5
21
+ ],
22
+ "input_data_format": null,
23
+ "max_patches": 16,
24
+ "processor_class": "Llama4Processor",
25
+ "resample": 2,
26
+ "rescale_factor": 0.00392156862745098,
27
+ "resize_to_max_canvas": false,
28
+ "return_tensors": null,
29
+ "size": {
30
+ "height": 336,
31
+ "width": 336
32
+ }
33
+ }
processor_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "fake_image_token": "<|image|>",
3
+ "image_token": "<|image|>",
4
+ "patch_size": 14,
5
+ "processor_class": "Llama4Processor"
6
+ }
recipe.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_stage:
2
+ default_modifiers:
3
+ GPTQModifier:
4
+ config_groups:
5
+ group_0:
6
+ targets: [Linear]
7
+ weights:
8
+ num_bits: 8
9
+ type: int
10
+ symmetric: true
11
+ group_size: null
12
+ strategy: channel
13
+ block_structure: null
14
+ dynamic: false
15
+ actorder: null
16
+ scale_dtype: null
17
+ zp_dtype: null
18
+ observer: minmax
19
+ observer_kwargs: {}
20
+ input_activations:
21
+ num_bits: 8
22
+ type: int
23
+ symmetric: true
24
+ group_size: null
25
+ strategy: token
26
+ block_structure: null
27
+ dynamic: true
28
+ actorder: null
29
+ scale_dtype: null
30
+ zp_dtype: null
31
+ observer: null
32
+ observer_kwargs: {}
33
+ output_activations: null
34
+ format: null
35
+ targets: [Linear]
36
+ ignore: ['re:.*lm_head', 're:.*multi_modal_projector', 're:.*vision_model']
37
+ block_size: 128
38
+ dampening_frac: 0.03
39
+ actorder: static
40
+ offload_hessians: false
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|begin_of_text|>",
3
+ "eos_token": "<|eot|>",
4
+ "pad_token": "<|finetune_right_pad_id|>"
5
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22e009b4fcb58eddbabf347e71b9881ea1e6eb72d44e5ea9477c7587df68fd8d
3
+ size 27948580
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff