WinstonDeng commited on
Commit
6578499
·
verified ·
1 Parent(s): dc3047b

step-3.7-flash bf16 model

Browse files
chat_template.jinja ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% macro render_message_content(message) %}{% if message.content is none %}{{- '' }}{% elif message.content is string %}{{- message.content }}{% elif message.content is mapping %}{{- message.content['value'] if 'value' in message.content else message.content['text'] }}{% elif message.content is iterable %}{% set ns = namespace(needs_text_separator=false) %}{% for item in message.content %}{% if item.type == 'text' %}{% if ns.needs_text_separator %}{{- ' ' }}{% endif %}{{- item['value'] if 'value' in item else item['text'] }}{% set ns.needs_text_separator = true %}{% elif item.type == 'image' %}<im_patch>{% set ns.needs_text_separator = false %}{% endif %}{% endfor %}{% endif %}{% endmacro %}
2
+ {{bos_token}}{%- if tools %}
3
+ {{- '<|im_start|>system\n' }}
4
+ {%- if reasoning_effort is defined %}
5
+ {{- "Reasoning: " + reasoning_effort + '\n\n' }}
6
+ {%- endif %}
7
+ {%- if messages[0].role == 'system' %}
8
+ {{- render_message_content(messages[0]) + '\n\n' }}
9
+ {%- endif %}
10
+ {{- "# Tools\n\nYou have access to the following functions in JSONSchema format:\n\n<tools>" }}
11
+ {%- for tool in tools %}
12
+ {{- "\n" }}
13
+ {{- tool | tojson(ensure_ascii=False) }}
14
+ {%- endfor %}
15
+ {{- "\n</tools>\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...>\n...\n</function> block must be nested within <tool_call>\n...\n</tool_call> XML tags\n- Required parameters MUST be specified\n</IMPORTANT><|im_end|>\n" }}
16
+ {%- else %}
17
+ {%- if messages[0].role == 'system' %}
18
+ {{- '<|im_start|>system\n' }}
19
+ {%- if reasoning_effort is defined %}
20
+ {{- "Reasoning: " + reasoning_effort + '\n\n' }}
21
+ {%- endif %}
22
+ {{- render_message_content(messages[0]) + '<|im_end|>\n' }}
23
+ {%- elif reasoning_effort is defined %}
24
+ {{- '<|im_start|>system\n' + "Reasoning: " + reasoning_effort + '\n\n' + '<|im_end|>\n' }}
25
+ {%- endif %}
26
+ {%- endif %}
27
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
28
+ {%- for message in messages[::-1] %}
29
+ {%- set index = (messages|length - 1) - loop.index0 %}
30
+ {%- if ns.multi_step_tool and message.role == "user" and render_message_content(message) is string and not(render_message_content(message).startswith('<tool_response>') and render_message_content(message).endswith('</tool_response>')) %}
31
+ {%- set ns.multi_step_tool = false %}
32
+ {%- set ns.last_query_index = index %}
33
+ {%- endif %}
34
+ {%- endfor %}
35
+ {%- for message in messages %}
36
+ {%- set content = render_message_content(message) %}
37
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
38
+ {%- set role_name = 'observation' if (message.role == "system" and not loop.first and message.name == 'observation') else message.role %}
39
+ {{- '<|im_start|>' + role_name + '\n' + content + '<|im_end|>' + '\n' }}
40
+ {%- elif message.role == "assistant" %}
41
+ {%- if message.reasoning_content is string %}
42
+ {%- set reasoning_content = message.reasoning_content %}
43
+ {%- else %}
44
+ {%- if '</think>' in content %}
45
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
46
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
47
+ {%- else %}
48
+ {%- set reasoning_content = '' %}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {%- if loop.index0 > ns.last_query_index %}
52
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n' + content }}
53
+ {%- else %}
54
+ {{- '<|im_start|>' + message.role + '\n' + content }}
55
+ {%- endif %}
56
+ {%- if message.tool_calls %}
57
+ {%- for tool_call in message.tool_calls %}
58
+ {%- if tool_call.function is defined %}
59
+ {%- set tool_call = tool_call.function %}
60
+ {%- endif %}
61
+ {{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
62
+ {%- if tool_call.arguments is defined %}
63
+ {%- set arguments = tool_call.arguments | fromjson if tool_call.arguments is string else tool_call.arguments %}
64
+ {%- for args_name, args_value in arguments|items %}
65
+ {{- '<parameter=' + args_name + '>\n' }}
66
+ {%- set args_value = args_value | tojson(ensure_ascii=False) | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
67
+ {{- args_value }}
68
+ {{- '\n</parameter>\n' }}
69
+ {%- endfor %}
70
+ {%- endif %}
71
+ {{- '</function>\n</tool_call>' }}
72
+ {%- endfor %}
73
+ {%- endif %}
74
+ {{- '<|im_end|>\n' }}
75
+ {%- elif message.role == "tool" %}
76
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
77
+ {{- '<|im_start|>tool_response\n' }}
78
+ {%- endif %}
79
+ {{- '<tool_response>' }}
80
+ {{- content }}
81
+ {{- '</tool_response>' }}
82
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
83
+ {{- '<|im_end|>\n' }}
84
+ {%- endif %}
85
+ {%- endif %}
86
+ {%- endfor %}
87
+ {%- if add_generation_prompt %}
88
+ {{- '<|im_start|>assistant\n<think>\n' }}
89
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MMGPTStepRoboticsForCausalLM"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_step_robotics.StepRoboticsConfig"
7
+ },
8
+ "model_type": "step3p5v",
9
+ "im_end_token": "<im_end>",
10
+ "im_patch_token": "<im_patch>",
11
+ "im_start_token": "<im_start>",
12
+ "image_token_len": 169,
13
+ "patch_token_len": 81,
14
+ "image_token_id": 128001,
15
+ "understand_projector_stride": 2,
16
+ "use_im_start_end": "true",
17
+ "vision_select_layer": -1,
18
+ "projector_bias": false,
19
+ "vision_config": {
20
+ "model_type": "perception_encoder",
21
+ "image_size": 728,
22
+ "patch_size": 14,
23
+ "width": 1536,
24
+ "layers": 47,
25
+ "heads": 16,
26
+ "pool_type": "none",
27
+ "output_dim": null,
28
+ "use_cls_token": false,
29
+ "ls_init_value": 0.1,
30
+ "use_ln_post": false,
31
+ "hidden_act": "quick_gelu"
32
+ },
33
+ "text_config": {
34
+ "architectures": [
35
+ "Step3p5ForCausalLM"
36
+ ],
37
+ "rope_scaling": {
38
+ "rope_type": "llama3",
39
+ "factor": 2.0,
40
+ "original_max_position_embeddings": 131072,
41
+ "low_freq_factor": 1.0,
42
+ "high_freq_factor": 32.0
43
+ },
44
+ "yarn_only_types": [
45
+ "full_attention"
46
+ ],
47
+ "model_type": "step3p5",
48
+ "hidden_size": 4096,
49
+ "intermediate_size": 11264,
50
+ "num_hidden_layers": 45,
51
+ "max_seq_len": 262144,
52
+ "max_position_embeddings": 262144,
53
+ "vocab_size": 128896,
54
+ "torch_dtype": "bfloat16",
55
+ "use_qk_norm": false,
56
+ "moe_layers_enum": "3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44",
57
+ "use_mfa": false,
58
+ "num_attention_heads": 64,
59
+ "num_attention_groups": 8,
60
+ "head_dim": 128,
61
+ "use_moe": true,
62
+ "moe_num_experts": 288,
63
+ "moe_top_k": 8,
64
+ "moe_intermediate_size": 1280,
65
+ "share_expert_dim": 1280,
66
+ "moe_layer_offset": 0,
67
+ "moe_every_n_layer": 1,
68
+ "norm_expert_weight": true,
69
+ "moe_router_activation": "sigmoid",
70
+ "moe_router_scaling_factor": 3.0,
71
+ "att_impl_type": "GQA",
72
+ "num_nextn_predict_layers": 3,
73
+ "rope_theta": [
74
+ 5000000.0,
75
+ 10000.0,
76
+ 10000.0,
77
+ 10000.0,
78
+ 5000000.0,
79
+ 10000.0,
80
+ 10000.0,
81
+ 10000.0,
82
+ 5000000.0,
83
+ 10000.0,
84
+ 10000.0,
85
+ 10000.0,
86
+ 5000000.0,
87
+ 10000.0,
88
+ 10000.0,
89
+ 10000.0,
90
+ 5000000.0,
91
+ 10000.0,
92
+ 10000.0,
93
+ 10000.0,
94
+ 5000000.0,
95
+ 10000.0,
96
+ 10000.0,
97
+ 10000.0,
98
+ 5000000.0,
99
+ 10000.0,
100
+ 10000.0,
101
+ 10000.0,
102
+ 5000000.0,
103
+ 10000.0,
104
+ 10000.0,
105
+ 10000.0,
106
+ 5000000.0,
107
+ 10000.0,
108
+ 10000.0,
109
+ 10000.0,
110
+ 5000000.0,
111
+ 10000.0,
112
+ 10000.0,
113
+ 10000.0,
114
+ 5000000.0,
115
+ 10000.0,
116
+ 10000.0,
117
+ 10000.0,
118
+ 5000000.0,
119
+ 10000.0,
120
+ 10000.0,
121
+ 10000.0
122
+ ],
123
+ "use_head_wise_attn_gate": true,
124
+ "sliding_window": 512,
125
+ "use_moe_router_bias": true,
126
+ "need_fp32_gate": true,
127
+ "sink": false,
128
+ "layer_types": [
129
+ "full_attention",
130
+ "sliding_attention",
131
+ "sliding_attention",
132
+ "sliding_attention",
133
+ "full_attention",
134
+ "sliding_attention",
135
+ "sliding_attention",
136
+ "sliding_attention",
137
+ "full_attention",
138
+ "sliding_attention",
139
+ "sliding_attention",
140
+ "sliding_attention",
141
+ "full_attention",
142
+ "sliding_attention",
143
+ "sliding_attention",
144
+ "sliding_attention",
145
+ "full_attention",
146
+ "sliding_attention",
147
+ "sliding_attention",
148
+ "sliding_attention",
149
+ "full_attention",
150
+ "sliding_attention",
151
+ "sliding_attention",
152
+ "sliding_attention",
153
+ "full_attention",
154
+ "sliding_attention",
155
+ "sliding_attention",
156
+ "sliding_attention",
157
+ "full_attention",
158
+ "sliding_attention",
159
+ "sliding_attention",
160
+ "sliding_attention",
161
+ "full_attention",
162
+ "sliding_attention",
163
+ "sliding_attention",
164
+ "sliding_attention",
165
+ "full_attention",
166
+ "sliding_attention",
167
+ "sliding_attention",
168
+ "sliding_attention",
169
+ "full_attention",
170
+ "sliding_attention",
171
+ "sliding_attention",
172
+ "sliding_attention",
173
+ "full_attention",
174
+ "sliding_attention",
175
+ "sliding_attention",
176
+ "sliding_attention"
177
+ ],
178
+ "use_rope_layers": [],
179
+ "partial_rotary_factors": [
180
+ 0.5,
181
+ 1.0,
182
+ 1.0,
183
+ 1.0,
184
+ 0.5,
185
+ 1.0,
186
+ 1.0,
187
+ 1.0,
188
+ 0.5,
189
+ 1.0,
190
+ 1.0,
191
+ 1.0,
192
+ 0.5,
193
+ 1.0,
194
+ 1.0,
195
+ 1.0,
196
+ 0.5,
197
+ 1.0,
198
+ 1.0,
199
+ 1.0,
200
+ 0.5,
201
+ 1.0,
202
+ 1.0,
203
+ 1.0,
204
+ 0.5,
205
+ 1.0,
206
+ 1.0,
207
+ 1.0,
208
+ 0.5,
209
+ 1.0,
210
+ 1.0,
211
+ 1.0,
212
+ 0.5,
213
+ 1.0,
214
+ 1.0,
215
+ 1.0,
216
+ 0.5,
217
+ 1.0,
218
+ 1.0,
219
+ 1.0,
220
+ 0.5,
221
+ 1.0,
222
+ 1.0,
223
+ 1.0,
224
+ 0.5,
225
+ 1.0,
226
+ 1.0,
227
+ 1.0
228
+ ],
229
+ "eos_token_id": [
230
+ 1,
231
+ 2,
232
+ 128007
233
+ ],
234
+ "bos_token_id": 0,
235
+ "attention_other_setting": {
236
+ "attention_type": "sliding_attention",
237
+ "num_attention_heads": 96,
238
+ "num_attention_groups": 8,
239
+ "head_dim": 128,
240
+ "true_head_dim": 128
241
+ },
242
+ "swiglu_limits": [
243
+ 0.0,
244
+ 0.0,
245
+ 0.0,
246
+ 0.0,
247
+ 0.0,
248
+ 0.0,
249
+ 0.0,
250
+ 0.0,
251
+ 0.0,
252
+ 0.0,
253
+ 0.0,
254
+ 0.0,
255
+ 0.0,
256
+ 0.0,
257
+ 0.0,
258
+ 0.0,
259
+ 0.0,
260
+ 0.0,
261
+ 0.0,
262
+ 0.0,
263
+ 0.0,
264
+ 0.0,
265
+ 0.0,
266
+ 0.0,
267
+ 0.0,
268
+ 0.0,
269
+ 0.0,
270
+ 0.0,
271
+ 0.0,
272
+ 0.0,
273
+ 0.0,
274
+ 0.0,
275
+ 0.0,
276
+ 0.0,
277
+ 0.0,
278
+ 0.0,
279
+ 0.0,
280
+ 0.0,
281
+ 0.0,
282
+ 0.0,
283
+ 0.0,
284
+ 0.0,
285
+ 0.0,
286
+ 7,
287
+ 7,
288
+ 0.0,
289
+ 0.0,
290
+ 0.0
291
+ ],
292
+ "swiglu_limits_shared": [
293
+ 0.0,
294
+ 0.0,
295
+ 0.0,
296
+ 0.0,
297
+ 0.0,
298
+ 0.0,
299
+ 0.0,
300
+ 0.0,
301
+ 0.0,
302
+ 0.0,
303
+ 0.0,
304
+ 0.0,
305
+ 0.0,
306
+ 0.0,
307
+ 0.0,
308
+ 0.0,
309
+ 0.0,
310
+ 0.0,
311
+ 0.0,
312
+ 0.0,
313
+ 0.0,
314
+ 0.0,
315
+ 0.0,
316
+ 0.0,
317
+ 0.0,
318
+ 0.0,
319
+ 0.0,
320
+ 0.0,
321
+ 0.0,
322
+ 0.0,
323
+ 0.0,
324
+ 0.0,
325
+ 0.0,
326
+ 0.0,
327
+ 0.0,
328
+ 0.0,
329
+ 0.0,
330
+ 0.0,
331
+ 0.0,
332
+ 0.0,
333
+ 0.0,
334
+ 0.0,
335
+ 0.0,
336
+ 16,
337
+ 16,
338
+ 0.0,
339
+ 0.0,
340
+ 0.0
341
+ ]
342
+ }
343
+ }
model-00001.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a2d47133d0ffa22f50a24ad4974c559c1b31f26f5baca24fc4f4dfe198b46c6
3
+ size 924094096
model-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67c13067deed696b62763643b7d531fd2cfde4c6e81cfcaba5460551e510d0af
3
+ size 9808156008
model-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f3567584681f4d2792e4d949c9440198f792a5afd93220d3770b509728b6ef1
3
+ size 18557475928
model-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d035fb813758ed63f1d537bbf41f6cbb2c5c8eb05f187de18a448c7766a64960
3
+ size 18624846944
model-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9a2c0daa3a49fc88e53e0b6419f2e4db7e412f40760488d49ca0f834fe83725
3
+ size 18557475928
model-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fee76c5fb28547ad0d4094a0bae7755a292dd439cc23b054210a24c965b093f
3
+ size 18624846976
model-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccad5d228ec280d95419fbbcf2590f2cdfc4c932a7249a7669dc7f509dc7fe66
3
+ size 18557475968
model-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d537acabde8deace533c23df8e43268f1423b41e7b6e27c79232955283f4e44
3
+ size 18624846976
model-00009.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48be665fd9bce6e2fdac06d03a1a9916794fce4231b03009e6a4cfca1055a2c9
3
+ size 18557475968
model-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd61c7f6d62725005a07fe778dc572b9642972054424b2a12d1494e7ca241d91
3
+ size 18624846976
model-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51c5fe0dce035dd7fc01333fe3ba0fff46e65412ad7a71c09fa8e2992b8d26a7
3
+ size 18557475968
model-00012.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f3e890ede3949af958a72da0beb99db6834853ee22978eb7782a600d013abac
3
+ size 18624846976
model-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98802ed9091498df2ef7a73b2697f5ac275a64892d984b9045a0a99f7b459c78
3
+ size 18557475968
model-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:459e5814b710f888b6763385fb179d52f746f59e702dd165f0c5d5cc73417b03
3
+ size 18624846976
model-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13a51f345afa384b930387d40ac79ed6614f02129d61a9714e213f726970f47c
3
+ size 18557475968
model-00016.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3475a9dcaff31af71b6183371f8e355bdedea5f4dbb1ade6e84dcfe28ddc9517
3
+ size 18624846976
model-00017.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92917af53ef59cd99d43d49de2ffcbec3d21db7ebc59107a66aa2438da2eca14
3
+ size 18557475968
model-00018.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aba73fb3d39556bba83fe864f7a7b60e8b2085204b074101500531e69525ee4f
3
+ size 18624846976
model-00019.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:617c98c96871403936caa0dcea602e7650cb947493555c142dc80e6c991adad8
3
+ size 18557475968
model-00020.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ccea8f04adaeeb446b8def20c6042c96f6da4eb68da6bf2a76bacf65350e4e9
3
+ size 18624846976
model-00021.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af8c9ca65f1830163f6d5741569b4dd4c62468a1c21556e7b760e303bc3b7818
3
+ size 18557475968
model-00022.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cc5137141b5e2522fd3e69a4c828a0dbb602569ab8a0afcce5151b06800339f
3
+ size 18624846976
model-00023.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05c2c2a08df421f617794e137429246a6ea60dd908fc691263242a12325dae7f
3
+ size 9245052456
model-00024.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7688adfc7748c12fdc8504187c57fe6ec6005798a02defc0d3372f921b1400a1
3
+ size 6968188464
model-vit-00001.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22aa3f3679feffb57c2fb0bc885db0f5613db3536efef5d4b0984e8d769f6017
3
+ size 1613990904
model-vit-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f63ca4700a4184459d3ddb3a86c54a62914d359cedfddcfc14739ae782be082
3
+ size 2348122376
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin▁of▁sentence|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|im_end|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|end▁of▁sentence|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff