rdz-falcon commited on
Commit
e0099a4
·
verified ·
1 Parent(s): d86ccf5

★ Best checkpoint | epoch=1 | loss=4.4424

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-best/tokenizer.json filter=lfs diff=lfs merge=lfs -text
checkpoint-best/added_tokens.json ADDED
@@ -0,0 +1,471 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<M0>": 262147,
3
+ "<M100>": 262247,
4
+ "<M101>": 262248,
5
+ "<M102>": 262249,
6
+ "<M103>": 262250,
7
+ "<M104>": 262251,
8
+ "<M105>": 262252,
9
+ "<M106>": 262253,
10
+ "<M107>": 262254,
11
+ "<M108>": 262255,
12
+ "<M109>": 262256,
13
+ "<M10>": 262157,
14
+ "<M110>": 262257,
15
+ "<M111>": 262258,
16
+ "<M112>": 262259,
17
+ "<M113>": 262260,
18
+ "<M114>": 262261,
19
+ "<M115>": 262262,
20
+ "<M116>": 262263,
21
+ "<M117>": 262264,
22
+ "<M118>": 262265,
23
+ "<M119>": 262266,
24
+ "<M11>": 262158,
25
+ "<M120>": 262267,
26
+ "<M121>": 262268,
27
+ "<M122>": 262269,
28
+ "<M123>": 262270,
29
+ "<M124>": 262271,
30
+ "<M125>": 262272,
31
+ "<M126>": 262273,
32
+ "<M127>": 262274,
33
+ "<M128>": 262275,
34
+ "<M129>": 262276,
35
+ "<M12>": 262159,
36
+ "<M130>": 262277,
37
+ "<M131>": 262278,
38
+ "<M132>": 262279,
39
+ "<M133>": 262280,
40
+ "<M134>": 262281,
41
+ "<M135>": 262282,
42
+ "<M136>": 262283,
43
+ "<M137>": 262284,
44
+ "<M138>": 262285,
45
+ "<M139>": 262286,
46
+ "<M13>": 262160,
47
+ "<M140>": 262287,
48
+ "<M141>": 262288,
49
+ "<M142>": 262289,
50
+ "<M143>": 262290,
51
+ "<M144>": 262291,
52
+ "<M145>": 262292,
53
+ "<M146>": 262293,
54
+ "<M147>": 262294,
55
+ "<M148>": 262295,
56
+ "<M149>": 262296,
57
+ "<M14>": 262161,
58
+ "<M150>": 262297,
59
+ "<M151>": 262298,
60
+ "<M152>": 262299,
61
+ "<M153>": 262300,
62
+ "<M154>": 262301,
63
+ "<M155>": 262302,
64
+ "<M156>": 262303,
65
+ "<M157>": 262304,
66
+ "<M158>": 262305,
67
+ "<M159>": 262306,
68
+ "<M15>": 262162,
69
+ "<M160>": 262307,
70
+ "<M161>": 262308,
71
+ "<M162>": 262309,
72
+ "<M163>": 262310,
73
+ "<M164>": 262311,
74
+ "<M165>": 262312,
75
+ "<M166>": 262313,
76
+ "<M167>": 262314,
77
+ "<M168>": 262315,
78
+ "<M169>": 262316,
79
+ "<M16>": 262163,
80
+ "<M170>": 262317,
81
+ "<M171>": 262318,
82
+ "<M172>": 262319,
83
+ "<M173>": 262320,
84
+ "<M174>": 262321,
85
+ "<M175>": 262322,
86
+ "<M176>": 262323,
87
+ "<M177>": 262324,
88
+ "<M178>": 262325,
89
+ "<M179>": 262326,
90
+ "<M17>": 262164,
91
+ "<M180>": 262327,
92
+ "<M181>": 262328,
93
+ "<M182>": 262329,
94
+ "<M183>": 262330,
95
+ "<M184>": 262331,
96
+ "<M185>": 262332,
97
+ "<M186>": 262333,
98
+ "<M187>": 262334,
99
+ "<M188>": 262335,
100
+ "<M18>": 262165,
101
+ "<M190>": 262336,
102
+ "<M191>": 262337,
103
+ "<M192>": 262338,
104
+ "<M194>": 262339,
105
+ "<M196>": 262340,
106
+ "<M197>": 262341,
107
+ "<M198>": 262342,
108
+ "<M199>": 262343,
109
+ "<M19>": 262166,
110
+ "<M1>": 262148,
111
+ "<M200>": 262344,
112
+ "<M205>": 262345,
113
+ "<M206>": 262346,
114
+ "<M207>": 262347,
115
+ "<M208>": 262348,
116
+ "<M209>": 262349,
117
+ "<M20>": 262167,
118
+ "<M210>": 262350,
119
+ "<M211>": 262351,
120
+ "<M212>": 262352,
121
+ "<M213>": 262353,
122
+ "<M214>": 262354,
123
+ "<M215>": 262355,
124
+ "<M216>": 262356,
125
+ "<M217>": 262357,
126
+ "<M218>": 262358,
127
+ "<M219>": 262359,
128
+ "<M21>": 262168,
129
+ "<M220>": 262360,
130
+ "<M221>": 262361,
131
+ "<M222>": 262362,
132
+ "<M223>": 262363,
133
+ "<M224>": 262364,
134
+ "<M225>": 262365,
135
+ "<M226>": 262366,
136
+ "<M227>": 262367,
137
+ "<M228>": 262368,
138
+ "<M229>": 262369,
139
+ "<M22>": 262169,
140
+ "<M230>": 262370,
141
+ "<M231>": 262371,
142
+ "<M232>": 262372,
143
+ "<M233>": 262373,
144
+ "<M234>": 262374,
145
+ "<M235>": 262375,
146
+ "<M236>": 262376,
147
+ "<M238>": 262377,
148
+ "<M239>": 262378,
149
+ "<M23>": 262170,
150
+ "<M240>": 262379,
151
+ "<M241>": 262380,
152
+ "<M242>": 262381,
153
+ "<M243>": 262382,
154
+ "<M244>": 262383,
155
+ "<M245>": 262384,
156
+ "<M246>": 262385,
157
+ "<M247>": 262386,
158
+ "<M248>": 262387,
159
+ "<M24>": 262171,
160
+ "<M250>": 262388,
161
+ "<M252>": 262389,
162
+ "<M254>": 262390,
163
+ "<M256>": 262391,
164
+ "<M257>": 262392,
165
+ "<M258>": 262393,
166
+ "<M259>": 262394,
167
+ "<M25>": 262172,
168
+ "<M260>": 262395,
169
+ "<M261>": 262396,
170
+ "<M262>": 262397,
171
+ "<M263>": 262398,
172
+ "<M264>": 262399,
173
+ "<M265>": 262400,
174
+ "<M266>": 262401,
175
+ "<M267>": 262402,
176
+ "<M268>": 262403,
177
+ "<M269>": 262404,
178
+ "<M26>": 262173,
179
+ "<M270>": 262405,
180
+ "<M271>": 262406,
181
+ "<M272>": 262407,
182
+ "<M273>": 262408,
183
+ "<M274>": 262409,
184
+ "<M275>": 262410,
185
+ "<M276>": 262411,
186
+ "<M277>": 262412,
187
+ "<M278>": 262413,
188
+ "<M27>": 262174,
189
+ "<M280>": 262414,
190
+ "<M282>": 262415,
191
+ "<M283>": 262416,
192
+ "<M284>": 262417,
193
+ "<M285>": 262418,
194
+ "<M286>": 262419,
195
+ "<M287>": 262420,
196
+ "<M289>": 262421,
197
+ "<M28>": 262175,
198
+ "<M290>": 262422,
199
+ "<M291>": 262423,
200
+ "<M292>": 262424,
201
+ "<M293>": 262425,
202
+ "<M295>": 262426,
203
+ "<M296>": 262427,
204
+ "<M297>": 262428,
205
+ "<M298>": 262429,
206
+ "<M29>": 262176,
207
+ "<M2>": 262149,
208
+ "<M300>": 262430,
209
+ "<M303>": 262431,
210
+ "<M304>": 262432,
211
+ "<M306>": 262433,
212
+ "<M307>": 262434,
213
+ "<M308>": 262435,
214
+ "<M309>": 262436,
215
+ "<M30>": 262177,
216
+ "<M310>": 262437,
217
+ "<M311>": 262438,
218
+ "<M312>": 262439,
219
+ "<M313>": 262440,
220
+ "<M315>": 262441,
221
+ "<M316>": 262442,
222
+ "<M317>": 262443,
223
+ "<M318>": 262444,
224
+ "<M319>": 262445,
225
+ "<M31>": 262178,
226
+ "<M320>": 262446,
227
+ "<M321>": 262447,
228
+ "<M322>": 262448,
229
+ "<M323>": 262449,
230
+ "<M324>": 262450,
231
+ "<M325>": 262451,
232
+ "<M326>": 262452,
233
+ "<M327>": 262453,
234
+ "<M328>": 262454,
235
+ "<M329>": 262455,
236
+ "<M32>": 262179,
237
+ "<M330>": 262456,
238
+ "<M332>": 262457,
239
+ "<M333>": 262458,
240
+ "<M334>": 262459,
241
+ "<M335>": 262460,
242
+ "<M336>": 262461,
243
+ "<M337>": 262462,
244
+ "<M338>": 262463,
245
+ "<M33>": 262180,
246
+ "<M342>": 262464,
247
+ "<M343>": 262465,
248
+ "<M345>": 262466,
249
+ "<M346>": 262467,
250
+ "<M347>": 262468,
251
+ "<M348>": 262469,
252
+ "<M349>": 262470,
253
+ "<M34>": 262181,
254
+ "<M350>": 262471,
255
+ "<M351>": 262472,
256
+ "<M352>": 262473,
257
+ "<M353>": 262474,
258
+ "<M354>": 262475,
259
+ "<M355>": 262476,
260
+ "<M356>": 262477,
261
+ "<M357>": 262478,
262
+ "<M359>": 262479,
263
+ "<M35>": 262182,
264
+ "<M360>": 262480,
265
+ "<M362>": 262481,
266
+ "<M363>": 262482,
267
+ "<M364>": 262483,
268
+ "<M365>": 262484,
269
+ "<M366>": 262485,
270
+ "<M367>": 262486,
271
+ "<M368>": 262487,
272
+ "<M369>": 262488,
273
+ "<M36>": 262183,
274
+ "<M370>": 262489,
275
+ "<M371>": 262490,
276
+ "<M372>": 262491,
277
+ "<M373>": 262492,
278
+ "<M374>": 262493,
279
+ "<M375>": 262494,
280
+ "<M376>": 262495,
281
+ "<M378>": 262496,
282
+ "<M379>": 262497,
283
+ "<M37>": 262184,
284
+ "<M380>": 262498,
285
+ "<M381>": 262499,
286
+ "<M382>": 262500,
287
+ "<M383>": 262501,
288
+ "<M384>": 262502,
289
+ "<M385>": 262503,
290
+ "<M386>": 262504,
291
+ "<M387>": 262505,
292
+ "<M388>": 262506,
293
+ "<M389>": 262507,
294
+ "<M38>": 262185,
295
+ "<M390>": 262508,
296
+ "<M391>": 262509,
297
+ "<M392>": 262510,
298
+ "<M393>": 262511,
299
+ "<M395>": 262512,
300
+ "<M396>": 262513,
301
+ "<M398>": 262514,
302
+ "<M39>": 262186,
303
+ "<M3>": 262150,
304
+ "<M402>": 262515,
305
+ "<M403>": 262516,
306
+ "<M404>": 262517,
307
+ "<M405>": 262518,
308
+ "<M406>": 262519,
309
+ "<M408>": 262520,
310
+ "<M409>": 262521,
311
+ "<M40>": 262187,
312
+ "<M410>": 262522,
313
+ "<M411>": 262523,
314
+ "<M412>": 262524,
315
+ "<M413>": 262525,
316
+ "<M414>": 262526,
317
+ "<M416>": 262527,
318
+ "<M418>": 262528,
319
+ "<M419>": 262529,
320
+ "<M41>": 262188,
321
+ "<M421>": 262530,
322
+ "<M422>": 262531,
323
+ "<M423>": 262532,
324
+ "<M424>": 262533,
325
+ "<M426>": 262534,
326
+ "<M427>": 262535,
327
+ "<M428>": 262536,
328
+ "<M429>": 262537,
329
+ "<M42>": 262189,
330
+ "<M430>": 262538,
331
+ "<M431>": 262539,
332
+ "<M432>": 262540,
333
+ "<M433>": 262541,
334
+ "<M434>": 262542,
335
+ "<M435>": 262543,
336
+ "<M436>": 262544,
337
+ "<M437>": 262545,
338
+ "<M438>": 262546,
339
+ "<M439>": 262547,
340
+ "<M43>": 262190,
341
+ "<M440>": 262548,
342
+ "<M441>": 262549,
343
+ "<M442>": 262550,
344
+ "<M443>": 262551,
345
+ "<M444>": 262552,
346
+ "<M445>": 262553,
347
+ "<M446>": 262554,
348
+ "<M447>": 262555,
349
+ "<M448>": 262556,
350
+ "<M449>": 262557,
351
+ "<M44>": 262191,
352
+ "<M450>": 262558,
353
+ "<M451>": 262559,
354
+ "<M452>": 262560,
355
+ "<M454>": 262561,
356
+ "<M455>": 262562,
357
+ "<M456>": 262563,
358
+ "<M457>": 262564,
359
+ "<M458>": 262565,
360
+ "<M459>": 262566,
361
+ "<M45>": 262192,
362
+ "<M460>": 262567,
363
+ "<M461>": 262568,
364
+ "<M462>": 262569,
365
+ "<M463>": 262570,
366
+ "<M464>": 262571,
367
+ "<M465>": 262572,
368
+ "<M466>": 262573,
369
+ "<M467>": 262574,
370
+ "<M468>": 262575,
371
+ "<M469>": 262576,
372
+ "<M46>": 262193,
373
+ "<M470>": 262577,
374
+ "<M471>": 262578,
375
+ "<M472>": 262579,
376
+ "<M473>": 262580,
377
+ "<M474>": 262581,
378
+ "<M475>": 262582,
379
+ "<M477>": 262583,
380
+ "<M479>": 262584,
381
+ "<M47>": 262194,
382
+ "<M481>": 262585,
383
+ "<M483>": 262586,
384
+ "<M484>": 262587,
385
+ "<M485>": 262588,
386
+ "<M487>": 262589,
387
+ "<M489>": 262590,
388
+ "<M48>": 262195,
389
+ "<M490>": 262591,
390
+ "<M491>": 262592,
391
+ "<M492>": 262593,
392
+ "<M493>": 262594,
393
+ "<M494>": 262595,
394
+ "<M495>": 262596,
395
+ "<M496>": 262597,
396
+ "<M497>": 262598,
397
+ "<M498>": 262599,
398
+ "<M499>": 262600,
399
+ "<M49>": 262196,
400
+ "<M4>": 262151,
401
+ "<M500>": 262601,
402
+ "<M501>": 262602,
403
+ "<M502>": 262603,
404
+ "<M503>": 262604,
405
+ "<M504>": 262605,
406
+ "<M505>": 262606,
407
+ "<M506>": 262607,
408
+ "<M507>": 262608,
409
+ "<M508>": 262609,
410
+ "<M509>": 262610,
411
+ "<M50>": 262197,
412
+ "<M510>": 262611,
413
+ "<M511>": 262612,
414
+ "<M51>": 262198,
415
+ "<M52>": 262199,
416
+ "<M53>": 262200,
417
+ "<M54>": 262201,
418
+ "<M55>": 262202,
419
+ "<M56>": 262203,
420
+ "<M57>": 262204,
421
+ "<M58>": 262205,
422
+ "<M59>": 262206,
423
+ "<M5>": 262152,
424
+ "<M60>": 262207,
425
+ "<M61>": 262208,
426
+ "<M62>": 262209,
427
+ "<M63>": 262210,
428
+ "<M64>": 262211,
429
+ "<M65>": 262212,
430
+ "<M66>": 262213,
431
+ "<M67>": 262214,
432
+ "<M68>": 262215,
433
+ "<M69>": 262216,
434
+ "<M6>": 262153,
435
+ "<M70>": 262217,
436
+ "<M71>": 262218,
437
+ "<M72>": 262219,
438
+ "<M73>": 262220,
439
+ "<M74>": 262221,
440
+ "<M75>": 262222,
441
+ "<M76>": 262223,
442
+ "<M77>": 262224,
443
+ "<M78>": 262225,
444
+ "<M79>": 262226,
445
+ "<M7>": 262154,
446
+ "<M80>": 262227,
447
+ "<M81>": 262228,
448
+ "<M82>": 262229,
449
+ "<M83>": 262230,
450
+ "<M84>": 262231,
451
+ "<M85>": 262232,
452
+ "<M86>": 262233,
453
+ "<M87>": 262234,
454
+ "<M88>": 262235,
455
+ "<M89>": 262236,
456
+ "<M8>": 262155,
457
+ "<M90>": 262237,
458
+ "<M91>": 262238,
459
+ "<M92>": 262239,
460
+ "<M93>": 262240,
461
+ "<M94>": 262241,
462
+ "<M95>": 262242,
463
+ "<M96>": 262243,
464
+ "<M97>": 262244,
465
+ "<M98>": 262245,
466
+ "<M99>": 262246,
467
+ "<M9>": 262156,
468
+ "<M_END>": 262146,
469
+ "<M_START>": 262145,
470
+ "<image_soft_token>": 262144
471
+ }
checkpoint-best/chat_template.jinja ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{ bos_token }}
2
+ {%- if messages[0]['role'] == 'system' -%}
3
+ {%- if messages[0]['content'] is string -%}
4
+ {%- set first_user_prefix = messages[0]['content'] + '
5
+
6
+ ' -%}
7
+ {%- else -%}
8
+ {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
9
+
10
+ ' -%}
11
+ {%- endif -%}
12
+ {%- set loop_messages = messages[1:] -%}
13
+ {%- else -%}
14
+ {%- set first_user_prefix = "" -%}
15
+ {%- set loop_messages = messages -%}
16
+ {%- endif -%}
17
+ {%- for message in loop_messages -%}
18
+ {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
19
+ {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
20
+ {%- endif -%}
21
+ {%- if (message['role'] == 'assistant') -%}
22
+ {%- set role = "model" -%}
23
+ {%- else -%}
24
+ {%- set role = message['role'] -%}
25
+ {%- endif -%}
26
+ {{ '<start_of_turn>' + role + '
27
+ ' + (first_user_prefix if loop.first else "") }}
28
+ {%- if message['content'] is string -%}
29
+ {{ message['content'] | trim }}
30
+ {%- elif message['content'] is iterable -%}
31
+ {%- for item in message['content'] -%}
32
+ {%- if item['type'] == 'image' -%}
33
+ {{ '<start_of_image>' }}
34
+ {%- elif item['type'] == 'text' -%}
35
+ {{ item['text'] | trim }}
36
+ {%- endif -%}
37
+ {%- endfor -%}
38
+ {%- else -%}
39
+ {{ raise_exception("Invalid content type") }}
40
+ {%- endif -%}
41
+ {{ '<end_of_turn>
42
+ ' }}
43
+ {%- endfor -%}
44
+ {%- if add_generation_prompt -%}
45
+ {{'<start_of_turn>model
46
+ '}}
47
+ {%- endif -%}
checkpoint-best/config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_sliding_window_pattern": 6,
3
+ "architectures": [
4
+ "Gemma3ForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "attn_logit_softcapping": null,
9
+ "bos_token_id": 2,
10
+ "dtype": "float32",
11
+ "eos_token_id": 1,
12
+ "final_logit_softcapping": null,
13
+ "head_dim": 256,
14
+ "hidden_activation": "gelu_pytorch_tanh",
15
+ "hidden_size": 640,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 2048,
18
+ "layer_types": [
19
+ "sliding_attention",
20
+ "sliding_attention",
21
+ "sliding_attention",
22
+ "sliding_attention",
23
+ "sliding_attention",
24
+ "full_attention",
25
+ "sliding_attention",
26
+ "sliding_attention",
27
+ "sliding_attention",
28
+ "sliding_attention",
29
+ "sliding_attention",
30
+ "full_attention",
31
+ "sliding_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "sliding_attention",
35
+ "sliding_attention",
36
+ "full_attention"
37
+ ],
38
+ "max_position_embeddings": 32768,
39
+ "model_type": "gemma3_text",
40
+ "num_attention_heads": 4,
41
+ "num_hidden_layers": 18,
42
+ "num_key_value_heads": 1,
43
+ "pad_token_id": 0,
44
+ "query_pre_attn_scalar": 256,
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_local_base_freq": 10000.0,
47
+ "rope_scaling": null,
48
+ "rope_theta": 1000000.0,
49
+ "sliding_window": 512,
50
+ "transformers_version": "4.57.6",
51
+ "use_bidirectional_attention": false,
52
+ "use_cache": true,
53
+ "vocab_size": 262613
54
+ }
checkpoint-best/generation_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cache_implementation": "hybrid",
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 1,
6
+ 106
7
+ ],
8
+ "top_k": 64,
9
+ "top_p": 0.95,
10
+ "transformers_version": "4.57.6"
11
+ }
checkpoint-best/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7546287b518c6589b330634007b238a5f4532f8adc69915faaf08ebe0364e2a0
3
+ size 1073619904
checkpoint-best/special_tokens_map.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "boi_token": "<start_of_image>",
3
+ "bos_token": {
4
+ "content": "<bos>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ "eoi_token": "<end_of_image>",
11
+ "eos_token": {
12
+ "content": "<eos>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ },
18
+ "image_token": "<image_soft_token>",
19
+ "pad_token": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "unk_token": {
27
+ "content": "<unk>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
checkpoint-best/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4508188ef586fd9b7974e820d55442a0c83a8a1842a23bdfd33407d276be9176
3
+ size 33470204
checkpoint-best/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
3
+ size 4689074
checkpoint-best/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff