ChiefTheLord commited on
Commit
8129284
·
verified ·
1 Parent(s): 0234bfa

Delete checkpoints

Browse files
checkpoints/checkpoint-1280/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:69e7936faa0f4f96da50c28e7fc64aa98b67f8da2d8c84d20a6c2a1111b17e0f
3
- size 2297612372
 
 
 
 
checkpoints/checkpoint-1280/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:44c88d6887930c13f119d5ac5d9efd93094abb40e4255a84aa20844c912d5e44
3
- size 548599104
 
 
 
 
checkpoints/checkpoint-1280/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9d223714e45bc425e11bbcc5a937a01cf97e4f8bbd782e3737caf11063855d0
3
- size 14180
 
 
 
 
checkpoints/checkpoint-1280/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:29eaea96c12c4154ffed8572a07eb967ef6c8db7290858962558445a58bedf6e
3
- size 1064
 
 
 
 
checkpoints/checkpoint-1280/trainer_state.json DELETED
@@ -1,503 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 6.320987654320987,
5
- "eval_steps": 128,
6
- "global_step": 1280,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.1580246913580247,
13
- "grad_norm": 9.916343688964844,
14
- "learning_rate": 1.4851485148514851e-05,
15
- "loss": 13.4879,
16
- "step": 32
17
- },
18
- {
19
- "epoch": 0.3160493827160494,
20
- "grad_norm": 8.410324096679688,
21
- "learning_rate": 3.06930693069307e-05,
22
- "loss": 11.0681,
23
- "step": 64
24
- },
25
- {
26
- "epoch": 0.4740740740740741,
27
- "grad_norm": 7.2831854820251465,
28
- "learning_rate": 4.653465346534654e-05,
29
- "loss": 10.2179,
30
- "step": 96
31
- },
32
- {
33
- "epoch": 0.6320987654320988,
34
- "grad_norm": 5.446238040924072,
35
- "learning_rate": 6.237623762376238e-05,
36
- "loss": 9.8657,
37
- "step": 128
38
- },
39
- {
40
- "epoch": 0.6320987654320988,
41
- "eval_bleu": 0.009206539746414727,
42
- "eval_cap_loss": 4.180750540658539,
43
- "eval_con_loss": 2.05906052682914,
44
- "eval_loss": 8.298871559255263,
45
- "step": 128
46
- },
47
- {
48
- "epoch": 0.6320987654320988,
49
- "eval_bleu": 0.009206539746414727,
50
- "eval_cap_loss": 4.180750540658539,
51
- "eval_con_loss": 2.05906052682914,
52
- "eval_loss": 8.298871559255263,
53
- "eval_runtime": 160.1737,
54
- "eval_samples_per_second": 5.051,
55
- "eval_steps_per_second": 0.637,
56
- "step": 128
57
- },
58
- {
59
- "epoch": 0.7901234567901234,
60
- "grad_norm": 6.679168224334717,
61
- "learning_rate": 7.821782178217822e-05,
62
- "loss": 9.6441,
63
- "step": 160
64
- },
65
- {
66
- "epoch": 0.9481481481481482,
67
- "grad_norm": 3.790262222290039,
68
- "learning_rate": 9.405940594059406e-05,
69
- "loss": 9.5422,
70
- "step": 192
71
- },
72
- {
73
- "epoch": 1.106172839506173,
74
- "grad_norm": 5.2132487297058105,
75
- "learning_rate": 9.99701414469309e-05,
76
- "loss": 9.2999,
77
- "step": 224
78
- },
79
- {
80
- "epoch": 1.2641975308641975,
81
- "grad_norm": 3.9284615516662598,
82
- "learning_rate": 9.979827188241365e-05,
83
- "loss": 9.2528,
84
- "step": 256
85
- },
86
- {
87
- "epoch": 1.2641975308641975,
88
- "eval_bleu": 0.010232611843559726,
89
- "eval_cap_loss": 3.7518067640416763,
90
- "eval_con_loss": 2.0590475727530086,
91
- "eval_loss": 7.869901939934375,
92
- "step": 256
93
- },
94
- {
95
- "epoch": 1.2641975308641975,
96
- "eval_bleu": 0.010232611843559726,
97
- "eval_cap_loss": 3.7518067640416763,
98
- "eval_con_loss": 2.0590475727530086,
99
- "eval_loss": 7.869901939934375,
100
- "eval_runtime": 163.0281,
101
- "eval_samples_per_second": 4.962,
102
- "eval_steps_per_second": 0.626,
103
- "step": 256
104
- },
105
- {
106
- "epoch": 1.4222222222222223,
107
- "grad_norm": 2.924140691757202,
108
- "learning_rate": 9.947416695486633e-05,
109
- "loss": 9.2148,
110
- "step": 288
111
- },
112
- {
113
- "epoch": 1.5802469135802468,
114
- "grad_norm": 2.8188695907592773,
115
- "learning_rate": 9.899881746636785e-05,
116
- "loss": 9.2119,
117
- "step": 320
118
- },
119
- {
120
- "epoch": 1.7382716049382716,
121
- "grad_norm": 2.9822909832000732,
122
- "learning_rate": 9.837367657983356e-05,
123
- "loss": 9.1222,
124
- "step": 352
125
- },
126
- {
127
- "epoch": 1.8962962962962964,
128
- "grad_norm": 2.3413400650024414,
129
- "learning_rate": 9.760065537663649e-05,
130
- "loss": 9.1112,
131
- "step": 384
132
- },
133
- {
134
- "epoch": 1.8962962962962964,
135
- "eval_bleu": 0.014012859200134394,
136
- "eval_cap_loss": 3.5916168689727783,
137
- "eval_con_loss": 2.0590362034591974,
138
- "eval_loss": 7.709689268878862,
139
- "step": 384
140
- },
141
- {
142
- "epoch": 1.8962962962962964,
143
- "eval_bleu": 0.014012859200134394,
144
- "eval_cap_loss": 3.5916168689727783,
145
- "eval_con_loss": 2.0590362034591974,
146
- "eval_loss": 7.709689268878862,
147
- "eval_runtime": 160.6733,
148
- "eval_samples_per_second": 5.035,
149
- "eval_steps_per_second": 0.635,
150
- "step": 384
151
- },
152
- {
153
- "epoch": 2.054320987654321,
154
- "grad_norm": 3.7009811401367188,
155
- "learning_rate": 9.668211701435327e-05,
156
- "loss": 9.0482,
157
- "step": 416
158
- },
159
- {
160
- "epoch": 2.212345679012346,
161
- "grad_norm": 3.00201153755188,
162
- "learning_rate": 9.562086950249409e-05,
163
- "loss": 9.0231,
164
- "step": 448
165
- },
166
- {
167
- "epoch": 2.3703703703703702,
168
- "grad_norm": 2.188750743865967,
169
- "learning_rate": 9.442015711830245e-05,
170
- "loss": 9.0336,
171
- "step": 480
172
- },
173
- {
174
- "epoch": 2.528395061728395,
175
- "grad_norm": 2.7350165843963623,
176
- "learning_rate": 9.308365048886625e-05,
177
- "loss": 9.0336,
178
- "step": 512
179
- },
180
- {
181
- "epoch": 2.528395061728395,
182
- "eval_bleu": 0.019149744160649594,
183
- "eval_cap_loss": 3.5493307885001686,
184
- "eval_con_loss": 2.059034511154773,
185
- "eval_loss": 7.6673998201594635,
186
- "step": 512
187
- },
188
- {
189
- "epoch": 2.528395061728395,
190
- "eval_bleu": 0.019149744160649594,
191
- "eval_cap_loss": 3.5493307885001686,
192
- "eval_con_loss": 2.059034511154773,
193
- "eval_loss": 7.6673998201594635,
194
- "eval_runtime": 161.0817,
195
- "eval_samples_per_second": 5.022,
196
- "eval_steps_per_second": 0.633,
197
- "step": 512
198
- },
199
- {
200
- "epoch": 2.68641975308642,
201
- "grad_norm": 2.4831721782684326,
202
- "learning_rate": 9.161543536985996e-05,
203
- "loss": 8.9656,
204
- "step": 544
205
- },
206
- {
207
- "epoch": 2.8444444444444446,
208
- "grad_norm": 3.178410768508911,
209
- "learning_rate": 9.00200001552218e-05,
210
- "loss": 9.0066,
211
- "step": 576
212
- },
213
- {
214
- "epoch": 3.0024691358024693,
215
- "grad_norm": 2.4883828163146973,
216
- "learning_rate": 8.83022221559489e-05,
217
- "loss": 8.8965,
218
- "step": 608
219
- },
220
- {
221
- "epoch": 3.1604938271604937,
222
- "grad_norm": 3.184849500656128,
223
- "learning_rate": 8.646735268995731e-05,
224
- "loss": 8.8889,
225
- "step": 640
226
- },
227
- {
228
- "epoch": 3.1604938271604937,
229
- "eval_bleu": 0.016690347492961013,
230
- "eval_cap_loss": 3.445917959306754,
231
- "eval_con_loss": 2.059011185870451,
232
- "eval_loss": 7.563940347409716,
233
- "step": 640
234
- },
235
- {
236
- "epoch": 3.1604938271604937,
237
- "eval_bleu": 0.016690347492961013,
238
- "eval_cap_loss": 3.445917959306754,
239
- "eval_con_loss": 2.059011185870451,
240
- "eval_loss": 7.563940347409716,
241
- "eval_runtime": 160.8496,
242
- "eval_samples_per_second": 5.03,
243
- "eval_steps_per_second": 0.634,
244
- "step": 640
245
- },
246
- {
247
- "epoch": 3.3185185185185184,
248
- "grad_norm": 2.856328248977661,
249
- "learning_rate": 8.452100102858734e-05,
250
- "loss": 8.8877,
251
- "step": 672
252
- },
253
- {
254
- "epoch": 3.476543209876543,
255
- "grad_norm": 3.4148852825164795,
256
- "learning_rate": 8.246911724883068e-05,
257
- "loss": 8.8006,
258
- "step": 704
259
- },
260
- {
261
- "epoch": 3.634567901234568,
262
- "grad_norm": 3.2651753425598145,
263
- "learning_rate": 8.031797404370057e-05,
264
- "loss": 8.8842,
265
- "step": 736
266
- },
267
- {
268
- "epoch": 3.7925925925925927,
269
- "grad_norm": 3.55135440826416,
270
- "learning_rate": 7.807414754635145e-05,
271
- "loss": 8.8534,
272
- "step": 768
273
- },
274
- {
275
- "epoch": 3.7925925925925927,
276
- "eval_bleu": 0.018187566983297536,
277
- "eval_cap_loss": 3.4394626383687936,
278
- "eval_con_loss": 2.0590050898346246,
279
- "eval_loss": 7.557472799338546,
280
- "step": 768
281
- },
282
- {
283
- "epoch": 3.7925925925925927,
284
- "eval_bleu": 0.018187566983297536,
285
- "eval_cap_loss": 3.4394626383687936,
286
- "eval_con_loss": 2.0590050898346246,
287
- "eval_loss": 7.557472799338546,
288
- "eval_runtime": 164.2478,
289
- "eval_samples_per_second": 4.925,
290
- "eval_steps_per_second": 0.621,
291
- "step": 768
292
- },
293
- {
294
- "epoch": 3.950617283950617,
295
- "grad_norm": 2.6633050441741943,
296
- "learning_rate": 7.574449722656991e-05,
297
- "loss": 8.8075,
298
- "step": 800
299
- },
300
- {
301
- "epoch": 4.108641975308642,
302
- "grad_norm": 2.6695327758789062,
303
- "learning_rate": 7.333614492109364e-05,
304
- "loss": 8.7768,
305
- "step": 832
306
- },
307
- {
308
- "epoch": 4.266666666666667,
309
- "grad_norm": 2.1547067165374756,
310
- "learning_rate": 7.08564530618639e-05,
311
- "loss": 8.7896,
312
- "step": 864
313
- },
314
- {
315
- "epoch": 4.424691358024692,
316
- "grad_norm": 2.484839916229248,
317
- "learning_rate": 6.831300216876873e-05,
318
- "loss": 8.7546,
319
- "step": 896
320
- },
321
- {
322
- "epoch": 4.424691358024692,
323
- "eval_bleu": 0.017297916814474356,
324
- "eval_cap_loss": 3.4138665620018456,
325
- "eval_con_loss": 2.058998935362872,
326
- "eval_loss": 7.531864435065026,
327
- "step": 896
328
- },
329
- {
330
- "epoch": 4.424691358024692,
331
- "eval_bleu": 0.017297916814474356,
332
- "eval_cap_loss": 3.4138665620018456,
333
- "eval_con_loss": 2.058998935362872,
334
- "eval_loss": 7.531864435065026,
335
- "eval_runtime": 163.539,
336
- "eval_samples_per_second": 4.947,
337
- "eval_steps_per_second": 0.624,
338
- "step": 896
339
- },
340
- {
341
- "epoch": 4.582716049382716,
342
- "grad_norm": 2.8547232151031494,
343
- "learning_rate": 6.571356767568207e-05,
344
- "loss": 8.7195,
345
- "step": 928
346
- },
347
- {
348
- "epoch": 4.7407407407407405,
349
- "grad_norm": 3.8819704055786133,
350
- "learning_rate": 6.306609616064304e-05,
351
- "loss": 8.7259,
352
- "step": 960
353
- },
354
- {
355
- "epoch": 4.898765432098766,
356
- "grad_norm": 3.2503316402435303,
357
- "learning_rate": 6.037868105284045e-05,
358
- "loss": 8.7204,
359
- "step": 992
360
- },
361
- {
362
- "epoch": 5.05679012345679,
363
- "grad_norm": 2.2758522033691406,
364
- "learning_rate": 5.7659537890667145e-05,
365
- "loss": 8.717,
366
- "step": 1024
367
- },
368
- {
369
- "epoch": 5.05679012345679,
370
- "eval_bleu": 0.0211266020043842,
371
- "eval_cap_loss": 3.340357939402262,
372
- "eval_con_loss": 2.059003666335461,
373
- "eval_loss": 7.45836528376037,
374
- "step": 1024
375
- },
376
- {
377
- "epoch": 5.05679012345679,
378
- "eval_bleu": 0.0211266020043842,
379
- "eval_cap_loss": 3.340357939402262,
380
- "eval_con_loss": 2.059003666335461,
381
- "eval_loss": 7.45836528376037,
382
- "eval_runtime": 164.548,
383
- "eval_samples_per_second": 4.916,
384
- "eval_steps_per_second": 0.62,
385
- "step": 1024
386
- },
387
- {
388
- "epoch": 5.214814814814815,
389
- "grad_norm": 2.7144651412963867,
390
- "learning_rate": 5.491697920648174e-05,
391
- "loss": 8.6562,
392
- "step": 1056
393
- },
394
- {
395
- "epoch": 5.37283950617284,
396
- "grad_norm": 2.6319425106048584,
397
- "learning_rate": 5.2159389114855585e-05,
398
- "loss": 8.667,
399
- "step": 1088
400
- },
401
- {
402
- "epoch": 5.530864197530864,
403
- "grad_norm": 3.410334348678589,
404
- "learning_rate": 4.939519768199012e-05,
405
- "loss": 8.6587,
406
- "step": 1120
407
- },
408
- {
409
- "epoch": 5.688888888888889,
410
- "grad_norm": 4.210638523101807,
411
- "learning_rate": 4.663285515465818e-05,
412
- "loss": 8.6541,
413
- "step": 1152
414
- },
415
- {
416
- "epoch": 5.688888888888889,
417
- "eval_bleu": 0.02044929088069335,
418
- "eval_cap_loss": 3.2661077111375096,
419
- "eval_con_loss": 2.059012539246503,
420
- "eval_loss": 7.384132796642827,
421
- "step": 1152
422
- },
423
- {
424
- "epoch": 5.688888888888889,
425
- "eval_bleu": 0.02044929088069335,
426
- "eval_cap_loss": 3.2661077111375096,
427
- "eval_con_loss": 2.059012539246503,
428
- "eval_loss": 7.384132796642827,
429
- "eval_runtime": 163.0673,
430
- "eval_samples_per_second": 4.961,
431
- "eval_steps_per_second": 0.626,
432
- "step": 1152
433
- },
434
- {
435
- "epoch": 5.846913580246913,
436
- "grad_norm": 4.617128372192383,
437
- "learning_rate": 4.388080612745244e-05,
438
- "loss": 8.6481,
439
- "step": 1184
440
- },
441
- {
442
- "epoch": 6.004938271604939,
443
- "grad_norm": 2.4542462825775146,
444
- "learning_rate": 4.114746372731275e-05,
445
- "loss": 8.6291,
446
- "step": 1216
447
- },
448
- {
449
- "epoch": 6.162962962962963,
450
- "grad_norm": 4.868436813354492,
451
- "learning_rate": 3.844118389425153e-05,
452
- "loss": 8.5827,
453
- "step": 1248
454
- },
455
- {
456
- "epoch": 6.320987654320987,
457
- "grad_norm": 3.225403308868408,
458
- "learning_rate": 3.577023983690177e-05,
459
- "loss": 8.5461,
460
- "step": 1280
461
- },
462
- {
463
- "epoch": 6.320987654320987,
464
- "eval_bleu": 0.023454420732711045,
465
- "eval_cap_loss": 3.2792342223373114,
466
- "eval_con_loss": 2.0589846162235035,
467
- "eval_loss": 7.397203454784319,
468
- "step": 1280
469
- },
470
- {
471
- "epoch": 6.320987654320987,
472
- "eval_bleu": 0.023454420732711045,
473
- "eval_cap_loss": 3.2792342223373114,
474
- "eval_con_loss": 2.0589846162235035,
475
- "eval_loss": 7.397203454784319,
476
- "eval_runtime": 161.0376,
477
- "eval_samples_per_second": 5.024,
478
- "eval_steps_per_second": 0.633,
479
- "step": 1280
480
- }
481
- ],
482
- "logging_steps": 32,
483
- "max_steps": 2020,
484
- "num_input_tokens_seen": 0,
485
- "num_train_epochs": 10,
486
- "save_steps": 128,
487
- "stateful_callbacks": {
488
- "TrainerControl": {
489
- "args": {
490
- "should_epoch_stop": false,
491
- "should_evaluate": false,
492
- "should_log": false,
493
- "should_save": true,
494
- "should_training_stop": false
495
- },
496
- "attributes": {}
497
- }
498
- },
499
- "total_flos": 0.0,
500
- "train_batch_size": 16,
501
- "trial_name": null,
502
- "trial_params": null
503
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoints/checkpoint-1280/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee0fb7ff03fa7d579a0122f63c8133057dbe8dded973c9246203fc477a16730e
3
- size 5112
 
 
 
 
checkpoints/checkpoint-202/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0dffee69756e606b8b3871e102c92751a40f17cbd2e6e307e668c6d6967a32c6
3
- size 5821173932
 
 
 
 
checkpoints/checkpoint-202/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bc77f1f9f675b5a7d79bee225ca0f8410101081232d6ece7d43854ab8b66d84
3
- size 312119520
 
 
 
 
checkpoints/checkpoint-202/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0cff64e8412933e783886fbdffd3f6efbcf0ae4d2d1512c2e684b0f3d664dd3
3
- size 14244
 
 
 
 
checkpoints/checkpoint-202/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:17ee40d21180a1ccc4e69d8fdf2bfff2f3c4b3a31fe3c79203430eab430365bf
3
- size 1064
 
 
 
 
checkpoints/checkpoint-202/trainer_state.json DELETED
@@ -1,132 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.9975308641975309,
5
- "eval_steps": 64,
6
- "global_step": 202,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.1580246913580247,
13
- "grad_norm": 3.374070882797241,
14
- "learning_rate": 2.972743532698138e-05,
15
- "loss": 12.1032,
16
- "step": 32
17
- },
18
- {
19
- "epoch": 0.3160493827160494,
20
- "grad_norm": 3.2159523963928223,
21
- "learning_rate": 2.601262828482597e-05,
22
- "loss": 11.7808,
23
- "step": 64
24
- },
25
- {
26
- "epoch": 0.3160493827160494,
27
- "eval_bleu": 0.0,
28
- "eval_cap_loss": 8.910892418452672,
29
- "eval_con_loss": 1.3794510922408456,
30
- "eval_loss": 10.290343505408377,
31
- "step": 64
32
- },
33
- {
34
- "epoch": 0.3160493827160494,
35
- "eval_bleu": 0.0,
36
- "eval_cap_loss": 8.910892418452672,
37
- "eval_con_loss": 1.3794510922408456,
38
- "eval_loss": 10.290343505408377,
39
- "eval_runtime": 220.4456,
40
- "eval_samples_per_second": 3.67,
41
- "eval_steps_per_second": 0.921,
42
- "step": 64
43
- },
44
- {
45
- "epoch": 0.4740740740740741,
46
- "grad_norm": 2.8607146739959717,
47
- "learning_rate": 1.8986967266497293e-05,
48
- "loss": 11.5303,
49
- "step": 96
50
- },
51
- {
52
- "epoch": 0.6320987654320988,
53
- "grad_norm": 2.915891408920288,
54
- "learning_rate": 1.0762658106621542e-05,
55
- "loss": 11.4537,
56
- "step": 128
57
- },
58
- {
59
- "epoch": 0.6320987654320988,
60
- "eval_bleu": 0.0,
61
- "eval_cap_loss": 8.568840851337452,
62
- "eval_con_loss": 1.379400504046473,
63
- "eval_loss": 9.94824136771592,
64
- "step": 128
65
- },
66
- {
67
- "epoch": 0.6320987654320988,
68
- "eval_bleu": 0.0,
69
- "eval_cap_loss": 8.568840851337452,
70
- "eval_con_loss": 1.379400504046473,
71
- "eval_loss": 9.94824136771592,
72
- "eval_runtime": 220.8837,
73
- "eval_samples_per_second": 3.663,
74
- "eval_steps_per_second": 0.919,
75
- "step": 128
76
- },
77
- {
78
- "epoch": 0.7901234567901234,
79
- "grad_norm": 3.0462822914123535,
80
- "learning_rate": 3.812270111907451e-06,
81
- "loss": 11.2808,
82
- "step": 160
83
- },
84
- {
85
- "epoch": 0.9481481481481482,
86
- "grad_norm": 2.9391884803771973,
87
- "learning_rate": 2.2537891617109508e-07,
88
- "loss": 11.3221,
89
- "step": 192
90
- },
91
- {
92
- "epoch": 0.9481481481481482,
93
- "eval_bleu": 0.0,
94
- "eval_cap_loss": 8.536650624768487,
95
- "eval_con_loss": 1.379456711520115,
96
- "eval_loss": 9.916107299879855,
97
- "step": 192
98
- },
99
- {
100
- "epoch": 0.9481481481481482,
101
- "eval_bleu": 0.0,
102
- "eval_cap_loss": 8.536650624768487,
103
- "eval_con_loss": 1.379456711520115,
104
- "eval_loss": 9.916107299879855,
105
- "eval_runtime": 220.5088,
106
- "eval_samples_per_second": 3.669,
107
- "eval_steps_per_second": 0.921,
108
- "step": 192
109
- }
110
- ],
111
- "logging_steps": 32,
112
- "max_steps": 202,
113
- "num_input_tokens_seen": 0,
114
- "num_train_epochs": 1,
115
- "save_steps": 64,
116
- "stateful_callbacks": {
117
- "TrainerControl": {
118
- "args": {
119
- "should_epoch_stop": false,
120
- "should_evaluate": false,
121
- "should_log": false,
122
- "should_save": true,
123
- "should_training_stop": true
124
- },
125
- "attributes": {}
126
- }
127
- },
128
- "total_flos": 0.0,
129
- "train_batch_size": 16,
130
- "trial_name": null,
131
- "trial_params": null
132
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoints/checkpoint-202/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd2b23c0025977f304dc794f158c7bad81d7c588ae408e484fc9c79f6fec2528
3
- size 5112
 
 
 
 
checkpoints/checkpoint-606-2/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:30b715a98d0790402e95d413d3b3d080f453af0c8f70b74f5171a5e6a5a39f15
3
- size 2297612372
 
 
 
 
checkpoints/checkpoint-606-2/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ebde8deb09d4e8e6e449042dcefc4cc0997334c8ac8fa04c8acdcd6c76c089e
3
- size 178998372
 
 
 
 
checkpoints/checkpoint-606-2/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:51d23a921626f1fecc8b752c0dc40ad68da4137994e71ad7c66137caf507a3e6
3
- size 14180
 
 
 
 
checkpoints/checkpoint-606-2/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e51415869b2f0df9eed69859df5822396e1c56ea421d89bb22b2c580ba0e2803
3
- size 1064
 
 
 
 
checkpoints/checkpoint-606-2/trainer_state.json DELETED
@@ -1,235 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 2.9925925925925925,
5
- "eval_steps": 128,
6
- "global_step": 606,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.1580246913580247,
13
- "grad_norm": 3.291957378387451,
14
- "learning_rate": 4.918032786885246e-06,
15
- "loss": 9.3959,
16
- "step": 32
17
- },
18
- {
19
- "epoch": 0.3160493827160494,
20
- "grad_norm": 4.1592698097229,
21
- "learning_rate": 9.999916929744365e-06,
22
- "loss": 9.151,
23
- "step": 64
24
- },
25
- {
26
- "epoch": 0.4740740740740741,
27
- "grad_norm": 5.414369583129883,
28
- "learning_rate": 9.909808702018315e-06,
29
- "loss": 9.1311,
30
- "step": 96
31
- },
32
- {
33
- "epoch": 0.6320987654320988,
34
- "grad_norm": 4.628862380981445,
35
- "learning_rate": 9.653114094889128e-06,
36
- "loss": 9.1676,
37
- "step": 128
38
- },
39
- {
40
- "epoch": 0.6320987654320988,
41
- "eval_bleu": 0.018568904197283402,
42
- "eval_cap_loss": 3.457257219389373,
43
- "eval_con_loss": 2.059046186652838,
44
- "eval_loss": 8.97356055764591,
45
- "step": 128
46
- },
47
- {
48
- "epoch": 0.6320987654320988,
49
- "eval_bleu": 0.018568904197283402,
50
- "eval_cap_loss": 3.457257219389373,
51
- "eval_con_loss": 2.059046186652838,
52
- "eval_loss": 8.97356055764591,
53
- "eval_runtime": 161.0868,
54
- "eval_samples_per_second": 5.022,
55
- "eval_steps_per_second": 0.633,
56
- "step": 128
57
- },
58
- {
59
- "epoch": 0.7901234567901234,
60
- "grad_norm": 6.027233123779297,
61
- "learning_rate": 9.253761031990218e-06,
62
- "loss": 9.1153,
63
- "step": 160
64
- },
65
- {
66
- "epoch": 0.9481481481481482,
67
- "grad_norm": 6.049582004547119,
68
- "learning_rate": 8.699609944263219e-06,
69
- "loss": 9.1281,
70
- "step": 192
71
- },
72
- {
73
- "epoch": 1.106172839506173,
74
- "grad_norm": 4.76241397857666,
75
- "learning_rate": 8.019933675572389e-06,
76
- "loss": 9.021,
77
- "step": 224
78
- },
79
- {
80
- "epoch": 1.2641975308641975,
81
- "grad_norm": 4.239706516265869,
82
- "learning_rate": 7.2377931669113934e-06,
83
- "loss": 9.0272,
84
- "step": 256
85
- },
86
- {
87
- "epoch": 1.2641975308641975,
88
- "eval_bleu": 0.016148135533212146,
89
- "eval_cap_loss": 3.4138525163426117,
90
- "eval_con_loss": 2.059033068956113,
91
- "eval_loss": 8.886738094629026,
92
- "step": 256
93
- },
94
- {
95
- "epoch": 1.2641975308641975,
96
- "eval_bleu": 0.016148135533212146,
97
- "eval_cap_loss": 3.4138525163426117,
98
- "eval_con_loss": 2.059033068956113,
99
- "eval_loss": 8.886738094629026,
100
- "eval_runtime": 161.0562,
101
- "eval_samples_per_second": 5.023,
102
- "eval_steps_per_second": 0.633,
103
- "step": 256
104
- },
105
- {
106
- "epoch": 1.4222222222222223,
107
- "grad_norm": 5.311102867126465,
108
- "learning_rate": 6.379725899357408e-06,
109
- "loss": 9.0054,
110
- "step": 288
111
- },
112
- {
113
- "epoch": 1.5802469135802468,
114
- "grad_norm": 5.357556343078613,
115
- "learning_rate": 5.474845495876518e-06,
116
- "loss": 9.1177,
117
- "step": 320
118
- },
119
- {
120
- "epoch": 1.7382716049382716,
121
- "grad_norm": 5.401686668395996,
122
- "learning_rate": 4.553853916434448e-06,
123
- "loss": 9.0223,
124
- "step": 352
125
- },
126
- {
127
- "epoch": 1.8962962962962964,
128
- "grad_norm": 6.359274387359619,
129
- "learning_rate": 3.6479997619424605e-06,
130
- "loss": 9.083,
131
- "step": 384
132
- },
133
- {
134
- "epoch": 1.8962962962962964,
135
- "eval_bleu": 0.017365712049326326,
136
- "eval_cap_loss": 3.416674846527623,
137
- "eval_con_loss": 2.0590534537446263,
138
- "eval_loss": 8.892403146799873,
139
- "step": 384
140
- },
141
- {
142
- "epoch": 1.8962962962962964,
143
- "eval_bleu": 0.017365712049326326,
144
- "eval_cap_loss": 3.416674846527623,
145
- "eval_con_loss": 2.0590534537446263,
146
- "eval_loss": 8.892403146799873,
147
- "eval_runtime": 159.3342,
148
- "eval_samples_per_second": 5.077,
149
- "eval_steps_per_second": 0.64,
150
- "step": 384
151
- },
152
- {
153
- "epoch": 2.054320987654321,
154
- "grad_norm": 3.601047992706299,
155
- "learning_rate": 2.7880180310578546e-06,
156
- "loss": 9.1251,
157
- "step": 416
158
- },
159
- {
160
- "epoch": 2.212345679012346,
161
- "grad_norm": 4.511282920837402,
162
- "learning_rate": 2.0030873031501274e-06,
163
- "loss": 9.1476,
164
- "step": 448
165
- },
166
- {
167
- "epoch": 2.3703703703703702,
168
- "grad_norm": 3.5873196125030518,
169
- "learning_rate": 1.3198397294863285e-06,
170
- "loss": 9.2698,
171
- "step": 480
172
- },
173
- {
174
- "epoch": 2.528395061728395,
175
- "grad_norm": 2.907109498977661,
176
- "learning_rate": 7.614574229430432e-07,
177
- "loss": 9.3526,
178
- "step": 512
179
- },
180
- {
181
- "epoch": 2.528395061728395,
182
- "eval_bleu": 0.018224741893114085,
183
- "eval_cap_loss": 3.4440931338889924,
184
- "eval_con_loss": 2.059036243195627,
185
- "eval_loss": 8.947222527335672,
186
- "step": 512
187
- },
188
- {
189
- "epoch": 2.528395061728395,
190
- "eval_bleu": 0.018224741893114085,
191
- "eval_cap_loss": 3.4440931338889924,
192
- "eval_con_loss": 2.059036243195627,
193
- "eval_loss": 8.947222527335672,
194
- "eval_runtime": 159.1467,
195
- "eval_samples_per_second": 5.083,
196
- "eval_steps_per_second": 0.641,
197
- "step": 512
198
- },
199
- {
200
- "epoch": 2.68641975308642,
201
- "grad_norm": 5.598337650299072,
202
- "learning_rate": 3.4688590511087304e-07,
203
- "loss": 9.3263,
204
- "step": 544
205
- },
206
- {
207
- "epoch": 2.8444444444444446,
208
- "grad_norm": 5.659440040588379,
209
- "learning_rate": 9.019129798168658e-08,
210
- "loss": 9.4575,
211
- "step": 576
212
- }
213
- ],
214
- "logging_steps": 32,
215
- "max_steps": 606,
216
- "num_input_tokens_seen": 0,
217
- "num_train_epochs": 3,
218
- "save_steps": 128,
219
- "stateful_callbacks": {
220
- "TrainerControl": {
221
- "args": {
222
- "should_epoch_stop": false,
223
- "should_evaluate": false,
224
- "should_log": false,
225
- "should_save": true,
226
- "should_training_stop": true
227
- },
228
- "attributes": {}
229
- }
230
- },
231
- "total_flos": 0.0,
232
- "train_batch_size": 16,
233
- "trial_name": null,
234
- "trial_params": null
235
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoints/checkpoint-606-2/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c7cc8cc74cd8f844a12e1e98a8984236a6126dfae3836b014d3e373369f69d7
3
- size 5112
 
 
 
 
checkpoints/checkpoint-606/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2dab7fa42fa66d5815df8b7bbb5b720fa77fdbc70f25393dc810583332dfc3da
3
- size 2297612372
 
 
 
 
checkpoints/checkpoint-606/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:95bad9522fdb87255cb62c18caec57e7911e2ff092e39c135fe2882ebcf7bbd6
3
- size 178998372
 
 
 
 
checkpoints/checkpoint-606/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd68f9468bdece9db631b8983960a037ce75a3b363c645663d54244d569fdce3
3
- size 14180
 
 
 
 
checkpoints/checkpoint-606/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa2b7f16f016bea816df5df16d92c79d6f816d8a4d91bb613af70a9f91f3326d
3
- size 1064
 
 
 
 
checkpoints/checkpoint-606/trainer_state.json DELETED
@@ -1,235 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 2.996291718170581,
5
- "eval_steps": 128,
6
- "global_step": 606,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.15822002472187885,
13
- "grad_norm": 11.1969633102417,
14
- "learning_rate": 0.00015737704918032785,
15
- "loss": 14.339,
16
- "step": 32
17
- },
18
- {
19
- "epoch": 0.3164400494437577,
20
- "grad_norm": 12.77004337310791,
21
- "learning_rate": 0.00029997757152782376,
22
- "loss": 10.8537,
23
- "step": 64
24
- },
25
- {
26
- "epoch": 0.4746600741656366,
27
- "grad_norm": 8.878470420837402,
28
- "learning_rate": 0.0002969575009832261,
29
- "loss": 10.1762,
30
- "step": 96
31
- },
32
- {
33
- "epoch": 0.6328800988875154,
34
- "grad_norm": 7.734447479248047,
35
- "learning_rate": 0.00028895126509070673,
36
- "loss": 9.9993,
37
- "step": 128
38
- },
39
- {
40
- "epoch": 0.6328800988875154,
41
- "eval_bleu": 0.009866227706267426,
42
- "eval_cap_loss": 3.8397970962994203,
43
- "eval_con_loss": 1.3794401672673342,
44
- "eval_loss": 9.059034354581033,
45
- "step": 128
46
- },
47
- {
48
- "epoch": 0.6328800988875154,
49
- "eval_bleu": 0.009866227706267426,
50
- "eval_cap_loss": 3.8397970962994203,
51
- "eval_con_loss": 1.3794401672673342,
52
- "eval_loss": 9.059034354581033,
53
- "eval_runtime": 165.4257,
54
- "eval_samples_per_second": 4.89,
55
- "eval_steps_per_second": 1.227,
56
- "step": 128
57
- },
58
- {
59
- "epoch": 0.7911001236093943,
60
- "grad_norm": 5.295111179351807,
61
- "learning_rate": 0.0002766954985019261,
62
- "loss": 9.7893,
63
- "step": 160
64
- },
65
- {
66
- "epoch": 0.9493201483312732,
67
- "grad_norm": 6.028670310974121,
68
- "learning_rate": 0.0002598176493606703,
69
- "loss": 9.7303,
70
- "step": 192
71
- },
72
- {
73
- "epoch": 1.107540173053152,
74
- "grad_norm": 6.170614719390869,
75
- "learning_rate": 0.00023921376409217457,
76
- "loss": 9.3777,
77
- "step": 224
78
- },
79
- {
80
- "epoch": 1.2657601977750308,
81
- "grad_norm": 8.26496410369873,
82
- "learning_rate": 0.00021558291813029553,
83
- "loss": 9.2487,
84
- "step": 256
85
- },
86
- {
87
- "epoch": 1.2657601977750308,
88
- "eval_bleu": 0.012158325589574045,
89
- "eval_cap_loss": 3.663852495512939,
90
- "eval_con_loss": 1.3794238802247447,
91
- "eval_loss": 8.70712886068034,
92
- "step": 256
93
- },
94
- {
95
- "epoch": 1.2657601977750308,
96
- "eval_bleu": 0.012158325589574045,
97
- "eval_cap_loss": 3.663852495512939,
98
- "eval_con_loss": 1.3794238802247447,
99
- "eval_loss": 8.70712886068034,
100
- "eval_runtime": 165.1008,
101
- "eval_samples_per_second": 4.9,
102
- "eval_steps_per_second": 1.23,
103
- "step": 256
104
- },
105
- {
106
- "epoch": 1.4239802224969098,
107
- "grad_norm": 5.113515377044678,
108
- "learning_rate": 0.00018972688957066537,
109
- "loss": 9.2305,
110
- "step": 288
111
- },
112
- {
113
- "epoch": 1.5822002472187886,
114
- "grad_norm": 4.397431373596191,
115
- "learning_rate": 0.00016252295540000706,
116
- "loss": 9.2278,
117
- "step": 320
118
- },
119
- {
120
- "epoch": 1.7404202719406676,
121
- "grad_norm": 4.647693157196045,
122
- "learning_rate": 0.00013489412610081624,
123
- "loss": 9.0332,
124
- "step": 352
125
- },
126
- {
127
- "epoch": 1.8986402966625464,
128
- "grad_norm": 6.768193244934082,
129
- "learning_rate": 0.00010777782855047967,
130
- "loss": 9.0303,
131
- "step": 384
132
- },
133
- {
134
- "epoch": 1.8986402966625464,
135
- "eval_bleu": 0.015940976494587023,
136
- "eval_cap_loss": 3.5265488871212662,
137
- "eval_con_loss": 1.3794590686929638,
138
- "eval_loss": 8.432556859378156,
139
- "step": 384
140
- },
141
- {
142
- "epoch": 1.8986402966625464,
143
- "eval_bleu": 0.015940976494587023,
144
- "eval_cap_loss": 3.5265488871212662,
145
- "eval_con_loss": 1.3794590686929638,
146
- "eval_loss": 8.432556859378156,
147
- "eval_runtime": 164.9408,
148
- "eval_samples_per_second": 4.905,
149
- "eval_steps_per_second": 1.231,
150
- "step": 384
151
- },
152
- {
153
- "epoch": 2.056860321384425,
154
- "grad_norm": 3.732090473175049,
155
- "learning_rate": 8.286620499265821e-05,
156
- "loss": 8.9538,
157
- "step": 416
158
- },
159
- {
160
- "epoch": 2.215080346106304,
161
- "grad_norm": 4.6480584144592285,
162
- "learning_rate": 5.9401989732828384e-05,
163
- "loss": 8.8722,
164
- "step": 448
165
- },
166
- {
167
- "epoch": 2.373300370828183,
168
- "grad_norm": 5.697710037231445,
169
- "learning_rate": 3.901170167210344e-05,
170
- "loss": 8.8853,
171
- "step": 480
172
- },
173
- {
174
- "epoch": 2.5315203955500616,
175
- "grad_norm": 3.8718154430389404,
176
- "learning_rate": 2.238716904029349e-05,
177
- "loss": 8.8722,
178
- "step": 512
179
- },
180
- {
181
- "epoch": 2.5315203955500616,
182
- "eval_bleu": 0.022739422488822453,
183
- "eval_cap_loss": 3.4747910476083237,
184
- "eval_con_loss": 1.37942426369108,
185
- "eval_loss": 8.329006359494965,
186
- "step": 512
187
- },
188
- {
189
- "epoch": 2.5315203955500616,
190
- "eval_bleu": 0.022739422488822453,
191
- "eval_cap_loss": 3.4747910476083237,
192
- "eval_con_loss": 1.37942426369108,
193
- "eval_loss": 8.329006359494965,
194
- "eval_runtime": 163.9894,
195
- "eval_samples_per_second": 4.933,
196
- "eval_steps_per_second": 1.238,
197
- "step": 512
198
- },
199
- {
200
- "epoch": 2.689740420271941,
201
- "grad_norm": 4.082241535186768,
202
- "learning_rate": 1.0092450616157804e-05,
203
- "loss": 8.7704,
204
- "step": 544
205
- },
206
- {
207
- "epoch": 2.8479604449938196,
208
- "grad_norm": 3.609088897705078,
209
- "learning_rate": 2.544697607077684e-06,
210
- "loss": 8.8435,
211
- "step": 576
212
- }
213
- ],
214
- "logging_steps": 32,
215
- "max_steps": 606,
216
- "num_input_tokens_seen": 0,
217
- "num_train_epochs": 3,
218
- "save_steps": 128,
219
- "stateful_callbacks": {
220
- "TrainerControl": {
221
- "args": {
222
- "should_epoch_stop": false,
223
- "should_evaluate": false,
224
- "should_log": false,
225
- "should_save": true,
226
- "should_training_stop": true
227
- },
228
- "attributes": {}
229
- }
230
- },
231
- "total_flos": 0.0,
232
- "train_batch_size": 8,
233
- "trial_name": null,
234
- "trial_params": null
235
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoints/checkpoint-606/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c438a6e15f14bb991774d44e2f18eed0b82a51ff2a702113d87a3659795d03d1
3
- size 5112