shorecode commited on
Commit
4744ff8
·
verified ·
1 Parent(s): 891dc8a

Upload folder using huggingface_hub

Browse files
checkpoint-latest/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca78270c1eb66602a5b2542766d42a3fc8e325de3b96fce543a19e8e8bef7ca7
3
  size 62293080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03190bb45309bbfedccad7959d6dd1878e0a3a2c49476630690e4b0b715ba571
3
  size 62293080
checkpoint-latest/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8e440d79ef49bcf712a1d5de898513d16e87c6d8ce5161401d087749466aa41
3
  size 124642443
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08db5c721a857b9e376429cfbe60e90ff49062398fa0ce0ed683edf407f96d3c
3
  size 124642443
checkpoint-latest/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5484747db8664edead11a8224a290c2af74f43eba04cf19c64277c5316deb36e
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b182b3dedf0ef16e42c7c650dc1dabae1c16baddbc2124f22d1cc0600b2e9222
3
  size 14645
checkpoint-latest/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47578247ee117cf4bc0a0c2af4d329dc3c61e90f1c21bdab88f7e138719d3e25
3
+ size 1383
checkpoint-latest/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1017972ceaff444cd6ce76f4cbc6776ac15954ae5dab86e347a964413d5e9fd6
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6a5728e17488e0af5b9594b7a742e2aa414b404c43f45b9b8c470ef12744e2b
3
  size 1465
checkpoint-latest/trainer_state.json CHANGED
@@ -2,516 +2,84 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.2317437972901637,
6
  "eval_steps": 500,
7
- "global_step": 7000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0.021995424951610065,
14
- "grad_norm": 0.7161325216293335,
15
  "learning_rate": 0.00029781805384480024,
16
- "loss": 3.7283,
17
  "step": 125
18
  },
19
  {
20
  "epoch": 0.04399084990322013,
21
- "grad_norm": 0.7104535102844238,
22
  "learning_rate": 0.00029561851134963927,
23
- "loss": 3.6892,
24
  "step": 250
25
  },
26
  {
27
  "epoch": 0.06598627485483019,
28
- "grad_norm": 0.6470844149589539,
29
  "learning_rate": 0.00029341896885447824,
30
- "loss": 3.6564,
31
  "step": 375
32
  },
33
  {
34
  "epoch": 0.08798169980644026,
35
- "grad_norm": 0.8748793601989746,
36
  "learning_rate": 0.0002912194263593172,
37
- "loss": 3.6363,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 0.08798169980644026,
42
- "eval_loss": 3.00447678565979,
43
- "eval_runtime": 85.5252,
44
- "eval_samples_per_second": 115.171,
45
- "eval_steps_per_second": 4.431,
46
  "step": 500
47
  },
48
  {
49
  "epoch": 0.10997712475805033,
50
- "grad_norm": 0.794264554977417,
51
  "learning_rate": 0.00028901988386415624,
52
- "loss": 3.5804,
53
  "step": 625
54
  },
55
  {
56
  "epoch": 0.13197254970966038,
57
- "grad_norm": 0.7026092410087585,
58
  "learning_rate": 0.0002868203413689952,
59
- "loss": 3.5513,
60
  "step": 750
61
  },
62
  {
63
  "epoch": 0.15396797466127046,
64
- "grad_norm": 0.7747848629951477,
65
  "learning_rate": 0.00028462079887383424,
66
- "loss": 3.5586,
67
  "step": 875
68
  },
69
  {
70
  "epoch": 0.17596339961288052,
71
- "grad_norm": 0.8074367642402649,
72
  "learning_rate": 0.0002824212563786732,
73
- "loss": 3.5231,
74
  "step": 1000
75
  },
76
  {
77
  "epoch": 0.17596339961288052,
78
- "eval_loss": 2.9320695400238037,
79
- "eval_runtime": 85.7152,
80
- "eval_samples_per_second": 114.915,
81
- "eval_steps_per_second": 4.422,
82
  "step": 1000
83
- },
84
- {
85
- "epoch": 0.1979588245644906,
86
- "grad_norm": 0.8066490292549133,
87
- "learning_rate": 0.0002802217138835122,
88
- "loss": 3.5005,
89
- "step": 1125
90
- },
91
- {
92
- "epoch": 0.21995424951610065,
93
- "grad_norm": 0.7767718434333801,
94
- "learning_rate": 0.0002780221713883512,
95
- "loss": 3.4749,
96
- "step": 1250
97
- },
98
- {
99
- "epoch": 0.2419496744677107,
100
- "grad_norm": 0.7831096053123474,
101
- "learning_rate": 0.0002758226288931902,
102
- "loss": 3.4609,
103
- "step": 1375
104
- },
105
- {
106
- "epoch": 0.26394509941932076,
107
- "grad_norm": 0.9593456387519836,
108
- "learning_rate": 0.0002736230863980292,
109
- "loss": 3.4383,
110
- "step": 1500
111
- },
112
- {
113
- "epoch": 0.26394509941932076,
114
- "eval_loss": 2.885742664337158,
115
- "eval_runtime": 91.283,
116
- "eval_samples_per_second": 107.906,
117
- "eval_steps_per_second": 4.152,
118
- "step": 1500
119
- },
120
- {
121
- "epoch": 0.28594052437093087,
122
- "grad_norm": 0.7641668915748596,
123
- "learning_rate": 0.0002714235439028682,
124
- "loss": 3.4404,
125
- "step": 1625
126
- },
127
- {
128
- "epoch": 0.3079359493225409,
129
- "grad_norm": 0.71437668800354,
130
- "learning_rate": 0.00026922400140770716,
131
- "loss": 3.4147,
132
- "step": 1750
133
- },
134
- {
135
- "epoch": 0.329931374274151,
136
- "grad_norm": 0.7094811201095581,
137
- "learning_rate": 0.0002670244589125462,
138
- "loss": 3.4125,
139
- "step": 1875
140
- },
141
- {
142
- "epoch": 0.35192679922576103,
143
- "grad_norm": 0.722710907459259,
144
- "learning_rate": 0.00026482491641738516,
145
- "loss": 3.3959,
146
- "step": 2000
147
- },
148
- {
149
- "epoch": 0.35192679922576103,
150
- "eval_loss": 2.8463189601898193,
151
- "eval_runtime": 91.3088,
152
- "eval_samples_per_second": 107.876,
153
- "eval_steps_per_second": 4.151,
154
- "step": 2000
155
- },
156
- {
157
- "epoch": 0.3739222241773711,
158
- "grad_norm": 0.6693059802055359,
159
- "learning_rate": 0.00026262537392222413,
160
- "loss": 3.3749,
161
- "step": 2125
162
- },
163
- {
164
- "epoch": 0.3959176491289812,
165
- "grad_norm": 0.8750757575035095,
166
- "learning_rate": 0.00026042583142706316,
167
- "loss": 3.3698,
168
- "step": 2250
169
- },
170
- {
171
- "epoch": 0.41791307408059125,
172
- "grad_norm": 0.8450427651405334,
173
- "learning_rate": 0.00025822628893190213,
174
- "loss": 3.3556,
175
- "step": 2375
176
- },
177
- {
178
- "epoch": 0.4399084990322013,
179
- "grad_norm": 0.660953938961029,
180
- "learning_rate": 0.00025602674643674116,
181
- "loss": 3.3503,
182
- "step": 2500
183
- },
184
- {
185
- "epoch": 0.4399084990322013,
186
- "eval_loss": 2.8230228424072266,
187
- "eval_runtime": 91.2675,
188
- "eval_samples_per_second": 107.924,
189
- "eval_steps_per_second": 4.153,
190
- "step": 2500
191
- },
192
- {
193
- "epoch": 0.46190392398381136,
194
- "grad_norm": 0.7277446389198303,
195
- "learning_rate": 0.00025382720394158013,
196
- "loss": 3.358,
197
- "step": 2625
198
- },
199
- {
200
- "epoch": 0.4838993489354214,
201
- "grad_norm": 0.7908076047897339,
202
- "learning_rate": 0.0002516276614464191,
203
- "loss": 3.3433,
204
- "step": 2750
205
- },
206
- {
207
- "epoch": 0.5058947738870315,
208
- "grad_norm": 0.847932755947113,
209
- "learning_rate": 0.00024942811895125813,
210
- "loss": 3.3363,
211
- "step": 2875
212
- },
213
- {
214
- "epoch": 0.5278901988386415,
215
- "grad_norm": 0.7486276626586914,
216
- "learning_rate": 0.0002472285764560971,
217
- "loss": 3.3197,
218
- "step": 3000
219
- },
220
- {
221
- "epoch": 0.5278901988386415,
222
- "eval_loss": 2.7938477993011475,
223
- "eval_runtime": 91.2355,
224
- "eval_samples_per_second": 107.962,
225
- "eval_steps_per_second": 4.154,
226
- "step": 3000
227
- },
228
- {
229
- "epoch": 0.5498856237902516,
230
- "grad_norm": 0.7471196055412292,
231
- "learning_rate": 0.0002450290339609361,
232
- "loss": 3.3313,
233
- "step": 3125
234
- },
235
- {
236
- "epoch": 0.5718810487418617,
237
- "grad_norm": 0.7734588384628296,
238
- "learning_rate": 0.00024282949146577507,
239
- "loss": 3.3004,
240
- "step": 3250
241
- },
242
- {
243
- "epoch": 0.5938764736934717,
244
- "grad_norm": 0.7516855597496033,
245
- "learning_rate": 0.0002406299489706141,
246
- "loss": 3.304,
247
- "step": 3375
248
- },
249
- {
250
- "epoch": 0.6158718986450818,
251
- "grad_norm": 0.7859501242637634,
252
- "learning_rate": 0.0002384304064754531,
253
- "loss": 3.2958,
254
- "step": 3500
255
- },
256
- {
257
- "epoch": 0.6158718986450818,
258
- "eval_loss": 2.7763619422912598,
259
- "eval_runtime": 91.3446,
260
- "eval_samples_per_second": 107.833,
261
- "eval_steps_per_second": 4.149,
262
- "step": 3500
263
- },
264
- {
265
- "epoch": 0.6378673235966918,
266
- "grad_norm": 0.754370927810669,
267
- "learning_rate": 0.00023623086398029207,
268
- "loss": 3.2671,
269
- "step": 3625
270
- },
271
- {
272
- "epoch": 0.659862748548302,
273
- "grad_norm": 0.8835521936416626,
274
- "learning_rate": 0.00023403132148513107,
275
- "loss": 3.284,
276
- "step": 3750
277
- },
278
- {
279
- "epoch": 0.6818581734999121,
280
- "grad_norm": 0.7852393984794617,
281
- "learning_rate": 0.00023183177898997007,
282
- "loss": 3.2879,
283
- "step": 3875
284
- },
285
- {
286
- "epoch": 0.7038535984515221,
287
- "grad_norm": 0.6869296431541443,
288
- "learning_rate": 0.00022963223649480905,
289
- "loss": 3.2702,
290
- "step": 4000
291
- },
292
- {
293
- "epoch": 0.7038535984515221,
294
- "eval_loss": 2.7806477546691895,
295
- "eval_runtime": 91.4522,
296
- "eval_samples_per_second": 107.707,
297
- "eval_steps_per_second": 4.144,
298
- "step": 4000
299
- },
300
- {
301
- "epoch": 0.7258490234031322,
302
- "grad_norm": 1.0874834060668945,
303
- "learning_rate": 0.00022743269399964805,
304
- "loss": 3.254,
305
- "step": 4125
306
- },
307
- {
308
- "epoch": 0.7478444483547422,
309
- "grad_norm": 0.6965116262435913,
310
- "learning_rate": 0.00022523315150448702,
311
- "loss": 3.2525,
312
- "step": 4250
313
- },
314
- {
315
- "epoch": 0.7698398733063523,
316
- "grad_norm": 0.6829759478569031,
317
- "learning_rate": 0.00022303360900932605,
318
- "loss": 3.2502,
319
- "step": 4375
320
- },
321
- {
322
- "epoch": 0.7918352982579624,
323
- "grad_norm": 0.7037951946258545,
324
- "learning_rate": 0.00022083406651416505,
325
- "loss": 3.2419,
326
- "step": 4500
327
- },
328
- {
329
- "epoch": 0.7918352982579624,
330
- "eval_loss": 2.7353272438049316,
331
- "eval_runtime": 90.7352,
332
- "eval_samples_per_second": 108.558,
333
- "eval_steps_per_second": 4.177,
334
- "step": 4500
335
- },
336
- {
337
- "epoch": 0.8138307232095724,
338
- "grad_norm": 0.9032262563705444,
339
- "learning_rate": 0.00021863452401900402,
340
- "loss": 3.2426,
341
- "step": 4625
342
- },
343
- {
344
- "epoch": 0.8358261481611825,
345
- "grad_norm": 0.6463780403137207,
346
- "learning_rate": 0.00021643498152384302,
347
- "loss": 3.2408,
348
- "step": 4750
349
- },
350
- {
351
- "epoch": 0.8578215731127925,
352
- "grad_norm": 0.8066023588180542,
353
- "learning_rate": 0.00021423543902868202,
354
- "loss": 3.2178,
355
- "step": 4875
356
- },
357
- {
358
- "epoch": 0.8798169980644026,
359
- "grad_norm": 0.7497230172157288,
360
- "learning_rate": 0.000212035896533521,
361
- "loss": 3.2268,
362
- "step": 5000
363
- },
364
- {
365
- "epoch": 0.8798169980644026,
366
- "eval_loss": 2.721640110015869,
367
- "eval_runtime": 90.4581,
368
- "eval_samples_per_second": 108.89,
369
- "eval_steps_per_second": 4.19,
370
- "step": 5000
371
- },
372
- {
373
- "epoch": 0.9018124230160127,
374
- "grad_norm": 0.6645826101303101,
375
- "learning_rate": 0.00020983635403836,
376
- "loss": 3.1946,
377
- "step": 5125
378
- },
379
- {
380
- "epoch": 0.9238078479676227,
381
- "grad_norm": 0.7062675952911377,
382
- "learning_rate": 0.00020763681154319902,
383
- "loss": 3.2228,
384
- "step": 5250
385
- },
386
- {
387
- "epoch": 0.9458032729192328,
388
- "grad_norm": 0.7374680638313293,
389
- "learning_rate": 0.000205437269048038,
390
- "loss": 3.1905,
391
- "step": 5375
392
- },
393
- {
394
- "epoch": 0.9677986978708428,
395
- "grad_norm": 0.853108286857605,
396
- "learning_rate": 0.000203237726552877,
397
- "loss": 3.2011,
398
- "step": 5500
399
- },
400
- {
401
- "epoch": 0.9677986978708428,
402
- "eval_loss": 2.714653491973877,
403
- "eval_runtime": 90.854,
404
- "eval_samples_per_second": 108.416,
405
- "eval_steps_per_second": 4.172,
406
- "step": 5500
407
- },
408
- {
409
- "epoch": 0.9897941228224529,
410
- "grad_norm": 0.6738927364349365,
411
- "learning_rate": 0.000201038184057716,
412
- "loss": 3.1888,
413
- "step": 5625
414
- },
415
- {
416
- "epoch": 1.011789547774063,
417
- "grad_norm": 0.7420991659164429,
418
- "learning_rate": 0.00019883864156255496,
419
- "loss": 3.1835,
420
- "step": 5750
421
- },
422
- {
423
- "epoch": 1.0337849727256732,
424
- "grad_norm": 0.8556201457977295,
425
- "learning_rate": 0.00019663909906739396,
426
- "loss": 3.1885,
427
- "step": 5875
428
- },
429
- {
430
- "epoch": 1.055780397677283,
431
- "grad_norm": 0.7332561016082764,
432
- "learning_rate": 0.00019443955657223294,
433
- "loss": 3.1691,
434
- "step": 6000
435
- },
436
- {
437
- "epoch": 1.055780397677283,
438
- "eval_loss": 2.692098617553711,
439
- "eval_runtime": 90.3724,
440
- "eval_samples_per_second": 108.993,
441
- "eval_steps_per_second": 4.194,
442
- "step": 6000
443
- },
444
- {
445
- "epoch": 1.0777758226288932,
446
- "grad_norm": 0.8108923435211182,
447
- "learning_rate": 0.00019224001407707196,
448
- "loss": 3.1627,
449
- "step": 6125
450
- },
451
- {
452
- "epoch": 1.0997712475805033,
453
- "grad_norm": 0.8860889077186584,
454
- "learning_rate": 0.00019004047158191096,
455
- "loss": 3.1703,
456
- "step": 6250
457
- },
458
- {
459
- "epoch": 1.1217666725321134,
460
- "grad_norm": 0.6584394574165344,
461
- "learning_rate": 0.00018784092908674993,
462
- "loss": 3.1704,
463
- "step": 6375
464
- },
465
- {
466
- "epoch": 1.1437620974837235,
467
- "grad_norm": 0.7717780470848083,
468
- "learning_rate": 0.00018564138659158893,
469
- "loss": 3.1712,
470
- "step": 6500
471
- },
472
- {
473
- "epoch": 1.1437620974837235,
474
- "eval_loss": 2.6807186603546143,
475
- "eval_runtime": 90.3407,
476
- "eval_samples_per_second": 109.032,
477
- "eval_steps_per_second": 4.195,
478
- "step": 6500
479
- },
480
- {
481
- "epoch": 1.1657575224353334,
482
- "grad_norm": 0.666558563709259,
483
- "learning_rate": 0.00018344184409642793,
484
- "loss": 3.1564,
485
- "step": 6625
486
- },
487
- {
488
- "epoch": 1.1877529473869435,
489
- "grad_norm": 0.8080345392227173,
490
- "learning_rate": 0.0001812423016012669,
491
- "loss": 3.1725,
492
- "step": 6750
493
- },
494
- {
495
- "epoch": 1.2097483723385536,
496
- "grad_norm": 0.7759010791778564,
497
- "learning_rate": 0.0001790427591061059,
498
- "loss": 3.1469,
499
- "step": 6875
500
- },
501
- {
502
- "epoch": 1.2317437972901637,
503
- "grad_norm": 0.7258739471435547,
504
- "learning_rate": 0.00017684321661094488,
505
- "loss": 3.147,
506
- "step": 7000
507
- },
508
- {
509
- "epoch": 1.2317437972901637,
510
- "eval_loss": 2.674105405807495,
511
- "eval_runtime": 90.5221,
512
- "eval_samples_per_second": 108.813,
513
- "eval_steps_per_second": 4.187,
514
- "step": 7000
515
  }
516
  ],
517
  "logging_steps": 125,
@@ -531,7 +99,7 @@
531
  "attributes": {}
532
  }
533
  },
534
- "total_flos": 4106809598017536.0,
535
  "train_batch_size": 26,
536
  "trial_name": null,
537
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.17596339961288052,
6
  "eval_steps": 500,
7
+ "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0.021995424951610065,
14
+ "grad_norm": 0.7317402958869934,
15
  "learning_rate": 0.00029781805384480024,
16
+ "loss": 3.1594,
17
  "step": 125
18
  },
19
  {
20
  "epoch": 0.04399084990322013,
21
+ "grad_norm": 0.7342692613601685,
22
  "learning_rate": 0.00029561851134963927,
23
+ "loss": 3.1518,
24
  "step": 250
25
  },
26
  {
27
  "epoch": 0.06598627485483019,
28
+ "grad_norm": 0.6049332022666931,
29
  "learning_rate": 0.00029341896885447824,
30
+ "loss": 3.1479,
31
  "step": 375
32
  },
33
  {
34
  "epoch": 0.08798169980644026,
35
+ "grad_norm": 0.7177291512489319,
36
  "learning_rate": 0.0002912194263593172,
37
+ "loss": 3.1586,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 0.08798169980644026,
42
+ "eval_loss": 2.6734766960144043,
43
+ "eval_runtime": 59.2675,
44
+ "eval_samples_per_second": 166.196,
45
+ "eval_steps_per_second": 6.395,
46
  "step": 500
47
  },
48
  {
49
  "epoch": 0.10997712475805033,
50
+ "grad_norm": 0.7241224050521851,
51
  "learning_rate": 0.00028901988386415624,
52
+ "loss": 3.1257,
53
  "step": 625
54
  },
55
  {
56
  "epoch": 0.13197254970966038,
57
+ "grad_norm": 1.1103954315185547,
58
  "learning_rate": 0.0002868203413689952,
59
+ "loss": 3.1179,
60
  "step": 750
61
  },
62
  {
63
  "epoch": 0.15396797466127046,
64
+ "grad_norm": 0.7277866005897522,
65
  "learning_rate": 0.00028462079887383424,
66
+ "loss": 3.1451,
67
  "step": 875
68
  },
69
  {
70
  "epoch": 0.17596339961288052,
71
+ "grad_norm": 0.7038848400115967,
72
  "learning_rate": 0.0002824212563786732,
73
+ "loss": 3.1223,
74
  "step": 1000
75
  },
76
  {
77
  "epoch": 0.17596339961288052,
78
+ "eval_loss": 2.664580821990967,
79
+ "eval_runtime": 59.2093,
80
+ "eval_samples_per_second": 166.359,
81
+ "eval_steps_per_second": 6.401,
82
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  }
84
  ],
85
  "logging_steps": 125,
 
99
  "attributes": {}
100
  }
101
  },
102
+ "total_flos": 586712875008000.0,
103
  "train_batch_size": 26,
104
  "trial_name": null,
105
  "trial_params": null
checkpoint-latest/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12927a523338558b1d44dd8df078024b60936add92e7554d0264326927e1cba5
3
  size 6033
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bef956c560dcdc3bac6492fdc576c6dff35538e184d8d9734adbb5fe3c9b01e
3
  size 6033