irishprancer commited on
Commit
66aebbe
·
verified ·
1 Parent(s): c62be4e

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84029e8f70d12b2c7137d5b303188195f30521bd1c82299d6d37b24c0269a65a
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b09791637006ca8005c03c2da6ace411f4e8914b2ca2e679ed4878cbb2b2df8c
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d1059ec2c30fa5b1aacfd6d9895b6233f1694495648c48fedab4d2b8e820425
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe55d7bdffac085201a4baf66b734c199969f282bc122f5a5726cafc99279b80
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7622d316fe354db40f60bc22ab635af3869b60bf5a6c816cb74ee6598c94be27
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5feb56512e955691dc9bb9a1e37b9dd590e06a961d7d94560b679e2730b03194
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:351fd5edffc48c8b46106c61b298184039dcb3c5ee48faa68a22154873155edd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cf3f988e8fed2daa2e801eb1f19b681872781cf57f0fb7b896e859a12cfe2bb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,1172 +1,157 @@
1
  {
2
- "best_metric": 0.7168284058570862,
3
- "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 52.17391304347826,
5
  "eval_steps": 150,
6
- "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.43478260869565216,
13
- "grad_norm": 1.502160668373108,
14
  "learning_rate": 3e-06,
15
- "loss": 0.906,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.8695652173913043,
20
- "grad_norm": 1.6870523691177368,
21
  "learning_rate": 6e-06,
22
- "loss": 0.9023,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 1.3043478260869565,
27
- "grad_norm": 1.7296977043151855,
28
  "learning_rate": 9e-06,
29
- "loss": 0.9002,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 1.7391304347826086,
34
- "grad_norm": 1.4458173513412476,
35
  "learning_rate": 1.2e-05,
36
- "loss": 0.9095,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 2.1739130434782608,
41
- "grad_norm": 1.351696252822876,
42
  "learning_rate": 1.5e-05,
43
- "loss": 0.8362,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 2.608695652173913,
48
- "grad_norm": 2.046602964401245,
49
  "learning_rate": 1.8e-05,
50
- "loss": 0.8897,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 3.0434782608695654,
55
- "grad_norm": 1.4842596054077148,
56
  "learning_rate": 2.1e-05,
57
- "loss": 0.8916,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 3.4782608695652173,
62
- "grad_norm": 1.7293957471847534,
63
  "learning_rate": 2.4e-05,
64
- "loss": 0.8233,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 3.9130434782608696,
69
- "grad_norm": 1.4240052700042725,
70
  "learning_rate": 2.7000000000000002e-05,
71
- "loss": 0.8529,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 4.3478260869565215,
76
- "grad_norm": 1.3658534288406372,
77
  "learning_rate": 3e-05,
78
- "loss": 0.8646,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 4.782608695652174,
83
- "grad_norm": 2.2143099308013916,
84
  "learning_rate": 2.999999702723963e-05,
85
- "loss": 0.8225,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 5.217391304347826,
90
- "grad_norm": 1.0725128650665283,
91
  "learning_rate": 2.9999988108959687e-05,
92
- "loss": 0.7653,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 5.6521739130434785,
97
- "grad_norm": 1.5600417852401733,
98
  "learning_rate": 2.9999973245163716e-05,
99
- "loss": 0.7415,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 6.086956521739131,
104
- "grad_norm": 1.907906413078308,
105
  "learning_rate": 2.99999524358576e-05,
106
- "loss": 0.7656,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 6.521739130434782,
111
- "grad_norm": 1.121804118156433,
112
  "learning_rate": 2.9999925681049593e-05,
113
  "loss": 0.7858,
114
  "step": 150
115
  },
116
  {
117
  "epoch": 6.521739130434782,
118
- "eval_loss": 0.7960879802703857,
119
- "eval_runtime": 0.4775,
120
- "eval_samples_per_second": 20.942,
121
- "eval_steps_per_second": 20.942,
122
  "step": 150
123
  },
124
  {
 
 
 
 
125
  "epoch": 6.521739130434782,
126
- "eval_loss": 0.8616224527359009,
127
- "eval_runtime": 0.3994,
128
- "eval_samples_per_second": 25.038,
129
- "eval_steps_per_second": 25.038,
130
  "step": 150
131
  },
132
  {
 
 
 
 
133
  "epoch": 6.521739130434782,
134
- "eval_loss": 0.7960879802703857,
135
- "eval_runtime": 0.3973,
136
- "eval_samples_per_second": 25.173,
137
- "eval_steps_per_second": 25.173,
138
  "step": 150
139
  },
140
  {
 
 
 
 
141
  "epoch": 6.521739130434782,
142
- "eval_loss": 0.8616224527359009,
143
- "eval_runtime": 0.4031,
144
- "eval_samples_per_second": 24.805,
145
- "eval_steps_per_second": 24.805,
146
  "step": 150
147
  },
148
  {
 
 
 
 
149
  "epoch": 6.521739130434782,
150
- "eval_loss": 0.8619683384895325,
151
- "eval_runtime": 0.4079,
152
- "eval_samples_per_second": 24.517,
153
- "eval_steps_per_second": 24.517,
154
  "step": 150
155
- },
156
- {
157
- "epoch": 6.956521739130435,
158
- "grad_norm": 1.541015625,
159
- "learning_rate": 2.9999892980750297e-05,
160
- "loss": 0.6586,
161
- "step": 160
162
- },
163
- {
164
- "epoch": 7.391304347826087,
165
- "grad_norm": 1.3462358713150024,
166
- "learning_rate": 2.9999854334972675e-05,
167
- "loss": 0.7387,
168
- "step": 170
169
- },
170
- {
171
- "epoch": 7.826086956521739,
172
- "grad_norm": 1.7260987758636475,
173
- "learning_rate": 2.999980974373204e-05,
174
- "loss": 0.7294,
175
- "step": 180
176
- },
177
- {
178
- "epoch": 8.26086956521739,
179
- "grad_norm": 1.5400819778442383,
180
- "learning_rate": 2.9999759207046075e-05,
181
- "loss": 0.6245,
182
- "step": 190
183
- },
184
- {
185
- "epoch": 8.695652173913043,
186
- "grad_norm": 1.7347906827926636,
187
- "learning_rate": 2.9999702724934804e-05,
188
- "loss": 0.6768,
189
- "step": 200
190
- },
191
- {
192
- "epoch": 9.130434782608695,
193
- "grad_norm": 1.0418808460235596,
194
- "learning_rate": 2.999964029742062e-05,
195
- "loss": 0.6522,
196
- "step": 210
197
- },
198
- {
199
- "epoch": 9.565217391304348,
200
- "grad_norm": 1.2194719314575195,
201
- "learning_rate": 2.9999571924528263e-05,
202
- "loss": 0.5593,
203
- "step": 220
204
- },
205
- {
206
- "epoch": 10.0,
207
- "grad_norm": 1.5290558338165283,
208
- "learning_rate": 2.9999497606284837e-05,
209
- "loss": 0.7557,
210
- "step": 230
211
- },
212
- {
213
- "epoch": 10.434782608695652,
214
- "grad_norm": 1.4276846647262573,
215
- "learning_rate": 2.9999417342719796e-05,
216
- "loss": 0.7117,
217
- "step": 240
218
- },
219
- {
220
- "epoch": 10.869565217391305,
221
- "grad_norm": 0.9797449111938477,
222
- "learning_rate": 2.9999331133864956e-05,
223
- "loss": 0.59,
224
- "step": 250
225
- },
226
- {
227
- "epoch": 11.304347826086957,
228
- "grad_norm": 1.1942695379257202,
229
- "learning_rate": 2.9999238979754485e-05,
230
- "loss": 0.6548,
231
- "step": 260
232
- },
233
- {
234
- "epoch": 11.73913043478261,
235
- "grad_norm": 1.0486805438995361,
236
- "learning_rate": 2.999914088042492e-05,
237
- "loss": 0.6474,
238
- "step": 270
239
- },
240
- {
241
- "epoch": 12.173913043478262,
242
- "grad_norm": 1.3112999200820923,
243
- "learning_rate": 2.9999036835915132e-05,
244
- "loss": 0.5941,
245
- "step": 280
246
- },
247
- {
248
- "epoch": 12.608695652173914,
249
- "grad_norm": 1.0798743963241577,
250
- "learning_rate": 2.9998926846266365e-05,
251
- "loss": 0.6325,
252
- "step": 290
253
- },
254
- {
255
- "epoch": 13.043478260869565,
256
- "grad_norm": 1.3873789310455322,
257
- "learning_rate": 2.9998810911522213e-05,
258
- "loss": 0.5808,
259
- "step": 300
260
- },
261
- {
262
- "epoch": 13.043478260869565,
263
- "eval_loss": 0.7309858202934265,
264
- "eval_runtime": 0.5483,
265
- "eval_samples_per_second": 18.239,
266
- "eval_steps_per_second": 18.239,
267
- "step": 300
268
- },
269
- {
270
- "epoch": 13.043478260869565,
271
- "eval_loss": 0.8616224527359009,
272
- "eval_runtime": 0.4277,
273
- "eval_samples_per_second": 23.38,
274
- "eval_steps_per_second": 23.38,
275
- "step": 300
276
- },
277
- {
278
- "epoch": 13.043478260869565,
279
- "eval_loss": 0.7309858202934265,
280
- "eval_runtime": 0.414,
281
- "eval_samples_per_second": 24.156,
282
- "eval_steps_per_second": 24.156,
283
- "step": 300
284
- },
285
- {
286
- "epoch": 13.043478260869565,
287
- "eval_loss": 0.7762144804000854,
288
- "eval_runtime": 0.4059,
289
- "eval_samples_per_second": 24.639,
290
- "eval_steps_per_second": 24.639,
291
- "step": 300
292
- },
293
- {
294
- "epoch": 13.043478260869565,
295
- "eval_loss": 0.8606348037719727,
296
- "eval_runtime": 0.4045,
297
- "eval_samples_per_second": 24.719,
298
- "eval_steps_per_second": 24.719,
299
- "step": 300
300
- },
301
- {
302
- "epoch": 13.478260869565217,
303
- "grad_norm": 1.783539056777954,
304
- "learning_rate": 2.9998689031728636e-05,
305
- "loss": 0.5145,
306
- "step": 310
307
- },
308
- {
309
- "epoch": 13.91304347826087,
310
- "grad_norm": 1.5309405326843262,
311
- "learning_rate": 2.9998561206933938e-05,
312
- "loss": 0.6497,
313
- "step": 320
314
- },
315
- {
316
- "epoch": 14.347826086956522,
317
- "grad_norm": 1.4793602228164673,
318
- "learning_rate": 2.9998427437188786e-05,
319
- "loss": 0.5743,
320
- "step": 330
321
- },
322
- {
323
- "epoch": 14.782608695652174,
324
- "grad_norm": 1.3178294897079468,
325
- "learning_rate": 2.99982877225462e-05,
326
- "loss": 0.6015,
327
- "step": 340
328
- },
329
- {
330
- "epoch": 15.217391304347826,
331
- "grad_norm": 0.978071928024292,
332
- "learning_rate": 2.9998142063061564e-05,
333
- "loss": 0.4987,
334
- "step": 350
335
- },
336
- {
337
- "epoch": 15.652173913043478,
338
- "grad_norm": 1.6408658027648926,
339
- "learning_rate": 2.9997990458792603e-05,
340
- "loss": 0.5627,
341
- "step": 360
342
- },
343
- {
344
- "epoch": 16.08695652173913,
345
- "grad_norm": 1.6342864036560059,
346
- "learning_rate": 2.9997832909799417e-05,
347
- "loss": 0.6672,
348
- "step": 370
349
- },
350
- {
351
- "epoch": 16.52173913043478,
352
- "grad_norm": 0.9524793028831482,
353
- "learning_rate": 2.9997669416144452e-05,
354
- "loss": 0.513,
355
- "step": 380
356
- },
357
- {
358
- "epoch": 16.956521739130434,
359
- "grad_norm": 0.9344761371612549,
360
- "learning_rate": 2.999749997789251e-05,
361
- "loss": 0.5794,
362
- "step": 390
363
- },
364
- {
365
- "epoch": 17.391304347826086,
366
- "grad_norm": 1.1158229112625122,
367
- "learning_rate": 2.9997324595110743e-05,
368
- "loss": 0.518,
369
- "step": 400
370
- },
371
- {
372
- "epoch": 17.82608695652174,
373
- "grad_norm": 1.2849094867706299,
374
- "learning_rate": 2.9997143267868683e-05,
375
- "loss": 0.5879,
376
- "step": 410
377
- },
378
- {
379
- "epoch": 18.26086956521739,
380
- "grad_norm": 1.1642646789550781,
381
- "learning_rate": 2.9996955996238192e-05,
382
- "loss": 0.5056,
383
- "step": 420
384
- },
385
- {
386
- "epoch": 18.695652173913043,
387
- "grad_norm": 1.2012473344802856,
388
- "learning_rate": 2.9996762780293503e-05,
389
- "loss": 0.5318,
390
- "step": 430
391
- },
392
- {
393
- "epoch": 19.130434782608695,
394
- "grad_norm": 1.213199257850647,
395
- "learning_rate": 2.9996563620111197e-05,
396
- "loss": 0.5336,
397
- "step": 440
398
- },
399
- {
400
- "epoch": 19.565217391304348,
401
- "grad_norm": 1.4254536628723145,
402
- "learning_rate": 2.9996358515770218e-05,
403
- "loss": 0.568,
404
- "step": 450
405
- },
406
- {
407
- "epoch": 19.565217391304348,
408
- "eval_loss": 0.7168284058570862,
409
- "eval_runtime": 0.5799,
410
- "eval_samples_per_second": 17.244,
411
- "eval_steps_per_second": 17.244,
412
- "step": 450
413
- },
414
- {
415
- "epoch": 19.565217391304348,
416
- "eval_loss": 0.8616224527359009,
417
- "eval_runtime": 0.5422,
418
- "eval_samples_per_second": 18.444,
419
- "eval_steps_per_second": 18.444,
420
- "step": 450
421
- },
422
- {
423
- "epoch": 19.565217391304348,
424
- "eval_loss": 0.7168284058570862,
425
- "eval_runtime": 0.5615,
426
- "eval_samples_per_second": 17.809,
427
- "eval_steps_per_second": 17.809,
428
- "step": 450
429
- },
430
- {
431
- "epoch": 19.565217391304348,
432
- "eval_loss": 0.7562109231948853,
433
- "eval_runtime": 0.4866,
434
- "eval_samples_per_second": 20.549,
435
- "eval_steps_per_second": 20.549,
436
- "step": 450
437
- },
438
- {
439
- "epoch": 19.565217391304348,
440
- "eval_loss": 0.8607499003410339,
441
- "eval_runtime": 0.4084,
442
- "eval_samples_per_second": 24.487,
443
- "eval_steps_per_second": 24.487,
444
- "step": 450
445
- },
446
- {
447
- "epoch": 20.0,
448
- "grad_norm": 2.1195826530456543,
449
- "learning_rate": 2.9996147467351856e-05,
450
- "loss": 0.5146,
451
- "step": 460
452
- },
453
- {
454
- "epoch": 20.434782608695652,
455
- "grad_norm": 1.268523097038269,
456
- "learning_rate": 2.9995930474939773e-05,
457
- "loss": 0.4781,
458
- "step": 470
459
- },
460
- {
461
- "epoch": 20.869565217391305,
462
- "grad_norm": 1.4743558168411255,
463
- "learning_rate": 2.9995707538619975e-05,
464
- "loss": 0.5705,
465
- "step": 480
466
- },
467
- {
468
- "epoch": 21.304347826086957,
469
- "grad_norm": 1.3239866495132446,
470
- "learning_rate": 2.9995478658480822e-05,
471
- "loss": 0.5163,
472
- "step": 490
473
- },
474
- {
475
- "epoch": 21.73913043478261,
476
- "grad_norm": 1.2419785261154175,
477
- "learning_rate": 2.9995243834613043e-05,
478
- "loss": 0.5205,
479
- "step": 500
480
- },
481
- {
482
- "epoch": 22.17391304347826,
483
- "grad_norm": 1.7764437198638916,
484
- "learning_rate": 2.9995003067109707e-05,
485
- "loss": 0.4838,
486
- "step": 510
487
- },
488
- {
489
- "epoch": 22.608695652173914,
490
- "grad_norm": 1.5313807725906372,
491
- "learning_rate": 2.9994756356066246e-05,
492
- "loss": 0.5616,
493
- "step": 520
494
- },
495
- {
496
- "epoch": 23.043478260869566,
497
- "grad_norm": 1.7396149635314941,
498
- "learning_rate": 2.999450370158046e-05,
499
- "loss": 0.4929,
500
- "step": 530
501
- },
502
- {
503
- "epoch": 23.47826086956522,
504
- "grad_norm": 1.310137152671814,
505
- "learning_rate": 2.9994245103752478e-05,
506
- "loss": 0.4385,
507
- "step": 540
508
- },
509
- {
510
- "epoch": 23.91304347826087,
511
- "grad_norm": 1.2314122915267944,
512
- "learning_rate": 2.999398056268481e-05,
513
- "loss": 0.5265,
514
- "step": 550
515
- },
516
- {
517
- "epoch": 24.347826086956523,
518
- "grad_norm": 1.405901312828064,
519
- "learning_rate": 2.9993710078482306e-05,
520
- "loss": 0.5203,
521
- "step": 560
522
- },
523
- {
524
- "epoch": 24.782608695652176,
525
- "grad_norm": 0.9515899419784546,
526
- "learning_rate": 2.9993433651252185e-05,
527
- "loss": 0.443,
528
- "step": 570
529
- },
530
- {
531
- "epoch": 25.217391304347824,
532
- "grad_norm": 1.7217934131622314,
533
- "learning_rate": 2.9993151281104006e-05,
534
- "loss": 0.5327,
535
- "step": 580
536
- },
537
- {
538
- "epoch": 25.652173913043477,
539
- "grad_norm": 1.1352386474609375,
540
- "learning_rate": 2.9992862968149695e-05,
541
- "loss": 0.4737,
542
- "step": 590
543
- },
544
- {
545
- "epoch": 26.08695652173913,
546
- "grad_norm": 1.1713489294052124,
547
- "learning_rate": 2.9992568712503533e-05,
548
- "loss": 0.4607,
549
- "step": 600
550
- },
551
- {
552
- "epoch": 26.08695652173913,
553
- "eval_loss": 0.7201142311096191,
554
- "eval_runtime": 0.4104,
555
- "eval_samples_per_second": 24.369,
556
- "eval_steps_per_second": 24.369,
557
- "step": 600
558
- },
559
- {
560
- "epoch": 26.08695652173913,
561
- "eval_loss": 0.8616224527359009,
562
- "eval_runtime": 0.4009,
563
- "eval_samples_per_second": 24.945,
564
- "eval_steps_per_second": 24.945,
565
- "step": 600
566
- },
567
- {
568
- "epoch": 26.08695652173913,
569
- "eval_loss": 0.7201142311096191,
570
- "eval_runtime": 0.3978,
571
- "eval_samples_per_second": 25.14,
572
- "eval_steps_per_second": 25.14,
573
- "step": 600
574
- },
575
- {
576
- "epoch": 26.08695652173913,
577
- "eval_loss": 0.7344290018081665,
578
- "eval_runtime": 0.3994,
579
- "eval_samples_per_second": 25.038,
580
- "eval_steps_per_second": 25.038,
581
- "step": 600
582
- },
583
- {
584
- "epoch": 26.08695652173913,
585
- "eval_loss": 0.8599739074707031,
586
- "eval_runtime": 0.3997,
587
- "eval_samples_per_second": 25.02,
588
- "eval_steps_per_second": 25.02,
589
- "step": 600
590
- },
591
- {
592
- "epoch": 26.52173913043478,
593
- "grad_norm": 1.0763235092163086,
594
- "learning_rate": 2.180702116052084e-06,
595
- "loss": 0.5123,
596
- "step": 610
597
- },
598
- {
599
- "epoch": 26.956521739130434,
600
- "grad_norm": 1.3133342266082764,
601
- "learning_rate": 4.361404232104168e-06,
602
- "loss": 0.4305,
603
- "step": 620
604
- },
605
- {
606
- "epoch": 27.391304347826086,
607
- "grad_norm": 1.196905493736267,
608
- "learning_rate": 6.5421063481562515e-06,
609
- "loss": 0.483,
610
- "step": 630
611
- },
612
- {
613
- "epoch": 27.82608695652174,
614
- "grad_norm": 1.3611772060394287,
615
- "learning_rate": 8.722808464208335e-06,
616
- "loss": 0.5073,
617
- "step": 640
618
- },
619
- {
620
- "epoch": 28.26086956521739,
621
- "grad_norm": 1.3929076194763184,
622
- "learning_rate": 1.0903510580260419e-05,
623
- "loss": 0.4196,
624
- "step": 650
625
- },
626
- {
627
- "epoch": 28.695652173913043,
628
- "grad_norm": 1.5381708145141602,
629
- "learning_rate": 1.3084212696312503e-05,
630
- "loss": 0.5196,
631
- "step": 660
632
- },
633
- {
634
- "epoch": 29.130434782608695,
635
- "grad_norm": 1.2946442365646362,
636
- "learning_rate": 1.5264914812364585e-05,
637
- "loss": 0.4475,
638
- "step": 670
639
- },
640
- {
641
- "epoch": 29.565217391304348,
642
- "grad_norm": 1.0526130199432373,
643
- "learning_rate": 1.744561692841667e-05,
644
- "loss": 0.4891,
645
- "step": 680
646
- },
647
- {
648
- "epoch": 30.0,
649
- "grad_norm": 2.4151837825775146,
650
- "learning_rate": 1.9626319044468756e-05,
651
- "loss": 0.4298,
652
- "step": 690
653
- },
654
- {
655
- "epoch": 30.434782608695652,
656
- "grad_norm": 1.2968952655792236,
657
- "learning_rate": 2.1807021160520838e-05,
658
- "loss": 0.4978,
659
- "step": 700
660
- },
661
- {
662
- "epoch": 30.869565217391305,
663
- "grad_norm": 1.5897126197814941,
664
- "learning_rate": 2.1807018999619227e-05,
665
- "loss": 0.4322,
666
- "step": 710
667
- },
668
- {
669
- "epoch": 31.304347826086957,
670
- "grad_norm": 0.903261661529541,
671
- "learning_rate": 2.180701251691525e-05,
672
- "loss": 0.4453,
673
- "step": 720
674
- },
675
- {
676
- "epoch": 31.73913043478261,
677
- "grad_norm": 1.5805950164794922,
678
- "learning_rate": 2.180700171241147e-05,
679
- "loss": 0.5081,
680
- "step": 730
681
- },
682
- {
683
- "epoch": 32.17391304347826,
684
- "grad_norm": 1.1670500040054321,
685
- "learning_rate": 2.1806986586112178e-05,
686
- "loss": 0.4616,
687
- "step": 740
688
- },
689
- {
690
- "epoch": 32.608695652173914,
691
- "grad_norm": 1.7125110626220703,
692
- "learning_rate": 2.1806967138023368e-05,
693
- "loss": 0.5002,
694
- "step": 750
695
- },
696
- {
697
- "epoch": 32.608695652173914,
698
- "eval_loss": 0.7243400812149048,
699
- "eval_runtime": 0.5459,
700
- "eval_samples_per_second": 18.32,
701
- "eval_steps_per_second": 18.32,
702
- "step": 750
703
- },
704
- {
705
- "epoch": 32.608695652173914,
706
- "eval_loss": 0.8616224527359009,
707
- "eval_runtime": 0.4034,
708
- "eval_samples_per_second": 24.789,
709
- "eval_steps_per_second": 24.789,
710
- "step": 750
711
- },
712
- {
713
- "epoch": 32.608695652173914,
714
- "eval_loss": 0.7243400812149048,
715
- "eval_runtime": 0.4067,
716
- "eval_samples_per_second": 24.59,
717
- "eval_steps_per_second": 24.59,
718
- "step": 750
719
- },
720
- {
721
- "epoch": 32.608695652173914,
722
- "eval_loss": 0.7293505668640137,
723
- "eval_runtime": 0.4035,
724
- "eval_samples_per_second": 24.782,
725
- "eval_steps_per_second": 24.782,
726
- "step": 750
727
- },
728
- {
729
- "epoch": 32.608695652173914,
730
- "eval_loss": 0.8606684803962708,
731
- "eval_runtime": 0.4152,
732
- "eval_samples_per_second": 24.083,
733
- "eval_steps_per_second": 24.083,
734
- "step": 750
735
- },
736
- {
737
- "epoch": 33.04347826086956,
738
- "grad_norm": 1.426059603691101,
739
- "learning_rate": 1.5715375657489587e-06,
740
- "loss": 0.4543,
741
- "step": 760
742
- },
743
- {
744
- "epoch": 33.47826086956522,
745
- "grad_norm": 1.5478020906448364,
746
- "learning_rate": 3.1430751314979174e-06,
747
- "loss": 0.4608,
748
- "step": 770
749
- },
750
- {
751
- "epoch": 33.91304347826087,
752
- "grad_norm": 1.2852143049240112,
753
- "learning_rate": 4.714612697246876e-06,
754
- "loss": 0.4359,
755
- "step": 780
756
- },
757
- {
758
- "epoch": 34.34782608695652,
759
- "grad_norm": 1.3916325569152832,
760
- "learning_rate": 6.286150262995835e-06,
761
- "loss": 0.4366,
762
- "step": 790
763
- },
764
- {
765
- "epoch": 34.78260869565217,
766
- "grad_norm": 1.4073759317398071,
767
- "learning_rate": 7.857687828744793e-06,
768
- "loss": 0.4145,
769
- "step": 800
770
- },
771
- {
772
- "epoch": 35.21739130434783,
773
- "grad_norm": 1.258367896080017,
774
- "learning_rate": 9.429225394493751e-06,
775
- "loss": 0.5149,
776
- "step": 810
777
- },
778
- {
779
- "epoch": 35.65217391304348,
780
- "grad_norm": 1.2376227378845215,
781
- "learning_rate": 1.100076296024271e-05,
782
- "loss": 0.4429,
783
- "step": 820
784
- },
785
- {
786
- "epoch": 36.08695652173913,
787
- "grad_norm": 1.071475625038147,
788
- "learning_rate": 1.257230052599167e-05,
789
- "loss": 0.4587,
790
- "step": 830
791
- },
792
- {
793
- "epoch": 36.52173913043478,
794
- "grad_norm": 1.109466552734375,
795
- "learning_rate": 1.4143838091740628e-05,
796
- "loss": 0.4244,
797
- "step": 840
798
- },
799
- {
800
- "epoch": 36.95652173913044,
801
- "grad_norm": 1.3434367179870605,
802
- "learning_rate": 1.5715375657489586e-05,
803
- "loss": 0.4951,
804
- "step": 850
805
- },
806
- {
807
- "epoch": 37.391304347826086,
808
- "grad_norm": 1.8077468872070312,
809
- "learning_rate": 1.5715374100221386e-05,
810
- "loss": 0.4675,
811
- "step": 860
812
- },
813
- {
814
- "epoch": 37.82608695652174,
815
- "grad_norm": 1.2127968072891235,
816
- "learning_rate": 1.5715369428417403e-05,
817
- "loss": 0.4156,
818
- "step": 870
819
- },
820
- {
821
- "epoch": 38.26086956521739,
822
- "grad_norm": 1.1871669292449951,
823
- "learning_rate": 1.571536164207949e-05,
824
- "loss": 0.4515,
825
- "step": 880
826
- },
827
- {
828
- "epoch": 38.69565217391305,
829
- "grad_norm": 1.3719384670257568,
830
- "learning_rate": 1.571535074121073e-05,
831
- "loss": 0.4019,
832
- "step": 890
833
- },
834
- {
835
- "epoch": 39.130434782608695,
836
- "grad_norm": 1.8886760473251343,
837
- "learning_rate": 1.5715336725815448e-05,
838
- "loss": 0.5218,
839
- "step": 900
840
- },
841
- {
842
- "epoch": 39.130434782608695,
843
- "eval_loss": 0.7287566661834717,
844
- "eval_runtime": 0.5312,
845
- "eval_samples_per_second": 18.825,
846
- "eval_steps_per_second": 18.825,
847
- "step": 900
848
- },
849
- {
850
- "epoch": 39.130434782608695,
851
- "eval_loss": 0.8616224527359009,
852
- "eval_runtime": 0.4551,
853
- "eval_samples_per_second": 21.972,
854
- "eval_steps_per_second": 21.972,
855
- "step": 900
856
- },
857
- {
858
- "epoch": 39.130434782608695,
859
- "eval_loss": 0.7287566661834717,
860
- "eval_runtime": 0.4445,
861
- "eval_samples_per_second": 22.5,
862
- "eval_steps_per_second": 22.5,
863
- "step": 900
864
- },
865
- {
866
- "epoch": 39.130434782608695,
867
- "eval_loss": 0.7235647439956665,
868
- "eval_runtime": 0.4458,
869
- "eval_samples_per_second": 22.434,
870
- "eval_steps_per_second": 22.434,
871
- "step": 900
872
- },
873
- {
874
- "epoch": 39.130434782608695,
875
- "eval_loss": 0.8611491322517395,
876
- "eval_runtime": 0.4432,
877
- "eval_samples_per_second": 22.563,
878
- "eval_steps_per_second": 22.563,
879
- "step": 900
880
- },
881
- {
882
- "epoch": 39.56521739130435,
883
- "grad_norm": 1.4088988304138184,
884
- "learning_rate": 1.1307389750804807e-06,
885
- "loss": 0.4355,
886
- "step": 910
887
- },
888
- {
889
- "epoch": 40.0,
890
- "grad_norm": 2.766087532043457,
891
- "learning_rate": 2.2614779501609614e-06,
892
- "loss": 0.436,
893
- "step": 920
894
- },
895
- {
896
- "epoch": 40.43478260869565,
897
- "grad_norm": 1.4608732461929321,
898
- "learning_rate": 3.392216925241442e-06,
899
- "loss": 0.5145,
900
- "step": 930
901
- },
902
- {
903
- "epoch": 40.869565217391305,
904
- "grad_norm": 1.435799241065979,
905
- "learning_rate": 4.522955900321923e-06,
906
- "loss": 0.3916,
907
- "step": 940
908
- },
909
- {
910
- "epoch": 41.30434782608695,
911
- "grad_norm": 1.9943156242370605,
912
- "learning_rate": 5.653694875402403e-06,
913
- "loss": 0.4035,
914
- "step": 950
915
- },
916
- {
917
- "epoch": 41.73913043478261,
918
- "grad_norm": 1.3566862344741821,
919
- "learning_rate": 6.784433850482884e-06,
920
- "loss": 0.4228,
921
- "step": 960
922
- },
923
- {
924
- "epoch": 42.17391304347826,
925
- "grad_norm": 1.3916350603103638,
926
- "learning_rate": 7.915172825563364e-06,
927
- "loss": 0.5037,
928
- "step": 970
929
- },
930
- {
931
- "epoch": 42.608695652173914,
932
- "grad_norm": 1.3377119302749634,
933
- "learning_rate": 9.045911800643846e-06,
934
- "loss": 0.4422,
935
- "step": 980
936
- },
937
- {
938
- "epoch": 43.04347826086956,
939
- "grad_norm": 1.5022424459457397,
940
- "learning_rate": 1.0176650775724327e-05,
941
- "loss": 0.4335,
942
- "step": 990
943
- },
944
- {
945
- "epoch": 43.47826086956522,
946
- "grad_norm": 1.2574431896209717,
947
- "learning_rate": 1.1307389750804806e-05,
948
- "loss": 0.4644,
949
- "step": 1000
950
- },
951
- {
952
- "epoch": 43.91304347826087,
953
- "grad_norm": 1.1123943328857422,
954
- "learning_rate": 1.1307388630332802e-05,
955
- "loss": 0.414,
956
- "step": 1010
957
- },
958
- {
959
- "epoch": 44.34782608695652,
960
- "grad_norm": 1.9059613943099976,
961
- "learning_rate": 1.1307385268917228e-05,
962
- "loss": 0.4696,
963
- "step": 1020
964
- },
965
- {
966
- "epoch": 44.78260869565217,
967
- "grad_norm": 1.5019381046295166,
968
- "learning_rate": 1.1307379666559419e-05,
969
- "loss": 0.4581,
970
- "step": 1030
971
- },
972
- {
973
- "epoch": 45.21739130434783,
974
- "grad_norm": 1.169459342956543,
975
- "learning_rate": 1.1307371823261596e-05,
976
- "loss": 0.33,
977
- "step": 1040
978
- },
979
- {
980
- "epoch": 45.65217391304348,
981
- "grad_norm": 1.5735044479370117,
982
- "learning_rate": 1.130736173902687e-05,
983
- "loss": 0.4415,
984
- "step": 1050
985
- },
986
- {
987
- "epoch": 45.65217391304348,
988
- "eval_loss": 0.7305982708930969,
989
- "eval_runtime": 0.4644,
990
- "eval_samples_per_second": 21.532,
991
- "eval_steps_per_second": 21.532,
992
- "step": 1050
993
- },
994
- {
995
- "epoch": 45.65217391304348,
996
- "eval_loss": 0.8616224527359009,
997
- "eval_runtime": 0.4337,
998
- "eval_samples_per_second": 23.059,
999
- "eval_steps_per_second": 23.059,
1000
- "step": 1050
1001
- },
1002
- {
1003
- "epoch": 45.65217391304348,
1004
- "eval_loss": 0.7305982708930969,
1005
- "eval_runtime": 0.4105,
1006
- "eval_samples_per_second": 24.359,
1007
- "eval_steps_per_second": 24.359,
1008
- "step": 1050
1009
- },
1010
- {
1011
- "epoch": 45.65217391304348,
1012
- "eval_loss": 0.723252534866333,
1013
- "eval_runtime": 0.3944,
1014
- "eval_samples_per_second": 25.352,
1015
- "eval_steps_per_second": 25.352,
1016
- "step": 1050
1017
- },
1018
- {
1019
- "epoch": 45.65217391304348,
1020
- "eval_loss": 0.8612099885940552,
1021
- "eval_runtime": 0.3961,
1022
- "eval_samples_per_second": 25.244,
1023
- "eval_steps_per_second": 25.244,
1024
- "step": 1050
1025
- },
1026
- {
1027
- "epoch": 46.08695652173913,
1028
- "grad_norm": 1.668047308921814,
1029
- "learning_rate": 1.130734941385923e-05,
1030
- "loss": 0.4905,
1031
- "step": 1060
1032
- },
1033
- {
1034
- "epoch": 46.52173913043478,
1035
- "grad_norm": 1.8213101625442505,
1036
- "learning_rate": 1.1307334847763571e-05,
1037
- "loss": 0.4654,
1038
- "step": 1070
1039
- },
1040
- {
1041
- "epoch": 46.95652173913044,
1042
- "grad_norm": 1.7041969299316406,
1043
- "learning_rate": 1.1307318040745661e-05,
1044
- "loss": 0.4089,
1045
- "step": 1080
1046
- },
1047
- {
1048
- "epoch": 47.391304347826086,
1049
- "grad_norm": 1.9170663356781006,
1050
- "learning_rate": 1.1307298992812163e-05,
1051
- "loss": 0.4169,
1052
- "step": 1090
1053
- },
1054
- {
1055
- "epoch": 47.82608695652174,
1056
- "grad_norm": 1.3400579690933228,
1057
- "learning_rate": 1.1307277703970627e-05,
1058
- "loss": 0.4451,
1059
- "step": 1100
1060
- },
1061
- {
1062
- "epoch": 48.26086956521739,
1063
- "grad_norm": 1.8620245456695557,
1064
- "learning_rate": 1.1307254174229492e-05,
1065
- "loss": 0.4606,
1066
- "step": 1110
1067
- },
1068
- {
1069
- "epoch": 48.69565217391305,
1070
- "grad_norm": 1.4493643045425415,
1071
- "learning_rate": 1.1307228403598083e-05,
1072
- "loss": 0.3842,
1073
- "step": 1120
1074
- },
1075
- {
1076
- "epoch": 49.130434782608695,
1077
- "grad_norm": 1.5963612794876099,
1078
- "learning_rate": 1.1307200392086617e-05,
1079
- "loss": 0.5088,
1080
- "step": 1130
1081
- },
1082
- {
1083
- "epoch": 49.56521739130435,
1084
- "grad_norm": 1.0860666036605835,
1085
- "learning_rate": 1.1307170139706193e-05,
1086
- "loss": 0.4661,
1087
- "step": 1140
1088
- },
1089
- {
1090
- "epoch": 50.0,
1091
- "grad_norm": 2.400817632675171,
1092
- "learning_rate": 1.1307137646468805e-05,
1093
- "loss": 0.3732,
1094
- "step": 1150
1095
- },
1096
- {
1097
- "epoch": 50.43478260869565,
1098
- "grad_norm": 1.2243698835372925,
1099
- "learning_rate": 1.130710291238733e-05,
1100
- "loss": 0.3852,
1101
- "step": 1160
1102
- },
1103
- {
1104
- "epoch": 50.869565217391305,
1105
- "grad_norm": 1.5243916511535645,
1106
- "learning_rate": 1.130706593747554e-05,
1107
- "loss": 0.4324,
1108
- "step": 1170
1109
- },
1110
- {
1111
- "epoch": 51.30434782608695,
1112
- "grad_norm": 1.778385877609253,
1113
- "learning_rate": 1.1307026721748087e-05,
1114
- "loss": 0.45,
1115
- "step": 1180
1116
- },
1117
- {
1118
- "epoch": 51.73913043478261,
1119
- "grad_norm": 1.0479800701141357,
1120
- "learning_rate": 1.1306985265220515e-05,
1121
- "loss": 0.3661,
1122
- "step": 1190
1123
- },
1124
- {
1125
- "epoch": 52.17391304347826,
1126
- "grad_norm": 2.22280216217041,
1127
- "learning_rate": 1.1306941567909254e-05,
1128
- "loss": 0.518,
1129
- "step": 1200
1130
- },
1131
- {
1132
- "epoch": 52.17391304347826,
1133
- "eval_loss": 0.7372099161148071,
1134
- "eval_runtime": 0.5807,
1135
- "eval_samples_per_second": 17.222,
1136
- "eval_steps_per_second": 17.222,
1137
- "step": 1200
1138
- },
1139
- {
1140
- "epoch": 52.17391304347826,
1141
- "eval_loss": 0.8616224527359009,
1142
- "eval_runtime": 0.4577,
1143
- "eval_samples_per_second": 21.848,
1144
- "eval_steps_per_second": 21.848,
1145
- "step": 1200
1146
- },
1147
- {
1148
- "epoch": 52.17391304347826,
1149
- "eval_loss": 0.7372099161148071,
1150
- "eval_runtime": 0.4574,
1151
- "eval_samples_per_second": 21.862,
1152
- "eval_steps_per_second": 21.862,
1153
- "step": 1200
1154
- },
1155
- {
1156
- "epoch": 52.17391304347826,
1157
- "eval_loss": 0.7205449938774109,
1158
- "eval_runtime": 0.4458,
1159
- "eval_samples_per_second": 22.433,
1160
- "eval_steps_per_second": 22.433,
1161
- "step": 1200
1162
- },
1163
- {
1164
- "epoch": 52.17391304347826,
1165
- "eval_loss": 0.8604005575180054,
1166
- "eval_runtime": 0.4457,
1167
- "eval_samples_per_second": 22.437,
1168
- "eval_steps_per_second": 22.437,
1169
- "step": 1200
1170
  }
1171
  ],
1172
  "logging_steps": 10,
@@ -1186,7 +171,7 @@
1186
  "attributes": {}
1187
  }
1188
  },
1189
- "total_flos": 3.076671992345395e+16,
1190
  "train_batch_size": 4,
1191
  "trial_name": null,
1192
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7961059212684631,
3
+ "best_model_checkpoint": "./output/checkpoint-150",
4
+ "epoch": 6.521739130434782,
5
  "eval_steps": 150,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.43478260869565216,
13
+ "grad_norm": 1.5021616220474243,
14
  "learning_rate": 3e-06,
15
+ "loss": 0.9065,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.8695652173913043,
20
+ "grad_norm": 1.6870683431625366,
21
  "learning_rate": 6e-06,
22
+ "loss": 0.9027,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 1.3043478260869565,
27
+ "grad_norm": 1.7296483516693115,
28
  "learning_rate": 9e-06,
29
+ "loss": 0.9001,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 1.7391304347826086,
34
+ "grad_norm": 1.4536631107330322,
35
  "learning_rate": 1.2e-05,
36
+ "loss": 0.9092,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 2.1739130434782608,
41
+ "grad_norm": 1.3518139123916626,
42
  "learning_rate": 1.5e-05,
43
+ "loss": 0.8358,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 2.608695652173913,
48
+ "grad_norm": 2.031013250350952,
49
  "learning_rate": 1.8e-05,
50
+ "loss": 0.8898,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 3.0434782608695654,
55
+ "grad_norm": 1.4844363927841187,
56
  "learning_rate": 2.1e-05,
57
+ "loss": 0.8913,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 3.4782608695652173,
62
+ "grad_norm": 1.7294501066207886,
63
  "learning_rate": 2.4e-05,
64
+ "loss": 0.8231,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 3.9130434782608696,
69
+ "grad_norm": 1.423990249633789,
70
  "learning_rate": 2.7000000000000002e-05,
71
+ "loss": 0.8527,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 4.3478260869565215,
76
+ "grad_norm": 1.3655840158462524,
77
  "learning_rate": 3e-05,
78
+ "loss": 0.8647,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 4.782608695652174,
83
+ "grad_norm": 2.1975016593933105,
84
  "learning_rate": 2.999999702723963e-05,
85
+ "loss": 0.8229,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 5.217391304347826,
90
+ "grad_norm": 1.0727310180664062,
91
  "learning_rate": 2.9999988108959687e-05,
92
+ "loss": 0.7648,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 5.6521739130434785,
97
+ "grad_norm": 1.5595622062683105,
98
  "learning_rate": 2.9999973245163716e-05,
99
+ "loss": 0.741,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 6.086956521739131,
104
+ "grad_norm": 1.9067057371139526,
105
  "learning_rate": 2.99999524358576e-05,
106
+ "loss": 0.7655,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 6.521739130434782,
111
+ "grad_norm": 1.1219594478607178,
112
  "learning_rate": 2.9999925681049593e-05,
113
  "loss": 0.7858,
114
  "step": 150
115
  },
116
  {
117
  "epoch": 6.521739130434782,
118
+ "eval_loss": 0.7961059212684631,
119
+ "eval_runtime": 0.4905,
120
+ "eval_samples_per_second": 20.389,
121
+ "eval_steps_per_second": 20.389,
122
  "step": 150
123
  },
124
  {
125
+ "Start_State_loss": 0.8601926565170288,
126
+ "Start_State_runtime": 0.4411,
127
+ "Start_State_samples_per_second": 22.672,
128
+ "Start_State_steps_per_second": 22.672,
129
  "epoch": 6.521739130434782,
 
 
 
 
130
  "step": 150
131
  },
132
  {
133
+ "Raw_Model_loss": 0.7961059212684631,
134
+ "Raw_Model_runtime": 0.4281,
135
+ "Raw_Model_samples_per_second": 23.358,
136
+ "Raw_Model_steps_per_second": 23.358,
137
  "epoch": 6.521739130434782,
 
 
 
 
138
  "step": 150
139
  },
140
  {
141
+ "SWA_loss": 0.8601926565170288,
142
+ "SWA_runtime": 0.4847,
143
+ "SWA_samples_per_second": 20.629,
144
+ "SWA_steps_per_second": 20.629,
145
  "epoch": 6.521739130434782,
 
 
 
 
146
  "step": 150
147
  },
148
  {
149
+ "EMA_loss": 0.8603938221931458,
150
+ "EMA_runtime": 0.454,
151
+ "EMA_samples_per_second": 22.026,
152
+ "EMA_steps_per_second": 22.026,
153
  "epoch": 6.521739130434782,
 
 
 
 
154
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  }
156
  ],
157
  "logging_steps": 10,
 
171
  "attributes": {}
172
  }
173
  },
174
+ "total_flos": 3894839614291968.0,
175
  "train_batch_size": 4,
176
  "trial_name": null,
177
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c27b0feffa55e9783adeb1945da4877c05b7d99c3eb25293fa4481c312fbc7a4
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04cc93848fede83aea266ada8d3a6a175e9ab0ace29af265b71333398b052846
3
  size 5368