YeBhoneLin10 commited on
Commit
30a332c
·
verified ·
1 Parent(s): 6bfbe37

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/content/YeBhoneLin-Whiper-Small-Stream-2.0/last-checkpoint",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
 
1
  {
2
+ "_name_or_path": "openai/whisper-small",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c221c59c353a7c481ef8fb848e7a2c3921f470fcb417beec2b5bef362aa3bd5b
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d18664503eeefe73d902122981c82e338589f72a78e1805df1a86211ffccefc
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa70d6d073579eac4a8bf8a9837094800d8adea492cb54aab25e06e08ad6d60b
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:814aee5aaf254d346a1bf9a7a5da84ade3549eadd13d79d058bea07adbd2e108
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d71c2c7ed0cfd7a1644a65fd7297e581eb7c34b11ce37eddc62f761ad914001e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad46ce4e5894519be6dee3b5c4bef67745e115bf01464ced9f219f603b4ee39e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd1612242bb0477b7b98b59bf0fa7f281d428f72225aa5cd84f5ffc18b7c2bcc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d35a84a90afac6ec40a34ed4fb70cbf6f66e82eb3de0a29476df83e41dc581f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,615 +1,56 @@
1
  {
2
- "best_metric": 31.710700132100396,
3
- "best_model_checkpoint": "./whisper-small-lt/checkpoint-2000",
4
- "epoch": 5.0275,
5
- "eval_steps": 500,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.0125,
13
- "grad_norm": 1.318737506866455,
14
  "learning_rate": 5.000000000000001e-07,
15
- "loss": 0.023,
16
  "step": 25
17
  },
18
  {
19
  "epoch": 0.025,
20
- "grad_norm": 0.8892257213592529,
21
  "learning_rate": 1.0000000000000002e-06,
22
- "loss": 0.0129,
23
  "step": 50
24
  },
25
  {
26
  "epoch": 0.0375,
27
- "grad_norm": 1.2649672031402588,
28
  "learning_rate": 1.5e-06,
29
- "loss": 0.019,
30
  "step": 75
31
  },
32
  {
33
  "epoch": 0.05,
34
- "grad_norm": 0.9079497456550598,
35
  "learning_rate": 2.0000000000000003e-06,
36
- "loss": 0.0201,
37
  "step": 100
38
  },
39
  {
40
- "epoch": 0.0625,
41
- "grad_norm": 2.3915770053863525,
42
- "learning_rate": 2.5e-06,
43
- "loss": 0.0192,
44
- "step": 125
45
- },
46
- {
47
- "epoch": 0.075,
48
- "grad_norm": 1.043415904045105,
49
- "learning_rate": 3e-06,
50
- "loss": 0.0183,
51
- "step": 150
52
- },
53
- {
54
- "epoch": 0.0875,
55
- "grad_norm": 1.3370391130447388,
56
- "learning_rate": 3.5e-06,
57
- "loss": 0.0153,
58
- "step": 175
59
- },
60
- {
61
- "epoch": 0.1,
62
- "grad_norm": 1.7550990581512451,
63
- "learning_rate": 4.000000000000001e-06,
64
- "loss": 0.0196,
65
- "step": 200
66
- },
67
- {
68
- "epoch": 0.1125,
69
- "grad_norm": 3.453934669494629,
70
- "learning_rate": 4.5e-06,
71
- "loss": 0.0215,
72
- "step": 225
73
- },
74
- {
75
- "epoch": 0.125,
76
- "grad_norm": 2.3508036136627197,
77
- "learning_rate": 5e-06,
78
- "loss": 0.0214,
79
- "step": 250
80
- },
81
- {
82
- "epoch": 0.1375,
83
- "grad_norm": 1.381133794784546,
84
- "learning_rate": 5.500000000000001e-06,
85
- "loss": 0.0253,
86
- "step": 275
87
- },
88
- {
89
- "epoch": 0.15,
90
- "grad_norm": 1.2868778705596924,
91
- "learning_rate": 6e-06,
92
- "loss": 0.0168,
93
- "step": 300
94
- },
95
- {
96
- "epoch": 0.1625,
97
- "grad_norm": 1.7141369581222534,
98
- "learning_rate": 6.5000000000000004e-06,
99
- "loss": 0.0182,
100
- "step": 325
101
- },
102
- {
103
- "epoch": 0.175,
104
- "grad_norm": 2.0160534381866455,
105
- "learning_rate": 7e-06,
106
- "loss": 0.0197,
107
- "step": 350
108
- },
109
- {
110
- "epoch": 0.1875,
111
- "grad_norm": 1.8141695261001587,
112
- "learning_rate": 7.500000000000001e-06,
113
- "loss": 0.0157,
114
- "step": 375
115
- },
116
- {
117
- "epoch": 1.0055,
118
- "grad_norm": 2.000669002532959,
119
- "learning_rate": 8.000000000000001e-06,
120
- "loss": 0.0235,
121
- "step": 400
122
- },
123
- {
124
- "epoch": 1.018,
125
- "grad_norm": 0.7991472482681274,
126
- "learning_rate": 8.5e-06,
127
- "loss": 0.0186,
128
- "step": 425
129
- },
130
- {
131
- "epoch": 1.0305,
132
- "grad_norm": 1.885020136833191,
133
- "learning_rate": 9e-06,
134
- "loss": 0.0188,
135
- "step": 450
136
- },
137
- {
138
- "epoch": 1.043,
139
- "grad_norm": 1.6318445205688477,
140
- "learning_rate": 9.5e-06,
141
- "loss": 0.0196,
142
- "step": 475
143
- },
144
- {
145
- "epoch": 1.0555,
146
- "grad_norm": 1.235178828239441,
147
- "learning_rate": 1e-05,
148
- "loss": 0.0183,
149
- "step": 500
150
- },
151
- {
152
- "epoch": 1.0555,
153
- "eval_loss": 0.16427084803581238,
154
- "eval_runtime": 628.8932,
155
- "eval_samples_per_second": 1.013,
156
- "eval_steps_per_second": 0.127,
157
- "eval_wer": 36.38044914134743,
158
- "step": 500
159
- },
160
- {
161
- "epoch": 1.068,
162
- "grad_norm": 1.9209116697311401,
163
- "learning_rate": 9.833333333333333e-06,
164
- "loss": 0.0171,
165
- "step": 525
166
- },
167
- {
168
- "epoch": 1.0805,
169
- "grad_norm": 1.8610273599624634,
170
- "learning_rate": 9.666666666666667e-06,
171
- "loss": 0.0159,
172
- "step": 550
173
- },
174
- {
175
- "epoch": 1.093,
176
- "grad_norm": 1.6190500259399414,
177
- "learning_rate": 9.5e-06,
178
- "loss": 0.0138,
179
- "step": 575
180
- },
181
- {
182
- "epoch": 1.1055,
183
- "grad_norm": 1.2552204132080078,
184
- "learning_rate": 9.333333333333334e-06,
185
- "loss": 0.0178,
186
- "step": 600
187
- },
188
- {
189
- "epoch": 1.1179999999999999,
190
- "grad_norm": 1.5186320543289185,
191
- "learning_rate": 9.166666666666666e-06,
192
- "loss": 0.0165,
193
- "step": 625
194
- },
195
- {
196
- "epoch": 1.1305,
197
- "grad_norm": 2.233752727508545,
198
- "learning_rate": 9e-06,
199
- "loss": 0.0155,
200
- "step": 650
201
- },
202
- {
203
- "epoch": 1.143,
204
- "grad_norm": 1.0048011541366577,
205
- "learning_rate": 8.833333333333334e-06,
206
- "loss": 0.0165,
207
- "step": 675
208
- },
209
- {
210
- "epoch": 1.1555,
211
- "grad_norm": 2.4041740894317627,
212
- "learning_rate": 8.666666666666668e-06,
213
- "loss": 0.0111,
214
- "step": 700
215
- },
216
- {
217
- "epoch": 1.168,
218
- "grad_norm": 2.370142936706543,
219
- "learning_rate": 8.506666666666668e-06,
220
- "loss": 0.0135,
221
- "step": 725
222
- },
223
- {
224
- "epoch": 1.1804999999999999,
225
- "grad_norm": 1.4969311952590942,
226
- "learning_rate": 8.34e-06,
227
- "loss": 0.0155,
228
- "step": 750
229
- },
230
- {
231
- "epoch": 1.193,
232
- "grad_norm": 2.3001623153686523,
233
- "learning_rate": 8.173333333333334e-06,
234
- "loss": 0.0179,
235
- "step": 775
236
- },
237
- {
238
- "epoch": 2.011,
239
- "grad_norm": 1.519529938697815,
240
- "learning_rate": 8.006666666666667e-06,
241
- "loss": 0.0212,
242
- "step": 800
243
- },
244
- {
245
- "epoch": 2.0235,
246
- "grad_norm": 2.144901752471924,
247
- "learning_rate": 7.840000000000001e-06,
248
- "loss": 0.0136,
249
- "step": 825
250
- },
251
- {
252
- "epoch": 2.036,
253
- "grad_norm": 1.2353721857070923,
254
- "learning_rate": 7.673333333333333e-06,
255
- "loss": 0.0149,
256
- "step": 850
257
- },
258
- {
259
- "epoch": 2.0485,
260
- "grad_norm": 1.0761120319366455,
261
- "learning_rate": 7.506666666666668e-06,
262
- "loss": 0.0159,
263
- "step": 875
264
- },
265
- {
266
- "epoch": 2.061,
267
- "grad_norm": 1.4343351125717163,
268
- "learning_rate": 7.340000000000001e-06,
269
- "loss": 0.0117,
270
- "step": 900
271
- },
272
- {
273
- "epoch": 2.0735,
274
- "grad_norm": 0.9911390542984009,
275
- "learning_rate": 7.173333333333335e-06,
276
- "loss": 0.0132,
277
- "step": 925
278
- },
279
- {
280
- "epoch": 2.086,
281
- "grad_norm": 1.0542834997177124,
282
- "learning_rate": 7.006666666666667e-06,
283
- "loss": 0.0091,
284
- "step": 950
285
- },
286
- {
287
- "epoch": 2.0985,
288
- "grad_norm": 1.383226990699768,
289
- "learning_rate": 6.8400000000000014e-06,
290
- "loss": 0.0108,
291
- "step": 975
292
- },
293
- {
294
- "epoch": 2.111,
295
- "grad_norm": 1.7129534482955933,
296
- "learning_rate": 6.6733333333333335e-06,
297
- "loss": 0.0107,
298
- "step": 1000
299
- },
300
- {
301
- "epoch": 2.111,
302
- "eval_loss": 0.17394371330738068,
303
- "eval_runtime": 633.8826,
304
- "eval_samples_per_second": 1.005,
305
- "eval_steps_per_second": 0.126,
306
- "eval_wer": 35.19154557463672,
307
- "step": 1000
308
- },
309
- {
310
- "epoch": 2.1235,
311
- "grad_norm": 1.8474119901657104,
312
- "learning_rate": 6.5066666666666665e-06,
313
- "loss": 0.011,
314
- "step": 1025
315
- },
316
- {
317
- "epoch": 2.136,
318
- "grad_norm": 1.9425179958343506,
319
- "learning_rate": 6.34e-06,
320
- "loss": 0.0134,
321
- "step": 1050
322
- },
323
- {
324
- "epoch": 2.1485,
325
- "grad_norm": 1.1808143854141235,
326
- "learning_rate": 6.173333333333333e-06,
327
- "loss": 0.0088,
328
- "step": 1075
329
- },
330
- {
331
- "epoch": 2.161,
332
- "grad_norm": 1.567855715751648,
333
- "learning_rate": 6.006666666666667e-06,
334
- "loss": 0.0108,
335
- "step": 1100
336
- },
337
- {
338
- "epoch": 2.1734999999999998,
339
- "grad_norm": 2.328662395477295,
340
- "learning_rate": 5.84e-06,
341
- "loss": 0.0129,
342
- "step": 1125
343
- },
344
- {
345
- "epoch": 2.186,
346
- "grad_norm": 1.0424344539642334,
347
- "learning_rate": 5.673333333333334e-06,
348
- "loss": 0.0116,
349
- "step": 1150
350
- },
351
- {
352
- "epoch": 3.004,
353
- "grad_norm": 1.357283592224121,
354
- "learning_rate": 5.506666666666667e-06,
355
- "loss": 0.014,
356
- "step": 1175
357
- },
358
- {
359
- "epoch": 3.0165,
360
- "grad_norm": 1.666524887084961,
361
- "learning_rate": 5.3400000000000005e-06,
362
- "loss": 0.0111,
363
- "step": 1200
364
- },
365
- {
366
- "epoch": 3.029,
367
- "grad_norm": 1.2688168287277222,
368
- "learning_rate": 5.1733333333333335e-06,
369
- "loss": 0.0084,
370
- "step": 1225
371
- },
372
- {
373
- "epoch": 3.0415,
374
- "grad_norm": 1.1632572412490845,
375
- "learning_rate": 5.006666666666667e-06,
376
- "loss": 0.0078,
377
- "step": 1250
378
- },
379
- {
380
- "epoch": 3.054,
381
- "grad_norm": 1.4629402160644531,
382
- "learning_rate": 4.84e-06,
383
- "loss": 0.0059,
384
- "step": 1275
385
- },
386
- {
387
- "epoch": 3.0665,
388
- "grad_norm": 0.6573676466941833,
389
- "learning_rate": 4.673333333333333e-06,
390
- "loss": 0.0054,
391
- "step": 1300
392
- },
393
- {
394
- "epoch": 3.079,
395
- "grad_norm": 0.9792174100875854,
396
- "learning_rate": 4.506666666666667e-06,
397
- "loss": 0.0053,
398
- "step": 1325
399
- },
400
- {
401
- "epoch": 3.0915,
402
- "grad_norm": 0.8290985226631165,
403
- "learning_rate": 4.34e-06,
404
- "loss": 0.0042,
405
- "step": 1350
406
- },
407
- {
408
- "epoch": 3.104,
409
- "grad_norm": 1.0542296171188354,
410
- "learning_rate": 4.173333333333334e-06,
411
- "loss": 0.0058,
412
- "step": 1375
413
- },
414
- {
415
- "epoch": 3.1165,
416
- "grad_norm": 0.30175602436065674,
417
- "learning_rate": 4.006666666666667e-06,
418
- "loss": 0.0073,
419
- "step": 1400
420
- },
421
- {
422
- "epoch": 3.129,
423
- "grad_norm": 1.601818561553955,
424
- "learning_rate": 3.8400000000000005e-06,
425
- "loss": 0.0066,
426
- "step": 1425
427
- },
428
- {
429
- "epoch": 3.1415,
430
- "grad_norm": 1.0414142608642578,
431
- "learning_rate": 3.673333333333334e-06,
432
- "loss": 0.0082,
433
- "step": 1450
434
- },
435
- {
436
- "epoch": 3.154,
437
- "grad_norm": 0.8256711959838867,
438
- "learning_rate": 3.5066666666666673e-06,
439
- "loss": 0.0052,
440
- "step": 1475
441
- },
442
- {
443
- "epoch": 3.1665,
444
- "grad_norm": 1.2712088823318481,
445
- "learning_rate": 3.3400000000000006e-06,
446
- "loss": 0.0053,
447
- "step": 1500
448
- },
449
- {
450
- "epoch": 3.1665,
451
- "eval_loss": 0.19207946956157684,
452
- "eval_runtime": 631.9168,
453
- "eval_samples_per_second": 1.008,
454
- "eval_steps_per_second": 0.127,
455
- "eval_wer": 35.165125495376486,
456
- "step": 1500
457
- },
458
- {
459
- "epoch": 3.179,
460
- "grad_norm": 1.0322113037109375,
461
- "learning_rate": 3.173333333333334e-06,
462
- "loss": 0.0059,
463
- "step": 1525
464
- },
465
- {
466
- "epoch": 3.1915,
467
- "grad_norm": 1.2165710926055908,
468
- "learning_rate": 3.0066666666666674e-06,
469
- "loss": 0.0047,
470
- "step": 1550
471
- },
472
- {
473
- "epoch": 4.0095,
474
- "grad_norm": 1.093904972076416,
475
- "learning_rate": 2.84e-06,
476
- "loss": 0.0061,
477
- "step": 1575
478
- },
479
- {
480
- "epoch": 4.022,
481
- "grad_norm": 0.7058098912239075,
482
- "learning_rate": 2.6733333333333333e-06,
483
- "loss": 0.0046,
484
- "step": 1600
485
- },
486
- {
487
- "epoch": 4.0345,
488
- "grad_norm": 0.35674819350242615,
489
- "learning_rate": 2.5066666666666667e-06,
490
- "loss": 0.0035,
491
- "step": 1625
492
- },
493
- {
494
- "epoch": 4.047,
495
- "grad_norm": 0.7753161191940308,
496
- "learning_rate": 2.3400000000000005e-06,
497
- "loss": 0.0023,
498
- "step": 1650
499
- },
500
- {
501
- "epoch": 4.0595,
502
- "grad_norm": 0.5088989734649658,
503
- "learning_rate": 2.1733333333333334e-06,
504
- "loss": 0.002,
505
- "step": 1675
506
- },
507
- {
508
- "epoch": 4.072,
509
- "grad_norm": 1.716977596282959,
510
- "learning_rate": 2.006666666666667e-06,
511
- "loss": 0.0025,
512
- "step": 1700
513
- },
514
- {
515
- "epoch": 4.0845,
516
- "grad_norm": 0.2785784900188446,
517
- "learning_rate": 1.8400000000000002e-06,
518
- "loss": 0.0017,
519
- "step": 1725
520
- },
521
- {
522
- "epoch": 4.097,
523
- "grad_norm": 0.34034302830696106,
524
- "learning_rate": 1.6733333333333335e-06,
525
- "loss": 0.0018,
526
- "step": 1750
527
- },
528
- {
529
- "epoch": 4.1095,
530
- "grad_norm": 0.626815140247345,
531
- "learning_rate": 1.506666666666667e-06,
532
- "loss": 0.0026,
533
- "step": 1775
534
- },
535
- {
536
- "epoch": 4.122,
537
- "grad_norm": 0.41825541853904724,
538
- "learning_rate": 1.34e-06,
539
- "loss": 0.0025,
540
- "step": 1800
541
- },
542
- {
543
- "epoch": 4.1345,
544
- "grad_norm": 0.6465583443641663,
545
- "learning_rate": 1.1733333333333335e-06,
546
- "loss": 0.0033,
547
- "step": 1825
548
- },
549
- {
550
- "epoch": 4.147,
551
- "grad_norm": 0.3226906359195709,
552
- "learning_rate": 1.0066666666666668e-06,
553
- "loss": 0.0024,
554
- "step": 1850
555
- },
556
- {
557
- "epoch": 4.1595,
558
- "grad_norm": 0.4798910915851593,
559
- "learning_rate": 8.400000000000001e-07,
560
- "loss": 0.002,
561
- "step": 1875
562
- },
563
- {
564
- "epoch": 4.172,
565
- "grad_norm": 0.41319090127944946,
566
- "learning_rate": 6.733333333333334e-07,
567
- "loss": 0.0019,
568
- "step": 1900
569
- },
570
- {
571
- "epoch": 4.1845,
572
- "grad_norm": 0.6397626996040344,
573
- "learning_rate": 5.066666666666667e-07,
574
- "loss": 0.0016,
575
- "step": 1925
576
- },
577
- {
578
- "epoch": 5.0025,
579
- "grad_norm": 0.5262264013290405,
580
- "learning_rate": 3.4000000000000003e-07,
581
- "loss": 0.0017,
582
- "step": 1950
583
- },
584
- {
585
- "epoch": 5.015,
586
- "grad_norm": 0.509278416633606,
587
- "learning_rate": 1.7333333333333335e-07,
588
- "loss": 0.0037,
589
- "step": 1975
590
- },
591
- {
592
- "epoch": 5.0275,
593
- "grad_norm": 0.8492897748947144,
594
- "learning_rate": 6.666666666666667e-09,
595
- "loss": 0.0022,
596
- "step": 2000
597
- },
598
- {
599
- "epoch": 5.0275,
600
- "eval_loss": 0.1655821055173874,
601
- "eval_runtime": 636.0021,
602
- "eval_samples_per_second": 1.002,
603
- "eval_steps_per_second": 0.126,
604
- "eval_wer": 31.710700132100396,
605
- "step": 2000
606
  }
607
  ],
608
  "logging_steps": 25,
609
  "max_steps": 2000,
610
  "num_input_tokens_seen": 0,
611
  "num_train_epochs": 9223372036854775807,
612
- "save_steps": 500,
613
  "stateful_callbacks": {
614
  "TrainerControl": {
615
  "args": {
@@ -617,12 +58,12 @@
617
  "should_evaluate": false,
618
  "should_log": false,
619
  "should_save": true,
620
- "should_training_stop": true
621
  },
622
  "attributes": {}
623
  }
624
  },
625
- "total_flos": 9.2217464672256e+18,
626
  "train_batch_size": 16,
627
  "trial_name": null,
628
  "trial_params": null
 
1
  {
2
+ "best_metric": 34.02245706737121,
3
+ "best_model_checkpoint": "./whisper-small-lt/checkpoint-100",
4
+ "epoch": 0.05,
5
+ "eval_steps": 100,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.0125,
13
+ "grad_norm": 2.394010305404663,
14
  "learning_rate": 5.000000000000001e-07,
15
+ "loss": 0.1152,
16
  "step": 25
17
  },
18
  {
19
  "epoch": 0.025,
20
+ "grad_norm": 1.771360158920288,
21
  "learning_rate": 1.0000000000000002e-06,
22
+ "loss": 0.0713,
23
  "step": 50
24
  },
25
  {
26
  "epoch": 0.0375,
27
+ "grad_norm": 2.0305898189544678,
28
  "learning_rate": 1.5e-06,
29
+ "loss": 0.056,
30
  "step": 75
31
  },
32
  {
33
  "epoch": 0.05,
34
+ "grad_norm": 2.1570658683776855,
35
  "learning_rate": 2.0000000000000003e-06,
36
+ "loss": 0.0489,
37
  "step": 100
38
  },
39
  {
40
+ "epoch": 0.05,
41
+ "eval_loss": 0.1409192979335785,
42
+ "eval_runtime": 624.8165,
43
+ "eval_samples_per_second": 1.019,
44
+ "eval_steps_per_second": 0.128,
45
+ "eval_wer": 34.02245706737121,
46
+ "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  }
48
  ],
49
  "logging_steps": 25,
50
  "max_steps": 2000,
51
  "num_input_tokens_seen": 0,
52
  "num_train_epochs": 9223372036854775807,
53
+ "save_steps": 100,
54
  "stateful_callbacks": {
55
  "TrainerControl": {
56
  "args": {
 
58
  "should_evaluate": false,
59
  "should_log": false,
60
  "should_save": true,
61
+ "should_training_stop": false
62
  },
63
  "attributes": {}
64
  }
65
  },
66
+ "total_flos": 4.61736640512e+17,
67
  "train_batch_size": 16,
68
  "trial_name": null,
69
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e34c60b814e2c49bccf2034b010f909e8e6f936f8fc498b53be4bfd23aeeae6
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c315f0b3f3b9c04dd0f73fe7a958b5781a698b4bc9f0eb0dbcedca8beb018820
3
  size 5304