schnell commited on
Commit
33833a4
·
1 Parent(s): f820086

Training in progress, epoch 13

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef2ed6e9c5769612ee83bbcff1434945866238417fae08918c197f74b46418b8
3
  size 236469913
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd2f1419c45952d183434e915bc48855048f7e7221baef55ee75c2ec918ea8a9
3
  size 236469913
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c098f75814bf0d67b7b2990383c442d386c5dedd85129e03b179a63a78cf10e
3
  size 118242180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ec13bee1191623dd89f547f05ab2d8f9b47f3c9077b8d581674d22ee640388d
3
  size 118242180
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd9765584191118ad624ada4294aa4c7c4b446e8275a08afe002751786a580c3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a47ecc6944cdde782b572b7a351e6a250bc225e29461826a5154c96dc4832901
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85a3eb859005f1a2e2b3cdf3c05d91017c106fb92e1c7a7c1999929d8f053b63
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b55971e7d5600536d0c32f692c32c0762404f751d6cde4f3d9062910e7ef1f7
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76b7fc01a18c41ee4d5f6a3a44379550533c02e5ae590743297a4e13668b3d82
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68c328438a7d04830fa9147610ceddf4844bd5db4e595b25c7f824689bdaa43d
3
  size 14503
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb87d42e1d664298b59b2f6667ddacc7adf88687ba59853e16a623f40650b2d6
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cfbfb8ae404ff126d864dc533080bc6f6ebba58e419973b1342f14752109cb0
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd0be3c29b40bb10e427fd603007010cb5d019dfe3909eb92cb96939b7adcda3
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87bfb5b4b88c1f3652dfbcbd080f3298e694046d1bf7d69c5b0519876a04d3f0
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 12.0,
5
- "global_step": 833676,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10116,11 +10116,854 @@
10116
  "eval_samples_per_second": 969.198,
10117
  "eval_steps_per_second": 40.384,
10118
  "step": 833676
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10119
  }
10120
  ],
10121
  "max_steps": 972622,
10122
  "num_train_epochs": 14,
10123
- "total_flos": 5.126958960674341e+18,
10124
  "trial_name": null,
10125
  "trial_params": null
10126
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 13.0,
5
+ "global_step": 903149,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10116
  "eval_samples_per_second": 969.198,
10117
  "eval_steps_per_second": 40.384,
10118
  "step": 833676
10119
+ },
10120
+ {
10121
+ "epoch": 12.0,
10122
+ "learning_rate": 1.4433349430623277e-05,
10123
+ "loss": 2.7869,
10124
+ "step": 834000
10125
+ },
10126
+ {
10127
+ "epoch": 12.01,
10128
+ "learning_rate": 1.4381422688870542e-05,
10129
+ "loss": 2.784,
10130
+ "step": 834500
10131
+ },
10132
+ {
10133
+ "epoch": 12.02,
10134
+ "learning_rate": 1.4329495947117808e-05,
10135
+ "loss": 2.7852,
10136
+ "step": 835000
10137
+ },
10138
+ {
10139
+ "epoch": 12.03,
10140
+ "learning_rate": 1.4277569205365072e-05,
10141
+ "loss": 2.7812,
10142
+ "step": 835500
10143
+ },
10144
+ {
10145
+ "epoch": 12.03,
10146
+ "learning_rate": 1.4225746317095843e-05,
10147
+ "loss": 2.7795,
10148
+ "step": 836000
10149
+ },
10150
+ {
10151
+ "epoch": 12.04,
10152
+ "learning_rate": 1.4173819575343106e-05,
10153
+ "loss": 2.7886,
10154
+ "step": 836500
10155
+ },
10156
+ {
10157
+ "epoch": 12.05,
10158
+ "learning_rate": 1.4121892833590373e-05,
10159
+ "loss": 2.7786,
10160
+ "step": 837000
10161
+ },
10162
+ {
10163
+ "epoch": 12.06,
10164
+ "learning_rate": 1.4069966091837636e-05,
10165
+ "loss": 2.7819,
10166
+ "step": 837500
10167
+ },
10168
+ {
10169
+ "epoch": 12.06,
10170
+ "learning_rate": 1.4018039350084902e-05,
10171
+ "loss": 2.7792,
10172
+ "step": 838000
10173
+ },
10174
+ {
10175
+ "epoch": 12.07,
10176
+ "learning_rate": 1.396621646181567e-05,
10177
+ "loss": 2.7834,
10178
+ "step": 838500
10179
+ },
10180
+ {
10181
+ "epoch": 12.08,
10182
+ "learning_rate": 1.3914289720062937e-05,
10183
+ "loss": 2.7768,
10184
+ "step": 839000
10185
+ },
10186
+ {
10187
+ "epoch": 12.08,
10188
+ "learning_rate": 1.38623629783102e-05,
10189
+ "loss": 2.7793,
10190
+ "step": 839500
10191
+ },
10192
+ {
10193
+ "epoch": 12.09,
10194
+ "learning_rate": 1.3810436236557463e-05,
10195
+ "loss": 2.7784,
10196
+ "step": 840000
10197
+ },
10198
+ {
10199
+ "epoch": 12.1,
10200
+ "learning_rate": 1.3758613348288235e-05,
10201
+ "loss": 2.7776,
10202
+ "step": 840500
10203
+ },
10204
+ {
10205
+ "epoch": 12.11,
10206
+ "learning_rate": 1.3706686606535501e-05,
10207
+ "loss": 2.7822,
10208
+ "step": 841000
10209
+ },
10210
+ {
10211
+ "epoch": 12.11,
10212
+ "learning_rate": 1.3654759864782765e-05,
10213
+ "loss": 2.7877,
10214
+ "step": 841500
10215
+ },
10216
+ {
10217
+ "epoch": 12.12,
10218
+ "learning_rate": 1.360283312303003e-05,
10219
+ "loss": 2.7802,
10220
+ "step": 842000
10221
+ },
10222
+ {
10223
+ "epoch": 12.13,
10224
+ "learning_rate": 1.3551010234760799e-05,
10225
+ "loss": 2.7854,
10226
+ "step": 842500
10227
+ },
10228
+ {
10229
+ "epoch": 12.13,
10230
+ "learning_rate": 1.3499083493008066e-05,
10231
+ "loss": 2.7821,
10232
+ "step": 843000
10233
+ },
10234
+ {
10235
+ "epoch": 12.14,
10236
+ "learning_rate": 1.344715675125533e-05,
10237
+ "loss": 2.7788,
10238
+ "step": 843500
10239
+ },
10240
+ {
10241
+ "epoch": 12.15,
10242
+ "learning_rate": 1.3395230009502594e-05,
10243
+ "loss": 2.7802,
10244
+ "step": 844000
10245
+ },
10246
+ {
10247
+ "epoch": 12.16,
10248
+ "learning_rate": 1.334330326774986e-05,
10249
+ "loss": 2.7772,
10250
+ "step": 844500
10251
+ },
10252
+ {
10253
+ "epoch": 12.16,
10254
+ "learning_rate": 1.3291376525997124e-05,
10255
+ "loss": 2.7777,
10256
+ "step": 845000
10257
+ },
10258
+ {
10259
+ "epoch": 12.17,
10260
+ "learning_rate": 1.323944978424439e-05,
10261
+ "loss": 2.7751,
10262
+ "step": 845500
10263
+ },
10264
+ {
10265
+ "epoch": 12.18,
10266
+ "learning_rate": 1.3187523042491653e-05,
10267
+ "loss": 2.7846,
10268
+ "step": 846000
10269
+ },
10270
+ {
10271
+ "epoch": 12.18,
10272
+ "learning_rate": 1.3135700154222425e-05,
10273
+ "loss": 2.7809,
10274
+ "step": 846500
10275
+ },
10276
+ {
10277
+ "epoch": 12.19,
10278
+ "learning_rate": 1.3083773412469688e-05,
10279
+ "loss": 2.776,
10280
+ "step": 847000
10281
+ },
10282
+ {
10283
+ "epoch": 12.2,
10284
+ "learning_rate": 1.3031846670716955e-05,
10285
+ "loss": 2.7767,
10286
+ "step": 847500
10287
+ },
10288
+ {
10289
+ "epoch": 12.21,
10290
+ "learning_rate": 1.2980023782447722e-05,
10291
+ "loss": 2.7834,
10292
+ "step": 848000
10293
+ },
10294
+ {
10295
+ "epoch": 12.21,
10296
+ "learning_rate": 1.2928097040694989e-05,
10297
+ "loss": 2.7801,
10298
+ "step": 848500
10299
+ },
10300
+ {
10301
+ "epoch": 12.22,
10302
+ "learning_rate": 1.2876170298942252e-05,
10303
+ "loss": 2.7782,
10304
+ "step": 849000
10305
+ },
10306
+ {
10307
+ "epoch": 12.23,
10308
+ "learning_rate": 1.2824243557189517e-05,
10309
+ "loss": 2.7877,
10310
+ "step": 849500
10311
+ },
10312
+ {
10313
+ "epoch": 12.23,
10314
+ "learning_rate": 1.2772420668920287e-05,
10315
+ "loss": 2.783,
10316
+ "step": 850000
10317
+ },
10318
+ {
10319
+ "epoch": 12.24,
10320
+ "learning_rate": 1.2720493927167553e-05,
10321
+ "loss": 2.7825,
10322
+ "step": 850500
10323
+ },
10324
+ {
10325
+ "epoch": 12.25,
10326
+ "learning_rate": 1.2668567185414818e-05,
10327
+ "loss": 2.78,
10328
+ "step": 851000
10329
+ },
10330
+ {
10331
+ "epoch": 12.26,
10332
+ "learning_rate": 1.2616640443662082e-05,
10333
+ "loss": 2.7763,
10334
+ "step": 851500
10335
+ },
10336
+ {
10337
+ "epoch": 12.26,
10338
+ "learning_rate": 1.2564713701909348e-05,
10339
+ "loss": 2.7805,
10340
+ "step": 852000
10341
+ },
10342
+ {
10343
+ "epoch": 12.27,
10344
+ "learning_rate": 1.2512890813640118e-05,
10345
+ "loss": 2.7834,
10346
+ "step": 852500
10347
+ },
10348
+ {
10349
+ "epoch": 12.28,
10350
+ "learning_rate": 1.2460964071887383e-05,
10351
+ "loss": 2.7768,
10352
+ "step": 853000
10353
+ },
10354
+ {
10355
+ "epoch": 12.29,
10356
+ "learning_rate": 1.2409037330134648e-05,
10357
+ "loss": 2.7782,
10358
+ "step": 853500
10359
+ },
10360
+ {
10361
+ "epoch": 12.29,
10362
+ "learning_rate": 1.235711058838191e-05,
10363
+ "loss": 2.7797,
10364
+ "step": 854000
10365
+ },
10366
+ {
10367
+ "epoch": 12.3,
10368
+ "learning_rate": 1.2305183846629176e-05,
10369
+ "loss": 2.7775,
10370
+ "step": 854500
10371
+ },
10372
+ {
10373
+ "epoch": 12.31,
10374
+ "learning_rate": 1.225325710487644e-05,
10375
+ "loss": 2.7767,
10376
+ "step": 855000
10377
+ },
10378
+ {
10379
+ "epoch": 12.31,
10380
+ "learning_rate": 1.2201330363123705e-05,
10381
+ "loss": 2.781,
10382
+ "step": 855500
10383
+ },
10384
+ {
10385
+ "epoch": 12.32,
10386
+ "learning_rate": 1.214940362137097e-05,
10387
+ "loss": 2.7836,
10388
+ "step": 856000
10389
+ },
10390
+ {
10391
+ "epoch": 12.33,
10392
+ "learning_rate": 1.209758073310174e-05,
10393
+ "loss": 2.7769,
10394
+ "step": 856500
10395
+ },
10396
+ {
10397
+ "epoch": 12.34,
10398
+ "learning_rate": 1.2045757844832511e-05,
10399
+ "loss": 2.7768,
10400
+ "step": 857000
10401
+ },
10402
+ {
10403
+ "epoch": 12.34,
10404
+ "learning_rate": 1.1993831103079775e-05,
10405
+ "loss": 2.7803,
10406
+ "step": 857500
10407
+ },
10408
+ {
10409
+ "epoch": 12.35,
10410
+ "learning_rate": 1.194190436132704e-05,
10411
+ "loss": 2.7746,
10412
+ "step": 858000
10413
+ },
10414
+ {
10415
+ "epoch": 12.36,
10416
+ "learning_rate": 1.1889977619574304e-05,
10417
+ "loss": 2.7826,
10418
+ "step": 858500
10419
+ },
10420
+ {
10421
+ "epoch": 12.36,
10422
+ "learning_rate": 1.1838050877821571e-05,
10423
+ "loss": 2.7776,
10424
+ "step": 859000
10425
+ },
10426
+ {
10427
+ "epoch": 12.37,
10428
+ "learning_rate": 1.178622798955234e-05,
10429
+ "loss": 2.7823,
10430
+ "step": 859500
10431
+ },
10432
+ {
10433
+ "epoch": 12.38,
10434
+ "learning_rate": 1.1734301247799605e-05,
10435
+ "loss": 2.7855,
10436
+ "step": 860000
10437
+ },
10438
+ {
10439
+ "epoch": 12.39,
10440
+ "learning_rate": 1.168237450604687e-05,
10441
+ "loss": 2.7795,
10442
+ "step": 860500
10443
+ },
10444
+ {
10445
+ "epoch": 12.39,
10446
+ "learning_rate": 1.1630447764294135e-05,
10447
+ "loss": 2.7811,
10448
+ "step": 861000
10449
+ },
10450
+ {
10451
+ "epoch": 12.4,
10452
+ "learning_rate": 1.15785210225414e-05,
10453
+ "loss": 2.7801,
10454
+ "step": 861500
10455
+ },
10456
+ {
10457
+ "epoch": 12.41,
10458
+ "learning_rate": 1.1526594280788663e-05,
10459
+ "loss": 2.773,
10460
+ "step": 862000
10461
+ },
10462
+ {
10463
+ "epoch": 12.41,
10464
+ "learning_rate": 1.1474771392519435e-05,
10465
+ "loss": 2.7787,
10466
+ "step": 862500
10467
+ },
10468
+ {
10469
+ "epoch": 12.42,
10470
+ "learning_rate": 1.14228446507667e-05,
10471
+ "loss": 2.7825,
10472
+ "step": 863000
10473
+ },
10474
+ {
10475
+ "epoch": 12.43,
10476
+ "learning_rate": 1.1370917909013963e-05,
10477
+ "loss": 2.783,
10478
+ "step": 863500
10479
+ },
10480
+ {
10481
+ "epoch": 12.44,
10482
+ "learning_rate": 1.1318991167261228e-05,
10483
+ "loss": 2.7753,
10484
+ "step": 864000
10485
+ },
10486
+ {
10487
+ "epoch": 12.44,
10488
+ "learning_rate": 1.1267064425508493e-05,
10489
+ "loss": 2.7734,
10490
+ "step": 864500
10491
+ },
10492
+ {
10493
+ "epoch": 12.45,
10494
+ "learning_rate": 1.1215137683755758e-05,
10495
+ "loss": 2.7773,
10496
+ "step": 865000
10497
+ },
10498
+ {
10499
+ "epoch": 12.46,
10500
+ "learning_rate": 1.1163210942003022e-05,
10501
+ "loss": 2.7752,
10502
+ "step": 865500
10503
+ },
10504
+ {
10505
+ "epoch": 12.47,
10506
+ "learning_rate": 1.1111284200250287e-05,
10507
+ "loss": 2.7795,
10508
+ "step": 866000
10509
+ },
10510
+ {
10511
+ "epoch": 12.47,
10512
+ "learning_rate": 1.1059461311981057e-05,
10513
+ "loss": 2.7801,
10514
+ "step": 866500
10515
+ },
10516
+ {
10517
+ "epoch": 12.48,
10518
+ "learning_rate": 1.1007534570228324e-05,
10519
+ "loss": 2.7781,
10520
+ "step": 867000
10521
+ },
10522
+ {
10523
+ "epoch": 12.49,
10524
+ "learning_rate": 1.0955711681959093e-05,
10525
+ "loss": 2.7805,
10526
+ "step": 867500
10527
+ },
10528
+ {
10529
+ "epoch": 12.49,
10530
+ "learning_rate": 1.0903784940206358e-05,
10531
+ "loss": 2.7758,
10532
+ "step": 868000
10533
+ },
10534
+ {
10535
+ "epoch": 12.5,
10536
+ "learning_rate": 1.0851858198453623e-05,
10537
+ "loss": 2.7806,
10538
+ "step": 868500
10539
+ },
10540
+ {
10541
+ "epoch": 12.51,
10542
+ "learning_rate": 1.0799931456700888e-05,
10543
+ "loss": 2.7763,
10544
+ "step": 869000
10545
+ },
10546
+ {
10547
+ "epoch": 12.52,
10548
+ "learning_rate": 1.0748108568431658e-05,
10549
+ "loss": 2.7779,
10550
+ "step": 869500
10551
+ },
10552
+ {
10553
+ "epoch": 12.52,
10554
+ "learning_rate": 1.0696181826678922e-05,
10555
+ "loss": 2.7746,
10556
+ "step": 870000
10557
+ },
10558
+ {
10559
+ "epoch": 12.53,
10560
+ "learning_rate": 1.0644255084926187e-05,
10561
+ "loss": 2.7768,
10562
+ "step": 870500
10563
+ },
10564
+ {
10565
+ "epoch": 12.54,
10566
+ "learning_rate": 1.059232834317345e-05,
10567
+ "loss": 2.7761,
10568
+ "step": 871000
10569
+ },
10570
+ {
10571
+ "epoch": 12.54,
10572
+ "learning_rate": 1.0540401601420715e-05,
10573
+ "loss": 2.7745,
10574
+ "step": 871500
10575
+ },
10576
+ {
10577
+ "epoch": 12.55,
10578
+ "learning_rate": 1.048847485966798e-05,
10579
+ "loss": 2.7766,
10580
+ "step": 872000
10581
+ },
10582
+ {
10583
+ "epoch": 12.56,
10584
+ "learning_rate": 1.0436548117915245e-05,
10585
+ "loss": 2.7734,
10586
+ "step": 872500
10587
+ },
10588
+ {
10589
+ "epoch": 12.57,
10590
+ "learning_rate": 1.038462137616251e-05,
10591
+ "loss": 2.7794,
10592
+ "step": 873000
10593
+ },
10594
+ {
10595
+ "epoch": 12.57,
10596
+ "learning_rate": 1.033279848789328e-05,
10597
+ "loss": 2.7796,
10598
+ "step": 873500
10599
+ },
10600
+ {
10601
+ "epoch": 12.58,
10602
+ "learning_rate": 1.0280871746140545e-05,
10603
+ "loss": 2.7727,
10604
+ "step": 874000
10605
+ },
10606
+ {
10607
+ "epoch": 12.59,
10608
+ "learning_rate": 1.0229048857871314e-05,
10609
+ "loss": 2.7794,
10610
+ "step": 874500
10611
+ },
10612
+ {
10613
+ "epoch": 12.59,
10614
+ "learning_rate": 1.0177122116118581e-05,
10615
+ "loss": 2.7735,
10616
+ "step": 875000
10617
+ },
10618
+ {
10619
+ "epoch": 12.6,
10620
+ "learning_rate": 1.0125195374365846e-05,
10621
+ "loss": 2.7776,
10622
+ "step": 875500
10623
+ },
10624
+ {
10625
+ "epoch": 12.61,
10626
+ "learning_rate": 1.007326863261311e-05,
10627
+ "loss": 2.774,
10628
+ "step": 876000
10629
+ },
10630
+ {
10631
+ "epoch": 12.62,
10632
+ "learning_rate": 1.002144574434388e-05,
10633
+ "loss": 2.7807,
10634
+ "step": 876500
10635
+ },
10636
+ {
10637
+ "epoch": 12.62,
10638
+ "learning_rate": 9.969519002591145e-06,
10639
+ "loss": 2.7736,
10640
+ "step": 877000
10641
+ },
10642
+ {
10643
+ "epoch": 12.63,
10644
+ "learning_rate": 9.91759226083841e-06,
10645
+ "loss": 2.7706,
10646
+ "step": 877500
10647
+ },
10648
+ {
10649
+ "epoch": 12.64,
10650
+ "learning_rate": 9.865665519085675e-06,
10651
+ "loss": 2.774,
10652
+ "step": 878000
10653
+ },
10654
+ {
10655
+ "epoch": 12.65,
10656
+ "learning_rate": 9.81373877733294e-06,
10657
+ "loss": 2.7725,
10658
+ "step": 878500
10659
+ },
10660
+ {
10661
+ "epoch": 12.65,
10662
+ "learning_rate": 9.761812035580203e-06,
10663
+ "loss": 2.7807,
10664
+ "step": 879000
10665
+ },
10666
+ {
10667
+ "epoch": 12.66,
10668
+ "learning_rate": 9.709885293827468e-06,
10669
+ "loss": 2.7738,
10670
+ "step": 879500
10671
+ },
10672
+ {
10673
+ "epoch": 12.67,
10674
+ "learning_rate": 9.65806240555824e-06,
10675
+ "loss": 2.7749,
10676
+ "step": 880000
10677
+ },
10678
+ {
10679
+ "epoch": 12.67,
10680
+ "learning_rate": 9.606135663805503e-06,
10681
+ "loss": 2.7736,
10682
+ "step": 880500
10683
+ },
10684
+ {
10685
+ "epoch": 12.68,
10686
+ "learning_rate": 9.554208922052768e-06,
10687
+ "loss": 2.7764,
10688
+ "step": 881000
10689
+ },
10690
+ {
10691
+ "epoch": 12.69,
10692
+ "learning_rate": 9.502282180300032e-06,
10693
+ "loss": 2.7756,
10694
+ "step": 881500
10695
+ },
10696
+ {
10697
+ "epoch": 12.7,
10698
+ "learning_rate": 9.450459292030802e-06,
10699
+ "loss": 2.7762,
10700
+ "step": 882000
10701
+ },
10702
+ {
10703
+ "epoch": 12.7,
10704
+ "learning_rate": 9.398532550278067e-06,
10705
+ "loss": 2.7755,
10706
+ "step": 882500
10707
+ },
10708
+ {
10709
+ "epoch": 12.71,
10710
+ "learning_rate": 9.346605808525334e-06,
10711
+ "loss": 2.7792,
10712
+ "step": 883000
10713
+ },
10714
+ {
10715
+ "epoch": 12.72,
10716
+ "learning_rate": 9.294679066772598e-06,
10717
+ "loss": 2.7758,
10718
+ "step": 883500
10719
+ },
10720
+ {
10721
+ "epoch": 12.72,
10722
+ "learning_rate": 9.242752325019863e-06,
10723
+ "loss": 2.7782,
10724
+ "step": 884000
10725
+ },
10726
+ {
10727
+ "epoch": 12.73,
10728
+ "learning_rate": 9.190929436750633e-06,
10729
+ "loss": 2.7763,
10730
+ "step": 884500
10731
+ },
10732
+ {
10733
+ "epoch": 12.74,
10734
+ "learning_rate": 9.139002694997898e-06,
10735
+ "loss": 2.7738,
10736
+ "step": 885000
10737
+ },
10738
+ {
10739
+ "epoch": 12.75,
10740
+ "learning_rate": 9.087075953245163e-06,
10741
+ "loss": 2.7739,
10742
+ "step": 885500
10743
+ },
10744
+ {
10745
+ "epoch": 12.75,
10746
+ "learning_rate": 9.035149211492428e-06,
10747
+ "loss": 2.7787,
10748
+ "step": 886000
10749
+ },
10750
+ {
10751
+ "epoch": 12.76,
10752
+ "learning_rate": 8.983222469739691e-06,
10753
+ "loss": 2.7699,
10754
+ "step": 886500
10755
+ },
10756
+ {
10757
+ "epoch": 12.77,
10758
+ "learning_rate": 8.931399581470462e-06,
10759
+ "loss": 2.7746,
10760
+ "step": 887000
10761
+ },
10762
+ {
10763
+ "epoch": 12.77,
10764
+ "learning_rate": 8.879472839717727e-06,
10765
+ "loss": 2.774,
10766
+ "step": 887500
10767
+ },
10768
+ {
10769
+ "epoch": 12.78,
10770
+ "learning_rate": 8.827546097964992e-06,
10771
+ "loss": 2.7694,
10772
+ "step": 888000
10773
+ },
10774
+ {
10775
+ "epoch": 12.79,
10776
+ "learning_rate": 8.775619356212255e-06,
10777
+ "loss": 2.7754,
10778
+ "step": 888500
10779
+ },
10780
+ {
10781
+ "epoch": 12.8,
10782
+ "learning_rate": 8.72369261445952e-06,
10783
+ "loss": 2.7787,
10784
+ "step": 889000
10785
+ },
10786
+ {
10787
+ "epoch": 12.8,
10788
+ "learning_rate": 8.671765872706785e-06,
10789
+ "loss": 2.7732,
10790
+ "step": 889500
10791
+ },
10792
+ {
10793
+ "epoch": 12.81,
10794
+ "learning_rate": 8.61983913095405e-06,
10795
+ "loss": 2.7777,
10796
+ "step": 890000
10797
+ },
10798
+ {
10799
+ "epoch": 12.82,
10800
+ "learning_rate": 8.56801624268482e-06,
10801
+ "loss": 2.7704,
10802
+ "step": 890500
10803
+ },
10804
+ {
10805
+ "epoch": 12.83,
10806
+ "learning_rate": 8.516089500932086e-06,
10807
+ "loss": 2.7788,
10808
+ "step": 891000
10809
+ },
10810
+ {
10811
+ "epoch": 12.83,
10812
+ "learning_rate": 8.464162759179351e-06,
10813
+ "loss": 2.7706,
10814
+ "step": 891500
10815
+ },
10816
+ {
10817
+ "epoch": 12.84,
10818
+ "learning_rate": 8.412236017426616e-06,
10819
+ "loss": 2.7709,
10820
+ "step": 892000
10821
+ },
10822
+ {
10823
+ "epoch": 12.85,
10824
+ "learning_rate": 8.360413129157386e-06,
10825
+ "loss": 2.7779,
10826
+ "step": 892500
10827
+ },
10828
+ {
10829
+ "epoch": 12.85,
10830
+ "learning_rate": 8.30848638740465e-06,
10831
+ "loss": 2.774,
10832
+ "step": 893000
10833
+ },
10834
+ {
10835
+ "epoch": 12.86,
10836
+ "learning_rate": 8.256559645651915e-06,
10837
+ "loss": 2.7752,
10838
+ "step": 893500
10839
+ },
10840
+ {
10841
+ "epoch": 12.87,
10842
+ "learning_rate": 8.20463290389918e-06,
10843
+ "loss": 2.7729,
10844
+ "step": 894000
10845
+ },
10846
+ {
10847
+ "epoch": 12.88,
10848
+ "learning_rate": 8.152706162146444e-06,
10849
+ "loss": 2.7768,
10850
+ "step": 894500
10851
+ },
10852
+ {
10853
+ "epoch": 12.88,
10854
+ "learning_rate": 8.100779420393708e-06,
10855
+ "loss": 2.7796,
10856
+ "step": 895000
10857
+ },
10858
+ {
10859
+ "epoch": 12.89,
10860
+ "learning_rate": 8.048852678640973e-06,
10861
+ "loss": 2.773,
10862
+ "step": 895500
10863
+ },
10864
+ {
10865
+ "epoch": 12.9,
10866
+ "learning_rate": 7.997029790371743e-06,
10867
+ "loss": 2.7731,
10868
+ "step": 896000
10869
+ },
10870
+ {
10871
+ "epoch": 12.9,
10872
+ "learning_rate": 7.945103048619008e-06,
10873
+ "loss": 2.7717,
10874
+ "step": 896500
10875
+ },
10876
+ {
10877
+ "epoch": 12.91,
10878
+ "learning_rate": 7.893176306866273e-06,
10879
+ "loss": 2.774,
10880
+ "step": 897000
10881
+ },
10882
+ {
10883
+ "epoch": 12.92,
10884
+ "learning_rate": 7.841249565113538e-06,
10885
+ "loss": 2.7721,
10886
+ "step": 897500
10887
+ },
10888
+ {
10889
+ "epoch": 12.93,
10890
+ "learning_rate": 7.789322823360804e-06,
10891
+ "loss": 2.7745,
10892
+ "step": 898000
10893
+ },
10894
+ {
10895
+ "epoch": 12.93,
10896
+ "learning_rate": 7.737396081608067e-06,
10897
+ "loss": 2.7747,
10898
+ "step": 898500
10899
+ },
10900
+ {
10901
+ "epoch": 12.94,
10902
+ "learning_rate": 7.685469339855332e-06,
10903
+ "loss": 2.772,
10904
+ "step": 899000
10905
+ },
10906
+ {
10907
+ "epoch": 12.95,
10908
+ "learning_rate": 7.633542598102597e-06,
10909
+ "loss": 2.772,
10910
+ "step": 899500
10911
+ },
10912
+ {
10913
+ "epoch": 12.95,
10914
+ "learning_rate": 7.581719709833368e-06,
10915
+ "loss": 2.7748,
10916
+ "step": 900000
10917
+ },
10918
+ {
10919
+ "epoch": 12.96,
10920
+ "learning_rate": 7.529896821564137e-06,
10921
+ "loss": 2.7667,
10922
+ "step": 900500
10923
+ },
10924
+ {
10925
+ "epoch": 12.97,
10926
+ "learning_rate": 7.477970079811403e-06,
10927
+ "loss": 2.7705,
10928
+ "step": 901000
10929
+ },
10930
+ {
10931
+ "epoch": 12.98,
10932
+ "learning_rate": 7.426043338058668e-06,
10933
+ "loss": 2.7674,
10934
+ "step": 901500
10935
+ },
10936
+ {
10937
+ "epoch": 12.98,
10938
+ "learning_rate": 7.374116596305931e-06,
10939
+ "loss": 2.7729,
10940
+ "step": 902000
10941
+ },
10942
+ {
10943
+ "epoch": 12.99,
10944
+ "learning_rate": 7.322189854553196e-06,
10945
+ "loss": 2.7731,
10946
+ "step": 902500
10947
+ },
10948
+ {
10949
+ "epoch": 13.0,
10950
+ "learning_rate": 7.2703669662839675e-06,
10951
+ "loss": 2.7728,
10952
+ "step": 903000
10953
+ },
10954
+ {
10955
+ "epoch": 13.0,
10956
+ "eval_accuracy": 0.5083214034814523,
10957
+ "eval_loss": 2.5994162559509277,
10958
+ "eval_runtime": 554.8265,
10959
+ "eval_samples_per_second": 971.361,
10960
+ "eval_steps_per_second": 40.474,
10961
+ "step": 903149
10962
  }
10963
  ],
10964
  "max_steps": 972622,
10965
  "num_train_epochs": 14,
10966
+ "total_flos": 5.554259622962921e+18,
10967
  "trial_name": null,
10968
  "trial_params": null
10969
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c098f75814bf0d67b7b2990383c442d386c5dedd85129e03b179a63a78cf10e
3
  size 118242180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ec13bee1191623dd89f547f05ab2d8f9b47f3c9077b8d581674d22ee640388d
3
  size 118242180
runs/Jul30_01-06-41_user-SYS-5049A-TR/events.out.tfevents.1659110814.user-SYS-5049A-TR.3945016.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3853a5a36f581057de1289aa6295c5ae41dbb1a1d29e8d6a716b1229b4dd2ec3
3
- size 273994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ec72c1e2fe2a362e45c53d1f17f2bc66e75e52404b7a8c7dc1900a55a00db23
3
+ size 296563