Training in progress, step 481, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +571 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4452d8ee5d1c4b3f248050b462aef67647d83e8aa2c819475c2561ad6988260f
 size 373077376

 version https://git-lfs.github.com/spec/v1
+oid sha256:d861b0c356377800f143df821f91f101184e3745994cd74adf5133106d79bde2
 size 373077376

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:abc52b4912bc5a69beadb79f29cd708ffaef6d5ab82fd19a670038c92f29c313
 size 373225675

 version https://git-lfs.github.com/spec/v1
+oid sha256:799ccb4c58e58c79e5f8070f3d59b69f01779859a514d0911cf768c6e286cb76
 size 373225675

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f438d73941ac2939699522d3048115527267b7c8c06f9f728e1517b0c3c16832
 size 1401

 version https://git-lfs.github.com/spec/v1
+oid sha256:a8b4903fb7be2c884aa62b335e9291d92240420b2bf52100c824048b850730b6
 size 1401

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8316008316008316,
   "eval_steps": 100,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2840,6 +2840,573 @@
       "eval_samples_per_second": 22.501,
       "eval_steps_per_second": 2.813,
       "step": 400
     }
   ],
   "logging_steps": 1,
@@ -2854,12 +3421,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.018894554759168e+17,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 100,
+  "global_step": 481,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 22.501,
       "eval_steps_per_second": 2.813,
       "step": 400
+    },
+    {
+      "epoch": 0.8336798336798337,
+      "grad_norm": 0.27734375,
+      "learning_rate": 7.585430144121319e-05,
+      "loss": 5.5678,
+      "step": 401
+    },
+    {
+      "epoch": 0.8357588357588358,
+      "grad_norm": 0.275390625,
+      "learning_rate": 7.404029558083653e-05,
+      "loss": 5.4751,
+      "step": 402
+    },
+    {
+      "epoch": 0.8378378378378378,
+      "grad_norm": 0.275390625,
+      "learning_rate": 7.224650765840613e-05,
+      "loss": 5.4895,
+      "step": 403
+    },
+    {
+      "epoch": 0.83991683991684,
+      "grad_norm": 0.2890625,
+      "learning_rate": 7.047302281505735e-05,
+      "loss": 5.381,
+      "step": 404
+    },
+    {
+      "epoch": 0.841995841995842,
+      "grad_norm": 0.64453125,
+      "learning_rate": 6.871992522825182e-05,
+      "loss": 5.3161,
+      "step": 405
+    },
+    {
+      "epoch": 0.8440748440748441,
+      "grad_norm": 1.078125,
+      "learning_rate": 6.698729810778065e-05,
+      "loss": 5.3665,
+      "step": 406
+    },
+    {
+      "epoch": 0.8461538461538461,
+      "grad_norm": 1.0859375,
+      "learning_rate": 6.527522369181655e-05,
+      "loss": 5.545,
+      "step": 407
+    },
+    {
+      "epoch": 0.8482328482328483,
+      "grad_norm": 0.259765625,
+      "learning_rate": 6.358378324300929e-05,
+      "loss": 5.1788,
+      "step": 408
+    },
+    {
+      "epoch": 0.8503118503118503,
+      "grad_norm": 0.26953125,
+      "learning_rate": 6.191305704462896e-05,
+      "loss": 5.2483,
+      "step": 409
+    },
+    {
+      "epoch": 0.8523908523908524,
+      "grad_norm": 0.28125,
+      "learning_rate": 6.026312439675552e-05,
+      "loss": 4.8411,
+      "step": 410
+    },
+    {
+      "epoch": 0.8544698544698545,
+      "grad_norm": 0.3359375,
+      "learning_rate": 5.863406361251472e-05,
+      "loss": 5.1144,
+      "step": 411
+    },
+    {
+      "epoch": 0.8565488565488566,
+      "grad_norm": 1.0546875,
+      "learning_rate": 5.7025952014361004e-05,
+      "loss": 5.1504,
+      "step": 412
+    },
+    {
+      "epoch": 0.8586278586278586,
+      "grad_norm": 0.337890625,
+      "learning_rate": 5.543886593040737e-05,
+      "loss": 5.2093,
+      "step": 413
+    },
+    {
+      "epoch": 0.8607068607068608,
+      "grad_norm": 0.287109375,
+      "learning_rate": 5.387288069080298e-05,
+      "loss": 5.0402,
+      "step": 414
+    },
+    {
+      "epoch": 0.8627858627858628,
+      "grad_norm": 0.32421875,
+      "learning_rate": 5.23280706241569e-05,
+      "loss": 4.7027,
+      "step": 415
+    },
+    {
+      "epoch": 0.8648648648648649,
+      "grad_norm": 0.25,
+      "learning_rate": 5.080450905401057e-05,
+      "loss": 5.2766,
+      "step": 416
+    },
+    {
+      "epoch": 0.8669438669438669,
+      "grad_norm": 0.28515625,
+      "learning_rate": 4.930226829535767e-05,
+      "loss": 5.3266,
+      "step": 417
+    },
+    {
+      "epoch": 0.8690228690228691,
+      "grad_norm": 0.279296875,
+      "learning_rate": 4.7821419651211284e-05,
+      "loss": 5.1296,
+      "step": 418
+    },
+    {
+      "epoch": 0.8711018711018711,
+      "grad_norm": 0.27734375,
+      "learning_rate": 4.636203340922007e-05,
+      "loss": 5.5194,
+      "step": 419
+    },
+    {
+      "epoch": 0.8731808731808732,
+      "grad_norm": 0.2578125,
+      "learning_rate": 4.492417883833155e-05,
+      "loss": 4.968,
+      "step": 420
+    },
+    {
+      "epoch": 0.8752598752598753,
+      "grad_norm": 0.2578125,
+      "learning_rate": 4.350792418550509e-05,
+      "loss": 5.6204,
+      "step": 421
+    },
+    {
+      "epoch": 0.8773388773388774,
+      "grad_norm": 0.2451171875,
+      "learning_rate": 4.211333667247125e-05,
+      "loss": 5.2991,
+      "step": 422
+    },
+    {
+      "epoch": 0.8794178794178794,
+      "grad_norm": 0.267578125,
+      "learning_rate": 4.074048249254286e-05,
+      "loss": 5.3253,
+      "step": 423
+    },
+    {
+      "epoch": 0.8814968814968815,
+      "grad_norm": 1.0546875,
+      "learning_rate": 3.938942680747176e-05,
+      "loss": 4.9877,
+      "step": 424
+    },
+    {
+      "epoch": 0.8835758835758836,
+      "grad_norm": 0.2373046875,
+      "learning_rate": 3.806023374435663e-05,
+      "loss": 5.5203,
+      "step": 425
+    },
+    {
+      "epoch": 0.8856548856548857,
+      "grad_norm": 0.2890625,
+      "learning_rate": 3.675296639259912e-05,
+      "loss": 4.9162,
+      "step": 426
+    },
+    {
+      "epoch": 0.8877338877338877,
+      "grad_norm": 0.267578125,
+      "learning_rate": 3.546768680090934e-05,
+      "loss": 5.3487,
+      "step": 427
+    },
+    {
+      "epoch": 0.8898128898128899,
+      "grad_norm": 0.26171875,
+      "learning_rate": 3.420445597436056e-05,
+      "loss": 4.8628,
+      "step": 428
+    },
+    {
+      "epoch": 0.8918918918918919,
+      "grad_norm": 0.271484375,
+      "learning_rate": 3.296333387149392e-05,
+      "loss": 5.2627,
+      "step": 429
+    },
+    {
+      "epoch": 0.893970893970894,
+      "grad_norm": 0.27734375,
+      "learning_rate": 3.174437940147268e-05,
+      "loss": 5.3865,
+      "step": 430
+    },
+    {
+      "epoch": 0.896049896049896,
+      "grad_norm": 0.296875,
+      "learning_rate": 3.054765042128521e-05,
+      "loss": 5.1347,
+      "step": 431
+    },
+    {
+      "epoch": 0.8981288981288982,
+      "grad_norm": 0.296875,
+      "learning_rate": 2.9373203733000232e-05,
+      "loss": 5.308,
+      "step": 432
+    },
+    {
+      "epoch": 0.9002079002079002,
+      "grad_norm": 0.26171875,
+      "learning_rate": 2.8221095081069513e-05,
+      "loss": 5.0021,
+      "step": 433
+    },
+    {
+      "epoch": 0.9022869022869023,
+      "grad_norm": 1.046875,
+      "learning_rate": 2.709137914968268e-05,
+      "loss": 5.0882,
+      "step": 434
+    },
+    {
+      "epoch": 0.9043659043659044,
+      "grad_norm": 1.0546875,
+      "learning_rate": 2.5984109560171388e-05,
+      "loss": 5.4556,
+      "step": 435
+    },
+    {
+      "epoch": 0.9064449064449065,
+      "grad_norm": 0.248046875,
+      "learning_rate": 2.4899338868464407e-05,
+      "loss": 5.4887,
+      "step": 436
+    },
+    {
+      "epoch": 0.9085239085239085,
+      "grad_norm": 0.2734375,
+      "learning_rate": 2.3837118562592797e-05,
+      "loss": 5.3905,
+      "step": 437
+    },
+    {
+      "epoch": 0.9106029106029107,
+      "grad_norm": 0.283203125,
+      "learning_rate": 2.2797499060246252e-05,
+      "loss": 5.1305,
+      "step": 438
+    },
+    {
+      "epoch": 0.9126819126819127,
+      "grad_norm": 0.2412109375,
+      "learning_rate": 2.1780529706380336e-05,
+      "loss": 5.3754,
+      "step": 439
+    },
+    {
+      "epoch": 0.9147609147609148,
+      "grad_norm": 0.27734375,
+      "learning_rate": 2.0786258770873646e-05,
+      "loss": 5.2316,
+      "step": 440
+    },
+    {
+      "epoch": 0.9168399168399168,
+      "grad_norm": 0.294921875,
+      "learning_rate": 1.9814733446237355e-05,
+      "loss": 5.37,
+      "step": 441
+    },
+    {
+      "epoch": 0.918918918918919,
+      "grad_norm": 0.2392578125,
+      "learning_rate": 1.886599984537479e-05,
+      "loss": 5.4565,
+      "step": 442
+    },
+    {
+      "epoch": 0.920997920997921,
+      "grad_norm": 0.31640625,
+      "learning_rate": 1.7940102999393192e-05,
+      "loss": 5.1685,
+      "step": 443
+    },
+    {
+      "epoch": 0.9230769230769231,
+      "grad_norm": 0.275390625,
+      "learning_rate": 1.70370868554659e-05,
+      "loss": 5.2651,
+      "step": 444
+    },
+    {
+      "epoch": 0.9251559251559252,
+      "grad_norm": 0.29296875,
+      "learning_rate": 1.6156994274746485e-05,
+      "loss": 5.1014,
+      "step": 445
+    },
+    {
+      "epoch": 0.9272349272349273,
+      "grad_norm": 0.328125,
+      "learning_rate": 1.5299867030334813e-05,
+      "loss": 4.4473,
+      "step": 446
+    },
+    {
+      "epoch": 0.9293139293139293,
+      "grad_norm": 0.275390625,
+      "learning_rate": 1.4465745805293584e-05,
+      "loss": 5.4957,
+      "step": 447
+    },
+    {
+      "epoch": 0.9313929313929314,
+      "grad_norm": 0.259765625,
+      "learning_rate": 1.3654670190718033e-05,
+      "loss": 5.4438,
+      "step": 448
+    },
+    {
+      "epoch": 0.9334719334719335,
+      "grad_norm": 0.263671875,
+      "learning_rate": 1.286667868385627e-05,
+      "loss": 5.1253,
+      "step": 449
+    },
+    {
+      "epoch": 0.9355509355509356,
+      "grad_norm": 0.25390625,
+      "learning_rate": 1.210180868628219e-05,
+      "loss": 5.2587,
+      "step": 450
+    },
+    {
+      "epoch": 0.9376299376299376,
+      "grad_norm": 0.28125,
+      "learning_rate": 1.1360096502120388e-05,
+      "loss": 5.2587,
+      "step": 451
+    },
+    {
+      "epoch": 0.9397089397089398,
+      "grad_norm": 0.349609375,
+      "learning_rate": 1.064157733632276e-05,
+      "loss": 5.0973,
+      "step": 452
+    },
+    {
+      "epoch": 0.9417879417879418,
+      "grad_norm": 0.263671875,
+      "learning_rate": 9.94628529299768e-06,
+      "loss": 5.1763,
+      "step": 453
+    },
+    {
+      "epoch": 0.9438669438669439,
+      "grad_norm": 0.271484375,
+      "learning_rate": 9.274253373791064e-06,
+      "loss": 5.2711,
+      "step": 454
+    },
+    {
+      "epoch": 0.9459459459459459,
+      "grad_norm": 0.279296875,
+      "learning_rate": 8.62551347632029e-06,
+      "loss": 5.1639,
+      "step": 455
+    },
+    {
+      "epoch": 0.9480249480249481,
+      "grad_norm": 0.26171875,
+      "learning_rate": 8.000096392660028e-06,
+      "loss": 5.2604,
+      "step": 456
+    },
+    {
+      "epoch": 0.9501039501039501,
+      "grad_norm": 0.7890625,
+      "learning_rate": 7.398031807880457e-06,
+      "loss": 4.9583,
+      "step": 457
+    },
+    {
+      "epoch": 0.9521829521829522,
+      "grad_norm": 0.28125,
+      "learning_rate": 6.819348298638839e-06,
+      "loss": 5.5333,
+      "step": 458
+    },
+    {
+      "epoch": 0.9542619542619543,
+      "grad_norm": 0.28515625,
+      "learning_rate": 6.264073331822551e-06,
+      "loss": 5.0472,
+      "step": 459
+    },
+    {
+      "epoch": 0.9563409563409564,
+      "grad_norm": 0.373046875,
+      "learning_rate": 5.732233263245845e-06,
+      "loss": 4.8395,
+      "step": 460
+    },
+    {
+      "epoch": 0.9584199584199584,
+      "grad_norm": 0.3046875,
+      "learning_rate": 5.223853336398632e-06,
+      "loss": 5.3617,
+      "step": 461
+    },
+    {
+      "epoch": 0.9604989604989606,
+      "grad_norm": 0.380859375,
+      "learning_rate": 4.738957681248379e-06,
+      "loss": 4.9956,
+      "step": 462
+    },
+    {
+      "epoch": 0.9625779625779626,
+      "grad_norm": 0.296875,
+      "learning_rate": 4.277569313094809e-06,
+      "loss": 5.0861,
+      "step": 463
+    },
+    {
+      "epoch": 0.9646569646569647,
+      "grad_norm": 0.267578125,
+      "learning_rate": 3.839710131477492e-06,
+      "loss": 5.1291,
+      "step": 464
+    },
+    {
+      "epoch": 0.9667359667359667,
+      "grad_norm": 0.26953125,
+      "learning_rate": 3.4254009191363455e-06,
+      "loss": 4.9485,
+      "step": 465
+    },
+    {
+      "epoch": 0.9688149688149689,
+      "grad_norm": 0.263671875,
+      "learning_rate": 3.034661341025258e-06,
+      "loss": 5.0859,
+      "step": 466
+    },
+    {
+      "epoch": 0.9708939708939709,
+      "grad_norm": 0.68359375,
+      "learning_rate": 2.6675099433787208e-06,
+      "loss": 5.1618,
+      "step": 467
+    },
+    {
+      "epoch": 0.972972972972973,
+      "grad_norm": 0.263671875,
+      "learning_rate": 2.323964152831426e-06,
+      "loss": 5.5762,
+      "step": 468
+    },
+    {
+      "epoch": 0.975051975051975,
+      "grad_norm": 0.36328125,
+      "learning_rate": 2.0040402755912013e-06,
+      "loss": 4.9616,
+      "step": 469
+    },
+    {
+      "epoch": 0.9771309771309772,
+      "grad_norm": 0.283203125,
+      "learning_rate": 1.7077534966650766e-06,
+      "loss": 5.3332,
+      "step": 470
+    },
+    {
+      "epoch": 0.9792099792099792,
+      "grad_norm": 0.26171875,
+      "learning_rate": 1.43511787913847e-06,
+      "loss": 4.9868,
+      "step": 471
+    },
+    {
+      "epoch": 0.9812889812889813,
+      "grad_norm": 0.251953125,
+      "learning_rate": 1.1861463635077786e-06,
+      "loss": 5.3489,
+      "step": 472
+    },
+    {
+      "epoch": 0.9833679833679834,
+      "grad_norm": 0.25390625,
+      "learning_rate": 9.60850767065924e-07,
+      "loss": 5.1146,
+      "step": 473
+    },
+    {
+      "epoch": 0.9854469854469855,
+      "grad_norm": 0.66015625,
+      "learning_rate": 7.592417833419129e-07,
+      "loss": 5.0727,
+      "step": 474
+    },
+    {
+      "epoch": 0.9875259875259875,
+      "grad_norm": 0.2421875,
+      "learning_rate": 5.81328981592688e-07,
+      "loss": 5.5923,
+      "step": 475
+    },
+    {
+      "epoch": 0.9896049896049897,
+      "grad_norm": 0.259765625,
+      "learning_rate": 4.2712080634949023e-07,
+      "loss": 5.2152,
+      "step": 476
+    },
+    {
+      "epoch": 0.9916839916839917,
+      "grad_norm": 0.25,
+      "learning_rate": 2.966245770166243e-07,
+      "loss": 5.2777,
+      "step": 477
+    },
+    {
+      "epoch": 0.9937629937629938,
+      "grad_norm": 0.279296875,
+      "learning_rate": 1.8984648752429223e-07,
+      "loss": 5.2911,
+      "step": 478
+    },
+    {
+      "epoch": 0.9958419958419958,
+      "grad_norm": 0.25390625,
+      "learning_rate": 1.0679160603449533e-07,
+      "loss": 5.1634,
+      "step": 479
+    },
+    {
+      "epoch": 0.997920997920998,
+      "grad_norm": 0.2470703125,
+      "learning_rate": 4.746387470044855e-08,
+      "loss": 5.0967,
+      "step": 480
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.25390625,
+      "learning_rate": 1.1866109479674591e-08,
+      "loss": 5.2176,
+      "step": 481
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.2252207020978995e+17,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null