Training in progress, step 1855, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +501 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d5fee08fae4a270923d847f749b15fcd42f7381884da39c605eb17b3e0a9ce81
 size 791869518

 version https://git-lfs.github.com/spec/v1
+oid sha256:c8e57e667b960b56e96b51410b7155677ee5f6f65335d22becf61dd69a8772b6
 size 791869518

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f39ccf0568d82c2eefebb237f16c6e23c73c67d0f03adba0677b59e01b1871d
 size 2375752250

 version https://git-lfs.github.com/spec/v1
+oid sha256:836b2f95bd092426379cd5f1f88e3a5b0be2151ea5112d892b65276f45920021
 size 2375752250

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eebbda9031bbe83261af4068018fa312b4ce10a163bb58ed929c991e6ab70844
 size 1000

 version https://git-lfs.github.com/spec/v1
+oid sha256:35cfcc0427b746466615d2216d048b8c86e2bc0218bb45a5ecc4e1ea12650eab
 size 1000

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8084210526315789,
   "eval_steps": 500,
-  "global_step": 1500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2131,6 +2131,503 @@
       "eval_samples_per_second": 602.367,
       "eval_steps_per_second": 37.648,
       "step": 1500
     }
   ],
   "logging_steps": 5,
@@ -2145,12 +2642,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6.498536989183181e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9997473684210526,
   "eval_steps": 500,
+  "global_step": 1855,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 602.367,
       "eval_steps_per_second": 37.648,
       "step": 1500
+    },
+    {
+      "epoch": 0.8111157894736842,
+      "grad_norm": 113.875,
+      "learning_rate": 2.0970641102456563e-06,
+      "loss": 47.3825,
+      "step": 1505
+    },
+    {
+      "epoch": 0.8138105263157894,
+      "grad_norm": 112.9375,
+      "learning_rate": 2.0671060515278613e-06,
+      "loss": 46.2206,
+      "step": 1510
+    },
+    {
+      "epoch": 0.8165052631578947,
+      "grad_norm": 109.6875,
+      "learning_rate": 2.037147992810066e-06,
+      "loss": 47.1773,
+      "step": 1515
+    },
+    {
+      "epoch": 0.8192,
+      "grad_norm": 106.1875,
+      "learning_rate": 2.007189934092271e-06,
+      "loss": 46.7185,
+      "step": 1520
+    },
+    {
+      "epoch": 0.8218947368421052,
+      "grad_norm": 107.25,
+      "learning_rate": 1.977231875374476e-06,
+      "loss": 47.2794,
+      "step": 1525
+    },
+    {
+      "epoch": 0.8245894736842105,
+      "grad_norm": 113.125,
+      "learning_rate": 1.9472738166566808e-06,
+      "loss": 46.1662,
+      "step": 1530
+    },
+    {
+      "epoch": 0.8272842105263158,
+      "grad_norm": 106.1875,
+      "learning_rate": 1.9173157579388858e-06,
+      "loss": 47.7226,
+      "step": 1535
+    },
+    {
+      "epoch": 0.829978947368421,
+      "grad_norm": 111.375,
+      "learning_rate": 1.8873576992210907e-06,
+      "loss": 46.4276,
+      "step": 1540
+    },
+    {
+      "epoch": 0.8326736842105263,
+      "grad_norm": 114.4375,
+      "learning_rate": 1.8573996405032955e-06,
+      "loss": 46.6981,
+      "step": 1545
+    },
+    {
+      "epoch": 0.8353684210526315,
+      "grad_norm": 104.0625,
+      "learning_rate": 1.8274415817855005e-06,
+      "loss": 46.4666,
+      "step": 1550
+    },
+    {
+      "epoch": 0.8380631578947368,
+      "grad_norm": 106.6875,
+      "learning_rate": 1.7974835230677055e-06,
+      "loss": 45.6698,
+      "step": 1555
+    },
+    {
+      "epoch": 0.8407578947368421,
+      "grad_norm": 108.625,
+      "learning_rate": 1.7675254643499104e-06,
+      "loss": 48.0453,
+      "step": 1560
+    },
+    {
+      "epoch": 0.8434526315789473,
+      "grad_norm": 107.1875,
+      "learning_rate": 1.7375674056321152e-06,
+      "loss": 46.3295,
+      "step": 1565
+    },
+    {
+      "epoch": 0.8461473684210526,
+      "grad_norm": 114.125,
+      "learning_rate": 1.7076093469143202e-06,
+      "loss": 46.0131,
+      "step": 1570
+    },
+    {
+      "epoch": 0.8488421052631578,
+      "grad_norm": 98.4375,
+      "learning_rate": 1.6776512881965251e-06,
+      "loss": 47.3054,
+      "step": 1575
+    },
+    {
+      "epoch": 0.8515368421052631,
+      "grad_norm": 109.5,
+      "learning_rate": 1.64769322947873e-06,
+      "loss": 46.6946,
+      "step": 1580
+    },
+    {
+      "epoch": 0.8542315789473685,
+      "grad_norm": 107.375,
+      "learning_rate": 1.6177351707609349e-06,
+      "loss": 46.8322,
+      "step": 1585
+    },
+    {
+      "epoch": 0.8569263157894736,
+      "grad_norm": 111.5625,
+      "learning_rate": 1.5877771120431399e-06,
+      "loss": 47.2501,
+      "step": 1590
+    },
+    {
+      "epoch": 0.859621052631579,
+      "grad_norm": 109.4375,
+      "learning_rate": 1.5578190533253446e-06,
+      "loss": 47.1194,
+      "step": 1595
+    },
+    {
+      "epoch": 0.8623157894736843,
+      "grad_norm": 110.625,
+      "learning_rate": 1.5278609946075496e-06,
+      "loss": 45.8835,
+      "step": 1600
+    },
+    {
+      "epoch": 0.8650105263157895,
+      "grad_norm": 104.3125,
+      "learning_rate": 1.4979029358897546e-06,
+      "loss": 46.3103,
+      "step": 1605
+    },
+    {
+      "epoch": 0.8677052631578948,
+      "grad_norm": 105.4375,
+      "learning_rate": 1.4679448771719593e-06,
+      "loss": 45.329,
+      "step": 1610
+    },
+    {
+      "epoch": 0.8704,
+      "grad_norm": 108.75,
+      "learning_rate": 1.4379868184541643e-06,
+      "loss": 46.3264,
+      "step": 1615
+    },
+    {
+      "epoch": 0.8730947368421053,
+      "grad_norm": 105.6875,
+      "learning_rate": 1.4080287597363693e-06,
+      "loss": 45.5464,
+      "step": 1620
+    },
+    {
+      "epoch": 0.8757894736842106,
+      "grad_norm": 111.5625,
+      "learning_rate": 1.378070701018574e-06,
+      "loss": 45.2749,
+      "step": 1625
+    },
+    {
+      "epoch": 0.8784842105263158,
+      "grad_norm": 106.125,
+      "learning_rate": 1.348112642300779e-06,
+      "loss": 46.0788,
+      "step": 1630
+    },
+    {
+      "epoch": 0.8811789473684211,
+      "grad_norm": 105.375,
+      "learning_rate": 1.318154583582984e-06,
+      "loss": 46.3733,
+      "step": 1635
+    },
+    {
+      "epoch": 0.8838736842105264,
+      "grad_norm": 102.9375,
+      "learning_rate": 1.288196524865189e-06,
+      "loss": 45.4384,
+      "step": 1640
+    },
+    {
+      "epoch": 0.8865684210526316,
+      "grad_norm": 110.4375,
+      "learning_rate": 1.2582384661473938e-06,
+      "loss": 46.3224,
+      "step": 1645
+    },
+    {
+      "epoch": 0.8892631578947369,
+      "grad_norm": 104.5,
+      "learning_rate": 1.2282804074295987e-06,
+      "loss": 44.5758,
+      "step": 1650
+    },
+    {
+      "epoch": 0.8919578947368421,
+      "grad_norm": 107.1875,
+      "learning_rate": 1.1983223487118035e-06,
+      "loss": 46.2027,
+      "step": 1655
+    },
+    {
+      "epoch": 0.8946526315789474,
+      "grad_norm": 105.375,
+      "learning_rate": 1.1683642899940085e-06,
+      "loss": 45.8361,
+      "step": 1660
+    },
+    {
+      "epoch": 0.8973473684210527,
+      "grad_norm": 103.1875,
+      "learning_rate": 1.1384062312762134e-06,
+      "loss": 46.9477,
+      "step": 1665
+    },
+    {
+      "epoch": 0.9000421052631579,
+      "grad_norm": 105.9375,
+      "learning_rate": 1.1084481725584182e-06,
+      "loss": 46.2056,
+      "step": 1670
+    },
+    {
+      "epoch": 0.9027368421052632,
+      "grad_norm": 105.0625,
+      "learning_rate": 1.0784901138406232e-06,
+      "loss": 46.3934,
+      "step": 1675
+    },
+    {
+      "epoch": 0.9054315789473685,
+      "grad_norm": 110.5,
+      "learning_rate": 1.0485320551228282e-06,
+      "loss": 45.8651,
+      "step": 1680
+    },
+    {
+      "epoch": 0.9081263157894737,
+      "grad_norm": 106.1875,
+      "learning_rate": 1.018573996405033e-06,
+      "loss": 44.2944,
+      "step": 1685
+    },
+    {
+      "epoch": 0.910821052631579,
+      "grad_norm": 100.6875,
+      "learning_rate": 9.88615937687238e-07,
+      "loss": 45.6455,
+      "step": 1690
+    },
+    {
+      "epoch": 0.9135157894736842,
+      "grad_norm": 102.625,
+      "learning_rate": 9.586578789694429e-07,
+      "loss": 45.1027,
+      "step": 1695
+    },
+    {
+      "epoch": 0.9162105263157895,
+      "grad_norm": 108.125,
+      "learning_rate": 9.286998202516478e-07,
+      "loss": 46.1671,
+      "step": 1700
+    },
+    {
+      "epoch": 0.9189052631578948,
+      "grad_norm": 108.4375,
+      "learning_rate": 8.987417615338527e-07,
+      "loss": 44.9381,
+      "step": 1705
+    },
+    {
+      "epoch": 0.9216,
+      "grad_norm": 98.875,
+      "learning_rate": 8.687837028160576e-07,
+      "loss": 45.2336,
+      "step": 1710
+    },
+    {
+      "epoch": 0.9242947368421053,
+      "grad_norm": 112.6875,
+      "learning_rate": 8.388256440982626e-07,
+      "loss": 45.8469,
+      "step": 1715
+    },
+    {
+      "epoch": 0.9269894736842105,
+      "grad_norm": 108.0,
+      "learning_rate": 8.088675853804674e-07,
+      "loss": 45.2639,
+      "step": 1720
+    },
+    {
+      "epoch": 0.9296842105263158,
+      "grad_norm": 107.9375,
+      "learning_rate": 7.789095266626723e-07,
+      "loss": 45.5577,
+      "step": 1725
+    },
+    {
+      "epoch": 0.9323789473684211,
+      "grad_norm": 100.6875,
+      "learning_rate": 7.489514679448773e-07,
+      "loss": 44.7973,
+      "step": 1730
+    },
+    {
+      "epoch": 0.9350736842105263,
+      "grad_norm": 110.0,
+      "learning_rate": 7.189934092270822e-07,
+      "loss": 45.1726,
+      "step": 1735
+    },
+    {
+      "epoch": 0.9377684210526316,
+      "grad_norm": 104.125,
+      "learning_rate": 6.89035350509287e-07,
+      "loss": 44.1223,
+      "step": 1740
+    },
+    {
+      "epoch": 0.9404631578947369,
+      "grad_norm": 101.5,
+      "learning_rate": 6.59077291791492e-07,
+      "loss": 45.1792,
+      "step": 1745
+    },
+    {
+      "epoch": 0.9431578947368421,
+      "grad_norm": 103.5625,
+      "learning_rate": 6.291192330736969e-07,
+      "loss": 46.004,
+      "step": 1750
+    },
+    {
+      "epoch": 0.9458526315789474,
+      "grad_norm": 108.0,
+      "learning_rate": 5.991611743559017e-07,
+      "loss": 44.949,
+      "step": 1755
+    },
+    {
+      "epoch": 0.9485473684210526,
+      "grad_norm": 103.5625,
+      "learning_rate": 5.692031156381067e-07,
+      "loss": 45.3909,
+      "step": 1760
+    },
+    {
+      "epoch": 0.9512421052631579,
+      "grad_norm": 105.375,
+      "learning_rate": 5.392450569203116e-07,
+      "loss": 45.4842,
+      "step": 1765
+    },
+    {
+      "epoch": 0.9539368421052632,
+      "grad_norm": 109.625,
+      "learning_rate": 5.092869982025165e-07,
+      "loss": 44.3717,
+      "step": 1770
+    },
+    {
+      "epoch": 0.9566315789473684,
+      "grad_norm": 98.9375,
+      "learning_rate": 4.793289394847214e-07,
+      "loss": 45.0351,
+      "step": 1775
+    },
+    {
+      "epoch": 0.9593263157894737,
+      "grad_norm": 104.0,
+      "learning_rate": 4.4937088076692636e-07,
+      "loss": 44.7382,
+      "step": 1780
+    },
+    {
+      "epoch": 0.962021052631579,
+      "grad_norm": 104.875,
+      "learning_rate": 4.194128220491313e-07,
+      "loss": 44.2659,
+      "step": 1785
+    },
+    {
+      "epoch": 0.9647157894736842,
+      "grad_norm": 107.6875,
+      "learning_rate": 3.8945476333133616e-07,
+      "loss": 44.9655,
+      "step": 1790
+    },
+    {
+      "epoch": 0.9674105263157895,
+      "grad_norm": 108.5625,
+      "learning_rate": 3.594967046135411e-07,
+      "loss": 46.1188,
+      "step": 1795
+    },
+    {
+      "epoch": 0.9701052631578947,
+      "grad_norm": 104.625,
+      "learning_rate": 3.29538645895746e-07,
+      "loss": 44.0362,
+      "step": 1800
+    },
+    {
+      "epoch": 0.9728,
+      "grad_norm": 99.1875,
+      "learning_rate": 2.995805871779509e-07,
+      "loss": 44.2291,
+      "step": 1805
+    },
+    {
+      "epoch": 0.9754947368421053,
+      "grad_norm": 107.75,
+      "learning_rate": 2.696225284601558e-07,
+      "loss": 44.9852,
+      "step": 1810
+    },
+    {
+      "epoch": 0.9781894736842105,
+      "grad_norm": 108.625,
+      "learning_rate": 2.396644697423607e-07,
+      "loss": 44.8573,
+      "step": 1815
+    },
+    {
+      "epoch": 0.9808842105263158,
+      "grad_norm": 108.125,
+      "learning_rate": 2.0970641102456564e-07,
+      "loss": 44.9958,
+      "step": 1820
+    },
+    {
+      "epoch": 0.983578947368421,
+      "grad_norm": 107.6875,
+      "learning_rate": 1.7974835230677054e-07,
+      "loss": 43.81,
+      "step": 1825
+    },
+    {
+      "epoch": 0.9862736842105263,
+      "grad_norm": 106.0625,
+      "learning_rate": 1.4979029358897544e-07,
+      "loss": 44.0496,
+      "step": 1830
+    },
+    {
+      "epoch": 0.9889684210526316,
+      "grad_norm": 104.9375,
+      "learning_rate": 1.1983223487118036e-07,
+      "loss": 43.5615,
+      "step": 1835
+    },
+    {
+      "epoch": 0.9916631578947368,
+      "grad_norm": 106.5625,
+      "learning_rate": 8.987417615338527e-08,
+      "loss": 44.9755,
+      "step": 1840
+    },
+    {
+      "epoch": 0.9943578947368421,
+      "grad_norm": 108.0,
+      "learning_rate": 5.991611743559018e-08,
+      "loss": 42.8911,
+      "step": 1845
+    },
+    {
+      "epoch": 0.9970526315789474,
+      "grad_norm": 104.1875,
+      "learning_rate": 2.995805871779509e-08,
+      "loss": 44.0027,
+      "step": 1850
+    },
+    {
+      "epoch": 0.9997473684210526,
+      "grad_norm": 110.8125,
+      "learning_rate": 0.0,
+      "loss": 44.4761,
+      "step": 1855
     }
   ],
   "logging_steps": 5,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 8.0365240766232e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null