Training in progress, step 2282, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +396 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:524178631dea28d6a1be6624311bcec723e346dc14c66b3a74890d48f74535c0
 size 791781368

 version https://git-lfs.github.com/spec/v1
+oid sha256:b5c239fe048ae787ac9ba2071d45d41a1e490518eddc34d3cfaf3afacba9c7a8
 size 791781368

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f7a1e9dee126e6e10988635b663718974d07307a09a0ef8f8e1a040065575d1d
 size 2375487866

 version https://git-lfs.github.com/spec/v1
+oid sha256:2bec5a99e167b104da1b38d30711ce7795a0e35b3f0e9682e07d97909de250cc
 size 2375487866

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd44b9ad3ef5591f2a0671f1ec04c21ad479cbd1d478859e3ba017f1c74bf027
 size 1000

 version https://git-lfs.github.com/spec/v1
+oid sha256:d305aeb5f66edef533fe495bedd6ae8dca8a6220560f62265310b5e9dbe7ba24
 size 1000

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8762202050902917,
   "eval_steps": 1000,
-  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2823,6 +2823,398 @@
       "eval_samples_per_second": 988.943,
       "eval_steps_per_second": 30.906,
       "step": 2000
     }
   ],
   "logging_steps": 5,
@@ -2837,12 +3229,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.732943025316823e+19,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9997672540080229,
   "eval_steps": 1000,
+  "global_step": 2282,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 988.943,
       "eval_steps_per_second": 30.906,
       "step": 2000
+    },
+    {
+      "epoch": 0.8784107556030175,
+      "grad_norm": 29.203125,
+      "learning_rate": 2.698490014612762e-06,
+      "loss": 24.1809,
+      "step": 2005
+    },
+    {
+      "epoch": 0.8806013061157432,
+      "grad_norm": 28.46875,
+      "learning_rate": 2.6497808085728203e-06,
+      "loss": 23.4948,
+      "step": 2010
+    },
+    {
+      "epoch": 0.8827918566284689,
+      "grad_norm": 16.171875,
+      "learning_rate": 2.601071602532879e-06,
+      "loss": 23.6219,
+      "step": 2015
+    },
+    {
+      "epoch": 0.8849824071411947,
+      "grad_norm": 31.9375,
+      "learning_rate": 2.5523623964929375e-06,
+      "loss": 22.9886,
+      "step": 2020
+    },
+    {
+      "epoch": 0.8871729576539203,
+      "grad_norm": 13.4453125,
+      "learning_rate": 2.503653190452996e-06,
+      "loss": 23.6685,
+      "step": 2025
+    },
+    {
+      "epoch": 0.8893635081666461,
+      "grad_norm": 27.390625,
+      "learning_rate": 2.4549439844130542e-06,
+      "loss": 23.4918,
+      "step": 2030
+    },
+    {
+      "epoch": 0.8915540586793719,
+      "grad_norm": 53.375,
+      "learning_rate": 2.4062347783731126e-06,
+      "loss": 23.38,
+      "step": 2035
+    },
+    {
+      "epoch": 0.8937446091920976,
+      "grad_norm": 18.734375,
+      "learning_rate": 2.357525572333171e-06,
+      "loss": 23.5452,
+      "step": 2040
+    },
+    {
+      "epoch": 0.8959351597048233,
+      "grad_norm": 25.671875,
+      "learning_rate": 2.3088163662932294e-06,
+      "loss": 23.5461,
+      "step": 2045
+    },
+    {
+      "epoch": 0.8981257102175491,
+      "grad_norm": 28.953125,
+      "learning_rate": 2.260107160253288e-06,
+      "loss": 23.2701,
+      "step": 2050
+    },
+    {
+      "epoch": 0.9003162607302748,
+      "grad_norm": 16.4375,
+      "learning_rate": 2.2113979542133465e-06,
+      "loss": 23.5859,
+      "step": 2055
+    },
+    {
+      "epoch": 0.9025068112430005,
+      "grad_norm": 20.59375,
+      "learning_rate": 2.162688748173405e-06,
+      "loss": 22.9554,
+      "step": 2060
+    },
+    {
+      "epoch": 0.9046973617557262,
+      "grad_norm": 31.21875,
+      "learning_rate": 2.1139795421334633e-06,
+      "loss": 23.3924,
+      "step": 2065
+    },
+    {
+      "epoch": 0.906887912268452,
+      "grad_norm": 29.359375,
+      "learning_rate": 2.065270336093522e-06,
+      "loss": 23.0099,
+      "step": 2070
+    },
+    {
+      "epoch": 0.9090784627811777,
+      "grad_norm": 22.734375,
+      "learning_rate": 2.0165611300535805e-06,
+      "loss": 23.4626,
+      "step": 2075
+    },
+    {
+      "epoch": 0.9112690132939034,
+      "grad_norm": 59.6875,
+      "learning_rate": 1.967851924013639e-06,
+      "loss": 22.6226,
+      "step": 2080
+    },
+    {
+      "epoch": 0.9134595638066292,
+      "grad_norm": 37.71875,
+      "learning_rate": 1.9191427179736972e-06,
+      "loss": 22.4083,
+      "step": 2085
+    },
+    {
+      "epoch": 0.9156501143193548,
+      "grad_norm": 23.28125,
+      "learning_rate": 1.8704335119337556e-06,
+      "loss": 23.4292,
+      "step": 2090
+    },
+    {
+      "epoch": 0.9178406648320806,
+      "grad_norm": 31.0625,
+      "learning_rate": 1.8217243058938142e-06,
+      "loss": 23.3258,
+      "step": 2095
+    },
+    {
+      "epoch": 0.9200312153448064,
+      "grad_norm": 16.359375,
+      "learning_rate": 1.7730150998538726e-06,
+      "loss": 23.5227,
+      "step": 2100
+    },
+    {
+      "epoch": 0.922221765857532,
+      "grad_norm": 63.59375,
+      "learning_rate": 1.724305893813931e-06,
+      "loss": 23.928,
+      "step": 2105
+    },
+    {
+      "epoch": 0.9244123163702578,
+      "grad_norm": 30.21875,
+      "learning_rate": 1.6755966877739893e-06,
+      "loss": 22.57,
+      "step": 2110
+    },
+    {
+      "epoch": 0.9266028668829835,
+      "grad_norm": 28.078125,
+      "learning_rate": 1.626887481734048e-06,
+      "loss": 23.4965,
+      "step": 2115
+    },
+    {
+      "epoch": 0.9287934173957093,
+      "grad_norm": 21.421875,
+      "learning_rate": 1.5781782756941063e-06,
+      "loss": 23.3915,
+      "step": 2120
+    },
+    {
+      "epoch": 0.930983967908435,
+      "grad_norm": 14.703125,
+      "learning_rate": 1.5294690696541647e-06,
+      "loss": 23.4732,
+      "step": 2125
+    },
+    {
+      "epoch": 0.9331745184211607,
+      "grad_norm": 21.5625,
+      "learning_rate": 1.480759863614223e-06,
+      "loss": 23.06,
+      "step": 2130
+    },
+    {
+      "epoch": 0.9353650689338865,
+      "grad_norm": 26.3125,
+      "learning_rate": 1.4320506575742814e-06,
+      "loss": 22.6973,
+      "step": 2135
+    },
+    {
+      "epoch": 0.9375556194466121,
+      "grad_norm": 19.078125,
+      "learning_rate": 1.3833414515343402e-06,
+      "loss": 22.8054,
+      "step": 2140
+    },
+    {
+      "epoch": 0.9397461699593379,
+      "grad_norm": 30.84375,
+      "learning_rate": 1.3346322454943986e-06,
+      "loss": 23.4105,
+      "step": 2145
+    },
+    {
+      "epoch": 0.9419367204720637,
+      "grad_norm": 62.21875,
+      "learning_rate": 1.285923039454457e-06,
+      "loss": 22.8058,
+      "step": 2150
+    },
+    {
+      "epoch": 0.9441272709847893,
+      "grad_norm": 15.203125,
+      "learning_rate": 1.2372138334145156e-06,
+      "loss": 23.0896,
+      "step": 2155
+    },
+    {
+      "epoch": 0.9463178214975151,
+      "grad_norm": 27.8125,
+      "learning_rate": 1.188504627374574e-06,
+      "loss": 23.386,
+      "step": 2160
+    },
+    {
+      "epoch": 0.9485083720102409,
+      "grad_norm": 17.546875,
+      "learning_rate": 1.1397954213346323e-06,
+      "loss": 23.0428,
+      "step": 2165
+    },
+    {
+      "epoch": 0.9506989225229665,
+      "grad_norm": 20.015625,
+      "learning_rate": 1.0910862152946907e-06,
+      "loss": 23.0953,
+      "step": 2170
+    },
+    {
+      "epoch": 0.9528894730356923,
+      "grad_norm": 24.59375,
+      "learning_rate": 1.0423770092547493e-06,
+      "loss": 22.9106,
+      "step": 2175
+    },
+    {
+      "epoch": 0.955080023548418,
+      "grad_norm": 27.9375,
+      "learning_rate": 9.936678032148077e-07,
+      "loss": 22.686,
+      "step": 2180
+    },
+    {
+      "epoch": 0.9572705740611437,
+      "grad_norm": 22.078125,
+      "learning_rate": 9.44958597174866e-07,
+      "loss": 23.1452,
+      "step": 2185
+    },
+    {
+      "epoch": 0.9594611245738695,
+      "grad_norm": 20.65625,
+      "learning_rate": 8.962493911349246e-07,
+      "loss": 22.4537,
+      "step": 2190
+    },
+    {
+      "epoch": 0.9616516750865952,
+      "grad_norm": 114.75,
+      "learning_rate": 8.47540185094983e-07,
+      "loss": 23.5824,
+      "step": 2195
+    },
+    {
+      "epoch": 0.963842225599321,
+      "grad_norm": 21.65625,
+      "learning_rate": 7.988309790550415e-07,
+      "loss": 23.2908,
+      "step": 2200
+    },
+    {
+      "epoch": 0.9660327761120466,
+      "grad_norm": 14.1875,
+      "learning_rate": 7.501217730150999e-07,
+      "loss": 22.8018,
+      "step": 2205
+    },
+    {
+      "epoch": 0.9682233266247724,
+      "grad_norm": 15.3125,
+      "learning_rate": 7.014125669751585e-07,
+      "loss": 22.5503,
+      "step": 2210
+    },
+    {
+      "epoch": 0.9704138771374982,
+      "grad_norm": 33.15625,
+      "learning_rate": 6.527033609352168e-07,
+      "loss": 22.7779,
+      "step": 2215
+    },
+    {
+      "epoch": 0.9726044276502238,
+      "grad_norm": 14.2890625,
+      "learning_rate": 6.039941548952752e-07,
+      "loss": 22.3891,
+      "step": 2220
+    },
+    {
+      "epoch": 0.9747949781629496,
+      "grad_norm": 61.40625,
+      "learning_rate": 5.552849488553337e-07,
+      "loss": 22.7335,
+      "step": 2225
+    },
+    {
+      "epoch": 0.9769855286756753,
+      "grad_norm": 48.46875,
+      "learning_rate": 5.065757428153922e-07,
+      "loss": 23.5739,
+      "step": 2230
+    },
+    {
+      "epoch": 0.979176079188401,
+      "grad_norm": 15.5703125,
+      "learning_rate": 4.578665367754506e-07,
+      "loss": 22.5284,
+      "step": 2235
+    },
+    {
+      "epoch": 0.9813666297011268,
+      "grad_norm": 21.296875,
+      "learning_rate": 4.091573307355091e-07,
+      "loss": 23.336,
+      "step": 2240
+    },
+    {
+      "epoch": 0.9835571802138525,
+      "grad_norm": 22.671875,
+      "learning_rate": 3.6044812469556747e-07,
+      "loss": 22.2249,
+      "step": 2245
+    },
+    {
+      "epoch": 0.9857477307265782,
+      "grad_norm": 31.71875,
+      "learning_rate": 3.1173891865562595e-07,
+      "loss": 22.8697,
+      "step": 2250
+    },
+    {
+      "epoch": 0.9879382812393039,
+      "grad_norm": 17.65625,
+      "learning_rate": 2.630297126156844e-07,
+      "loss": 22.5376,
+      "step": 2255
+    },
+    {
+      "epoch": 0.9901288317520297,
+      "grad_norm": 25.34375,
+      "learning_rate": 2.1432050657574284e-07,
+      "loss": 22.7322,
+      "step": 2260
+    },
+    {
+      "epoch": 0.9923193822647554,
+      "grad_norm": 57.375,
+      "learning_rate": 1.6561130053580127e-07,
+      "loss": 22.5554,
+      "step": 2265
+    },
+    {
+      "epoch": 0.9945099327774811,
+      "grad_norm": 26.25,
+      "learning_rate": 1.1690209449585972e-07,
+      "loss": 22.8525,
+      "step": 2270
+    },
+    {
+      "epoch": 0.9967004832902069,
+      "grad_norm": 32.375,
+      "learning_rate": 6.819288845591817e-08,
+      "loss": 22.8014,
+      "step": 2275
+    },
+    {
+      "epoch": 0.9988910338029326,
+      "grad_norm": 21.90625,
+      "learning_rate": 1.9483682415976622e-08,
+      "loss": 22.4073,
+      "step": 2280
     }
   ],
   "logging_steps": 5,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.9772879918220706e+19,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null