Training in progress, step 2282, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +396 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:320c69b34dca6d0448bda999d261b9b7dc2acc0901006dcd199ce13e45d754f1
 size 791781368

 version https://git-lfs.github.com/spec/v1
+oid sha256:bc189396d250cd02b58eccc2bb364409eaee0c8425639afa61aa38ebc2574ad2
 size 791781368

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2219206f12e11aa97a1823b9d064ff66778aab5d93dea10195714a4421d40f4d
 size 2375487866

 version https://git-lfs.github.com/spec/v1
+oid sha256:f3a0e5ae6c5f327214099f137a7cc8083d4122fae307d1b2016880d7e75c10cd
 size 2375487866

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd44b9ad3ef5591f2a0671f1ec04c21ad479cbd1d478859e3ba017f1c74bf027
 size 1000

 version https://git-lfs.github.com/spec/v1
+oid sha256:d305aeb5f66edef533fe495bedd6ae8dca8a6220560f62265310b5e9dbe7ba24
 size 1000

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8762202050902917,
   "eval_steps": 1000,
-  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2823,6 +2823,398 @@
       "eval_samples_per_second": 1013.774,
       "eval_steps_per_second": 31.682,
       "step": 2000
     }
   ],
   "logging_steps": 5,
@@ -2837,12 +3229,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.732943025316823e+19,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9997672540080229,
   "eval_steps": 1000,
+  "global_step": 2282,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 1013.774,
       "eval_steps_per_second": 31.682,
       "step": 2000
+    },
+    {
+      "epoch": 0.8784107556030175,
+      "grad_norm": 20.109375,
+      "learning_rate": 2.698490014612762e-06,
+      "loss": 25.8034,
+      "step": 2005
+    },
+    {
+      "epoch": 0.8806013061157432,
+      "grad_norm": 10.953125,
+      "learning_rate": 2.6497808085728203e-06,
+      "loss": 25.0685,
+      "step": 2010
+    },
+    {
+      "epoch": 0.8827918566284689,
+      "grad_norm": 31.0625,
+      "learning_rate": 2.601071602532879e-06,
+      "loss": 25.7096,
+      "step": 2015
+    },
+    {
+      "epoch": 0.8849824071411947,
+      "grad_norm": 24.046875,
+      "learning_rate": 2.5523623964929375e-06,
+      "loss": 25.3376,
+      "step": 2020
+    },
+    {
+      "epoch": 0.8871729576539203,
+      "grad_norm": 27.65625,
+      "learning_rate": 2.503653190452996e-06,
+      "loss": 24.6723,
+      "step": 2025
+    },
+    {
+      "epoch": 0.8893635081666461,
+      "grad_norm": 12.5078125,
+      "learning_rate": 2.4549439844130542e-06,
+      "loss": 25.1757,
+      "step": 2030
+    },
+    {
+      "epoch": 0.8915540586793719,
+      "grad_norm": 15.1015625,
+      "learning_rate": 2.4062347783731126e-06,
+      "loss": 25.6934,
+      "step": 2035
+    },
+    {
+      "epoch": 0.8937446091920976,
+      "grad_norm": 28.75,
+      "learning_rate": 2.357525572333171e-06,
+      "loss": 25.4336,
+      "step": 2040
+    },
+    {
+      "epoch": 0.8959351597048233,
+      "grad_norm": 18.53125,
+      "learning_rate": 2.3088163662932294e-06,
+      "loss": 24.4317,
+      "step": 2045
+    },
+    {
+      "epoch": 0.8981257102175491,
+      "grad_norm": 15.265625,
+      "learning_rate": 2.260107160253288e-06,
+      "loss": 24.6006,
+      "step": 2050
+    },
+    {
+      "epoch": 0.9003162607302748,
+      "grad_norm": 12.1796875,
+      "learning_rate": 2.2113979542133465e-06,
+      "loss": 24.7936,
+      "step": 2055
+    },
+    {
+      "epoch": 0.9025068112430005,
+      "grad_norm": 12.21875,
+      "learning_rate": 2.162688748173405e-06,
+      "loss": 26.129,
+      "step": 2060
+    },
+    {
+      "epoch": 0.9046973617557262,
+      "grad_norm": 12.984375,
+      "learning_rate": 2.1139795421334633e-06,
+      "loss": 24.6225,
+      "step": 2065
+    },
+    {
+      "epoch": 0.906887912268452,
+      "grad_norm": 15.65625,
+      "learning_rate": 2.065270336093522e-06,
+      "loss": 24.7391,
+      "step": 2070
+    },
+    {
+      "epoch": 0.9090784627811777,
+      "grad_norm": 13.3046875,
+      "learning_rate": 2.0165611300535805e-06,
+      "loss": 24.7855,
+      "step": 2075
+    },
+    {
+      "epoch": 0.9112690132939034,
+      "grad_norm": 15.015625,
+      "learning_rate": 1.967851924013639e-06,
+      "loss": 24.7362,
+      "step": 2080
+    },
+    {
+      "epoch": 0.9134595638066292,
+      "grad_norm": 18.53125,
+      "learning_rate": 1.9191427179736972e-06,
+      "loss": 24.8633,
+      "step": 2085
+    },
+    {
+      "epoch": 0.9156501143193548,
+      "grad_norm": 19.0625,
+      "learning_rate": 1.8704335119337556e-06,
+      "loss": 24.7622,
+      "step": 2090
+    },
+    {
+      "epoch": 0.9178406648320806,
+      "grad_norm": 22.96875,
+      "learning_rate": 1.8217243058938142e-06,
+      "loss": 24.8469,
+      "step": 2095
+    },
+    {
+      "epoch": 0.9200312153448064,
+      "grad_norm": 16.546875,
+      "learning_rate": 1.7730150998538726e-06,
+      "loss": 24.4148,
+      "step": 2100
+    },
+    {
+      "epoch": 0.922221765857532,
+      "grad_norm": 17.15625,
+      "learning_rate": 1.724305893813931e-06,
+      "loss": 24.5911,
+      "step": 2105
+    },
+    {
+      "epoch": 0.9244123163702578,
+      "grad_norm": 32.375,
+      "learning_rate": 1.6755966877739893e-06,
+      "loss": 24.3472,
+      "step": 2110
+    },
+    {
+      "epoch": 0.9266028668829835,
+      "grad_norm": 11.1796875,
+      "learning_rate": 1.626887481734048e-06,
+      "loss": 24.272,
+      "step": 2115
+    },
+    {
+      "epoch": 0.9287934173957093,
+      "grad_norm": 15.828125,
+      "learning_rate": 1.5781782756941063e-06,
+      "loss": 24.1789,
+      "step": 2120
+    },
+    {
+      "epoch": 0.930983967908435,
+      "grad_norm": 13.609375,
+      "learning_rate": 1.5294690696541647e-06,
+      "loss": 24.5268,
+      "step": 2125
+    },
+    {
+      "epoch": 0.9331745184211607,
+      "grad_norm": 11.7890625,
+      "learning_rate": 1.480759863614223e-06,
+      "loss": 24.4547,
+      "step": 2130
+    },
+    {
+      "epoch": 0.9353650689338865,
+      "grad_norm": 14.46875,
+      "learning_rate": 1.4320506575742814e-06,
+      "loss": 24.5342,
+      "step": 2135
+    },
+    {
+      "epoch": 0.9375556194466121,
+      "grad_norm": 12.9765625,
+      "learning_rate": 1.3833414515343402e-06,
+      "loss": 24.1581,
+      "step": 2140
+    },
+    {
+      "epoch": 0.9397461699593379,
+      "grad_norm": 13.4140625,
+      "learning_rate": 1.3346322454943986e-06,
+      "loss": 24.0952,
+      "step": 2145
+    },
+    {
+      "epoch": 0.9419367204720637,
+      "grad_norm": 11.921875,
+      "learning_rate": 1.285923039454457e-06,
+      "loss": 24.6826,
+      "step": 2150
+    },
+    {
+      "epoch": 0.9441272709847893,
+      "grad_norm": 13.7734375,
+      "learning_rate": 1.2372138334145156e-06,
+      "loss": 23.6338,
+      "step": 2155
+    },
+    {
+      "epoch": 0.9463178214975151,
+      "grad_norm": 12.1015625,
+      "learning_rate": 1.188504627374574e-06,
+      "loss": 24.3637,
+      "step": 2160
+    },
+    {
+      "epoch": 0.9485083720102409,
+      "grad_norm": 14.578125,
+      "learning_rate": 1.1397954213346323e-06,
+      "loss": 24.6624,
+      "step": 2165
+    },
+    {
+      "epoch": 0.9506989225229665,
+      "grad_norm": 12.7578125,
+      "learning_rate": 1.0910862152946907e-06,
+      "loss": 23.9978,
+      "step": 2170
+    },
+    {
+      "epoch": 0.9528894730356923,
+      "grad_norm": 13.2265625,
+      "learning_rate": 1.0423770092547493e-06,
+      "loss": 24.4468,
+      "step": 2175
+    },
+    {
+      "epoch": 0.955080023548418,
+      "grad_norm": 11.7890625,
+      "learning_rate": 9.936678032148077e-07,
+      "loss": 24.303,
+      "step": 2180
+    },
+    {
+      "epoch": 0.9572705740611437,
+      "grad_norm": 16.25,
+      "learning_rate": 9.44958597174866e-07,
+      "loss": 24.5248,
+      "step": 2185
+    },
+    {
+      "epoch": 0.9594611245738695,
+      "grad_norm": 18.859375,
+      "learning_rate": 8.962493911349246e-07,
+      "loss": 24.4316,
+      "step": 2190
+    },
+    {
+      "epoch": 0.9616516750865952,
+      "grad_norm": 10.6796875,
+      "learning_rate": 8.47540185094983e-07,
+      "loss": 23.8851,
+      "step": 2195
+    },
+    {
+      "epoch": 0.963842225599321,
+      "grad_norm": 12.140625,
+      "learning_rate": 7.988309790550415e-07,
+      "loss": 24.2927,
+      "step": 2200
+    },
+    {
+      "epoch": 0.9660327761120466,
+      "grad_norm": 18.078125,
+      "learning_rate": 7.501217730150999e-07,
+      "loss": 24.1125,
+      "step": 2205
+    },
+    {
+      "epoch": 0.9682233266247724,
+      "grad_norm": 9.7109375,
+      "learning_rate": 7.014125669751585e-07,
+      "loss": 24.1763,
+      "step": 2210
+    },
+    {
+      "epoch": 0.9704138771374982,
+      "grad_norm": 12.6953125,
+      "learning_rate": 6.527033609352168e-07,
+      "loss": 23.1948,
+      "step": 2215
+    },
+    {
+      "epoch": 0.9726044276502238,
+      "grad_norm": 18.671875,
+      "learning_rate": 6.039941548952752e-07,
+      "loss": 24.2813,
+      "step": 2220
+    },
+    {
+      "epoch": 0.9747949781629496,
+      "grad_norm": 9.96875,
+      "learning_rate": 5.552849488553337e-07,
+      "loss": 23.7533,
+      "step": 2225
+    },
+    {
+      "epoch": 0.9769855286756753,
+      "grad_norm": 13.71875,
+      "learning_rate": 5.065757428153922e-07,
+      "loss": 24.5382,
+      "step": 2230
+    },
+    {
+      "epoch": 0.979176079188401,
+      "grad_norm": 13.4296875,
+      "learning_rate": 4.578665367754506e-07,
+      "loss": 23.7636,
+      "step": 2235
+    },
+    {
+      "epoch": 0.9813666297011268,
+      "grad_norm": 13.640625,
+      "learning_rate": 4.091573307355091e-07,
+      "loss": 23.785,
+      "step": 2240
+    },
+    {
+      "epoch": 0.9835571802138525,
+      "grad_norm": 27.078125,
+      "learning_rate": 3.6044812469556747e-07,
+      "loss": 23.8269,
+      "step": 2245
+    },
+    {
+      "epoch": 0.9857477307265782,
+      "grad_norm": 13.4609375,
+      "learning_rate": 3.1173891865562595e-07,
+      "loss": 24.0428,
+      "step": 2250
+    },
+    {
+      "epoch": 0.9879382812393039,
+      "grad_norm": 12.109375,
+      "learning_rate": 2.630297126156844e-07,
+      "loss": 24.3293,
+      "step": 2255
+    },
+    {
+      "epoch": 0.9901288317520297,
+      "grad_norm": 20.84375,
+      "learning_rate": 2.1432050657574284e-07,
+      "loss": 24.0019,
+      "step": 2260
+    },
+    {
+      "epoch": 0.9923193822647554,
+      "grad_norm": 16.265625,
+      "learning_rate": 1.6561130053580127e-07,
+      "loss": 24.3604,
+      "step": 2265
+    },
+    {
+      "epoch": 0.9945099327774811,
+      "grad_norm": 19.96875,
+      "learning_rate": 1.1690209449585972e-07,
+      "loss": 24.1925,
+      "step": 2270
+    },
+    {
+      "epoch": 0.9967004832902069,
+      "grad_norm": 16.953125,
+      "learning_rate": 6.819288845591817e-08,
+      "loss": 23.6996,
+      "step": 2275
+    },
+    {
+      "epoch": 0.9988910338029326,
+      "grad_norm": 19.6875,
+      "learning_rate": 1.9483682415976622e-08,
+      "loss": 23.9507,
+      "step": 2280
     }
   ],
   "logging_steps": 5,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.9772879918220706e+19,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null