Training in progress, step 8342, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +480 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa6538f15523715706c0cc48c49152bcfecc249e17e889feb643a91012caa9c6
 size 791869518

 version https://git-lfs.github.com/spec/v1
+oid sha256:03cc5d7b651a639b0220de3a2a1ccacf8b95355b5dcd8c8b028327a0da96fdfb
 size 791869518

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d439f2eace1ef00a678a5956526c66bcd5928b76f0a87826a26ada01bdd73735
 size 2375752250

 version https://git-lfs.github.com/spec/v1
+oid sha256:bed96b488a7dd948ce9646603587fe38f7e77e9afc4e5a26e9aef530b83068ba
 size 2375752250

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c554d2b052e63939cff49cc1a506e9844ff0d9bd378c89494a0df0ba1b188a15
 size 1000

 version https://git-lfs.github.com/spec/v1
+oid sha256:08b735cffdc42abe93b366df558ae724495aca3da952a5c2458609ec9e48fe3c
 size 1000

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9589738979292157,
   "eval_steps": 500,
-  "global_step": 8000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -11335,6 +11335,482 @@
       "eval_samples_per_second": 1116.87,
       "eval_steps_per_second": 34.904,
       "step": 8000
     }
   ],
   "logging_steps": 5,
@@ -11349,12 +11825,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.4658863942310298e+19,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9999700320656897,
   "eval_steps": 500,
+  "global_step": 8342,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 1116.87,
       "eval_steps_per_second": 34.904,
       "step": 8000
+    },
+    {
+      "epoch": 0.9595732566154215,
+      "grad_norm": 102.375,
+      "learning_rate": 8.978286932196616e-08,
+      "loss": 64.3574,
+      "step": 8005
+    },
+    {
+      "epoch": 0.9601726153016272,
+      "grad_norm": 113.8125,
+      "learning_rate": 8.845077927267883e-08,
+      "loss": 62.3656,
+      "step": 8010
+    },
+    {
+      "epoch": 0.960771973987833,
+      "grad_norm": 106.625,
+      "learning_rate": 8.711868922339149e-08,
+      "loss": 64.0395,
+      "step": 8015
+    },
+    {
+      "epoch": 0.9613713326740387,
+      "grad_norm": 108.875,
+      "learning_rate": 8.578659917410417e-08,
+      "loss": 65.0463,
+      "step": 8020
+    },
+    {
+      "epoch": 0.9619706913602445,
+      "grad_norm": 106.0625,
+      "learning_rate": 8.445450912481683e-08,
+      "loss": 63.7409,
+      "step": 8025
+    },
+    {
+      "epoch": 0.9625700500464502,
+      "grad_norm": 108.375,
+      "learning_rate": 8.31224190755295e-08,
+      "loss": 63.3834,
+      "step": 8030
+    },
+    {
+      "epoch": 0.9631694087326561,
+      "grad_norm": 108.125,
+      "learning_rate": 8.179032902624216e-08,
+      "loss": 63.9561,
+      "step": 8035
+    },
+    {
+      "epoch": 0.9637687674188619,
+      "grad_norm": 107.5,
+      "learning_rate": 8.045823897695484e-08,
+      "loss": 61.5704,
+      "step": 8040
+    },
+    {
+      "epoch": 0.9643681261050676,
+      "grad_norm": 107.375,
+      "learning_rate": 7.91261489276675e-08,
+      "loss": 63.2019,
+      "step": 8045
+    },
+    {
+      "epoch": 0.9649674847912734,
+      "grad_norm": 105.375,
+      "learning_rate": 7.779405887838017e-08,
+      "loss": 64.2461,
+      "step": 8050
+    },
+    {
+      "epoch": 0.9655668434774791,
+      "grad_norm": 108.5,
+      "learning_rate": 7.646196882909283e-08,
+      "loss": 63.3329,
+      "step": 8055
+    },
+    {
+      "epoch": 0.9661662021636849,
+      "grad_norm": 108.625,
+      "learning_rate": 7.512987877980551e-08,
+      "loss": 64.4003,
+      "step": 8060
+    },
+    {
+      "epoch": 0.9667655608498906,
+      "grad_norm": 112.0625,
+      "learning_rate": 7.379778873051817e-08,
+      "loss": 63.6081,
+      "step": 8065
+    },
+    {
+      "epoch": 0.9673649195360964,
+      "grad_norm": 107.9375,
+      "learning_rate": 7.246569868123084e-08,
+      "loss": 63.1161,
+      "step": 8070
+    },
+    {
+      "epoch": 0.9679642782223021,
+      "grad_norm": 109.0,
+      "learning_rate": 7.113360863194353e-08,
+      "loss": 64.4234,
+      "step": 8075
+    },
+    {
+      "epoch": 0.9685636369085079,
+      "grad_norm": 102.6875,
+      "learning_rate": 6.98015185826562e-08,
+      "loss": 63.9451,
+      "step": 8080
+    },
+    {
+      "epoch": 0.9691629955947136,
+      "grad_norm": 106.1875,
+      "learning_rate": 6.846942853336886e-08,
+      "loss": 63.7961,
+      "step": 8085
+    },
+    {
+      "epoch": 0.9697623542809194,
+      "grad_norm": 107.0,
+      "learning_rate": 6.713733848408152e-08,
+      "loss": 62.8793,
+      "step": 8090
+    },
+    {
+      "epoch": 0.9703617129671251,
+      "grad_norm": 107.375,
+      "learning_rate": 6.58052484347942e-08,
+      "loss": 63.4959,
+      "step": 8095
+    },
+    {
+      "epoch": 0.9709610716533309,
+      "grad_norm": 103.375,
+      "learning_rate": 6.447315838550686e-08,
+      "loss": 62.9931,
+      "step": 8100
+    },
+    {
+      "epoch": 0.9715604303395367,
+      "grad_norm": 108.25,
+      "learning_rate": 6.314106833621953e-08,
+      "loss": 63.7424,
+      "step": 8105
+    },
+    {
+      "epoch": 0.9721597890257424,
+      "grad_norm": 111.4375,
+      "learning_rate": 6.180897828693219e-08,
+      "loss": 64.0168,
+      "step": 8110
+    },
+    {
+      "epoch": 0.9727591477119483,
+      "grad_norm": 109.8125,
+      "learning_rate": 6.047688823764486e-08,
+      "loss": 63.7016,
+      "step": 8115
+    },
+    {
+      "epoch": 0.973358506398154,
+      "grad_norm": 106.125,
+      "learning_rate": 5.9144798188357535e-08,
+      "loss": 62.0944,
+      "step": 8120
+    },
+    {
+      "epoch": 0.9739578650843598,
+      "grad_norm": 109.0,
+      "learning_rate": 5.78127081390702e-08,
+      "loss": 62.8097,
+      "step": 8125
+    },
+    {
+      "epoch": 0.9745572237705655,
+      "grad_norm": 106.6875,
+      "learning_rate": 5.648061808978287e-08,
+      "loss": 62.1002,
+      "step": 8130
+    },
+    {
+      "epoch": 0.9751565824567713,
+      "grad_norm": 108.3125,
+      "learning_rate": 5.5148528040495535e-08,
+      "loss": 62.1948,
+      "step": 8135
+    },
+    {
+      "epoch": 0.975755941142977,
+      "grad_norm": 107.1875,
+      "learning_rate": 5.3816437991208206e-08,
+      "loss": 62.8547,
+      "step": 8140
+    },
+    {
+      "epoch": 0.9763552998291828,
+      "grad_norm": 113.3125,
+      "learning_rate": 5.248434794192087e-08,
+      "loss": 64.7491,
+      "step": 8145
+    },
+    {
+      "epoch": 0.9769546585153885,
+      "grad_norm": 111.25,
+      "learning_rate": 5.115225789263354e-08,
+      "loss": 63.4233,
+      "step": 8150
+    },
+    {
+      "epoch": 0.9775540172015943,
+      "grad_norm": 108.375,
+      "learning_rate": 4.9820167843346206e-08,
+      "loss": 64.265,
+      "step": 8155
+    },
+    {
+      "epoch": 0.9781533758878,
+      "grad_norm": 107.75,
+      "learning_rate": 4.8488077794058877e-08,
+      "loss": 63.7708,
+      "step": 8160
+    },
+    {
+      "epoch": 0.9787527345740058,
+      "grad_norm": 107.8125,
+      "learning_rate": 4.715598774477154e-08,
+      "loss": 63.6366,
+      "step": 8165
+    },
+    {
+      "epoch": 0.9793520932602116,
+      "grad_norm": 108.5625,
+      "learning_rate": 4.582389769548421e-08,
+      "loss": 63.4673,
+      "step": 8170
+    },
+    {
+      "epoch": 0.9799514519464173,
+      "grad_norm": 109.8125,
+      "learning_rate": 4.449180764619688e-08,
+      "loss": 63.0172,
+      "step": 8175
+    },
+    {
+      "epoch": 0.9805508106326231,
+      "grad_norm": 111.25,
+      "learning_rate": 4.315971759690955e-08,
+      "loss": 64.0092,
+      "step": 8180
+    },
+    {
+      "epoch": 0.9811501693188288,
+      "grad_norm": 107.75,
+      "learning_rate": 4.182762754762222e-08,
+      "loss": 63.3634,
+      "step": 8185
+    },
+    {
+      "epoch": 0.9817495280050346,
+      "grad_norm": 105.375,
+      "learning_rate": 4.049553749833488e-08,
+      "loss": 62.8124,
+      "step": 8190
+    },
+    {
+      "epoch": 0.9823488866912403,
+      "grad_norm": 106.5625,
+      "learning_rate": 3.9163447449047554e-08,
+      "loss": 63.596,
+      "step": 8195
+    },
+    {
+      "epoch": 0.9829482453774462,
+      "grad_norm": 108.625,
+      "learning_rate": 3.783135739976022e-08,
+      "loss": 63.2134,
+      "step": 8200
+    },
+    {
+      "epoch": 0.9835476040636519,
+      "grad_norm": 110.0,
+      "learning_rate": 3.649926735047289e-08,
+      "loss": 63.8696,
+      "step": 8205
+    },
+    {
+      "epoch": 0.9841469627498577,
+      "grad_norm": 108.5,
+      "learning_rate": 3.516717730118556e-08,
+      "loss": 63.2911,
+      "step": 8210
+    },
+    {
+      "epoch": 0.9847463214360634,
+      "grad_norm": 109.3125,
+      "learning_rate": 3.383508725189823e-08,
+      "loss": 64.3479,
+      "step": 8215
+    },
+    {
+      "epoch": 0.9853456801222692,
+      "grad_norm": 104.3125,
+      "learning_rate": 3.2502997202610895e-08,
+      "loss": 63.6428,
+      "step": 8220
+    },
+    {
+      "epoch": 0.985945038808475,
+      "grad_norm": 105.625,
+      "learning_rate": 3.117090715332356e-08,
+      "loss": 63.695,
+      "step": 8225
+    },
+    {
+      "epoch": 0.9865443974946807,
+      "grad_norm": 107.5625,
+      "learning_rate": 2.983881710403623e-08,
+      "loss": 63.5868,
+      "step": 8230
+    },
+    {
+      "epoch": 0.9871437561808865,
+      "grad_norm": 106.8125,
+      "learning_rate": 2.8506727054748902e-08,
+      "loss": 62.9535,
+      "step": 8235
+    },
+    {
+      "epoch": 0.9877431148670922,
+      "grad_norm": 112.1875,
+      "learning_rate": 2.717463700546157e-08,
+      "loss": 63.9218,
+      "step": 8240
+    },
+    {
+      "epoch": 0.988342473553298,
+      "grad_norm": 110.9375,
+      "learning_rate": 2.5842546956174237e-08,
+      "loss": 63.0742,
+      "step": 8245
+    },
+    {
+      "epoch": 0.9889418322395037,
+      "grad_norm": 109.375,
+      "learning_rate": 2.4510456906886905e-08,
+      "loss": 63.139,
+      "step": 8250
+    },
+    {
+      "epoch": 0.9895411909257095,
+      "grad_norm": 106.875,
+      "learning_rate": 2.3178366857599572e-08,
+      "loss": 63.3816,
+      "step": 8255
+    },
+    {
+      "epoch": 0.9901405496119152,
+      "grad_norm": 108.8125,
+      "learning_rate": 2.184627680831224e-08,
+      "loss": 62.7286,
+      "step": 8260
+    },
+    {
+      "epoch": 0.990739908298121,
+      "grad_norm": 108.875,
+      "learning_rate": 2.0514186759024908e-08,
+      "loss": 64.1767,
+      "step": 8265
+    },
+    {
+      "epoch": 0.9913392669843267,
+      "grad_norm": 107.3125,
+      "learning_rate": 1.9182096709737575e-08,
+      "loss": 63.8556,
+      "step": 8270
+    },
+    {
+      "epoch": 0.9919386256705325,
+      "grad_norm": 105.875,
+      "learning_rate": 1.7850006660450246e-08,
+      "loss": 63.7212,
+      "step": 8275
+    },
+    {
+      "epoch": 0.9925379843567383,
+      "grad_norm": 108.375,
+      "learning_rate": 1.6517916611162914e-08,
+      "loss": 63.7732,
+      "step": 8280
+    },
+    {
+      "epoch": 0.9931373430429441,
+      "grad_norm": 113.875,
+      "learning_rate": 1.5185826561875582e-08,
+      "loss": 64.1175,
+      "step": 8285
+    },
+    {
+      "epoch": 0.9937367017291499,
+      "grad_norm": 106.25,
+      "learning_rate": 1.3853736512588251e-08,
+      "loss": 63.2011,
+      "step": 8290
+    },
+    {
+      "epoch": 0.9943360604153556,
+      "grad_norm": 105.5625,
+      "learning_rate": 1.2521646463300919e-08,
+      "loss": 63.6572,
+      "step": 8295
+    },
+    {
+      "epoch": 0.9949354191015614,
+      "grad_norm": 104.5625,
+      "learning_rate": 1.1189556414013587e-08,
+      "loss": 64.0572,
+      "step": 8300
+    },
+    {
+      "epoch": 0.9955347777877671,
+      "grad_norm": 111.0625,
+      "learning_rate": 9.857466364726254e-09,
+      "loss": 62.2712,
+      "step": 8305
+    },
+    {
+      "epoch": 0.9961341364739729,
+      "grad_norm": 109.1875,
+      "learning_rate": 8.525376315438924e-09,
+      "loss": 64.1278,
+      "step": 8310
+    },
+    {
+      "epoch": 0.9967334951601786,
+      "grad_norm": 103.9375,
+      "learning_rate": 7.193286266151592e-09,
+      "loss": 63.2825,
+      "step": 8315
+    },
+    {
+      "epoch": 0.9973328538463844,
+      "grad_norm": 108.3125,
+      "learning_rate": 5.86119621686426e-09,
+      "loss": 63.4902,
+      "step": 8320
+    },
+    {
+      "epoch": 0.9979322125325901,
+      "grad_norm": 110.0,
+      "learning_rate": 4.529106167576927e-09,
+      "loss": 63.8995,
+      "step": 8325
+    },
+    {
+      "epoch": 0.9985315712187959,
+      "grad_norm": 108.3125,
+      "learning_rate": 3.1970161182895963e-09,
+      "loss": 63.5888,
+      "step": 8330
+    },
+    {
+      "epoch": 0.9991309299050016,
+      "grad_norm": 103.75,
+      "learning_rate": 1.8649260690022644e-09,
+      "loss": 62.5743,
+      "step": 8335
+    },
+    {
+      "epoch": 0.9997302885912074,
+      "grad_norm": 107.0,
+      "learning_rate": 5.328360197149327e-10,
+      "loss": 62.6706,
+      "step": 8340
     }
   ],
   "logging_steps": 5,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.614053037573669e+19,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null