Training in progress, step 204, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:740b9f616f65c988c61dc95217c9c0fdc273f6f55f09725e222b88496c72c59d
 size 9823216

 version https://git-lfs.github.com/spec/v1
+oid sha256:f41af54ef48385f1e9240a01ed0e4e9778b1bbdcbbeaa3976f0744a97cee781a
 size 9823216

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3fea744051c34f8a0de02d059c8345493f41f146deef35aa4b2cb0c6a8429f8b
 size 5962860

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d30bc5cfe87f818a6b3f472e279714bdad1dca3906fc14f6922585bbcdc9e13
 size 5962860

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6bd6748756db41b0863adfb7f8ef25e8a4b1b0052368551cdc549a64c7a648cc
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d1d1251a5e12d0ab0e0cece07fc1165a0b3630a00ab4bccd575f3d646ed3d1cd
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:41c7581b7df531d3dbae8c2657d36090edae47306e1ded92753ec3676d20d1b4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e783401ecc99df9e68aa18c0fa2df70c08a04fbed0cb3e6ae60e028c1074e54a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.75,
   "eval_steps": 500,
-  "global_step": 153,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1078,6 +1078,363 @@
       "learning_rate": 1.5349188304533413e-05,
       "loss": 1.9898,
       "step": 153
     }
   ],
   "logging_steps": 1,
@@ -1092,12 +1449,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2023918081671168.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 500,
+  "global_step": 204,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.5349188304533413e-05,
       "loss": 1.9898,
       "step": 153
+    },
+    {
+      "epoch": 0.7549019607843137,
+      "grad_norm": 0.6448200941085815,
+      "learning_rate": 1.4784473717366387e-05,
+      "loss": 2.0231,
+      "step": 154
+    },
+    {
+      "epoch": 0.7598039215686274,
+      "grad_norm": 0.6143677234649658,
+      "learning_rate": 1.4228535577631442e-05,
+      "loss": 1.8083,
+      "step": 155
+    },
+    {
+      "epoch": 0.7647058823529411,
+      "grad_norm": 0.6792232990264893,
+      "learning_rate": 1.3681512436768045e-05,
+      "loss": 2.2576,
+      "step": 156
+    },
+    {
+      "epoch": 0.7696078431372549,
+      "grad_norm": 0.6397396922111511,
+      "learning_rate": 1.314354062441106e-05,
+      "loss": 1.9228,
+      "step": 157
+    },
+    {
+      "epoch": 0.7745098039215687,
+      "grad_norm": 0.6527466773986816,
+      "learning_rate": 1.2614754214414548e-05,
+      "loss": 1.9549,
+      "step": 158
+    },
+    {
+      "epoch": 0.7794117647058824,
+      "grad_norm": 0.558052659034729,
+      "learning_rate": 1.2095284991437733e-05,
+      "loss": 1.6198,
+      "step": 159
+    },
+    {
+      "epoch": 0.7843137254901961,
+      "grad_norm": 0.5108990669250488,
+      "learning_rate": 1.1585262418101467e-05,
+      "loss": 1.7669,
+      "step": 160
+    },
+    {
+      "epoch": 0.7892156862745098,
+      "grad_norm": 0.5814594030380249,
+      "learning_rate": 1.1084813602723515e-05,
+      "loss": 1.8603,
+      "step": 161
+    },
+    {
+      "epoch": 0.7941176470588235,
+      "grad_norm": 0.5403014421463013,
+      "learning_rate": 1.0594063267640386e-05,
+      "loss": 1.6909,
+      "step": 162
+    },
+    {
+      "epoch": 0.7990196078431373,
+      "grad_norm": 0.5353929400444031,
+      "learning_rate": 1.0113133718124035e-05,
+      "loss": 1.8579,
+      "step": 163
+    },
+    {
+      "epoch": 0.803921568627451,
+      "grad_norm": 0.6234535574913025,
+      "learning_rate": 9.642144811900739e-06,
+      "loss": 2.0002,
+      "step": 164
+    },
+    {
+      "epoch": 0.8088235294117647,
+      "grad_norm": 0.5333160161972046,
+      "learning_rate": 9.181213929280046e-06,
+      "loss": 1.7953,
+      "step": 165
+    },
+    {
+      "epoch": 0.8137254901960784,
+      "grad_norm": 0.5747147798538208,
+      "learning_rate": 8.7304559439012e-06,
+      "loss": 2.2429,
+      "step": 166
+    },
+    {
+      "epoch": 0.8186274509803921,
+      "grad_norm": 0.5337631702423096,
+      "learning_rate": 8.28998319410413e-06,
+      "loss": 1.7715,
+      "step": 167
+    },
+    {
+      "epoch": 0.8235294117647058,
+      "grad_norm": 0.6177130341529846,
+      "learning_rate": 7.859905454932471e-06,
+      "loss": 1.684,
+      "step": 168
+    },
+    {
+      "epoch": 0.8284313725490197,
+      "grad_norm": 0.5750278830528259,
+      "learning_rate": 7.440329910775273e-06,
+      "loss": 2.0278,
+      "step": 169
+    },
+    {
+      "epoch": 0.8333333333333334,
+      "grad_norm": 0.5838117599487305,
+      "learning_rate": 7.031361128654401e-06,
+      "loss": 1.9058,
+      "step": 170
+    },
+    {
+      "epoch": 0.8382352941176471,
+      "grad_norm": 0.573087215423584,
+      "learning_rate": 6.633101032164274e-06,
+      "loss": 2.153,
+      "step": 171
+    },
+    {
+      "epoch": 0.8431372549019608,
+      "grad_norm": 0.5336370468139648,
+      "learning_rate": 6.2456488760703205e-06,
+      "loss": 1.773,
+      "step": 172
+    },
+    {
+      "epoch": 0.8480392156862745,
+      "grad_norm": 0.628755509853363,
+      "learning_rate": 5.869101221572654e-06,
+      "loss": 1.949,
+      "step": 173
+    },
+    {
+      "epoch": 0.8529411764705882,
+      "grad_norm": 0.5883850455284119,
+      "learning_rate": 5.5035519122409895e-06,
+      "loss": 1.8569,
+      "step": 174
+    },
+    {
+      "epoch": 0.8578431372549019,
+      "grad_norm": 0.5533519983291626,
+      "learning_rate": 5.149092050626825e-06,
+      "loss": 1.9951,
+      "step": 175
+    },
+    {
+      "epoch": 0.8627450980392157,
+      "grad_norm": 0.5445258021354675,
+      "learning_rate": 4.805809975558828e-06,
+      "loss": 1.8357,
+      "step": 176
+    },
+    {
+      "epoch": 0.8676470588235294,
+      "grad_norm": 0.7583712339401245,
+      "learning_rate": 4.47379124012689e-06,
+      "loss": 1.9937,
+      "step": 177
+    },
+    {
+      "epoch": 0.8725490196078431,
+      "grad_norm": 0.550308883190155,
+      "learning_rate": 4.153118590360561e-06,
+      "loss": 1.7592,
+      "step": 178
+    },
+    {
+      "epoch": 0.8774509803921569,
+      "grad_norm": 0.5869921445846558,
+      "learning_rate": 3.843871944606969e-06,
+      "loss": 1.8233,
+      "step": 179
+    },
+    {
+      "epoch": 0.8823529411764706,
+      "grad_norm": 0.624453067779541,
+      "learning_rate": 3.5461283736134722e-06,
+      "loss": 1.7236,
+      "step": 180
+    },
+    {
+      "epoch": 0.8872549019607843,
+      "grad_norm": 0.5934841632843018,
+      "learning_rate": 3.2599620813200837e-06,
+      "loss": 2.2794,
+      "step": 181
+    },
+    {
+      "epoch": 0.8921568627450981,
+      "grad_norm": 0.6147695183753967,
+      "learning_rate": 2.9854443863662262e-06,
+      "loss": 2.0399,
+      "step": 182
+    },
+    {
+      "epoch": 0.8970588235294118,
+      "grad_norm": 0.6072641611099243,
+      "learning_rate": 2.722643704316652e-06,
+      "loss": 2.0926,
+      "step": 183
+    },
+    {
+      "epoch": 0.9019607843137255,
+      "grad_norm": 0.6701170206069946,
+      "learning_rate": 2.4716255306108605e-06,
+      "loss": 2.0332,
+      "step": 184
+    },
+    {
+      "epoch": 0.9068627450980392,
+      "grad_norm": 0.6028889417648315,
+      "learning_rate": 2.2324524242402613e-06,
+      "loss": 2.066,
+      "step": 185
+    },
+    {
+      "epoch": 0.9117647058823529,
+      "grad_norm": 0.5733740329742432,
+      "learning_rate": 2.0051839921571448e-06,
+      "loss": 2.119,
+      "step": 186
+    },
+    {
+      "epoch": 0.9166666666666666,
+      "grad_norm": 0.6689417958259583,
+      "learning_rate": 1.7898768744194162e-06,
+      "loss": 2.1784,
+      "step": 187
+    },
+    {
+      "epoch": 0.9215686274509803,
+      "grad_norm": 0.6812567114830017,
+      "learning_rate": 1.5865847300746417e-06,
+      "loss": 1.7479,
+      "step": 188
+    },
+    {
+      "epoch": 0.9264705882352942,
+      "grad_norm": 0.6046686172485352,
+      "learning_rate": 1.3953582237871521e-06,
+      "loss": 1.942,
+      "step": 189
+    },
+    {
+      "epoch": 0.9313725490196079,
+      "grad_norm": 0.6390747427940369,
+      "learning_rate": 1.2162450132113201e-06,
+      "loss": 2.1545,
+      "step": 190
+    },
+    {
+      "epoch": 0.9362745098039216,
+      "grad_norm": 0.6433112621307373,
+      "learning_rate": 1.049289737114273e-06,
+      "loss": 1.9614,
+      "step": 191
+    },
+    {
+      "epoch": 0.9411764705882353,
+      "grad_norm": 0.6422625780105591,
+      "learning_rate": 8.945340042509797e-07,
+      "loss": 2.0157,
+      "step": 192
+    },
+    {
+      "epoch": 0.946078431372549,
+      "grad_norm": 0.696194052696228,
+      "learning_rate": 7.520163829944804e-07,
+      "loss": 2.3783,
+      "step": 193
+    },
+    {
+      "epoch": 0.9509803921568627,
+      "grad_norm": 0.6186797618865967,
+      "learning_rate": 6.217723917238128e-07,
+      "loss": 2.0641,
+      "step": 194
+    },
+    {
+      "epoch": 0.9558823529411765,
+      "grad_norm": 0.7474207878112793,
+      "learning_rate": 5.038344899721436e-07,
+      "loss": 2.3323,
+      "step": 195
+    },
+    {
+      "epoch": 0.9607843137254902,
+      "grad_norm": 0.6354583501815796,
+      "learning_rate": 3.9823207033710676e-07,
+      "loss": 2.0189,
+      "step": 196
+    },
+    {
+      "epoch": 0.9656862745098039,
+      "grad_norm": 0.6712233424186707,
+      "learning_rate": 3.0499145115561176e-07,
+      "loss": 2.1329,
+      "step": 197
+    },
+    {
+      "epoch": 0.9705882352941176,
+      "grad_norm": 0.6209347248077393,
+      "learning_rate": 2.2413586994470825e-07,
+      "loss": 2.3842,
+      "step": 198
+    },
+    {
+      "epoch": 0.9754901960784313,
+      "grad_norm": 0.6732835173606873,
+      "learning_rate": 1.5568547761034004e-07,
+      "loss": 2.4314,
+      "step": 199
+    },
+    {
+      "epoch": 0.9803921568627451,
+      "grad_norm": 0.8073440194129944,
+      "learning_rate": 9.965733342532924e-08,
+      "loss": 2.5627,
+      "step": 200
+    },
+    {
+      "epoch": 0.9852941176470589,
+      "grad_norm": 0.6623374223709106,
+      "learning_rate": 5.606540077782163e-08,
+      "loss": 1.9204,
+      "step": 201
+    },
+    {
+      "epoch": 0.9901960784313726,
+      "grad_norm": 0.5475188493728638,
+      "learning_rate": 2.4920543691309138e-08,
+      "loss": 1.7823,
+      "step": 202
+    },
+    {
+      "epoch": 0.9950980392156863,
+      "grad_norm": 0.5792528390884399,
+      "learning_rate": 6.2305241171345395e-09,
+      "loss": 1.8127,
+      "step": 203
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.741917610168457,
+      "learning_rate": 0.0,
+      "loss": 2.1431,
+      "step": 204
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2652053607677952.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null