Training in progress, step 300, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:09408008d6d7b34362def16448684fbf79127a0fa07e643a0aeeaff09e28de71
-size 124606

 version https://git-lfs.github.com/spec/v1
+oid sha256:7167716ea24b8d29d5dbe61735d7aa7044f3896076d6daa3ab9f0929b3b9a899
+size 124670

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6bca983b309063a996168bc9ba0246dee10aad731d5eafae85ac843af75455c4
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d34319ff3df364313deadc6fed2a87af4dbaa25121577034e823762746bfdd8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:90c3f5929acf9a48d389b53b92c7c5f336a9df7831dd2eeaa4b4512eed16ec0a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:007b96471e861c7aab51ec779dcb3b726506f4816d4cb4a61d83956481bc6738
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.6944444444444444,
   "eval_steps": 100,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1431,6 +1431,714 @@
       "eval_samples_per_second": 287.843,
       "eval_steps_per_second": 72.406,
       "step": 200
     }
   ],
   "logging_steps": 1,
@@ -1445,7 +2153,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -1454,12 +2162,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 75463150534656.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 1.0416666666666667,
   "eval_steps": 100,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 287.843,
       "eval_steps_per_second": 72.406,
       "step": 200
+    },
+    {
+      "epoch": 0.6979166666666666,
+      "grad_norm": NaN,
+      "learning_rate": 0.00014887423068915388,
+      "loss": 0.0,
+      "step": 201
+    },
+    {
+      "epoch": 0.7013888888888888,
+      "grad_norm": NaN,
+      "learning_rate": 0.00014838923755474746,
+      "loss": 0.0,
+      "step": 202
+    },
+    {
+      "epoch": 0.7048611111111112,
+      "grad_norm": NaN,
+      "learning_rate": 0.00014790275363694446,
+      "loss": 0.0,
+      "step": 203
+    },
+    {
+      "epoch": 0.7083333333333334,
+      "grad_norm": NaN,
+      "learning_rate": 0.00014741479392341942,
+      "loss": 0.0,
+      "step": 204
+    },
+    {
+      "epoch": 0.7118055555555556,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001469253734473135,
+      "loss": 0.0,
+      "step": 205
+    },
+    {
+      "epoch": 0.7152777777777778,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001464345072867713,
+      "loss": 0.0,
+      "step": 206
+    },
+    {
+      "epoch": 0.71875,
+      "grad_norm": NaN,
+      "learning_rate": 0.00014594221056447632,
+      "loss": 0.0,
+      "step": 207
+    },
+    {
+      "epoch": 0.7222222222222222,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001454484984471849,
+      "loss": 0.0,
+      "step": 208
+    },
+    {
+      "epoch": 0.7256944444444444,
+      "grad_norm": NaN,
+      "learning_rate": 0.00014495338614525926,
+      "loss": 0.0,
+      "step": 209
+    },
+    {
+      "epoch": 0.7291666666666666,
+      "grad_norm": NaN,
+      "learning_rate": 0.00014445688891219862,
+      "loss": 0.0,
+      "step": 210
+    },
+    {
+      "epoch": 0.7326388888888888,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001439590220441694,
+      "loss": 0.0,
+      "step": 211
+    },
+    {
+      "epoch": 0.7361111111111112,
+      "grad_norm": NaN,
+      "learning_rate": 0.00014345980087953405,
+      "loss": 0.0,
+      "step": 212
+    },
+    {
+      "epoch": 0.7395833333333334,
+      "grad_norm": NaN,
+      "learning_rate": 0.00014295924079837826,
+      "loss": 0.0,
+      "step": 213
+    },
+    {
+      "epoch": 0.7430555555555556,
+      "grad_norm": NaN,
+      "learning_rate": 0.00014245735722203735,
+      "loss": 0.0,
+      "step": 214
+    },
+    {
+      "epoch": 0.7465277777777778,
+      "grad_norm": NaN,
+      "learning_rate": 0.00014195416561262113,
+      "loss": 0.0,
+      "step": 215
+    },
+    {
+      "epoch": 0.75,
+      "grad_norm": NaN,
+      "learning_rate": 0.00014144968147253742,
+      "loss": 0.0,
+      "step": 216
+    },
+    {
+      "epoch": 0.7534722222222222,
+      "grad_norm": NaN,
+      "learning_rate": 0.00014094392034401463,
+      "loss": 0.0,
+      "step": 217
+    },
+    {
+      "epoch": 0.7569444444444444,
+      "grad_norm": NaN,
+      "learning_rate": 0.00014043689780862272,
+      "loss": 0.0,
+      "step": 218
+    },
+    {
+      "epoch": 0.7604166666666666,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013992862948679333,
+      "loss": 0.0,
+      "step": 219
+    },
+    {
+      "epoch": 0.7638888888888888,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001394191310373385,
+      "loss": 0.0,
+      "step": 220
+    },
+    {
+      "epoch": 0.7673611111111112,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013890841815696829,
+      "loss": 0.0,
+      "step": 221
+    },
+    {
+      "epoch": 0.7708333333333334,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013839650657980704,
+      "loss": 0.0,
+      "step": 222
+    },
+    {
+      "epoch": 0.7743055555555556,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013788341207690872,
+      "loss": 0.0,
+      "step": 223
+    },
+    {
+      "epoch": 0.7777777777777778,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013736915045577122,
+      "loss": 0.0,
+      "step": 224
+    },
+    {
+      "epoch": 0.78125,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013685373755984904,
+      "loss": 0.0,
+      "step": 225
+    },
+    {
+      "epoch": 0.7847222222222222,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001363371892680654,
+      "loss": 0.0,
+      "step": 226
+    },
+    {
+      "epoch": 0.7881944444444444,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013581952149432303,
+      "loss": 0.0,
+      "step": 227
+    },
+    {
+      "epoch": 0.7916666666666666,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001353007501870137,
+      "loss": 0.0,
+      "step": 228
+    },
+    {
+      "epoch": 0.7951388888888888,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013478089132852716,
+      "loss": 0.0,
+      "step": 229
+    },
+    {
+      "epoch": 0.7986111111111112,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001342599609347585,
+      "loss": 0.0,
+      "step": 230
+    },
+    {
+      "epoch": 0.8020833333333334,
+      "grad_norm": NaN,
+      "learning_rate": 0.000133737975054615,
+      "loss": 0.0,
+      "step": 231
+    },
+    {
+      "epoch": 0.8055555555555556,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013321494976952132,
+      "loss": 0.0,
+      "step": 232
+    },
+    {
+      "epoch": 0.8090277777777778,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013269090119292444,
+      "loss": 0.0,
+      "step": 233
+    },
+    {
+      "epoch": 0.8125,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013216584546979702,
+      "loss": 0.0,
+      "step": 234
+    },
+    {
+      "epoch": 0.8159722222222222,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013163979877614,
+      "loss": 0.0,
+      "step": 235
+    },
+    {
+      "epoch": 0.8194444444444444,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013111277731848442,
+      "loss": 0.0,
+      "step": 236
+    },
+    {
+      "epoch": 0.8229166666666666,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013058479733339185,
+      "loss": 0.0,
+      "step": 237
+    },
+    {
+      "epoch": 0.8263888888888888,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013005587508695444,
+      "loss": 0.0,
+      "step": 238
+    },
+    {
+      "epoch": 0.8298611111111112,
+      "grad_norm": NaN,
+      "learning_rate": 0.00012952602687429362,
+      "loss": 0.0,
+      "step": 239
+    },
+    {
+      "epoch": 0.8333333333333334,
+      "grad_norm": NaN,
+      "learning_rate": 0.00012899526901905822,
+      "loss": 0.0,
+      "step": 240
+    },
+    {
+      "epoch": 0.8368055555555556,
+      "grad_norm": NaN,
+      "learning_rate": 0.00012846361787292136,
+      "loss": 0.0,
+      "step": 241
+    },
+    {
+      "epoch": 0.8402777777777778,
+      "grad_norm": NaN,
+      "learning_rate": 0.00012793108981507694,
+      "loss": 0.0,
+      "step": 242
+    },
+    {
+      "epoch": 0.84375,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001273977012517348,
+      "loss": 0.0,
+      "step": 243
+    },
+    {
+      "epoch": 0.8472222222222222,
+      "grad_norm": NaN,
+      "learning_rate": 0.00012686346861561537,
+      "loss": 0.0,
+      "step": 244
+    },
+    {
+      "epoch": 0.8506944444444444,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001263284083654435,
+      "loss": 0.0,
+      "step": 245
+    },
+    {
+      "epoch": 0.8541666666666666,
+      "grad_norm": NaN,
+      "learning_rate": 0.00012579253698544125,
+      "loss": 0.0,
+      "step": 246
+    },
+    {
+      "epoch": 0.8576388888888888,
+      "grad_norm": NaN,
+      "learning_rate": 0.00012525587098482005,
+      "loss": 0.0,
+      "step": 247
+    },
+    {
+      "epoch": 0.8611111111111112,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001247184268972722,
+      "loss": 0.0,
+      "step": 248
+    },
+    {
+      "epoch": 0.8645833333333334,
+      "grad_norm": NaN,
+      "learning_rate": 0.00012418022128046142,
+      "loss": 0.0,
+      "step": 249
+    },
+    {
+      "epoch": 0.8680555555555556,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001236412707155127,
+      "loss": 0.0,
+      "step": 250
+    },
+    {
+      "epoch": 0.8715277777777778,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001231015918065016,
+      "loss": 0.0,
+      "step": 251
+    },
+    {
+      "epoch": 0.875,
+      "grad_norm": NaN,
+      "learning_rate": 0.00012256120117994245,
+      "loss": 0.0,
+      "step": 252
+    },
+    {
+      "epoch": 0.8784722222222222,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001220201154842765,
+      "loss": 0.0,
+      "step": 253
+    },
+    {
+      "epoch": 0.8819444444444444,
+      "grad_norm": NaN,
+      "learning_rate": 0.00012147835138935868,
+      "loss": 0.0,
+      "step": 254
+    },
+    {
+      "epoch": 0.8854166666666666,
+      "grad_norm": NaN,
+      "learning_rate": 0.00012093592558594416,
+      "loss": 0.0,
+      "step": 255
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": NaN,
+      "learning_rate": 0.00012039285478517417,
+      "loss": 0.0,
+      "step": 256
+    },
+    {
+      "epoch": 0.8923611111111112,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011984915571806108,
+      "loss": 0.0,
+      "step": 257
+    },
+    {
+      "epoch": 0.8958333333333334,
+      "grad_norm": NaN,
+      "learning_rate": 0.000119304845134973,
+      "loss": 0.0,
+      "step": 258
+    },
+    {
+      "epoch": 0.8993055555555556,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011875993980511773,
+      "loss": 0.0,
+      "step": 259
+    },
+    {
+      "epoch": 0.9027777777777778,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011821445651602616,
+      "loss": 0.0,
+      "step": 260
+    },
+    {
+      "epoch": 0.90625,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011766841207303498,
+      "loss": 0.0,
+      "step": 261
+    },
+    {
+      "epoch": 0.9097222222222222,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011712182329876902,
+      "loss": 0.0,
+      "step": 262
+    },
+    {
+      "epoch": 0.9131944444444444,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011657470703262296,
+      "loss": 0.0,
+      "step": 263
+    },
+    {
+      "epoch": 0.9166666666666666,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011602708013024255,
+      "loss": 0.0,
+      "step": 264
+    },
+    {
+      "epoch": 0.9201388888888888,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011547895946300528,
+      "loss": 0.0,
+      "step": 265
+    },
+    {
+      "epoch": 0.9236111111111112,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011493036191750066,
+      "loss": 0.0,
+      "step": 266
+    },
+    {
+      "epoch": 0.9270833333333334,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011438130439500986,
+      "loss": 0.0,
+      "step": 267
+    },
+    {
+      "epoch": 0.9305555555555556,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011383180381098517,
+      "loss": 0.0,
+      "step": 268
+    },
+    {
+      "epoch": 0.9340277777777778,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011328187709452884,
+      "loss": 0.0,
+      "step": 269
+    },
+    {
+      "epoch": 0.9375,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011273154118787132,
+      "loss": 0.0,
+      "step": 270
+    },
+    {
+      "epoch": 0.9409722222222222,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011218081304584957,
+      "loss": 0.0,
+      "step": 271
+    },
+    {
+      "epoch": 0.9444444444444444,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011162970963538462,
+      "loss": 0.0,
+      "step": 272
+    },
+    {
+      "epoch": 0.9479166666666666,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011107824793495876,
+      "loss": 0.0,
+      "step": 273
+    },
+    {
+      "epoch": 0.9513888888888888,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011052644493409255,
+      "loss": 0.0,
+      "step": 274
+    },
+    {
+      "epoch": 0.9548611111111112,
+      "grad_norm": NaN,
+      "learning_rate": 0.00010997431763282137,
+      "loss": 0.0,
+      "step": 275
+    },
+    {
+      "epoch": 0.9583333333333334,
+      "grad_norm": NaN,
+      "learning_rate": 0.00010942188304117182,
+      "loss": 0.0,
+      "step": 276
+    },
+    {
+      "epoch": 0.9618055555555556,
+      "grad_norm": NaN,
+      "learning_rate": 0.00010886915817863736,
+      "loss": 0.0,
+      "step": 277
+    },
+    {
+      "epoch": 0.9652777777777778,
+      "grad_norm": NaN,
+      "learning_rate": 0.00010831616007365435,
+      "loss": 0.0,
+      "step": 278
+    },
+    {
+      "epoch": 0.96875,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001077629057630771,
+      "loss": 0.0,
+      "step": 279
+    },
+    {
+      "epoch": 0.9722222222222222,
+      "grad_norm": NaN,
+      "learning_rate": 0.00010720941229165322,
+      "loss": 0.0,
+      "step": 280
+    },
+    {
+      "epoch": 0.9756944444444444,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001066556967114984,
+      "loss": 0.0,
+      "step": 281
+    },
+    {
+      "epoch": 0.9791666666666666,
+      "grad_norm": NaN,
+      "learning_rate": 0.00010610177608157117,
+      "loss": 0.0,
+      "step": 282
+    },
+    {
+      "epoch": 0.9826388888888888,
+      "grad_norm": NaN,
+      "learning_rate": 0.00010554766746714706,
+      "loss": 0.0,
+      "step": 283
+    },
+    {
+      "epoch": 0.9861111111111112,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001049933879392933,
+      "loss": 0.0,
+      "step": 284
+    },
+    {
+      "epoch": 0.9895833333333334,
+      "grad_norm": NaN,
+      "learning_rate": 0.00010443895457434252,
+      "loss": 0.0,
+      "step": 285
+    },
+    {
+      "epoch": 0.9930555555555556,
+      "grad_norm": NaN,
+      "learning_rate": 0.00010388438445336676,
+      "loss": 0.0,
+      "step": 286
+    },
+    {
+      "epoch": 0.9965277777777778,
+      "grad_norm": NaN,
+      "learning_rate": 0.00010332969466165143,
+      "loss": 0.0,
+      "step": 287
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": NaN,
+      "learning_rate": 0.00010277490228816853,
+      "loss": 0.0,
+      "step": 288
+    },
+    {
+      "epoch": 1.0034722222222223,
+      "grad_norm": NaN,
+      "learning_rate": 0.00010222002442505056,
+      "loss": 0.0,
+      "step": 289
+    },
+    {
+      "epoch": 1.0069444444444444,
+      "grad_norm": NaN,
+      "learning_rate": 0.00010166507816706386,
+      "loss": 0.0,
+      "step": 290
+    },
+    {
+      "epoch": 1.0104166666666667,
+      "grad_norm": NaN,
+      "learning_rate": 0.00010111008061108176,
+      "loss": 0.0,
+      "step": 291
+    },
+    {
+      "epoch": 1.0138888888888888,
+      "grad_norm": NaN,
+      "learning_rate": 0.00010055504885555797,
+      "loss": 0.0,
+      "step": 292
+    },
+    {
+      "epoch": 1.0173611111111112,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 293
+    },
+    {
+      "epoch": 1.0208333333333333,
+      "grad_norm": NaN,
+      "learning_rate": 9.944495114444205e-05,
+      "loss": 0.0,
+      "step": 294
+    },
+    {
+      "epoch": 1.0243055555555556,
+      "grad_norm": NaN,
+      "learning_rate": 9.888991938891828e-05,
+      "loss": 0.0,
+      "step": 295
+    },
+    {
+      "epoch": 1.0277777777777777,
+      "grad_norm": NaN,
+      "learning_rate": 9.833492183293615e-05,
+      "loss": 0.0,
+      "step": 296
+    },
+    {
+      "epoch": 1.03125,
+      "grad_norm": NaN,
+      "learning_rate": 9.777997557494944e-05,
+      "loss": 0.0,
+      "step": 297
+    },
+    {
+      "epoch": 1.0347222222222223,
+      "grad_norm": NaN,
+      "learning_rate": 9.722509771183151e-05,
+      "loss": 0.0,
+      "step": 298
+    },
+    {
+      "epoch": 1.0381944444444444,
+      "grad_norm": NaN,
+      "learning_rate": 9.667030533834862e-05,
+      "loss": 0.0,
+      "step": 299
+    },
+    {
+      "epoch": 1.0416666666666667,
+      "grad_norm": NaN,
+      "learning_rate": 9.611561554663325e-05,
+      "loss": 0.0,
+      "step": 300
+    },
+    {
+      "epoch": 1.0416666666666667,
+      "eval_loss": NaN,
+      "eval_runtime": 1.6994,
+      "eval_samples_per_second": 285.4,
+      "eval_steps_per_second": 71.791,
+      "step": 300
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 113103460761600.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null