Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f688a0a6b678fa887f880dfac0bfbc3f2a965501ec8b24d0f31dbf9990bdcf9
 size 12589528

 version https://git-lfs.github.com/spec/v1
+oid sha256:b063b3efc31e864b01f1e5f00ff69fc6997d43d82d532890188f2690a2f9e74f
 size 12589528

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1ca9811c299957abf3d3289deaf7d80ac97ee74223511eb54afb7fd35729351
 size 6448570

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b37a8517149db473d53708204aa97d1e222c7f26c8324e15d3ba3ea7df2e4b8
 size 6448570

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab0f09c0bf5cf07ba6db6d4c4224ffca127d641e335ad2b6bbf588fcb2665514
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6c606a32343ac7e8e282a073eb25029faebf1e77ae5fcdfea0e69871be58948
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b26348e70deb5f7d6cee52f5eb084c0e6829440001a3c6d0128b93cd074af8c2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b60d6f1383abda4776549360effee800fe6cfe2c0604503e9e3fbaa79347f790
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 7.918984413146973,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.14517299782240503,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 278.679,
       "eval_steps_per_second": 69.67,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1412522267639808.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 7.732388019561768,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.19356399709654004,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 278.679,
       "eval_steps_per_second": 69.67,
       "step": 150
+    },
+    {
+      "epoch": 0.14614081780788774,
+      "grad_norm": 7867.3369140625,
+      "learning_rate": 1.0872630680850196e-05,
+      "loss": 30.6436,
+      "step": 151
+    },
+    {
+      "epoch": 0.14710863779337044,
+      "grad_norm": 6650.4599609375,
+      "learning_rate": 1.0456712550462898e-05,
+      "loss": 29.6942,
+      "step": 152
+    },
+    {
+      "epoch": 0.14807645777885314,
+      "grad_norm": 8760.8125,
+      "learning_rate": 1.0047504309801104e-05,
+      "loss": 29.4166,
+      "step": 153
+    },
+    {
+      "epoch": 0.14904427776433585,
+      "grad_norm": 10957.2548828125,
+      "learning_rate": 9.645117832311886e-06,
+      "loss": 30.1486,
+      "step": 154
+    },
+    {
+      "epoch": 0.15001209774981852,
+      "grad_norm": 7686.2275390625,
+      "learning_rate": 9.249663126440394e-06,
+      "loss": 28.8037,
+      "step": 155
+    },
+    {
+      "epoch": 0.15097991773530123,
+      "grad_norm": 8248.8095703125,
+      "learning_rate": 8.861248305554624e-06,
+      "loss": 27.5932,
+      "step": 156
+    },
+    {
+      "epoch": 0.15194773772078393,
+      "grad_norm": 8967.9853515625,
+      "learning_rate": 8.47997955838829e-06,
+      "loss": 27.7874,
+      "step": 157
+    },
+    {
+      "epoch": 0.15291555770626664,
+      "grad_norm": 5873.86083984375,
+      "learning_rate": 8.10596112000994e-06,
+      "loss": 28.0216,
+      "step": 158
+    },
+    {
+      "epoch": 0.15388337769174934,
+      "grad_norm": 5138.7294921875,
+      "learning_rate": 7.739295243326067e-06,
+      "loss": 26.9419,
+      "step": 159
+    },
+    {
+      "epoch": 0.15485119767723204,
+      "grad_norm": 8216.4208984375,
+      "learning_rate": 7.380082171126228e-06,
+      "loss": 27.1432,
+      "step": 160
+    },
+    {
+      "epoch": 0.15581901766271475,
+      "grad_norm": 7475.0888671875,
+      "learning_rate": 7.028420108677635e-06,
+      "loss": 29.0104,
+      "step": 161
+    },
+    {
+      "epoch": 0.15678683764819742,
+      "grad_norm": 9634.138671875,
+      "learning_rate": 6.684405196876842e-06,
+      "loss": 29.0601,
+      "step": 162
+    },
+    {
+      "epoch": 0.15775465763368013,
+      "grad_norm": 5514.5380859375,
+      "learning_rate": 6.3481314859657675e-06,
+      "loss": 28.4319,
+      "step": 163
+    },
+    {
+      "epoch": 0.15872247761916283,
+      "grad_norm": 7093.63623046875,
+      "learning_rate": 6.019690909819298e-06,
+      "loss": 27.8845,
+      "step": 164
+    },
+    {
+      "epoch": 0.15969029760464554,
+      "grad_norm": 6054.31494140625,
+      "learning_rate": 5.6991732608115e-06,
+      "loss": 30.1447,
+      "step": 165
+    },
+    {
+      "epoch": 0.16065811759012824,
+      "grad_norm": 6758.61328125,
+      "learning_rate": 5.386666165267256e-06,
+      "loss": 26.6472,
+      "step": 166
+    },
+    {
+      "epoch": 0.16162593757561095,
+      "grad_norm": 5584.626953125,
+      "learning_rate": 5.08225505950613e-06,
+      "loss": 30.3575,
+      "step": 167
+    },
+    {
+      "epoch": 0.16259375756109365,
+      "grad_norm": 11202.7998046875,
+      "learning_rate": 4.786023166484913e-06,
+      "loss": 27.5054,
+      "step": 168
+    },
+    {
+      "epoch": 0.16356157754657633,
+      "grad_norm": 9054.27734375,
+      "learning_rate": 4.498051473045291e-06,
+      "loss": 28.3799,
+      "step": 169
+    },
+    {
+      "epoch": 0.16452939753205903,
+      "grad_norm": 8983.76953125,
+      "learning_rate": 4.218418707772886e-06,
+      "loss": 30.1843,
+      "step": 170
+    },
+    {
+      "epoch": 0.16549721751754173,
+      "grad_norm": 6682.33203125,
+      "learning_rate": 3.947201319473587e-06,
+      "loss": 27.3203,
+      "step": 171
+    },
+    {
+      "epoch": 0.16646503750302444,
+      "grad_norm": 7163.20849609375,
+      "learning_rate": 3.684473456273278e-06,
+      "loss": 30.9029,
+      "step": 172
+    },
+    {
+      "epoch": 0.16743285748850714,
+      "grad_norm": 8973.98046875,
+      "learning_rate": 3.4303069453464383e-06,
+      "loss": 31.2469,
+      "step": 173
+    },
+    {
+      "epoch": 0.16840067747398985,
+      "grad_norm": 10363.966796875,
+      "learning_rate": 3.184771273279312e-06,
+      "loss": 26.7013,
+      "step": 174
+    },
+    {
+      "epoch": 0.16936849745947255,
+      "grad_norm": 7765.19970703125,
+      "learning_rate": 2.947933567072987e-06,
+      "loss": 28.3753,
+      "step": 175
+    },
+    {
+      "epoch": 0.17033631744495523,
+      "grad_norm": 7376.22705078125,
+      "learning_rate": 2.719858575791534e-06,
+      "loss": 29.2217,
+      "step": 176
+    },
+    {
+      "epoch": 0.17130413743043793,
+      "grad_norm": 4901.16845703125,
+      "learning_rate": 2.500608652860256e-06,
+      "loss": 30.3474,
+      "step": 177
+    },
+    {
+      "epoch": 0.17227195741592063,
+      "grad_norm": 10761.5400390625,
+      "learning_rate": 2.2902437390188737e-06,
+      "loss": 33.722,
+      "step": 178
+    },
+    {
+      "epoch": 0.17323977740140334,
+      "grad_norm": 15606.84375,
+      "learning_rate": 2.0888213459343587e-06,
+      "loss": 28.4563,
+      "step": 179
+    },
+    {
+      "epoch": 0.17420759738688604,
+      "grad_norm": 12735.939453125,
+      "learning_rate": 1.8963965404777875e-06,
+      "loss": 30.7477,
+      "step": 180
+    },
+    {
+      "epoch": 0.17517541737236875,
+      "grad_norm": 15309.6728515625,
+      "learning_rate": 1.7130219296696263e-06,
+      "loss": 26.2339,
+      "step": 181
+    },
+    {
+      "epoch": 0.17614323735785145,
+      "grad_norm": 14952.8740234375,
+      "learning_rate": 1.5387476462974824e-06,
+      "loss": 29.1009,
+      "step": 182
+    },
+    {
+      "epoch": 0.17711105734333413,
+      "grad_norm": 8535.17578125,
+      "learning_rate": 1.3736213352103147e-06,
+      "loss": 31.472,
+      "step": 183
+    },
+    {
+      "epoch": 0.17807887732881683,
+      "grad_norm": 8864.3740234375,
+      "learning_rate": 1.2176881402928002e-06,
+      "loss": 32.8793,
+      "step": 184
+    },
+    {
+      "epoch": 0.17904669731429954,
+      "grad_norm": 16276.65234375,
+      "learning_rate": 1.0709906921234367e-06,
+      "loss": 32.0176,
+      "step": 185
+    },
+    {
+      "epoch": 0.18001451729978224,
+      "grad_norm": 11761.4423828125,
+      "learning_rate": 9.33569096319799e-07,
+      "loss": 33.7602,
+      "step": 186
+    },
+    {
+      "epoch": 0.18098233728526494,
+      "grad_norm": 16851.8828125,
+      "learning_rate": 8.054609225740255e-07,
+      "loss": 30.4344,
+      "step": 187
+    },
+    {
+      "epoch": 0.18195015727074765,
+      "grad_norm": 16993.369140625,
+      "learning_rate": 6.867011943816724e-07,
+      "loss": 32.7072,
+      "step": 188
+    },
+    {
+      "epoch": 0.18291797725623035,
+      "grad_norm": 7335.87451171875,
+      "learning_rate": 5.77322379466617e-07,
+      "loss": 29.7493,
+      "step": 189
+    },
+    {
+      "epoch": 0.18388579724171303,
+      "grad_norm": 29962.4609375,
+      "learning_rate": 4.773543809047186e-07,
+      "loss": 30.2235,
+      "step": 190
+    },
+    {
+      "epoch": 0.18485361722719573,
+      "grad_norm": 23285.44140625,
+      "learning_rate": 3.868245289486027e-07,
+      "loss": 29.8285,
+      "step": 191
+    },
+    {
+      "epoch": 0.18582143721267844,
+      "grad_norm": 16125.1630859375,
+      "learning_rate": 3.0575757355586817e-07,
+      "loss": 33.2887,
+      "step": 192
+    },
+    {
+      "epoch": 0.18678925719816114,
+      "grad_norm": 14776.724609375,
+      "learning_rate": 2.3417567762266497e-07,
+      "loss": 35.2345,
+      "step": 193
+    },
+    {
+      "epoch": 0.18775707718364384,
+      "grad_norm": 23442.9140625,
+      "learning_rate": 1.7209841092460043e-07,
+      "loss": 33.662,
+      "step": 194
+    },
+    {
+      "epoch": 0.18872489716912655,
+      "grad_norm": 18744.119140625,
+      "learning_rate": 1.1954274476655534e-07,
+      "loss": 34.7126,
+      "step": 195
+    },
+    {
+      "epoch": 0.18969271715460925,
+      "grad_norm": 16859.357421875,
+      "learning_rate": 7.652304734289127e-08,
+      "loss": 32.0933,
+      "step": 196
+    },
+    {
+      "epoch": 0.19066053714009193,
+      "grad_norm": 12730.04296875,
+      "learning_rate": 4.30510798093342e-08,
+      "loss": 36.5268,
+      "step": 197
+    },
+    {
+      "epoch": 0.19162835712557463,
+      "grad_norm": 12937.75390625,
+      "learning_rate": 1.9135993067588284e-08,
+      "loss": 35.9148,
+      "step": 198
+    },
+    {
+      "epoch": 0.19259617711105734,
+      "grad_norm": 13618.1806640625,
+      "learning_rate": 4.784325263584854e-09,
+      "loss": 36.4668,
+      "step": 199
+    },
+    {
+      "epoch": 0.19356399709654004,
+      "grad_norm": 27354.447265625,
+      "learning_rate": 0.0,
+      "loss": 47.5892,
+      "step": 200
+    },
+    {
+      "epoch": 0.19356399709654004,
+      "eval_loss": 7.732388019561768,
+      "eval_runtime": 6.2489,
+      "eval_samples_per_second": 278.448,
+      "eval_steps_per_second": 69.612,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1884929881276416.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null