Training in progress, step 120, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +153 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c4415f68d0ad9aa86820b176ef18920044ae9992e448da9f88d5a4fe7bd4c2da
 size 522227376

 version https://git-lfs.github.com/spec/v1
+oid sha256:c5ff808bb68c434ef997edf614255f797d07bb051473afe1e16e7400eef2d707
 size 522227376

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be584c76ae22a24595b5ef2a7ca110dec93dd9371debbbdd90dd6049669c081e
 size 1044601082

 version https://git-lfs.github.com/spec/v1
+oid sha256:72b605ba105bea4716a13c7a3589f3223c33a38a1550b5f4f5924a69ae1dfb5a
 size 1044601082

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b220ca7d0891e3ed41ffbbe0e13d38c90b8ebd3e153cdd98aa8bbe43696039ba
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:bb09e0c4906589c65d02131fb9256b534e79ccc5846887453f3386448bf9a73b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b97b319aeb7599bcb17c039089ff38d423203de2450979c1b08a5047fa1c81f3
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fb8d16b823fc08a6bdb32c62fea04b073580a8731b10b24ab53c73ef146cf930
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 2.3958656787872314,
   "best_model_checkpoint": "miner_id_24/checkpoint-20",
-  "epoch": 0.10903267573000784,
   "eval_steps": 20,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -755,6 +755,154 @@
       "eval_samples_per_second": 1.999,
       "eval_steps_per_second": 1.999,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -769,7 +917,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 4
       }
     },
     "TrainerControl": {
@@ -778,12 +926,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.32844080857088e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 2.3958656787872314,
   "best_model_checkpoint": "miner_id_24/checkpoint-20",
+  "epoch": 0.13083921087600942,
   "eval_steps": 20,
+  "global_step": 120,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 1.999,
       "eval_steps_per_second": 1.999,
       "step": 100
+    },
+    {
+      "epoch": 0.11012300248730791,
+      "grad_norm": 543.7817993164062,
+      "learning_rate": 0.0004986414340225785,
+      "loss": 161.0186,
+      "step": 101
+    },
+    {
+      "epoch": 0.111213329244608,
+      "grad_norm": 93.4175033569336,
+      "learning_rate": 0.0004986114391942372,
+      "loss": 95.1064,
+      "step": 102
+    },
+    {
+      "epoch": 0.11230365600190807,
+      "grad_norm": 42.3511848449707,
+      "learning_rate": 0.0004985811177760075,
+      "loss": 89.4123,
+      "step": 103
+    },
+    {
+      "epoch": 0.11339398275920815,
+      "grad_norm": 55.580867767333984,
+      "learning_rate": 0.0004985504698077212,
+      "loss": 87.5712,
+      "step": 104
+    },
+    {
+      "epoch": 0.11448430951650823,
+      "grad_norm": 31.689430236816406,
+      "learning_rate": 0.0004985194953296393,
+      "loss": 80.7536,
+      "step": 105
+    },
+    {
+      "epoch": 0.1155746362738083,
+      "grad_norm": 26.954017639160156,
+      "learning_rate": 0.0004984881943824515,
+      "loss": 75.9416,
+      "step": 106
+    },
+    {
+      "epoch": 0.11666496303110839,
+      "grad_norm": 29.87924575805664,
+      "learning_rate": 0.0004984565670072765,
+      "loss": 79.545,
+      "step": 107
+    },
+    {
+      "epoch": 0.11775528978840846,
+      "grad_norm": 28.872543334960938,
+      "learning_rate": 0.0004984246132456617,
+      "loss": 71.7809,
+      "step": 108
+    },
+    {
+      "epoch": 0.11884561654570855,
+      "grad_norm": 30.724918365478516,
+      "learning_rate": 0.0004983923331395836,
+      "loss": 71.9761,
+      "step": 109
+    },
+    {
+      "epoch": 0.11993594330300862,
+      "grad_norm": 31.383636474609375,
+      "learning_rate": 0.0004983597267314469,
+      "loss": 75.5666,
+      "step": 110
+    },
+    {
+      "epoch": 0.1210262700603087,
+      "grad_norm": 36.043067932128906,
+      "learning_rate": 0.0004983267940640854,
+      "loss": 75.1238,
+      "step": 111
+    },
+    {
+      "epoch": 0.12211659681760878,
+      "grad_norm": 35.503814697265625,
+      "learning_rate": 0.0004982935351807612,
+      "loss": 78.1812,
+      "step": 112
+    },
+    {
+      "epoch": 0.12320692357490885,
+      "grad_norm": 36.39851379394531,
+      "learning_rate": 0.0004982599501251649,
+      "loss": 72.4006,
+      "step": 113
+    },
+    {
+      "epoch": 0.12429725033220894,
+      "grad_norm": 32.95728302001953,
+      "learning_rate": 0.0004982260389414159,
+      "loss": 72.0358,
+      "step": 114
+    },
+    {
+      "epoch": 0.125387577089509,
+      "grad_norm": 34.838993072509766,
+      "learning_rate": 0.0004981918016740617,
+      "loss": 75.6014,
+      "step": 115
+    },
+    {
+      "epoch": 0.1264779038468091,
+      "grad_norm": 32.99583435058594,
+      "learning_rate": 0.0004981572383680784,
+      "loss": 72.1847,
+      "step": 116
+    },
+    {
+      "epoch": 0.12756823060410916,
+      "grad_norm": 34.666954040527344,
+      "learning_rate": 0.0004981223490688702,
+      "loss": 70.2274,
+      "step": 117
+    },
+    {
+      "epoch": 0.12865855736140924,
+      "grad_norm": 37.92626953125,
+      "learning_rate": 0.0004980871338222698,
+      "loss": 76.0094,
+      "step": 118
+    },
+    {
+      "epoch": 0.12974888411870933,
+      "grad_norm": 33.52912521362305,
+      "learning_rate": 0.0004980515926745376,
+      "loss": 77.3571,
+      "step": 119
+    },
+    {
+      "epoch": 0.13083921087600942,
+      "grad_norm": 34.129905700683594,
+      "learning_rate": 0.0004980157256723628,
+      "loss": 70.9215,
+      "step": 120
+    },
+    {
+      "epoch": 0.13083921087600942,
+      "eval_loss": 2.417283535003662,
+      "eval_runtime": 74.0366,
+      "eval_samples_per_second": 1.999,
+      "eval_steps_per_second": 1.999,
+      "step": 120
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 5
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.594128970285056e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null