Training in progress, step 9200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +153 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d9b5d664aa6ebbaadcc20c76e1cc625239e90c19d6c2942efd64c81c21119518
 size 100690288

 version https://git-lfs.github.com/spec/v1
+oid sha256:e9cbca2f45d6b52d4f7b6ebb7322fe228c587105c317fa5a3486bcddea3632ee
 size 100690288

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e614f71ec99257e2c5001fbb713d139188b185558f5ca1ba1b27966467efa588
 size 51345082

 version https://git-lfs.github.com/spec/v1
+oid sha256:50254272993d00750d20519785aed6da515890c737ef88754fedb4a1125894be
 size 51345082

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:61062bd98b5506d7ad36e1f5b2bc2732781c5fe4228f62da914c162e474d444e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:02d52ff618a83e9102020a0283499806564b6579fb7c6addaa728c8a804eedd0
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ffa49f78e8a130eb7bcf8fc251143bab34bf14a54d0d14a5f4a0a18dba60dbec
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:57b75f147ee39bab59d6712296fd45b943162e0434e26215e766d88ba6e6e775
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 2.2019271850585938,
   "best_model_checkpoint": "miner_id_24/checkpoint-8400",
-  "epoch": 0.1418987631157815,
   "eval_steps": 200,
-  "global_step": 9000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6675,6 +6675,154 @@
       "eval_samples_per_second": 16.176,
       "eval_steps_per_second": 8.096,
       "step": 9000
     }
   ],
   "logging_steps": 10,
@@ -6689,7 +6837,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
@@ -6698,12 +6846,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 9.868014841233408e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 2.2019271850585938,
   "best_model_checkpoint": "miner_id_24/checkpoint-8400",
+  "epoch": 0.14505206896279887,
   "eval_steps": 200,
+  "global_step": 9200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 16.176,
       "eval_steps_per_second": 8.096,
       "step": 9000
+    },
+    {
+      "epoch": 0.14205642840813237,
+      "grad_norm": 5.197229862213135,
+      "learning_rate": 0.0001988978579208574,
+      "loss": 9.1033,
+      "step": 9010
+    },
+    {
+      "epoch": 0.14221409370048324,
+      "grad_norm": 4.565831661224365,
+      "learning_rate": 0.00019889541187699707,
+      "loss": 8.8077,
+      "step": 9020
+    },
+    {
+      "epoch": 0.1423717589928341,
+      "grad_norm": 4.8584513664245605,
+      "learning_rate": 0.00019889296313689968,
+      "loss": 9.0339,
+      "step": 9030
+    },
+    {
+      "epoch": 0.14252942428518497,
+      "grad_norm": 4.712079048156738,
+      "learning_rate": 0.00019889051170063194,
+      "loss": 8.8577,
+      "step": 9040
+    },
+    {
+      "epoch": 0.14268708957753584,
+      "grad_norm": 9.281351089477539,
+      "learning_rate": 0.00019888805756826074,
+      "loss": 8.3711,
+      "step": 9050
+    },
+    {
+      "epoch": 0.1428447548698867,
+      "grad_norm": 4.656757354736328,
+      "learning_rate": 0.00019888560073985295,
+      "loss": 9.3672,
+      "step": 9060
+    },
+    {
+      "epoch": 0.14300242016223758,
+      "grad_norm": 5.0217108726501465,
+      "learning_rate": 0.00019888314121547558,
+      "loss": 9.3907,
+      "step": 9070
+    },
+    {
+      "epoch": 0.14316008545458844,
+      "grad_norm": 4.683655738830566,
+      "learning_rate": 0.00019888067899519567,
+      "loss": 8.6511,
+      "step": 9080
+    },
+    {
+      "epoch": 0.1433177507469393,
+      "grad_norm": 4.991017818450928,
+      "learning_rate": 0.00019887821407908033,
+      "loss": 8.6748,
+      "step": 9090
+    },
+    {
+      "epoch": 0.1434754160392902,
+      "grad_norm": 13.076788902282715,
+      "learning_rate": 0.0001988757464671968,
+      "loss": 8.3435,
+      "step": 9100
+    },
+    {
+      "epoch": 0.14363308133164107,
+      "grad_norm": 4.446813583374023,
+      "learning_rate": 0.00019887327615961234,
+      "loss": 9.5288,
+      "step": 9110
+    },
+    {
+      "epoch": 0.14379074662399194,
+      "grad_norm": 4.752020835876465,
+      "learning_rate": 0.00019887080315639428,
+      "loss": 9.3455,
+      "step": 9120
+    },
+    {
+      "epoch": 0.1439484119163428,
+      "grad_norm": 5.036296367645264,
+      "learning_rate": 0.0001988683274576101,
+      "loss": 8.9484,
+      "step": 9130
+    },
+    {
+      "epoch": 0.14410607720869367,
+      "grad_norm": 4.9117841720581055,
+      "learning_rate": 0.0001988658490633272,
+      "loss": 8.4929,
+      "step": 9140
+    },
+    {
+      "epoch": 0.14426374250104454,
+      "grad_norm": 13.374736785888672,
+      "learning_rate": 0.00019886336797361325,
+      "loss": 8.4415,
+      "step": 9150
+    },
+    {
+      "epoch": 0.1444214077933954,
+      "grad_norm": 4.678138256072998,
+      "learning_rate": 0.00019886088418853581,
+      "loss": 9.2699,
+      "step": 9160
+    },
+    {
+      "epoch": 0.14457907308574627,
+      "grad_norm": 6.370858669281006,
+      "learning_rate": 0.00019885839770816268,
+      "loss": 8.5141,
+      "step": 9170
+    },
+    {
+      "epoch": 0.14473673837809714,
+      "grad_norm": 5.328512191772461,
+      "learning_rate": 0.0001988559085325616,
+      "loss": 8.5896,
+      "step": 9180
+    },
+    {
+      "epoch": 0.144894403670448,
+      "grad_norm": 5.4744391441345215,
+      "learning_rate": 0.0001988534166618004,
+      "loss": 8.6385,
+      "step": 9190
+    },
+    {
+      "epoch": 0.14505206896279887,
+      "grad_norm": 99.89271545410156,
+      "learning_rate": 0.00019885092209594708,
+      "loss": 8.5823,
+      "step": 9200
+    },
+    {
+      "epoch": 0.14505206896279887,
+      "eval_loss": 2.203756093978882,
+      "eval_runtime": 62.7646,
+      "eval_samples_per_second": 16.203,
+      "eval_steps_per_second": 8.11,
+      "step": 9200
     }
   ],
   "logging_steps": 10,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 4
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.0089740855200973e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null