Training in progress, step 2500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +83 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d736be1f9ccc37e19f6ace25457bd3caecf74b78b6bd0a9c3c82bf78a0aa09c
 size 500770656

 version https://git-lfs.github.com/spec/v1
+oid sha256:fb3ef2bdae1f8b5428086a199f5d745478de4ff3d5cf2109c1bda2a315b5376e
 size 500770656

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e155cc132677ef963555f9b24629bc9313c22c472ca708fd3b6d6b2c09a3b97c
 size 134320806

 version https://git-lfs.github.com/spec/v1
+oid sha256:e542f6a8d209acfb7e9646e9a861e1364535a21abf254ae7b28c1bc690aa6c2b
 size 134320806

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:228e42bc899d2ecf95afbe4078371d96704b6df2b057bad532f7a23a19fd7a31
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a71c117db3d2138ce3e4cb621e3704e64fd4eb5c5aee72c3b7e77780e034491a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:107d1b031d80cd6355934d12b914afcffa03995e0a0b9696f39588911c8f3447
 size 2080

 version https://git-lfs.github.com/spec/v1
+oid sha256:bdae9d1ef7ba78274fffa78822b9ce8b8494e222e6b4e5b3e212211abfddfe81
 size 2080

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 2.529033899307251,
   "best_model_checkpoint": "miner_id_24/checkpoint-2200",
-  "epoch": 0.7454573691567014,
   "eval_steps": 100,
-  "global_step": 2400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1887,6 +1887,84 @@
       "eval_samples_per_second": 2.513,
       "eval_steps_per_second": 2.513,
       "step": 2400
     }
   ],
   "logging_steps": 10,
@@ -1901,7 +1979,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -1910,12 +1988,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.82713144016896e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 2.529033899307251,
   "best_model_checkpoint": "miner_id_24/checkpoint-2200",
+  "epoch": 0.7765180928715639,
   "eval_steps": 100,
+  "global_step": 2500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 2.513,
       "eval_steps_per_second": 2.513,
       "step": 2400
+    },
+    {
+      "epoch": 0.7485634415281877,
+      "grad_norm": 3.974891424179077,
+      "learning_rate": 0.00017132522771134973,
+      "loss": 2.7161,
+      "step": 2410
+    },
+    {
+      "epoch": 0.7516695138996738,
+      "grad_norm": 4.18734073638916,
+      "learning_rate": 0.00017109612235799432,
+      "loss": 2.7066,
+      "step": 2420
+    },
+    {
+      "epoch": 0.7547755862711601,
+      "grad_norm": 5.06152868270874,
+      "learning_rate": 0.00017086626030504704,
+      "loss": 2.6885,
+      "step": 2430
+    },
+    {
+      "epoch": 0.7578816586426463,
+      "grad_norm": 4.947213172912598,
+      "learning_rate": 0.00017063564155250788,
+      "loss": 2.7594,
+      "step": 2440
+    },
+    {
+      "epoch": 0.7609877310141326,
+      "grad_norm": 8.47783088684082,
+      "learning_rate": 0.00017040426610037684,
+      "loss": 2.7865,
+      "step": 2450
+    },
+    {
+      "epoch": 0.7640938033856188,
+      "grad_norm": 4.791199207305908,
+      "learning_rate": 0.00017017214850056916,
+      "loss": 2.7591,
+      "step": 2460
+    },
+    {
+      "epoch": 0.7671998757571051,
+      "grad_norm": 4.4141411781311035,
+      "learning_rate": 0.0001699392742011696,
+      "loss": 2.7741,
+      "step": 2470
+    },
+    {
+      "epoch": 0.7703059481285914,
+      "grad_norm": 4.622017860412598,
+      "learning_rate": 0.0001697056577540934,
+      "loss": 2.5143,
+      "step": 2480
+    },
+    {
+      "epoch": 0.7734120205000776,
+      "grad_norm": 5.536156177520752,
+      "learning_rate": 0.0001694713137112558,
+      "loss": 2.8695,
+      "step": 2490
+    },
+    {
+      "epoch": 0.7765180928715639,
+      "grad_norm": 6.285493850708008,
+      "learning_rate": 0.0001692362129688263,
+      "loss": 2.9384,
+      "step": 2500
+    },
+    {
+      "epoch": 0.7765180928715639,
+      "eval_loss": 2.551605224609375,
+      "eval_runtime": 25.8773,
+      "eval_samples_per_second": 2.512,
+      "eval_steps_per_second": 2.512,
+      "step": 2500
     }
   ],
   "logging_steps": 10,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.986595250176e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null