Training in progress, step 3600, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7564ca0da7a36af4a49e6efe1b662d7b48552179d9139c4a0001f28fb52be0f7
 size 48552

 version https://git-lfs.github.com/spec/v1
+oid sha256:5cbdc2b9825166869a6fac2e63a5a97fa4b37b5b84f026246356242c3f1b4f3c
 size 48552

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0df3d50fec9eb8da6ca5775a03ebf1ccb07ed80efaff1d783e7ae0d06f549730
 size 107046

 version https://git-lfs.github.com/spec/v1
+oid sha256:f3ccbbe76b5289cfdf27cae32ff8667d16da79bd900a109b903abe92fb00891a
 size 107046

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96f94a9e66b44ef92a231133a85ce908f6da4522f199e6c8f8ae6ace2cb26f13
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:aebc4da58173c003f7aad6d162278501a1be585d313d2d5f53369f2e219408a6
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e1024000b4f413660ff937a139f75de92e5c98f66c23ae365cd7ddd5af918b5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d0afe5275623b07807c5bf749b62f86b7a1ab63345ab37a5eef17970d4cb1036
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 11.898159980773926,
   "best_model_checkpoint": "miner_id_24/checkpoint-3450",
-  "epoch": 28.69022869022869,
   "eval_steps": 150,
-  "global_step": 3450,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -682,6 +682,35 @@
       "eval_samples_per_second": 225.536,
       "eval_steps_per_second": 57.49,
       "step": 3450
     }
   ],
   "logging_steps": 50,
@@ -696,7 +725,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -710,7 +739,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4491195580416.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 11.898159980773926,
   "best_model_checkpoint": "miner_id_24/checkpoint-3450",
+  "epoch": 29.93762993762994,
   "eval_steps": 150,
+  "global_step": 3600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 225.536,
       "eval_steps_per_second": 57.49,
       "step": 3450
+    },
+    {
+      "epoch": 29.106029106029105,
+      "grad_norm": 0.05766765773296356,
+      "learning_rate": 4.2084942084942086e-05,
+      "loss": 12.0737,
+      "step": 3500
+    },
+    {
+      "epoch": 29.52182952182952,
+      "grad_norm": 0.046123795211315155,
+      "learning_rate": 4.124559341950647e-05,
+      "loss": 11.9,
+      "step": 3550
+    },
+    {
+      "epoch": 29.93762993762994,
+      "grad_norm": 0.09343673288822174,
+      "learning_rate": 4.0406244754070844e-05,
+      "loss": 11.901,
+      "step": 3600
+    },
+    {
+      "epoch": 29.93762993762994,
+      "eval_loss": 11.898192405700684,
+      "eval_runtime": 0.4526,
+      "eval_samples_per_second": 225.366,
+      "eval_steps_per_second": 57.446,
+      "step": 3600
     }
   ],
   "logging_steps": 50,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 4686500364288.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null