Training in progress, step 2050, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e0865878ab8c84da9c28b5beeffc5d1ca448d0538c6a414a4c98f17773bd3041
 size 70430032

 version https://git-lfs.github.com/spec/v1
+oid sha256:f4b9ebb7d0a4d9ba058b3b8554694658447acc65ca78331821e03c4bb71ef4f5
 size 70430032

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ffcb646fe5e9bd792eaca38e55b80b62221173cee7784bced5245bb7ef86302
 size 141058579

 version https://git-lfs.github.com/spec/v1
+oid sha256:66eb74738fb84d8442383b2e6539dfaee12bcb030b7b71e7b396f48ae1ca113c
 size 141058579

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:61c0f56c5d7a9ebe68efe0b2374595948fa35446985c60cf71092048ae4657e3
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e3a9215814521eedf613c295d42129542ebe5c44e1f5bf54c74a569a258fb7a
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:331c76566887bc46d0c9e202654ba3a432bd85731f1fc4302d3f95c92b195cb9
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:88387d21c5e510a789d0d790ff99b950be09036cfa7a0c5c786c5bf5cf53ad60
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.4352,
   "eval_steps": 500,
-  "global_step": 2040,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2058,6 +2058,16 @@
       "mean_token_accuracy": 0.7496299520134926,
       "num_tokens": 9449045.0,
       "step": 2040
     }
   ],
   "logging_steps": 10,
@@ -2077,7 +2087,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.483463241466368e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.43733333333333335,
   "eval_steps": 500,
+  "global_step": 2050,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "mean_token_accuracy": 0.7496299520134926,
       "num_tokens": 9449045.0,
       "step": 2040
+    },
+    {
+      "entropy": 0.919689030200243,
+      "epoch": 0.43733333333333335,
+      "grad_norm": 0.33956724405288696,
+      "learning_rate": 9.111361614629022e-05,
+      "loss": 0.9860305786132812,
+      "mean_token_accuracy": 0.7708889573812485,
+      "num_tokens": 9498718.0,
+      "step": 2050
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 4.507850259279667e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null