Training in progress, step 3630, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e91fd52db8bb9783452c0d5dd7c0af8a657867e66c06b6e3c2761fd5dc918673
 size 70430032

 version https://git-lfs.github.com/spec/v1
+oid sha256:d44aede43cb1a41b83e1247683ea557b147107a67f306ff62d1398bbdad92c1b
 size 70430032

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9208e903089f9a7f3069e3625af4cafbe4549e30ac6b829aa123976463344dde
 size 141058579

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b31aaba1fda764e711b455094d3ead4cbaea7041b9472d183caf81a23b9747a
 size 141058579

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eeb52f222e24a9a9e42fa6918b67db8ffa8ef2f4f4ec3281838b0693b65f0a25
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:28be791632a2ce933cf99b51e2bfeaac4f925036c002da006e45360cf39931f0
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8422d6bece112fd75d3c138f96db21f37100a7fc66a031ae0f6c8cffc187aaa0
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:f3875b65aa45ddb86163d814c0192e0142bb08111de8324f09ea0668c6f3cca4
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7722666666666667,
   "eval_steps": 500,
-  "global_step": 3620,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3638,6 +3638,16 @@
       "mean_token_accuracy": 0.7547581911087036,
       "num_tokens": 16850703.0,
       "step": 3620
     }
   ],
   "logging_steps": 10,
@@ -3657,7 +3667,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.983529744959283e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7744,
   "eval_steps": 500,
+  "global_step": 3630,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "mean_token_accuracy": 0.7547581911087036,
       "num_tokens": 16850703.0,
       "step": 3620
+    },
+    {
+      "entropy": 0.8650934003293514,
+      "epoch": 0.7744,
+      "grad_norm": 0.23581688106060028,
+      "learning_rate": 7.031891161226608e-05,
+      "loss": 0.9123600959777832,
+      "mean_token_accuracy": 0.7830170378088951,
+      "num_tokens": 16894959.0,
+      "step": 3630
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 8.003783018612736e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null