Training in progress, step 330, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3abf244d469473fa52891e3f31f2d5d1c5ff073f4d2b5bb817dc5d30dfa82332
 size 70430032

 version https://git-lfs.github.com/spec/v1
+oid sha256:080e280080559f2791d55f3e9b530866ef137afce957188c20fa52869278185a
 size 70430032

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bfa5d8a7157df0a5eacdcc6bd5ccb31f0477ca14700a2671c6f678d1f94dac93
 size 141058579

 version https://git-lfs.github.com/spec/v1
+oid sha256:9817785732336e82575cd690ac86eee871b24cb8b95746ad574d47c0f1ba7156
 size 141058579

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f0bec828ee85f5f35df8d0034d7d501451bd46f134d0d13918434bfd847feba5
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:872fe76418f47a93b19f7178149bfab3a4c567f9bf8f19fe875e06de31f34354
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6c727c5faaaf0e8dbfe94f897b9183e692e39806ea1639c82f3d79733c504fc7
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:f133a959bfddbf7e52765c340dc6f5b0914229e18f54079bc6ff8b34af89bb5f
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.06826666666666667,
   "eval_steps": 500,
-  "global_step": 320,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -338,6 +338,16 @@
       "mean_token_accuracy": 0.7420963421463966,
       "num_tokens": 1472539.0,
       "step": 320
     }
   ],
   "logging_steps": 10,
@@ -357,7 +367,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6994408668005376.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0704,
   "eval_steps": 500,
+  "global_step": 330,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "mean_token_accuracy": 0.7420963421463966,
       "num_tokens": 1472539.0,
       "step": 320
+    },
+    {
+      "entropy": 0.9518789499998093,
+      "epoch": 0.0704,
+      "grad_norm": 0.25352534651756287,
+      "learning_rate": 9.999748091322068e-05,
+      "loss": 0.9646738052368165,
+      "mean_token_accuracy": 0.7610545977950096,
+      "num_tokens": 1518725.0,
+      "step": 330
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 7212522925473792.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null