Training in progress, step 320, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e194f8a417bfeb0d7fe313f3e3ce3e9281c81355a752e272d4a9f86904ff4e3
 size 70430032

 version https://git-lfs.github.com/spec/v1
+oid sha256:3abf244d469473fa52891e3f31f2d5d1c5ff073f4d2b5bb817dc5d30dfa82332
 size 70430032

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f482ae30d2e8525a1e39ed20a11830ddc2d28d3bf9119c743442c9a38dcfabcc
 size 141058579

 version https://git-lfs.github.com/spec/v1
+oid sha256:bfa5d8a7157df0a5eacdcc6bd5ccb31f0477ca14700a2671c6f678d1f94dac93
 size 141058579

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0d3197e14fd71b872050bfcdf47b8047e40bb95283d15d709ca7cdc3d8dc2a56
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:f0bec828ee85f5f35df8d0034d7d501451bd46f134d0d13918434bfd847feba5
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:afd6965293e39960c24ab2eb262951e9e03f24a87a65c4526c3d160cd3f52c1f
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c727c5faaaf0e8dbfe94f897b9183e692e39806ea1639c82f3d79733c504fc7
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.06613333333333334,
   "eval_steps": 500,
-  "global_step": 310,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -328,6 +328,16 @@
       "mean_token_accuracy": 0.7540638357400894,
       "num_tokens": 1421027.0,
       "step": 310
     }
   ],
   "logging_steps": 10,
@@ -347,7 +357,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6755386875217920.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.06826666666666667,
   "eval_steps": 500,
+  "global_step": 320,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "mean_token_accuracy": 0.7540638357400894,
       "num_tokens": 1421027.0,
       "step": 310
+    },
+    {
+      "entropy": 1.0486552365124227,
+      "epoch": 0.06826666666666667,
+      "grad_norm": 0.2840607762336731,
+      "learning_rate": 9.999891867457112e-05,
+      "loss": 1.1424532890319825,
+      "mean_token_accuracy": 0.7420963421463966,
+      "num_tokens": 1472539.0,
+      "step": 320
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 6994408668005376.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null