Training in progress, step 310, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:af425da52570a98f34d1a384e039e2e483ed4a5b53a2df4e79aaf16ceaac5d1a
 size 70430032

 version https://git-lfs.github.com/spec/v1
+oid sha256:0e194f8a417bfeb0d7fe313f3e3ce3e9281c81355a752e272d4a9f86904ff4e3
 size 70430032

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:277e32b2650dca6189545354afd8579d4fd2e96eca617720f6f93ba03fb51750
 size 141058579

 version https://git-lfs.github.com/spec/v1
+oid sha256:f482ae30d2e8525a1e39ed20a11830ddc2d28d3bf9119c743442c9a38dcfabcc
 size 141058579

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bf93f0953cf38059935f8a861883443194c60319e053dc70b90e0b5d5053d6a0
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:0d3197e14fd71b872050bfcdf47b8047e40bb95283d15d709ca7cdc3d8dc2a56
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e5c970667d2882cfe99c3bf0a16d854cd2e60422a1ab22dee6d08a0bcb0952b
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:afd6965293e39960c24ab2eb262951e9e03f24a87a65c4526c3d160cd3f52c1f
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.064,
   "eval_steps": 500,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -318,6 +318,16 @@
       "mean_token_accuracy": 0.7730094477534294,
       "num_tokens": 1375777.0,
       "step": 300
     }
   ],
   "logging_steps": 10,
@@ -337,7 +347,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6542364323549184.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.06613333333333334,
   "eval_steps": 500,
+  "global_step": 310,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "mean_token_accuracy": 0.7730094477534294,
       "num_tokens": 1375777.0,
       "step": 300
+    },
+    {
+      "entropy": 1.0266294315457345,
+      "epoch": 0.06613333333333334,
+      "grad_norm": 0.23917347192764282,
+      "learning_rate": 9.999975737505649e-05,
+      "loss": 1.1334312438964844,
+      "mean_token_accuracy": 0.7540638357400894,
+      "num_tokens": 1421027.0,
+      "step": 310
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 6755386875217920.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null