Training in progress, epoch 4, checkpoint

Files changed (6) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8b7e62e925af24c3c46e451bdff1f8741c5f27e678dd9e4e1d76a94a086d496a
 size 1476713628

 version https://git-lfs.github.com/spec/v1
+oid sha256:00248ee9684c65a32615da2b0cccb3ee6b6ac475e02b1cc704d4a10d03c53364
 size 1476713628

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d12eabbc14436e317cdf37d9dba3b756242f5d198e74c991dbb74857301a737f
 size 2953659629

 version https://git-lfs.github.com/spec/v1
+oid sha256:69f8175b7a9c4f54cd8420af1fc659bf71ac2b603ae7671b17572d14702dc17b
 size 2953659629

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:45479556d4ba632600b7dd85f430f0b6fcb738ecd5454ea101816125db2d33f0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b0c230ad29373d00210d983f3964cf3edad2620215c1c6091bf41cb911b3b787
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fb9c5f460012b3238729ee5b4cd8719008838840199b21ad75fcef7c04d58970
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:56a642fd3959d962a047731abaf128bced517e3db0e7e9f18577cfe93e2f6753
 size 1064

last-checkpoint/sentencepiece.bpe.model ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.719619326500732,
-  "best_model_checkpoint": "modelParams/TrainingArguments_output\\checkpoint-149",
-  "epoch": 2.9949748743718594,
   "eval_steps": 500,
-  "global_step": 149,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -34,6 +34,15 @@
       "eval_samples_per_second": 27.684,
       "eval_steps_per_second": 3.466,
       "step": 149
     }
   ],
   "logging_steps": 500,
@@ -53,7 +62,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8867065653362880.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7291361639824304,
+  "best_model_checkpoint": "modelParams/TrainingArguments_output\\checkpoint-199",
+  "epoch": 4.0,
   "eval_steps": 500,
+  "global_step": 199,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 27.684,
       "eval_steps_per_second": 3.466,
       "step": 149
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.7291361639824304,
+      "eval_loss": 0.6263195872306824,
+      "eval_runtime": 45.4245,
+      "eval_samples_per_second": 30.072,
+      "eval_steps_per_second": 3.764,
+      "step": 199
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 1.182275420448384e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null