Training in progress, step 200, checkpoint

Files changed (8) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2c2c0a790064775147df67e3ddcfd62e11584f5aea3f1a9de0417622cbf9c7d4
 size 2471645608

 version https://git-lfs.github.com/spec/v1
+oid sha256:a4f5638d836aa3ffef71be2453a8c99d617b9361fd7588274657fe1b2e3130ae
 size 2471645608

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:06e4829f9e6629891612b81adb97f878de5fd94c2d2fe978b940f5f80ac0f305
 size 2510806010

 version https://git-lfs.github.com/spec/v1
+oid sha256:fbcee9a8632ccff09aef16820056e4f6bbffefc993a32d01db80cce5411b40e1
 size 2510806010

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5de519c5e2e0ec73ae4a10a32ec71c2d0d5d3982d1fbb16434177a93460b8139
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:c1764bce30ae924d42be701f6a6fde2a47876fe58d108091bf07605901b85118
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e120a86fc85c4349ce6a6d226cf9080af7941a4f32f9239b536f491d778e55d1
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:a20e1ceeebaec131fbf112149e98ceb26f3faad1c6d86c821b7308537a9fa757
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:420919d56d937c6944ec2c4a3a5402e03840612d53e7914b441877be496558d6
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:69a394598e7585bd50940447fd643972e17adc14b60bb2586431a45172756fec
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2fbf96f9883e64cb2ab53f35f353ba99bb51455ec52fb6e4f630fe95a589598d
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:d34674b4588a137965b9f5125492bded00ff258d9b6bcc4e9120fd8057887841
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f30eaafe84a25fe4a9dc5723bc034c2757e284325f5eef16f6d75d1c5a09576
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:abcd078b1186023c3032f4652f953246e76b5f62233ca3f894e881ea9feb17b7
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 10.588235294117647,
   "eval_steps": 20,
-  "global_step": 180,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -213,6 +213,28 @@
       "eval_samples_per_second": 97.23,
       "eval_steps_per_second": 2.267,
       "step": 180
     }
   ],
   "logging_steps": 10,
@@ -232,7 +254,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.389437170692915e+16,
   "train_batch_size": 11,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 11.764705882352942,
   "eval_steps": 20,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 97.23,
       "eval_steps_per_second": 2.267,
       "step": 180
+    },
+    {
+      "epoch": 11.176470588235293,
+      "grad_norm": 0.134765625,
+      "learning_rate": 0.00011645945902807341,
+      "loss": 0.0121,
+      "step": 190
+    },
+    {
+      "epoch": 11.764705882352942,
+      "grad_norm": 0.1884765625,
+      "learning_rate": 0.00010825793454723325,
+      "loss": 0.0051,
+      "step": 200
+    },
+    {
+      "epoch": 11.764705882352942,
+      "eval_loss": 4.041390419006348,
+      "eval_runtime": 14.6874,
+      "eval_samples_per_second": 102.197,
+      "eval_steps_per_second": 2.383,
+      "step": 200
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.0433372297468314e+17,
   "train_batch_size": 11,
   "trial_name": null,
   "trial_params": null