Training in progress, step 80, checkpoint

Files changed (10) hide show

last-checkpoint/model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:399339e7e9729cadf83e4291e8d1f52cdf87d221e960a2bc5c0c3d73bc165c5a
 size 4976947640

 version https://git-lfs.github.com/spec/v1
+oid sha256:fad688aa4cd7c281ac69904318599eb36efc8a2d31ceff356bfb837049ac2448
 size 4976947640

last-checkpoint/model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:38f4cc1a24d0b85ffbfae769b334b74f7fbae4b87186977fe17c5497ad180fa9
 size 4966790216

 version https://git-lfs.github.com/spec/v1
+oid sha256:9dc04b843b1afea8ccda6ffe1ef6c6b87917f4f5fb6f30c2d0c708a0e20c4574
 size 4966790216

last-checkpoint/model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e7198e05c748d588cce776633ff44d0fc7ad90dab772de10c72446c856932c08
 size 2158075194

 version https://git-lfs.github.com/spec/v1
+oid sha256:fb07680d625dd45fbbddadf89625d2156bdb1038d76c129648ca78a74711cd52
 size 2158075194

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0cfb3e4512e55f025985c0d6c5f29042f27479ab5b5a2f9637cddc12386d4925
 size 12291216748

 version https://git-lfs.github.com/spec/v1
+oid sha256:8b822eaa8bd74c1deab3ca5a01acaa34c22773b0a367ce6e50ef2a34fe26ec94
 size 12291216748

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a3586098707d2957df6ef3e483057ea3562f7346e6119a129cb2cf6003c7f89b
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:4de625c1f1d2b13bc455a5c9ac91a5410dc5d8bd1f3408dde82d07d045a46f90
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0164d72205b9e51777b19957f1b47571930d2781984e660ccfae2b5af3fc748e
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:995528e674600af77a00417696d339d5e638d4bbdf33e157e2253a8583531200
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a1b22b9cc881f5982527af53e598bb5ac64ac0a314645d7d2c1a629460cd671
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:69a88bb0ffea7486980c2da18fda2e0c02dd389c0203de8a960915dbc0a2cf70
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e897f16517849a06894ae8867d75800c12bffee7e87cd6c8e2f792826e904ada
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:7653cea2d766b8ca1ba3fbaf4fb45a4729136b0a3eda4b34769e4708680d4d9b
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:740031d4f57d6c88f6a7fb938e78d9afddeb77e3969b2145157d2e65123b372c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed051585a93ab592517e46872815f40c0e3b41050eca4ba6345891a988101280
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.066006600660066,
   "eval_steps": 40,
-  "global_step": 40,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -51,6 +51,42 @@
       "eval_samples_per_second": 46.566,
       "eval_steps_per_second": 3.882,
       "step": 40
     }
   ],
   "logging_steps": 10,
@@ -70,7 +106,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.4471959420993536e+16,
   "train_batch_size": 3,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.132013201320132,
   "eval_steps": 40,
+  "global_step": 80,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 46.566,
       "eval_steps_per_second": 3.882,
       "step": 40
+    },
+    {
+      "epoch": 0.08250825082508251,
+      "grad_norm": 16.5,
+      "learning_rate": 3.571428571428572e-05,
+      "loss": 4.6939,
+      "step": 50
+    },
+    {
+      "epoch": 0.09900990099009901,
+      "grad_norm": 16.0,
+      "learning_rate": 4.2857142857142856e-05,
+      "loss": 4.3269,
+      "step": 60
+    },
+    {
+      "epoch": 0.11551155115511551,
+      "grad_norm": 11.75,
+      "learning_rate": 5e-05,
+      "loss": 4.0728,
+      "step": 70
+    },
+    {
+      "epoch": 0.132013201320132,
+      "grad_norm": 5.5625,
+      "learning_rate": 5.714285714285714e-05,
+      "loss": 3.9329,
+      "step": 80
+    },
+    {
+      "epoch": 0.132013201320132,
+      "eval_loss": 3.323232412338257,
+      "eval_runtime": 215.3024,
+      "eval_samples_per_second": 46.353,
+      "eval_steps_per_second": 3.864,
+      "step": 80
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 6.894391884198707e+16,
   "train_batch_size": 3,
   "trial_name": null,
   "trial_params": null