Training in progress, step 43200, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1379,6 +1379,10 @@ You can finetune this model on your own dataset.
 | 0.7581 | 42900 | 0.2956        |
 | 0.7590 | 42950 | 0.3038        |
 | 0.7598 | 43000 | 0.2195        |
 </details>

 | 0.7581 | 42900 | 0.2956        |
 | 0.7590 | 42950 | 0.3038        |
 | 0.7598 | 43000 | 0.2195        |
+| 0.7607 | 43050 | 0.2588        |
+| 0.7616 | 43100 | 0.3065        |
+| 0.7625 | 43150 | 0.3252        |
+| 0.7634 | 43200 | 0.2418        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75caeec9f53b9b891200013c0a2fd18951ed97d80436096583e4cb1480de71e5
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:8c0c9a0b1262140e82733d51fe5efff8b0e4a0984fb8df6ff90e910c2afd4ab8
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b1f4e8d78cc6baa416d9c6a87aa0b3025829063989d02f0b2a8b88b559ded826
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:b83e7c31a2c3e589b653cbf5d4ef5a6592e9733967f1d138904e5d044c5d5848
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4be9a690b209a02eb17ce8e3f85da6cd14d4dc8a1275cbf619841edc0fbfa444
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:9579fa0231ee8c937aa4751dd1de3d5d49af35716658dc3ccf0f0e2d1f420d45
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6cac2489bb213d35fe01f779a670cf1cba6053f86765e6ddff3ab2ce908e2308
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e689eacce1d8ee34e87120437a30d68097f39c37749fc244a0885289f69d33c
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:beb8449b565ba883e76cbff731d0f062ded7c2721b6cab570d00dd75a772df6d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b0456f284ad1b7efca7712b00a843898c6fb37f599b5c1b8ffce60a83dcfa662
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7598381368062059,
   "eval_steps": 500,
-  "global_step": 43000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6028,6 +6028,34 @@
       "learning_rate": 1.335925075101608e-05,
       "loss": 0.2195,
       "step": 43000
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.763372267675072,
   "eval_steps": 500,
+  "global_step": 43200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.335925075101608e-05,
       "loss": 0.2195,
       "step": 43000
+    },
+    {
+      "epoch": 0.7607216695234225,
+      "grad_norm": 1.4077396392822266,
+      "learning_rate": 1.3310164732677543e-05,
+      "loss": 0.2588,
+      "step": 43050
+    },
+    {
+      "epoch": 0.761605202240639,
+      "grad_norm": 2.881322145462036,
+      "learning_rate": 1.326107871433901e-05,
+      "loss": 0.3065,
+      "step": 43100
+    },
+    {
+      "epoch": 0.7624887349578555,
+      "grad_norm": 1.5936981439590454,
+      "learning_rate": 1.3211992696000472e-05,
+      "loss": 0.3252,
+      "step": 43150
+    },
+    {
+      "epoch": 0.763372267675072,
+      "grad_norm": 1.4670791625976562,
+      "learning_rate": 1.3162906677661935e-05,
+      "loss": 0.2418,
+      "step": 43200
     }
   ],
   "logging_steps": 50,