Training in progress, step 12800, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1277,6 +1277,10 @@ You can finetune this model on your own dataset.
 | 0.2209 | 12500 | 0.3775        |
 | 0.2218 | 12550 | 0.3695        |
 | 0.2227 | 12600 | 0.3545        |
 ### Framework Versions

 | 0.2209 | 12500 | 0.3775        |
 | 0.2218 | 12550 | 0.3695        |
 | 0.2227 | 12600 | 0.3545        |
+| 0.2235 | 12650 | 0.3548        |
+| 0.2244 | 12700 | 0.4847        |
+| 0.2253 | 12750 | 0.4           |
+| 0.2262 | 12800 | 0.4755        |
 ### Framework Versions

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:80936dfc8900c8ac8c5575eef651ee2d03a5cd89aa29046749ab69fc98c04269
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:aaffa127a39fd901b451a02082dd73e610a44a04250335e841bd1c868ed76e7e
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ae8d02c618bd157f065f641eb6849f4a49c9cf111a69fc22cd74fafb816bffc
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:3099f71f55b4d64c3a23404f7faf834041dd15d4ee0a70f3ac8b4ad68054749c
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2810d275875e84011ae759c1d7b23d09c29defe9da62dd6f19a6663a8f613d62
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:17530db209b84526ccd898abb8b9457fb6227f11278068f45b5d61574f612471
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ce2d7b7dd5e5f39af67a8ead71654a635306c138883914bf7f8972e42520e3f
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:3ad444446e22ff06cf0b10a8954472ffaf9e2730e13a164edba0cc9cb5b081cd
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c9386fe2e7b7d4f410bc4b63dcc7461a70d3aea6cca8295dc5a10ef7582b0f51
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ef5f6a8cf979ba7b777c43842eed460a7fa788b746409fc99d6b1fe7576d8044
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.22265024473856268,
   "eval_steps": 500,
-  "global_step": 12600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1772,6 +1772,34 @@
       "learning_rate": 4.318882409534468e-05,
       "loss": 0.3545,
       "step": 12600
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.22618437560742874,
   "eval_steps": 500,
+  "global_step": 12800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.318882409534468e-05,
       "loss": 0.3545,
       "step": 12600
+    },
+    {
+      "epoch": 0.22353377745577918,
+      "grad_norm": 1.8885284662246704,
+      "learning_rate": 4.313973807700615e-05,
+      "loss": 0.3548,
+      "step": 12650
+    },
+    {
+      "epoch": 0.2244173101729957,
+      "grad_norm": 1.8508330583572388,
+      "learning_rate": 4.3090652058667615e-05,
+      "loss": 0.4847,
+      "step": 12700
+    },
+    {
+      "epoch": 0.22530084289021224,
+      "grad_norm": 2.1445882320404053,
+      "learning_rate": 4.304156604032907e-05,
+      "loss": 0.4,
+      "step": 12750
+    },
+    {
+      "epoch": 0.22618437560742874,
+      "grad_norm": 1.721024990081787,
+      "learning_rate": 4.299248002199054e-05,
+      "loss": 0.4755,
+      "step": 12800
     }
   ],
   "logging_steps": 50,