Training in progress, step 18800, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1399,6 +1399,10 @@ You can finetune this model on your own dataset.
 | 0.3269 | 18500 | 0.3856        |
 | 0.3278 | 18550 | 0.3575        |
 | 0.3287 | 18600 | 0.347         |
 </details>

 | 0.3269 | 18500 | 0.3856        |
 | 0.3278 | 18550 | 0.3575        |
 | 0.3287 | 18600 | 0.347         |
+| 0.3296 | 18650 | 0.3195        |
+| 0.3304 | 18700 | 0.3543        |
+| 0.3313 | 18750 | 0.3642        |
+| 0.3322 | 18800 | 0.3415        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c659f7470485136b016f37853007297ca9974233845b53c7106a151f1185c5ff
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:d6283a957a66c184eb3662df7aa262cd154fb1dcee802f75857776d94240bc13
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5134fd47d1ddef6b8fafbfb0e5b5ac6fce8d4885f9aa6d68d2550fe5fe73399a
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:78292edbaee380ac230c50b2f8b68c5dd37e7df2ddfa15b2c43a017c23edfbeb
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8876eb9659d218313ffcb704a4ab5032bff1b5e2e75c2dbc7a464331691d0ba1
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:83eb2f002d2d9da0dc4755fcda636e68d5b4a41e8c2e5a2b8c82d014a65d58ff
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:346a0b991711eadeb589f86e15866b208715d0ef237bad5b888484a4e3892901
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c934cf056ab14aad09651705c56e19636a65538c7a4a6a0401dc0dfa92a2b37
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6d4a93a47d6ad046cc4b29ddb3b4c48d6d603705a4f414700fa29b5fa5270c50
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0b05eb17181bcecd1e03b83742ebacffcd8b0f6bb6d68e2982a23c3f445018e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.3286741708045449,
   "eval_steps": 500,
-  "global_step": 18600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2612,6 +2612,34 @@
       "learning_rate": 3.730144705582062e-05,
       "loss": 0.347,
       "step": 18600
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.332208301673411,
   "eval_steps": 500,
+  "global_step": 18800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.730144705582062e-05,
       "loss": 0.347,
       "step": 18600
+    },
+    {
+      "epoch": 0.3295577035217614,
+      "grad_norm": 1.8076531887054443,
+      "learning_rate": 3.7252361037482085e-05,
+      "loss": 0.3195,
+      "step": 18650
+    },
+    {
+      "epoch": 0.3304412362389779,
+      "grad_norm": 1.8082791566848755,
+      "learning_rate": 3.720327501914355e-05,
+      "loss": 0.3543,
+      "step": 18700
+    },
+    {
+      "epoch": 0.33132476895619445,
+      "grad_norm": 1.3712306022644043,
+      "learning_rate": 3.7154189000805014e-05,
+      "loss": 0.3642,
+      "step": 18750
+    },
+    {
+      "epoch": 0.332208301673411,
+      "grad_norm": 1.5654476881027222,
+      "learning_rate": 3.710510298246648e-05,
+      "loss": 0.3415,
+      "step": 18800
     }
   ],
   "logging_steps": 50,