Training in progress, epoch 5, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -50,7 +50,7 @@ model-index:
       type: unknown
     metrics:
     - type: cosine_accuracy
-      value: 0.9375065565109253
       name: Cosine Accuracy
 ---
@@ -115,9 +115,9 @@ print(embeddings.shape)
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
-# tensor([[1.0000, 0.6993, 0.3841],
-#         [0.6993, 1.0000, 0.3711],
-#         [0.3841, 0.3711, 1.0000]])
 ```
 <!--
@@ -154,7 +154,7 @@ You can finetune this model on your own dataset.
 | Metric              | Value      |
 |:--------------------|:-----------|
-| **cosine_accuracy** | **0.9375** |
 <!--
 ## Bias, Risks and Limitations
@@ -364,9 +364,10 @@ You can finetune this model on your own dataset.
 </details>
 ### Training Logs
-| Epoch | Step | Training Loss | Validation Loss | cosine_accuracy |
-|:-----:|:----:|:-------------:|:---------------:|:---------------:|
-| 4.0   | 9112 | 1.4316        | 0.7736          | 0.9375          |
 ### Framework Versions

       type: unknown
     metrics:
     - type: cosine_accuracy
+      value: 0.9435034394264221
       name: Cosine Accuracy
 ---
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
+# tensor([[1.0000, 0.6910, 0.3222],
+#         [0.6910, 1.0000, 0.3099],
+#         [0.3222, 0.3099, 1.0000]])
 ```
 <!--
 | Metric              | Value      |
 |:--------------------|:-----------|
+| **cosine_accuracy** | **0.9435** |
 <!--
 ## Bias, Risks and Limitations
 </details>
 ### Training Logs
+| Epoch | Step  | Training Loss | Validation Loss | cosine_accuracy |
+|:-----:|:-----:|:-------------:|:---------------:|:---------------:|
+| 4.0   | 9112  | 1.4316        | 0.7736          | 0.9375          |
+| 5.0   | 11390 | 1.3415        | 0.7541          | 0.9435          |
 ### Framework Versions

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:49d47e67fd64444d1bef9079ac3e87fe40f99c1e431014e043dadc9c1c6fcdd1
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:d9f3402611e3b3c3f41f95210941eb470fba6ed05452b3849a27bd4dd433f91e
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8371d259eab4397e20808c5f3707bcb677999ede71ca90832bb56e58cfdb3428
 size 180607738

 version https://git-lfs.github.com/spec/v1
+oid sha256:76893f6607e3ebbfe6341fa0102ea836a998daa1b3f6024dda6df1ae75e07865
 size 180607738

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae9a3cbcca6bf743673d6e3a369dedc99ea1f47c1765d50c994934bd3af201c9
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:37aac35e3c58c9053e71545ace22af7302bb0c360af070fa4f0ab6abc890f773
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5428823afa033ffc8f182c048fb98e8b38691e01883f6e183389a94595d29dfd
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:fdd86fc34d6a0f3e0667119a9de9c78b3da8a344f393071c4b8da7ec715e0886
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:17fc7dcbf4e82e93b77a6ea394c88d4c3b907333ba1aa74d5f235a8d4390a6b1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:1bb1ca70040fe2933c9ba4528086551bb0337a289b19dabf99f736ffc96fdbab
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.0,
   "eval_steps": 5000,
-  "global_step": 9112,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -47,6 +47,22 @@
       "eval_samples_per_second": 295.608,
       "eval_steps_per_second": 2.333,
       "step": 9112
     }
   ],
   "logging_steps": 5000,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 5.0,
   "eval_steps": 5000,
+  "global_step": 11390,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 295.608,
       "eval_steps_per_second": 2.333,
       "step": 9112
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 10.955339431762695,
+      "learning_rate": 1.0434385002286237e-05,
+      "loss": 1.3415,
+      "step": 11390
+    },
+    {
+      "epoch": 5.0,
+      "eval_cosine_accuracy": 0.9435034394264221,
+      "eval_loss": 0.7540939450263977,
+      "eval_runtime": 31.893,
+      "eval_samples_per_second": 298.028,
+      "eval_steps_per_second": 2.352,
+      "step": 11390
     }
   ],
   "logging_steps": 5000,