Training in progress, epoch 1, checkpoint

Browse files

Files changed (6) hide show

checkpoint-2071/README.md +13 -12
checkpoint-2071/model.safetensors +1 -1
checkpoint-2071/optimizer.pt +1 -1
checkpoint-2071/scheduler.pt +1 -1
checkpoint-2071/trainer_state.json +18 -18
checkpoint-2071/training_args.bin +1 -1

checkpoint-2071/README.md CHANGED Viewed

@@ -49,7 +49,7 @@ model-index:
       type: unknown
     metrics:
     - type: cosine_accuracy
-      value: 0.9583552479743958
       name: Cosine Accuracy
 ---
@@ -114,9 +114,9 @@ print(embeddings.shape)
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
-# tensor([[1.0000, 0.9723, 0.2011],
-#         [0.9723, 1.0000, 0.1684],
-#         [0.2011, 0.1684, 1.0000]])
 ```
 <!--
@@ -153,7 +153,7 @@ You can finetune this model on your own dataset.
 | Metric              | Value      |
 |:--------------------|:-----------|
-| **cosine_accuracy** | **0.9584** |
 <!--
 ## Bias, Risks and Limitations
@@ -227,9 +227,10 @@ You can finetune this model on your own dataset.
 - `eval_strategy`: steps
 - `per_device_train_batch_size`: 256
 - `per_device_eval_batch_size`: 256
 - `weight_decay`: 0.001
-- `num_train_epochs`: 6
-- `warmup_ratio`: 0.1
 - `fp16`: True
 - `dataloader_num_workers`: 1
 - `dataloader_prefetch_factor`: 2
@@ -252,17 +253,17 @@ You can finetune this model on your own dataset.
 - `gradient_accumulation_steps`: 1
 - `eval_accumulation_steps`: None
 - `torch_empty_cache_steps`: None
-- `learning_rate`: 5e-05
 - `weight_decay`: 0.001
 - `adam_beta1`: 0.9
 - `adam_beta2`: 0.999
 - `adam_epsilon`: 1e-08
 - `max_grad_norm`: 1.0
-- `num_train_epochs`: 6
 - `max_steps`: -1
 - `lr_scheduler_type`: linear
 - `lr_scheduler_kwargs`: {}
-- `warmup_ratio`: 0.1
 - `warmup_steps`: 0
 - `log_level`: passive
 - `log_level_replica`: warning
@@ -365,8 +366,8 @@ You can finetune this model on your own dataset.
 | Epoch  | Step | Training Loss | Validation Loss | cosine_accuracy |
 |:------:|:----:|:-------------:|:---------------:|:---------------:|
 | 0.0005 | 1    | 4.1585        | -               | -               |
-| 0.4829 | 1000 | 2.6877        | 0.5079          | 0.9513          |
-| 0.9657 | 2000 | 1.3275        | 0.4753          | 0.9584          |
 ### Framework Versions

       type: unknown
     metrics:
     - type: cosine_accuracy
+      value: 0.9479440450668335
       name: Cosine Accuracy
 ---
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
+# tensor([[1.0000, 0.9667, 0.2278],
+#         [0.9667, 1.0000, 0.2161],
+#         [0.2278, 0.2161, 1.0000]])
 ```
 <!--
 | Metric              | Value      |
 |:--------------------|:-----------|
+| **cosine_accuracy** | **0.9479** |
 <!--
 ## Bias, Risks and Limitations
 - `eval_strategy`: steps
 - `per_device_train_batch_size`: 256
 - `per_device_eval_batch_size`: 256
+- `learning_rate`: 2e-05
 - `weight_decay`: 0.001
+- `num_train_epochs`: 8
+- `warmup_ratio`: 0.2
 - `fp16`: True
 - `dataloader_num_workers`: 1
 - `dataloader_prefetch_factor`: 2
 - `gradient_accumulation_steps`: 1
 - `eval_accumulation_steps`: None
 - `torch_empty_cache_steps`: None
+- `learning_rate`: 2e-05
 - `weight_decay`: 0.001
 - `adam_beta1`: 0.9
 - `adam_beta2`: 0.999
 - `adam_epsilon`: 1e-08
 - `max_grad_norm`: 1.0
+- `num_train_epochs`: 8
 - `max_steps`: -1
 - `lr_scheduler_type`: linear
 - `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.2
 - `warmup_steps`: 0
 - `log_level`: passive
 - `log_level_replica`: warning
 | Epoch  | Step | Training Loss | Validation Loss | cosine_accuracy |
 |:------:|:----:|:-------------:|:---------------:|:---------------:|
 | 0.0005 | 1    | 4.1585        | -               | -               |
+| 0.4829 | 1000 | 3.2055        | 0.5676          | 0.9401          |
+| 0.9657 | 2000 | 2.0069        | 0.5089          | 0.9479          |
 ### Framework Versions

checkpoint-2071/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3de5dca97f8efca7db244e5ecf375694116c51d56eacdf645845e5ff842a967d
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:eb8c69769dbd05c7072f34ea2f05925262d6c023d92e9f985b4dc449c8405505
 size 90864192

checkpoint-2071/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75dbae26dbbb59787e87d3df2b4327fa5b4cdd3a12193e4f6a22a276171741c9
 size 180607738

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e5da75eb39176d4a290a09f3f7bf1add65552f6dc6d63eaf67c1b5cb81f0edd
 size 180607738

checkpoint-2071/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57bbbe1983a8a9a6b14440da65bfa44f6e62bc74ad4d459b2d737491017f0d91
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:091e83d21e287330b10fb74cc1330244dba58d03818725e35e0230804e5f3346
 size 1064

checkpoint-2071/trainer_state.json CHANGED Viewed

@@ -18,41 +18,41 @@
     },
     {
       "epoch": 0.48285852245292127,
-      "grad_norm": 6.439189910888672,
-      "learning_rate": 4.0185036202735324e-05,
-      "loss": 2.6877,
       "step": 1000
     },
     {
       "epoch": 0.48285852245292127,
-      "eval_cosine_accuracy": 0.9513092637062073,
-      "eval_loss": 0.5078648924827576,
-      "eval_runtime": 35.5752,
-      "eval_samples_per_second": 267.293,
-      "eval_steps_per_second": 1.068,
       "step": 1000
     },
     {
       "epoch": 0.9657170449058425,
-      "grad_norm": 7.472978591918945,
-      "learning_rate": 4.661986944469284e-05,
-      "loss": 1.3275,
       "step": 2000
     },
     {
       "epoch": 0.9657170449058425,
-      "eval_cosine_accuracy": 0.9583552479743958,
-      "eval_loss": 0.4752802550792694,
-      "eval_runtime": 34.5575,
-      "eval_samples_per_second": 275.164,
-      "eval_steps_per_second": 1.1,
       "step": 2000
     }
   ],
   "logging_steps": 1000,
-  "max_steps": 12426,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 6,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {

     },
     {
       "epoch": 0.48285852245292127,
+      "grad_norm": 6.372687339782715,
+      "learning_rate": 6.028968014484008e-06,
+      "loss": 3.2055,
       "step": 1000
     },
     {
       "epoch": 0.48285852245292127,
+      "eval_cosine_accuracy": 0.9400568008422852,
+      "eval_loss": 0.5675864219665527,
+      "eval_runtime": 35.3676,
+      "eval_samples_per_second": 268.862,
+      "eval_steps_per_second": 1.074,
       "step": 1000
     },
     {
       "epoch": 0.9657170449058425,
+      "grad_norm": 8.225760459899902,
+      "learning_rate": 1.2063971031985518e-05,
+      "loss": 2.0069,
       "step": 2000
     },
     {
       "epoch": 0.9657170449058425,
+      "eval_cosine_accuracy": 0.9479440450668335,
+      "eval_loss": 0.5088897347450256,
+      "eval_runtime": 35.33,
+      "eval_samples_per_second": 269.148,
+      "eval_steps_per_second": 1.076,
       "step": 2000
     }
   ],
   "logging_steps": 1000,
+  "max_steps": 16568,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 8,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {

checkpoint-2071/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cce4819fc7434b230bbe88b6e1443de74fe11baa80cd2b8ebf29cee376218c0d
 size 5752

 version https://git-lfs.github.com/spec/v1
+oid sha256:21cb488b39046dd5929796463136d527fa7f4b248e28c84eb80348f28dc5da8a
 size 5752