LamaDiab commited on
Commit
d60b880
·
verified ·
1 Parent(s): 4c0127d

Training in progress, epoch 5, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -50,7 +50,7 @@ model-index:
50
  type: unknown
51
  metrics:
52
  - type: cosine_accuracy
53
- value: 0.9375065565109253
54
  name: Cosine Accuracy
55
  ---
56
 
@@ -115,9 +115,9 @@ print(embeddings.shape)
115
  # Get the similarity scores for the embeddings
116
  similarities = model.similarity(embeddings, embeddings)
117
  print(similarities)
118
- # tensor([[1.0000, 0.6993, 0.3841],
119
- # [0.6993, 1.0000, 0.3711],
120
- # [0.3841, 0.3711, 1.0000]])
121
  ```
122
 
123
  <!--
@@ -154,7 +154,7 @@ You can finetune this model on your own dataset.
154
 
155
  | Metric | Value |
156
  |:--------------------|:-----------|
157
- | **cosine_accuracy** | **0.9375** |
158
 
159
  <!--
160
  ## Bias, Risks and Limitations
@@ -364,9 +364,10 @@ You can finetune this model on your own dataset.
364
  </details>
365
 
366
  ### Training Logs
367
- | Epoch | Step | Training Loss | Validation Loss | cosine_accuracy |
368
- |:-----:|:----:|:-------------:|:---------------:|:---------------:|
369
- | 4.0 | 9112 | 1.4316 | 0.7736 | 0.9375 |
 
370
 
371
 
372
  ### Framework Versions
 
50
  type: unknown
51
  metrics:
52
  - type: cosine_accuracy
53
+ value: 0.9435034394264221
54
  name: Cosine Accuracy
55
  ---
56
 
 
115
  # Get the similarity scores for the embeddings
116
  similarities = model.similarity(embeddings, embeddings)
117
  print(similarities)
118
+ # tensor([[1.0000, 0.6910, 0.3222],
119
+ # [0.6910, 1.0000, 0.3099],
120
+ # [0.3222, 0.3099, 1.0000]])
121
  ```
122
 
123
  <!--
 
154
 
155
  | Metric | Value |
156
  |:--------------------|:-----------|
157
+ | **cosine_accuracy** | **0.9435** |
158
 
159
  <!--
160
  ## Bias, Risks and Limitations
 
364
  </details>
365
 
366
  ### Training Logs
367
+ | Epoch | Step | Training Loss | Validation Loss | cosine_accuracy |
368
+ |:-----:|:-----:|:-------------:|:---------------:|:---------------:|
369
+ | 4.0 | 9112 | 1.4316 | 0.7736 | 0.9375 |
370
+ | 5.0 | 11390 | 1.3415 | 0.7541 | 0.9435 |
371
 
372
 
373
  ### Framework Versions
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49d47e67fd64444d1bef9079ac3e87fe40f99c1e431014e043dadc9c1c6fcdd1
3
  size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9f3402611e3b3c3f41f95210941eb470fba6ed05452b3849a27bd4dd433f91e
3
  size 90864192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8371d259eab4397e20808c5f3707bcb677999ede71ca90832bb56e58cfdb3428
3
  size 180607738
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76893f6607e3ebbfe6341fa0102ea836a998daa1b3f6024dda6df1ae75e07865
3
  size 180607738
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae9a3cbcca6bf743673d6e3a369dedc99ea1f47c1765d50c994934bd3af201c9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37aac35e3c58c9053e71545ace22af7302bb0c360af070fa4f0ab6abc890f773
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5428823afa033ffc8f182c048fb98e8b38691e01883f6e183389a94595d29dfd
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdd86fc34d6a0f3e0667119a9de9c78b3da8a344f393071c4b8da7ec715e0886
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17fc7dcbf4e82e93b77a6ea394c88d4c3b907333ba1aa74d5f235a8d4390a6b1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bb1ca70040fe2933c9ba4528086551bb0337a289b19dabf99f736ffc96fdbab
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 4.0,
6
  "eval_steps": 5000,
7
- "global_step": 9112,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -47,6 +47,22 @@
47
  "eval_samples_per_second": 295.608,
48
  "eval_steps_per_second": 2.333,
49
  "step": 9112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  }
51
  ],
52
  "logging_steps": 5000,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 5.0,
6
  "eval_steps": 5000,
7
+ "global_step": 11390,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
47
  "eval_samples_per_second": 295.608,
48
  "eval_steps_per_second": 2.333,
49
  "step": 9112
50
+ },
51
+ {
52
+ "epoch": 5.0,
53
+ "grad_norm": 10.955339431762695,
54
+ "learning_rate": 1.0434385002286237e-05,
55
+ "loss": 1.3415,
56
+ "step": 11390
57
+ },
58
+ {
59
+ "epoch": 5.0,
60
+ "eval_cosine_accuracy": 0.9435034394264221,
61
+ "eval_loss": 0.7540939450263977,
62
+ "eval_runtime": 31.893,
63
+ "eval_samples_per_second": 298.028,
64
+ "eval_steps_per_second": 2.352,
65
+ "step": 11390
66
  }
67
  ],
68
  "logging_steps": 5000,