thivy commited on
Commit
2d2f12e
·
verified ·
1 Parent(s): 0d7db45

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -406,49 +406,49 @@ model-index:
406
  type: group_b_retrieval
407
  metrics:
408
  - type: cosine_accuracy@1
409
- value: 0.8017153924190722
410
  name: Cosine Accuracy@1
411
  - type: cosine_accuracy@3
412
- value: 0.877524670294199
413
  name: Cosine Accuracy@3
414
  - type: cosine_accuracy@5
415
- value: 0.8977220326477912
416
  name: Cosine Accuracy@5
417
  - type: cosine_accuracy@10
418
- value: 0.9156137600295121
419
  name: Cosine Accuracy@10
420
  - type: cosine_precision@1
421
- value: 0.8017153924190722
422
  name: Cosine Precision@1
423
  - type: cosine_precision@3
424
- value: 0.2925082234313997
425
  name: Cosine Precision@3
426
  - type: cosine_precision@5
427
- value: 0.17954440652955822
428
  name: Cosine Precision@5
429
  - type: cosine_precision@10
430
- value: 0.09156137600295121
431
  name: Cosine Precision@10
432
  - type: cosine_recall@1
433
- value: 0.8017153924190722
434
  name: Cosine Recall@1
435
  - type: cosine_recall@3
436
- value: 0.877524670294199
437
  name: Cosine Recall@3
438
  - type: cosine_recall@5
439
- value: 0.8977220326477912
440
  name: Cosine Recall@5
441
  - type: cosine_recall@10
442
- value: 0.9156137600295121
443
  name: Cosine Recall@10
444
  - type: cosine_ndcg@10
445
- value: 0.8612148814875222
446
  name: Cosine Ndcg@10
447
  - type: cosine_mrr@10
448
- value: 0.8434970480552897
449
  name: Cosine Mrr@10
450
  - type: cosine_map@100
451
- value: 0.8456366871531872
452
  name: Cosine Map@100
453
  ---
454
 
@@ -516,7 +516,7 @@ print(query_embeddings.shape, document_embeddings.shape)
516
  # Get the similarity scores for the embeddings
517
  similarities = model.similarity(query_embeddings, document_embeddings)
518
  print(similarities)
519
- # tensor([[ 0.7469, -0.0002, 0.0699]])
520
  ```
521
 
522
  <!--
@@ -552,23 +552,23 @@ You can finetune this model on your own dataset.
552
  * Dataset: `group_b_retrieval`
553
  * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
554
 
555
- | Metric | Value |
556
- |:--------------------|:-----------|
557
- | cosine_accuracy@1 | 0.8017 |
558
- | cosine_accuracy@3 | 0.8775 |
559
- | cosine_accuracy@5 | 0.8977 |
560
- | cosine_accuracy@10 | 0.9156 |
561
- | cosine_precision@1 | 0.8017 |
562
- | cosine_precision@3 | 0.2925 |
563
- | cosine_precision@5 | 0.1795 |
564
- | cosine_precision@10 | 0.0916 |
565
- | cosine_recall@1 | 0.8017 |
566
- | cosine_recall@3 | 0.8775 |
567
- | cosine_recall@5 | 0.8977 |
568
- | cosine_recall@10 | 0.9156 |
569
- | **cosine_ndcg@10** | **0.8612** |
570
- | cosine_mrr@10 | 0.8435 |
571
- | cosine_map@100 | 0.8456 |
572
 
573
  <!--
574
  ## Bias, Risks and Limitations
@@ -798,6 +798,11 @@ You can finetune this model on your own dataset.
798
  | 0.4035 | 1300 | 0.1267 | - | - |
799
  | 0.4345 | 1400 | 0.1089 | - | - |
800
  | 0.4655 | 1500 | 0.1069 | 0.1850 | 0.8612 |
 
 
 
 
 
801
 
802
 
803
  ### Framework Versions
 
406
  type: group_b_retrieval
407
  metrics:
408
  - type: cosine_accuracy@1
409
+ value: 0.7896338651664668
410
  name: Cosine Accuracy@1
411
  - type: cosine_accuracy@3
412
+ value: 0.8709766669740847
413
  name: Cosine Accuracy@3
414
  - type: cosine_accuracy@5
415
+ value: 0.8913584801254265
416
  name: Cosine Accuracy@5
417
  - type: cosine_accuracy@10
418
+ value: 0.9114636170801439
419
  name: Cosine Accuracy@10
420
  - type: cosine_precision@1
421
+ value: 0.7896338651664668
422
  name: Cosine Precision@1
423
  - type: cosine_precision@3
424
+ value: 0.2903255556580282
425
  name: Cosine Precision@3
426
  - type: cosine_precision@5
427
+ value: 0.17827169602508527
428
  name: Cosine Precision@5
429
  - type: cosine_precision@10
430
+ value: 0.09114636170801438
431
  name: Cosine Precision@10
432
  - type: cosine_recall@1
433
+ value: 0.7896338651664668
434
  name: Cosine Recall@1
435
  - type: cosine_recall@3
436
+ value: 0.8709766669740847
437
  name: Cosine Recall@3
438
  - type: cosine_recall@5
439
+ value: 0.8913584801254265
440
  name: Cosine Recall@5
441
  - type: cosine_recall@10
442
+ value: 0.9114636170801439
443
  name: Cosine Recall@10
444
  - type: cosine_ndcg@10
445
+ value: 0.85301140683731
446
  name: Cosine Ndcg@10
447
  - type: cosine_mrr@10
448
+ value: 0.8340159110771488
449
  name: Cosine Mrr@10
450
  - type: cosine_map@100
451
+ value: 0.8361969274027575
452
  name: Cosine Map@100
453
  ---
454
 
 
516
  # Get the similarity scores for the embeddings
517
  similarities = model.similarity(query_embeddings, document_embeddings)
518
  print(similarities)
519
+ # tensor([[ 0.8043, -0.0173, 0.0371]])
520
  ```
521
 
522
  <!--
 
552
  * Dataset: `group_b_retrieval`
553
  * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
554
 
555
+ | Metric | Value |
556
+ |:--------------------|:----------|
557
+ | cosine_accuracy@1 | 0.7896 |
558
+ | cosine_accuracy@3 | 0.871 |
559
+ | cosine_accuracy@5 | 0.8914 |
560
+ | cosine_accuracy@10 | 0.9115 |
561
+ | cosine_precision@1 | 0.7896 |
562
+ | cosine_precision@3 | 0.2903 |
563
+ | cosine_precision@5 | 0.1783 |
564
+ | cosine_precision@10 | 0.0911 |
565
+ | cosine_recall@1 | 0.7896 |
566
+ | cosine_recall@3 | 0.871 |
567
+ | cosine_recall@5 | 0.8914 |
568
+ | cosine_recall@10 | 0.9115 |
569
+ | **cosine_ndcg@10** | **0.853** |
570
+ | cosine_mrr@10 | 0.834 |
571
+ | cosine_map@100 | 0.8362 |
572
 
573
  <!--
574
  ## Bias, Risks and Limitations
 
798
  | 0.4035 | 1300 | 0.1267 | - | - |
799
  | 0.4345 | 1400 | 0.1089 | - | - |
800
  | 0.4655 | 1500 | 0.1069 | 0.1850 | 0.8612 |
801
+ | 0.4966 | 1600 | 0.1144 | - | - |
802
+ | 0.5276 | 1700 | 0.1059 | - | - |
803
+ | 0.5587 | 1800 | 0.0966 | - | - |
804
+ | 0.5897 | 1900 | 0.1191 | - | - |
805
+ | 0.6207 | 2000 | 0.0964 | 0.1964 | 0.8530 |
806
 
807
 
808
  ### Framework Versions
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38b461d393342010280e75a27699c7e1fe222c1f9d612de3c659d7a9dab63596
3
  size 595640976
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76978de5f2e075e8aa79b5afc05c75d738cbe21cabbfdaed1a48a4269a4866f3
3
  size 595640976
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7dd30a9d85da2724d630510ec0808cdee92ad416551bfd14e6bcc1e2220c13c
3
  size 1191508006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a8115e8405542064ec7230c4c48017e022fb3b37ea793f0cf2c45a84b1bcfed
3
  size 1191508006
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02cd2bf499625e845bf2c22f35fde877cabef4a1d7cd9152e02bddf16ac17428
3
  size 14391
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b62e751c4256ced4de30a5b4b7f0c7deba173870a9142388b36fe30d1438a332
3
  size 14391
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b249816ee3fb553b903ac6355be3c046e74b47b6d09e070158ac72d91b74e658
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f446ffc28f743e9353c6eb0384dda3f80dababeb4062e70b3a2f57d5ddafd96
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 500,
3
  "best_metric": 0.8750381759774116,
4
  "best_model_checkpoint": "models/norbert4-v6-stage2-group-b/checkpoint-500",
5
- "epoch": 0.4655493482309125,
6
  "eval_steps": 500,
7
- "global_step": 1500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -182,6 +182,64 @@
182
  "eval_samples_per_second": 63.731,
183
  "eval_steps_per_second": 0.999,
184
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  }
186
  ],
187
  "logging_steps": 100,
@@ -196,7 +254,7 @@
196
  "early_stopping_threshold": 0.0
197
  },
198
  "attributes": {
199
- "early_stopping_patience_counter": 2
200
  }
201
  },
202
  "TrainerControl": {
@@ -205,7 +263,7 @@
205
  "should_evaluate": false,
206
  "should_log": false,
207
  "should_save": true,
208
- "should_training_stop": false
209
  },
210
  "attributes": {}
211
  }
 
2
  "best_global_step": 500,
3
  "best_metric": 0.8750381759774116,
4
  "best_model_checkpoint": "models/norbert4-v6-stage2-group-b/checkpoint-500",
5
+ "epoch": 0.6207324643078833,
6
  "eval_steps": 500,
7
+ "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
182
  "eval_samples_per_second": 63.731,
183
  "eval_steps_per_second": 0.999,
184
  "step": 1500
185
+ },
186
+ {
187
+ "epoch": 0.4965859714463066,
188
+ "grad_norm": 2.73714542388916,
189
+ "learning_rate": 1.1869129192350805e-05,
190
+ "loss": 0.1144,
191
+ "step": 1600
192
+ },
193
+ {
194
+ "epoch": 0.5276225946617008,
195
+ "grad_norm": 1.8794718980789185,
196
+ "learning_rate": 1.0795663964861532e-05,
197
+ "loss": 0.1059,
198
+ "step": 1700
199
+ },
200
+ {
201
+ "epoch": 0.5586592178770949,
202
+ "grad_norm": 1.8753403425216675,
203
+ "learning_rate": 9.712863874436337e-06,
204
+ "loss": 0.0966,
205
+ "step": 1800
206
+ },
207
+ {
208
+ "epoch": 0.5896958410924892,
209
+ "grad_norm": 0.4708488881587982,
210
+ "learning_rate": 8.633432513141098e-06,
211
+ "loss": 0.1191,
212
+ "step": 1900
213
+ },
214
+ {
215
+ "epoch": 0.6207324643078833,
216
+ "grad_norm": 0.937002956867218,
217
+ "learning_rate": 7.570033950547176e-06,
218
+ "loss": 0.0964,
219
+ "step": 2000
220
+ },
221
+ {
222
+ "epoch": 0.6207324643078833,
223
+ "eval_group_b_retrieval_cosine_accuracy@1": 0.7896338651664668,
224
+ "eval_group_b_retrieval_cosine_accuracy@10": 0.9114636170801439,
225
+ "eval_group_b_retrieval_cosine_accuracy@3": 0.8709766669740847,
226
+ "eval_group_b_retrieval_cosine_accuracy@5": 0.8913584801254265,
227
+ "eval_group_b_retrieval_cosine_map@100": 0.8361969274027575,
228
+ "eval_group_b_retrieval_cosine_mrr@10": 0.8340159110771488,
229
+ "eval_group_b_retrieval_cosine_ndcg@10": 0.85301140683731,
230
+ "eval_group_b_retrieval_cosine_precision@1": 0.7896338651664668,
231
+ "eval_group_b_retrieval_cosine_precision@10": 0.09114636170801438,
232
+ "eval_group_b_retrieval_cosine_precision@3": 0.2903255556580282,
233
+ "eval_group_b_retrieval_cosine_precision@5": 0.17827169602508527,
234
+ "eval_group_b_retrieval_cosine_recall@1": 0.7896338651664668,
235
+ "eval_group_b_retrieval_cosine_recall@10": 0.9114636170801439,
236
+ "eval_group_b_retrieval_cosine_recall@3": 0.8709766669740847,
237
+ "eval_group_b_retrieval_cosine_recall@5": 0.8913584801254265,
238
+ "eval_loss": 0.19638657569885254,
239
+ "eval_runtime": 167.4057,
240
+ "eval_samples_per_second": 64.771,
241
+ "eval_steps_per_second": 1.015,
242
+ "step": 2000
243
  }
244
  ],
245
  "logging_steps": 100,
 
254
  "early_stopping_threshold": 0.0
255
  },
256
  "attributes": {
257
+ "early_stopping_patience_counter": 3
258
  }
259
  },
260
  "TrainerControl": {
 
263
  "should_evaluate": false,
264
  "should_log": false,
265
  "should_save": true,
266
+ "should_training_stop": true
267
  },
268
  "attributes": {}
269
  }