Training in progress, step 4000, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -349,49 +349,49 @@ model-index:
|
|
| 349 |
type: NanoNFCorpus
|
| 350 |
metrics:
|
| 351 |
- type: dot_accuracy@1
|
| 352 |
-
value: 0.
|
| 353 |
name: Dot Accuracy@1
|
| 354 |
- type: dot_accuracy@3
|
| 355 |
-
value: 0.
|
| 356 |
name: Dot Accuracy@3
|
| 357 |
- type: dot_accuracy@5
|
| 358 |
-
value: 0.
|
| 359 |
name: Dot Accuracy@5
|
| 360 |
- type: dot_accuracy@10
|
| 361 |
-
value: 0.
|
| 362 |
name: Dot Accuracy@10
|
| 363 |
- type: dot_precision@1
|
| 364 |
-
value: 0.
|
| 365 |
name: Dot Precision@1
|
| 366 |
- type: dot_precision@3
|
| 367 |
-
value: 0.
|
| 368 |
name: Dot Precision@3
|
| 369 |
- type: dot_precision@5
|
| 370 |
-
value: 0.
|
| 371 |
name: Dot Precision@5
|
| 372 |
- type: dot_precision@10
|
| 373 |
-
value: 0.
|
| 374 |
name: Dot Precision@10
|
| 375 |
- type: dot_recall@1
|
| 376 |
-
value: 0.
|
| 377 |
name: Dot Recall@1
|
| 378 |
- type: dot_recall@3
|
| 379 |
-
value: 0.
|
| 380 |
name: Dot Recall@3
|
| 381 |
- type: dot_recall@5
|
| 382 |
-
value: 0.
|
| 383 |
name: Dot Recall@5
|
| 384 |
- type: dot_recall@10
|
| 385 |
-
value: 0.
|
| 386 |
name: Dot Recall@10
|
| 387 |
- type: dot_ndcg@10
|
| 388 |
-
value: 0.
|
| 389 |
name: Dot Ndcg@10
|
| 390 |
- type: dot_mrr@10
|
| 391 |
-
value: 0.
|
| 392 |
name: Dot Mrr@10
|
| 393 |
- type: dot_map@100
|
| 394 |
-
value: 0.
|
| 395 |
name: Dot Map@100
|
| 396 |
- type: query_active_dims
|
| 397 |
value: 51200.0
|
|
@@ -471,9 +471,9 @@ print(embeddings.shape)
|
|
| 471 |
# Get the similarity scores for the embeddings
|
| 472 |
similarities = model.similarity(embeddings, embeddings)
|
| 473 |
print(similarities)
|
| 474 |
-
# tensor([[
|
| 475 |
-
# [
|
| 476 |
-
# [
|
| 477 |
```
|
| 478 |
|
| 479 |
<!--
|
|
@@ -511,21 +511,21 @@ You can finetune this model on your own dataset.
|
|
| 511 |
|
| 512 |
| Metric | Value |
|
| 513 |
|:----------------------|:-----------|
|
| 514 |
-
| dot_accuracy@1 | 0.
|
| 515 |
-
| dot_accuracy@3 | 0.
|
| 516 |
-
| dot_accuracy@5 | 0.
|
| 517 |
-
| dot_accuracy@10 | 0.
|
| 518 |
-
| dot_precision@1 | 0.
|
| 519 |
-
| dot_precision@3 | 0.
|
| 520 |
-
| dot_precision@5 | 0.
|
| 521 |
-
| dot_precision@10 | 0.
|
| 522 |
-
| dot_recall@1 | 0.
|
| 523 |
-
| dot_recall@3 | 0.
|
| 524 |
-
| dot_recall@5 | 0.
|
| 525 |
-
| dot_recall@10 | 0.
|
| 526 |
-
| **dot_ndcg@10** | **0.
|
| 527 |
-
| dot_mrr@10 | 0.
|
| 528 |
-
| dot_map@100 | 0.
|
| 529 |
| query_active_dims | 51200.0 |
|
| 530 |
| query_sparsity_ratio | 0.0 |
|
| 531 |
| corpus_active_dims | 51200.0 |
|
|
@@ -818,6 +818,16 @@ You can finetune this model on your own dataset.
|
|
| 818 |
| 1.0038 | 3400 | 0.2065 | - | - |
|
| 819 |
| 1.0186 | 3450 | 0.17 | - | - |
|
| 820 |
| 1.0334 | 3500 | 0.179 | 0.7303 | 0.2596 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 821 |
|
| 822 |
|
| 823 |
### Framework Versions
|
|
|
|
| 349 |
type: NanoNFCorpus
|
| 350 |
metrics:
|
| 351 |
- type: dot_accuracy@1
|
| 352 |
+
value: 0.32
|
| 353 |
name: Dot Accuracy@1
|
| 354 |
- type: dot_accuracy@3
|
| 355 |
+
value: 0.4
|
| 356 |
name: Dot Accuracy@3
|
| 357 |
- type: dot_accuracy@5
|
| 358 |
+
value: 0.52
|
| 359 |
name: Dot Accuracy@5
|
| 360 |
- type: dot_accuracy@10
|
| 361 |
+
value: 0.62
|
| 362 |
name: Dot Accuracy@10
|
| 363 |
- type: dot_precision@1
|
| 364 |
+
value: 0.32
|
| 365 |
name: Dot Precision@1
|
| 366 |
- type: dot_precision@3
|
| 367 |
+
value: 0.2866666666666666
|
| 368 |
name: Dot Precision@3
|
| 369 |
- type: dot_precision@5
|
| 370 |
+
value: 0.276
|
| 371 |
name: Dot Precision@5
|
| 372 |
- type: dot_precision@10
|
| 373 |
+
value: 0.23800000000000002
|
| 374 |
name: Dot Precision@10
|
| 375 |
- type: dot_recall@1
|
| 376 |
+
value: 0.020779419687305747
|
| 377 |
name: Dot Recall@1
|
| 378 |
- type: dot_recall@3
|
| 379 |
+
value: 0.05318433469893364
|
| 380 |
name: Dot Recall@3
|
| 381 |
- type: dot_recall@5
|
| 382 |
+
value: 0.08697326661296835
|
| 383 |
name: Dot Recall@5
|
| 384 |
- type: dot_recall@10
|
| 385 |
+
value: 0.11024946650778294
|
| 386 |
name: Dot Recall@10
|
| 387 |
- type: dot_ndcg@10
|
| 388 |
+
value: 0.2731246838532083
|
| 389 |
name: Dot Ndcg@10
|
| 390 |
- type: dot_mrr@10
|
| 391 |
+
value: 0.3876349206349206
|
| 392 |
name: Dot Mrr@10
|
| 393 |
- type: dot_map@100
|
| 394 |
+
value: 0.11211568501359778
|
| 395 |
name: Dot Map@100
|
| 396 |
- type: query_active_dims
|
| 397 |
value: 51200.0
|
|
|
|
| 471 |
# Get the similarity scores for the embeddings
|
| 472 |
similarities = model.similarity(embeddings, embeddings)
|
| 473 |
print(similarities)
|
| 474 |
+
# tensor([[148.3932, 17.6451, 3.8017],
|
| 475 |
+
# [ 17.6451, 23.1705, 2.1552],
|
| 476 |
+
# [ 3.8017, 2.1552, 18.1152]])
|
| 477 |
```
|
| 478 |
|
| 479 |
<!--
|
|
|
|
| 511 |
|
| 512 |
| Metric | Value |
|
| 513 |
|:----------------------|:-----------|
|
| 514 |
+
| dot_accuracy@1 | 0.32 |
|
| 515 |
+
| dot_accuracy@3 | 0.4 |
|
| 516 |
+
| dot_accuracy@5 | 0.52 |
|
| 517 |
+
| dot_accuracy@10 | 0.62 |
|
| 518 |
+
| dot_precision@1 | 0.32 |
|
| 519 |
+
| dot_precision@3 | 0.2867 |
|
| 520 |
+
| dot_precision@5 | 0.276 |
|
| 521 |
+
| dot_precision@10 | 0.238 |
|
| 522 |
+
| dot_recall@1 | 0.0208 |
|
| 523 |
+
| dot_recall@3 | 0.0532 |
|
| 524 |
+
| dot_recall@5 | 0.087 |
|
| 525 |
+
| dot_recall@10 | 0.1102 |
|
| 526 |
+
| **dot_ndcg@10** | **0.2731** |
|
| 527 |
+
| dot_mrr@10 | 0.3876 |
|
| 528 |
+
| dot_map@100 | 0.1121 |
|
| 529 |
| query_active_dims | 51200.0 |
|
| 530 |
| query_sparsity_ratio | 0.0 |
|
| 531 |
| corpus_active_dims | 51200.0 |
|
|
|
|
| 818 |
| 1.0038 | 3400 | 0.2065 | - | - |
|
| 819 |
| 1.0186 | 3450 | 0.17 | - | - |
|
| 820 |
| 1.0334 | 3500 | 0.179 | 0.7303 | 0.2596 |
|
| 821 |
+
| 1.0481 | 3550 | 0.1848 | - | - |
|
| 822 |
+
| 1.0629 | 3600 | 0.1935 | - | - |
|
| 823 |
+
| 1.0776 | 3650 | 0.1795 | - | - |
|
| 824 |
+
| 1.0924 | 3700 | 0.1524 | - | - |
|
| 825 |
+
| 1.1072 | 3750 | 0.1542 | - | - |
|
| 826 |
+
| 1.1219 | 3800 | 0.1845 | - | - |
|
| 827 |
+
| 1.1367 | 3850 | 0.1568 | - | - |
|
| 828 |
+
| 1.1515 | 3900 | 0.1584 | - | - |
|
| 829 |
+
| 1.1662 | 3950 | 0.1484 | - | - |
|
| 830 |
+
| 1.1810 | 4000 | 0.1249 | 0.6119 | 0.2731 |
|
| 831 |
|
| 832 |
|
| 833 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 728561776
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0bcd4e6a8ab56a0db807ca31054934e51802d2eeaa5a038d39f551cacff14347
|
| 3 |
size 728561776
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1457369077
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21da268aff2f83d8014837d41fbcfa1beee4ef6ef72e3199bf45ce5ced8a2c48
|
| 3 |
size 1457369077
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14917
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1cd1ea480bf72a4e862c1b766d1db801ba2b3f5895e34f5947476735dcd46078
|
| 3 |
size 14917
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14917
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80fd94e2538428fa98737736853b585ed501ddd45dd103736afef286e04732c6
|
| 3 |
size 14917
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c5cd7e5a8b9d1b2f988217fde63eff5a8de41264597b7828a91c46e81ed2ca5
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 1500,
|
| 3 |
"best_metric": 0.2777906849504241,
|
| 4 |
"best_model_checkpoint": "models/splade-norbert4-base-eti/checkpoint-1500",
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1044,6 +1044,154 @@
|
|
| 1044 |
"eval_eti_samples_per_second": 8.192,
|
| 1045 |
"eval_eti_steps_per_second": 0.259,
|
| 1046 |
"step": 3500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1047 |
}
|
| 1048 |
],
|
| 1049 |
"logging_steps": 50,
|
|
@@ -1058,7 +1206,7 @@
|
|
| 1058 |
"early_stopping_threshold": 0.001
|
| 1059 |
},
|
| 1060 |
"attributes": {
|
| 1061 |
-
"early_stopping_patience_counter":
|
| 1062 |
}
|
| 1063 |
},
|
| 1064 |
"TrainerControl": {
|
|
@@ -1067,7 +1215,7 @@
|
|
| 1067 |
"should_evaluate": false,
|
| 1068 |
"should_log": false,
|
| 1069 |
"should_save": true,
|
| 1070 |
-
"should_training_stop":
|
| 1071 |
},
|
| 1072 |
"attributes": {}
|
| 1073 |
}
|
|
|
|
| 2 |
"best_global_step": 1500,
|
| 3 |
"best_metric": 0.2777906849504241,
|
| 4 |
"best_model_checkpoint": "models/splade-norbert4-base-eti/checkpoint-1500",
|
| 5 |
+
"epoch": 1.1809861234130499,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 4000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1044 |
"eval_eti_samples_per_second": 8.192,
|
| 1045 |
"eval_eti_steps_per_second": 0.259,
|
| 1046 |
"step": 3500
|
| 1047 |
+
},
|
| 1048 |
+
{
|
| 1049 |
+
"base_loss": 0.1651,
|
| 1050 |
+
"document_regularizer_loss": 0.0161,
|
| 1051 |
+
"epoch": 1.0481251845290818,
|
| 1052 |
+
"grad_norm": 1.5739084482192993,
|
| 1053 |
+
"learning_rate": 7.2309711286089245e-06,
|
| 1054 |
+
"loss": 0.1848,
|
| 1055 |
+
"query_regularizer_loss": 0.0036,
|
| 1056 |
+
"step": 3550
|
| 1057 |
+
},
|
| 1058 |
+
{
|
| 1059 |
+
"base_loss": 0.1741,
|
| 1060 |
+
"document_regularizer_loss": 0.015,
|
| 1061 |
+
"epoch": 1.0628875110717448,
|
| 1062 |
+
"grad_norm": 2.5850167274475098,
|
| 1063 |
+
"learning_rate": 7.1762904636920395e-06,
|
| 1064 |
+
"loss": 0.1935,
|
| 1065 |
+
"query_regularizer_loss": 0.0044,
|
| 1066 |
+
"step": 3600
|
| 1067 |
+
},
|
| 1068 |
+
{
|
| 1069 |
+
"base_loss": 0.1598,
|
| 1070 |
+
"document_regularizer_loss": 0.0158,
|
| 1071 |
+
"epoch": 1.0776498376144081,
|
| 1072 |
+
"grad_norm": 4.595969200134277,
|
| 1073 |
+
"learning_rate": 7.1216097987751545e-06,
|
| 1074 |
+
"loss": 0.1795,
|
| 1075 |
+
"query_regularizer_loss": 0.0039,
|
| 1076 |
+
"step": 3650
|
| 1077 |
+
},
|
| 1078 |
+
{
|
| 1079 |
+
"base_loss": 0.1322,
|
| 1080 |
+
"document_regularizer_loss": 0.0165,
|
| 1081 |
+
"epoch": 1.0924121641570712,
|
| 1082 |
+
"grad_norm": 2.4553780555725098,
|
| 1083 |
+
"learning_rate": 7.066929133858268e-06,
|
| 1084 |
+
"loss": 0.1524,
|
| 1085 |
+
"query_regularizer_loss": 0.0038,
|
| 1086 |
+
"step": 3700
|
| 1087 |
+
},
|
| 1088 |
+
{
|
| 1089 |
+
"base_loss": 0.1353,
|
| 1090 |
+
"document_regularizer_loss": 0.0149,
|
| 1091 |
+
"epoch": 1.1071744906997343,
|
| 1092 |
+
"grad_norm": 3.1925532817840576,
|
| 1093 |
+
"learning_rate": 7.012248468941383e-06,
|
| 1094 |
+
"loss": 0.1542,
|
| 1095 |
+
"query_regularizer_loss": 0.0039,
|
| 1096 |
+
"step": 3750
|
| 1097 |
+
},
|
| 1098 |
+
{
|
| 1099 |
+
"base_loss": 0.1653,
|
| 1100 |
+
"document_regularizer_loss": 0.0158,
|
| 1101 |
+
"epoch": 1.1219368172423974,
|
| 1102 |
+
"grad_norm": 7.258732795715332,
|
| 1103 |
+
"learning_rate": 6.957567804024498e-06,
|
| 1104 |
+
"loss": 0.1845,
|
| 1105 |
+
"query_regularizer_loss": 0.0035,
|
| 1106 |
+
"step": 3800
|
| 1107 |
+
},
|
| 1108 |
+
{
|
| 1109 |
+
"base_loss": 0.1357,
|
| 1110 |
+
"document_regularizer_loss": 0.0172,
|
| 1111 |
+
"epoch": 1.1366991437850604,
|
| 1112 |
+
"grad_norm": 2.0559401512145996,
|
| 1113 |
+
"learning_rate": 6.902887139107613e-06,
|
| 1114 |
+
"loss": 0.1568,
|
| 1115 |
+
"query_regularizer_loss": 0.0039,
|
| 1116 |
+
"step": 3850
|
| 1117 |
+
},
|
| 1118 |
+
{
|
| 1119 |
+
"base_loss": 0.1363,
|
| 1120 |
+
"document_regularizer_loss": 0.0177,
|
| 1121 |
+
"epoch": 1.1514614703277237,
|
| 1122 |
+
"grad_norm": 11.927323341369629,
|
| 1123 |
+
"learning_rate": 6.848206474190728e-06,
|
| 1124 |
+
"loss": 0.1584,
|
| 1125 |
+
"query_regularizer_loss": 0.0045,
|
| 1126 |
+
"step": 3900
|
| 1127 |
+
},
|
| 1128 |
+
{
|
| 1129 |
+
"base_loss": 0.1271,
|
| 1130 |
+
"document_regularizer_loss": 0.0171,
|
| 1131 |
+
"epoch": 1.1662237968703868,
|
| 1132 |
+
"grad_norm": 4.851423263549805,
|
| 1133 |
+
"learning_rate": 6.793525809273841e-06,
|
| 1134 |
+
"loss": 0.1484,
|
| 1135 |
+
"query_regularizer_loss": 0.0041,
|
| 1136 |
+
"step": 3950
|
| 1137 |
+
},
|
| 1138 |
+
{
|
| 1139 |
+
"base_loss": 0.105,
|
| 1140 |
+
"document_regularizer_loss": 0.0164,
|
| 1141 |
+
"epoch": 1.1809861234130499,
|
| 1142 |
+
"grad_norm": 4.975285530090332,
|
| 1143 |
+
"learning_rate": 6.738845144356956e-06,
|
| 1144 |
+
"loss": 0.1249,
|
| 1145 |
+
"query_regularizer_loss": 0.0035,
|
| 1146 |
+
"step": 4000
|
| 1147 |
+
},
|
| 1148 |
+
{
|
| 1149 |
+
"epoch": 1.1809861234130499,
|
| 1150 |
+
"eval_NanoBEIR_mean_avg_flops": 51200.0,
|
| 1151 |
+
"eval_NanoBEIR_mean_corpus_active_dims": 51200.0,
|
| 1152 |
+
"eval_NanoBEIR_mean_corpus_sparsity_ratio": 0.0,
|
| 1153 |
+
"eval_NanoBEIR_mean_dot_accuracy@1": 0.32,
|
| 1154 |
+
"eval_NanoBEIR_mean_dot_accuracy@10": 0.62,
|
| 1155 |
+
"eval_NanoBEIR_mean_dot_accuracy@3": 0.4,
|
| 1156 |
+
"eval_NanoBEIR_mean_dot_accuracy@5": 0.52,
|
| 1157 |
+
"eval_NanoBEIR_mean_dot_map@100": 0.11211568501359778,
|
| 1158 |
+
"eval_NanoBEIR_mean_dot_mrr@10": 0.3876349206349206,
|
| 1159 |
+
"eval_NanoBEIR_mean_dot_ndcg@10": 0.2731246838532083,
|
| 1160 |
+
"eval_NanoBEIR_mean_dot_precision@1": 0.32,
|
| 1161 |
+
"eval_NanoBEIR_mean_dot_precision@10": 0.23800000000000002,
|
| 1162 |
+
"eval_NanoBEIR_mean_dot_precision@3": 0.2866666666666666,
|
| 1163 |
+
"eval_NanoBEIR_mean_dot_precision@5": 0.276,
|
| 1164 |
+
"eval_NanoBEIR_mean_dot_recall@1": 0.020779419687305747,
|
| 1165 |
+
"eval_NanoBEIR_mean_dot_recall@10": 0.11024946650778294,
|
| 1166 |
+
"eval_NanoBEIR_mean_dot_recall@3": 0.05318433469893364,
|
| 1167 |
+
"eval_NanoBEIR_mean_dot_recall@5": 0.08697326661296835,
|
| 1168 |
+
"eval_NanoBEIR_mean_query_active_dims": 51200.0,
|
| 1169 |
+
"eval_NanoBEIR_mean_query_sparsity_ratio": 0.0,
|
| 1170 |
+
"eval_NanoNFCorpus_avg_flops": 51200.0,
|
| 1171 |
+
"eval_NanoNFCorpus_corpus_active_dims": 51200.0,
|
| 1172 |
+
"eval_NanoNFCorpus_corpus_sparsity_ratio": 0.0,
|
| 1173 |
+
"eval_NanoNFCorpus_dot_accuracy@1": 0.32,
|
| 1174 |
+
"eval_NanoNFCorpus_dot_accuracy@10": 0.62,
|
| 1175 |
+
"eval_NanoNFCorpus_dot_accuracy@3": 0.4,
|
| 1176 |
+
"eval_NanoNFCorpus_dot_accuracy@5": 0.52,
|
| 1177 |
+
"eval_NanoNFCorpus_dot_map@100": 0.11211568501359778,
|
| 1178 |
+
"eval_NanoNFCorpus_dot_mrr@10": 0.3876349206349206,
|
| 1179 |
+
"eval_NanoNFCorpus_dot_ndcg@10": 0.2731246838532083,
|
| 1180 |
+
"eval_NanoNFCorpus_dot_precision@1": 0.32,
|
| 1181 |
+
"eval_NanoNFCorpus_dot_precision@10": 0.23800000000000002,
|
| 1182 |
+
"eval_NanoNFCorpus_dot_precision@3": 0.2866666666666666,
|
| 1183 |
+
"eval_NanoNFCorpus_dot_precision@5": 0.276,
|
| 1184 |
+
"eval_NanoNFCorpus_dot_recall@1": 0.020779419687305747,
|
| 1185 |
+
"eval_NanoNFCorpus_dot_recall@10": 0.11024946650778294,
|
| 1186 |
+
"eval_NanoNFCorpus_dot_recall@3": 0.05318433469893364,
|
| 1187 |
+
"eval_NanoNFCorpus_dot_recall@5": 0.08697326661296835,
|
| 1188 |
+
"eval_NanoNFCorpus_query_active_dims": 51200.0,
|
| 1189 |
+
"eval_NanoNFCorpus_query_sparsity_ratio": 0.0,
|
| 1190 |
+
"eval_eti_loss": 0.6119212508201599,
|
| 1191 |
+
"eval_eti_runtime": 88.0723,
|
| 1192 |
+
"eval_eti_samples_per_second": 8.629,
|
| 1193 |
+
"eval_eti_steps_per_second": 0.273,
|
| 1194 |
+
"step": 4000
|
| 1195 |
}
|
| 1196 |
],
|
| 1197 |
"logging_steps": 50,
|
|
|
|
| 1206 |
"early_stopping_threshold": 0.001
|
| 1207 |
},
|
| 1208 |
"attributes": {
|
| 1209 |
+
"early_stopping_patience_counter": 5
|
| 1210 |
}
|
| 1211 |
},
|
| 1212 |
"TrainerControl": {
|
|
|
|
| 1215 |
"should_evaluate": false,
|
| 1216 |
"should_log": false,
|
| 1217 |
"should_save": true,
|
| 1218 |
+
"should_training_stop": true
|
| 1219 |
},
|
| 1220 |
"attributes": {}
|
| 1221 |
}
|