thivy commited on
Commit
7371102
·
verified ·
1 Parent(s): e20be57

Training in progress, step 4000, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -349,49 +349,49 @@ model-index:
349
  type: NanoNFCorpus
350
  metrics:
351
  - type: dot_accuracy@1
352
- value: 0.28
353
  name: Dot Accuracy@1
354
  - type: dot_accuracy@3
355
- value: 0.38
356
  name: Dot Accuracy@3
357
  - type: dot_accuracy@5
358
- value: 0.5
359
  name: Dot Accuracy@5
360
  - type: dot_accuracy@10
361
- value: 0.6
362
  name: Dot Accuracy@10
363
  - type: dot_precision@1
364
- value: 0.28
365
  name: Dot Precision@1
366
  - type: dot_precision@3
367
- value: 0.26666666666666666
368
  name: Dot Precision@3
369
  - type: dot_precision@5
370
- value: 0.268
371
  name: Dot Precision@5
372
  - type: dot_precision@10
373
- value: 0.22799999999999998
374
  name: Dot Precision@10
375
  - type: dot_recall@1
376
- value: 0.020245923941945597
377
  name: Dot Recall@1
378
  - type: dot_recall@3
379
- value: 0.05035545265818719
380
  name: Dot Recall@3
381
  - type: dot_recall@5
382
- value: 0.06636231438770367
383
  name: Dot Recall@5
384
  - type: dot_recall@10
385
- value: 0.10965738342516877
386
  name: Dot Recall@10
387
  - type: dot_ndcg@10
388
- value: 0.25962417542013744
389
  name: Dot Ndcg@10
390
  - type: dot_mrr@10
391
- value: 0.3617460317460317
392
  name: Dot Mrr@10
393
  - type: dot_map@100
394
- value: 0.10860788103772646
395
  name: Dot Map@100
396
  - type: query_active_dims
397
  value: 51200.0
@@ -471,9 +471,9 @@ print(embeddings.shape)
471
  # Get the similarity scores for the embeddings
472
  similarities = model.similarity(embeddings, embeddings)
473
  print(similarities)
474
- # tensor([[201.5240, 24.5641, 5.0840],
475
- # [ 24.5641, 34.0186, 3.3158],
476
- # [ 5.0840, 3.3158, 29.2518]])
477
  ```
478
 
479
  <!--
@@ -511,21 +511,21 @@ You can finetune this model on your own dataset.
511
 
512
  | Metric | Value |
513
  |:----------------------|:-----------|
514
- | dot_accuracy@1 | 0.28 |
515
- | dot_accuracy@3 | 0.38 |
516
- | dot_accuracy@5 | 0.5 |
517
- | dot_accuracy@10 | 0.6 |
518
- | dot_precision@1 | 0.28 |
519
- | dot_precision@3 | 0.2667 |
520
- | dot_precision@5 | 0.268 |
521
- | dot_precision@10 | 0.228 |
522
- | dot_recall@1 | 0.0202 |
523
- | dot_recall@3 | 0.0504 |
524
- | dot_recall@5 | 0.0664 |
525
- | dot_recall@10 | 0.1097 |
526
- | **dot_ndcg@10** | **0.2596** |
527
- | dot_mrr@10 | 0.3617 |
528
- | dot_map@100 | 0.1086 |
529
  | query_active_dims | 51200.0 |
530
  | query_sparsity_ratio | 0.0 |
531
  | corpus_active_dims | 51200.0 |
@@ -818,6 +818,16 @@ You can finetune this model on your own dataset.
818
  | 1.0038 | 3400 | 0.2065 | - | - |
819
  | 1.0186 | 3450 | 0.17 | - | - |
820
  | 1.0334 | 3500 | 0.179 | 0.7303 | 0.2596 |
 
 
 
 
 
 
 
 
 
 
821
 
822
 
823
  ### Framework Versions
 
349
  type: NanoNFCorpus
350
  metrics:
351
  - type: dot_accuracy@1
352
+ value: 0.32
353
  name: Dot Accuracy@1
354
  - type: dot_accuracy@3
355
+ value: 0.4
356
  name: Dot Accuracy@3
357
  - type: dot_accuracy@5
358
+ value: 0.52
359
  name: Dot Accuracy@5
360
  - type: dot_accuracy@10
361
+ value: 0.62
362
  name: Dot Accuracy@10
363
  - type: dot_precision@1
364
+ value: 0.32
365
  name: Dot Precision@1
366
  - type: dot_precision@3
367
+ value: 0.2866666666666666
368
  name: Dot Precision@3
369
  - type: dot_precision@5
370
+ value: 0.276
371
  name: Dot Precision@5
372
  - type: dot_precision@10
373
+ value: 0.23800000000000002
374
  name: Dot Precision@10
375
  - type: dot_recall@1
376
+ value: 0.020779419687305747
377
  name: Dot Recall@1
378
  - type: dot_recall@3
379
+ value: 0.05318433469893364
380
  name: Dot Recall@3
381
  - type: dot_recall@5
382
+ value: 0.08697326661296835
383
  name: Dot Recall@5
384
  - type: dot_recall@10
385
+ value: 0.11024946650778294
386
  name: Dot Recall@10
387
  - type: dot_ndcg@10
388
+ value: 0.2731246838532083
389
  name: Dot Ndcg@10
390
  - type: dot_mrr@10
391
+ value: 0.3876349206349206
392
  name: Dot Mrr@10
393
  - type: dot_map@100
394
+ value: 0.11211568501359778
395
  name: Dot Map@100
396
  - type: query_active_dims
397
  value: 51200.0
 
471
  # Get the similarity scores for the embeddings
472
  similarities = model.similarity(embeddings, embeddings)
473
  print(similarities)
474
+ # tensor([[148.3932, 17.6451, 3.8017],
475
+ # [ 17.6451, 23.1705, 2.1552],
476
+ # [ 3.8017, 2.1552, 18.1152]])
477
  ```
478
 
479
  <!--
 
511
 
512
  | Metric | Value |
513
  |:----------------------|:-----------|
514
+ | dot_accuracy@1 | 0.32 |
515
+ | dot_accuracy@3 | 0.4 |
516
+ | dot_accuracy@5 | 0.52 |
517
+ | dot_accuracy@10 | 0.62 |
518
+ | dot_precision@1 | 0.32 |
519
+ | dot_precision@3 | 0.2867 |
520
+ | dot_precision@5 | 0.276 |
521
+ | dot_precision@10 | 0.238 |
522
+ | dot_recall@1 | 0.0208 |
523
+ | dot_recall@3 | 0.0532 |
524
+ | dot_recall@5 | 0.087 |
525
+ | dot_recall@10 | 0.1102 |
526
+ | **dot_ndcg@10** | **0.2731** |
527
+ | dot_mrr@10 | 0.3876 |
528
+ | dot_map@100 | 0.1121 |
529
  | query_active_dims | 51200.0 |
530
  | query_sparsity_ratio | 0.0 |
531
  | corpus_active_dims | 51200.0 |
 
818
  | 1.0038 | 3400 | 0.2065 | - | - |
819
  | 1.0186 | 3450 | 0.17 | - | - |
820
  | 1.0334 | 3500 | 0.179 | 0.7303 | 0.2596 |
821
+ | 1.0481 | 3550 | 0.1848 | - | - |
822
+ | 1.0629 | 3600 | 0.1935 | - | - |
823
+ | 1.0776 | 3650 | 0.1795 | - | - |
824
+ | 1.0924 | 3700 | 0.1524 | - | - |
825
+ | 1.1072 | 3750 | 0.1542 | - | - |
826
+ | 1.1219 | 3800 | 0.1845 | - | - |
827
+ | 1.1367 | 3850 | 0.1568 | - | - |
828
+ | 1.1515 | 3900 | 0.1584 | - | - |
829
+ | 1.1662 | 3950 | 0.1484 | - | - |
830
+ | 1.1810 | 4000 | 0.1249 | 0.6119 | 0.2731 |
831
 
832
 
833
  ### Framework Versions
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b65e0f1ba2ea666d4d83b82d443b2ef255d3451253ac391b703b3cff507ac744
3
  size 728561776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bcd4e6a8ab56a0db807ca31054934e51802d2eeaa5a038d39f551cacff14347
3
  size 728561776
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b816a8fc9da81a604d92d18ab9c0fa78933d78143e853332cde1327f2d45484
3
  size 1457369077
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21da268aff2f83d8014837d41fbcfa1beee4ef6ef72e3199bf45ce5ced8a2c48
3
  size 1457369077
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:636a367cda4d50c8924d561ea7f35a51e3ec83088ad3e160d2a1c3155e15cfa2
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cd1ea480bf72a4e862c1b766d1db801ba2b3f5895e34f5947476735dcd46078
3
  size 14917
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:544c6732f714f6e1e189df798d020c312f287dedcac7dc7b9ea594b76375666d
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80fd94e2538428fa98737736853b585ed501ddd45dd103736afef286e04732c6
3
  size 14917
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:209eecd533b0372305e2eb37b43597d1a5a586610dd414759fd9d46c438f6af6
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c5cd7e5a8b9d1b2f988217fde63eff5a8de41264597b7828a91c46e81ed2ca5
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 1500,
3
  "best_metric": 0.2777906849504241,
4
  "best_model_checkpoint": "models/splade-norbert4-base-eti/checkpoint-1500",
5
- "epoch": 1.0333628579864187,
6
  "eval_steps": 500,
7
- "global_step": 3500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1044,6 +1044,154 @@
1044
  "eval_eti_samples_per_second": 8.192,
1045
  "eval_eti_steps_per_second": 0.259,
1046
  "step": 3500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1047
  }
1048
  ],
1049
  "logging_steps": 50,
@@ -1058,7 +1206,7 @@
1058
  "early_stopping_threshold": 0.001
1059
  },
1060
  "attributes": {
1061
- "early_stopping_patience_counter": 4
1062
  }
1063
  },
1064
  "TrainerControl": {
@@ -1067,7 +1215,7 @@
1067
  "should_evaluate": false,
1068
  "should_log": false,
1069
  "should_save": true,
1070
- "should_training_stop": false
1071
  },
1072
  "attributes": {}
1073
  }
 
2
  "best_global_step": 1500,
3
  "best_metric": 0.2777906849504241,
4
  "best_model_checkpoint": "models/splade-norbert4-base-eti/checkpoint-1500",
5
+ "epoch": 1.1809861234130499,
6
  "eval_steps": 500,
7
+ "global_step": 4000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1044
  "eval_eti_samples_per_second": 8.192,
1045
  "eval_eti_steps_per_second": 0.259,
1046
  "step": 3500
1047
+ },
1048
+ {
1049
+ "base_loss": 0.1651,
1050
+ "document_regularizer_loss": 0.0161,
1051
+ "epoch": 1.0481251845290818,
1052
+ "grad_norm": 1.5739084482192993,
1053
+ "learning_rate": 7.2309711286089245e-06,
1054
+ "loss": 0.1848,
1055
+ "query_regularizer_loss": 0.0036,
1056
+ "step": 3550
1057
+ },
1058
+ {
1059
+ "base_loss": 0.1741,
1060
+ "document_regularizer_loss": 0.015,
1061
+ "epoch": 1.0628875110717448,
1062
+ "grad_norm": 2.5850167274475098,
1063
+ "learning_rate": 7.1762904636920395e-06,
1064
+ "loss": 0.1935,
1065
+ "query_regularizer_loss": 0.0044,
1066
+ "step": 3600
1067
+ },
1068
+ {
1069
+ "base_loss": 0.1598,
1070
+ "document_regularizer_loss": 0.0158,
1071
+ "epoch": 1.0776498376144081,
1072
+ "grad_norm": 4.595969200134277,
1073
+ "learning_rate": 7.1216097987751545e-06,
1074
+ "loss": 0.1795,
1075
+ "query_regularizer_loss": 0.0039,
1076
+ "step": 3650
1077
+ },
1078
+ {
1079
+ "base_loss": 0.1322,
1080
+ "document_regularizer_loss": 0.0165,
1081
+ "epoch": 1.0924121641570712,
1082
+ "grad_norm": 2.4553780555725098,
1083
+ "learning_rate": 7.066929133858268e-06,
1084
+ "loss": 0.1524,
1085
+ "query_regularizer_loss": 0.0038,
1086
+ "step": 3700
1087
+ },
1088
+ {
1089
+ "base_loss": 0.1353,
1090
+ "document_regularizer_loss": 0.0149,
1091
+ "epoch": 1.1071744906997343,
1092
+ "grad_norm": 3.1925532817840576,
1093
+ "learning_rate": 7.012248468941383e-06,
1094
+ "loss": 0.1542,
1095
+ "query_regularizer_loss": 0.0039,
1096
+ "step": 3750
1097
+ },
1098
+ {
1099
+ "base_loss": 0.1653,
1100
+ "document_regularizer_loss": 0.0158,
1101
+ "epoch": 1.1219368172423974,
1102
+ "grad_norm": 7.258732795715332,
1103
+ "learning_rate": 6.957567804024498e-06,
1104
+ "loss": 0.1845,
1105
+ "query_regularizer_loss": 0.0035,
1106
+ "step": 3800
1107
+ },
1108
+ {
1109
+ "base_loss": 0.1357,
1110
+ "document_regularizer_loss": 0.0172,
1111
+ "epoch": 1.1366991437850604,
1112
+ "grad_norm": 2.0559401512145996,
1113
+ "learning_rate": 6.902887139107613e-06,
1114
+ "loss": 0.1568,
1115
+ "query_regularizer_loss": 0.0039,
1116
+ "step": 3850
1117
+ },
1118
+ {
1119
+ "base_loss": 0.1363,
1120
+ "document_regularizer_loss": 0.0177,
1121
+ "epoch": 1.1514614703277237,
1122
+ "grad_norm": 11.927323341369629,
1123
+ "learning_rate": 6.848206474190728e-06,
1124
+ "loss": 0.1584,
1125
+ "query_regularizer_loss": 0.0045,
1126
+ "step": 3900
1127
+ },
1128
+ {
1129
+ "base_loss": 0.1271,
1130
+ "document_regularizer_loss": 0.0171,
1131
+ "epoch": 1.1662237968703868,
1132
+ "grad_norm": 4.851423263549805,
1133
+ "learning_rate": 6.793525809273841e-06,
1134
+ "loss": 0.1484,
1135
+ "query_regularizer_loss": 0.0041,
1136
+ "step": 3950
1137
+ },
1138
+ {
1139
+ "base_loss": 0.105,
1140
+ "document_regularizer_loss": 0.0164,
1141
+ "epoch": 1.1809861234130499,
1142
+ "grad_norm": 4.975285530090332,
1143
+ "learning_rate": 6.738845144356956e-06,
1144
+ "loss": 0.1249,
1145
+ "query_regularizer_loss": 0.0035,
1146
+ "step": 4000
1147
+ },
1148
+ {
1149
+ "epoch": 1.1809861234130499,
1150
+ "eval_NanoBEIR_mean_avg_flops": 51200.0,
1151
+ "eval_NanoBEIR_mean_corpus_active_dims": 51200.0,
1152
+ "eval_NanoBEIR_mean_corpus_sparsity_ratio": 0.0,
1153
+ "eval_NanoBEIR_mean_dot_accuracy@1": 0.32,
1154
+ "eval_NanoBEIR_mean_dot_accuracy@10": 0.62,
1155
+ "eval_NanoBEIR_mean_dot_accuracy@3": 0.4,
1156
+ "eval_NanoBEIR_mean_dot_accuracy@5": 0.52,
1157
+ "eval_NanoBEIR_mean_dot_map@100": 0.11211568501359778,
1158
+ "eval_NanoBEIR_mean_dot_mrr@10": 0.3876349206349206,
1159
+ "eval_NanoBEIR_mean_dot_ndcg@10": 0.2731246838532083,
1160
+ "eval_NanoBEIR_mean_dot_precision@1": 0.32,
1161
+ "eval_NanoBEIR_mean_dot_precision@10": 0.23800000000000002,
1162
+ "eval_NanoBEIR_mean_dot_precision@3": 0.2866666666666666,
1163
+ "eval_NanoBEIR_mean_dot_precision@5": 0.276,
1164
+ "eval_NanoBEIR_mean_dot_recall@1": 0.020779419687305747,
1165
+ "eval_NanoBEIR_mean_dot_recall@10": 0.11024946650778294,
1166
+ "eval_NanoBEIR_mean_dot_recall@3": 0.05318433469893364,
1167
+ "eval_NanoBEIR_mean_dot_recall@5": 0.08697326661296835,
1168
+ "eval_NanoBEIR_mean_query_active_dims": 51200.0,
1169
+ "eval_NanoBEIR_mean_query_sparsity_ratio": 0.0,
1170
+ "eval_NanoNFCorpus_avg_flops": 51200.0,
1171
+ "eval_NanoNFCorpus_corpus_active_dims": 51200.0,
1172
+ "eval_NanoNFCorpus_corpus_sparsity_ratio": 0.0,
1173
+ "eval_NanoNFCorpus_dot_accuracy@1": 0.32,
1174
+ "eval_NanoNFCorpus_dot_accuracy@10": 0.62,
1175
+ "eval_NanoNFCorpus_dot_accuracy@3": 0.4,
1176
+ "eval_NanoNFCorpus_dot_accuracy@5": 0.52,
1177
+ "eval_NanoNFCorpus_dot_map@100": 0.11211568501359778,
1178
+ "eval_NanoNFCorpus_dot_mrr@10": 0.3876349206349206,
1179
+ "eval_NanoNFCorpus_dot_ndcg@10": 0.2731246838532083,
1180
+ "eval_NanoNFCorpus_dot_precision@1": 0.32,
1181
+ "eval_NanoNFCorpus_dot_precision@10": 0.23800000000000002,
1182
+ "eval_NanoNFCorpus_dot_precision@3": 0.2866666666666666,
1183
+ "eval_NanoNFCorpus_dot_precision@5": 0.276,
1184
+ "eval_NanoNFCorpus_dot_recall@1": 0.020779419687305747,
1185
+ "eval_NanoNFCorpus_dot_recall@10": 0.11024946650778294,
1186
+ "eval_NanoNFCorpus_dot_recall@3": 0.05318433469893364,
1187
+ "eval_NanoNFCorpus_dot_recall@5": 0.08697326661296835,
1188
+ "eval_NanoNFCorpus_query_active_dims": 51200.0,
1189
+ "eval_NanoNFCorpus_query_sparsity_ratio": 0.0,
1190
+ "eval_eti_loss": 0.6119212508201599,
1191
+ "eval_eti_runtime": 88.0723,
1192
+ "eval_eti_samples_per_second": 8.629,
1193
+ "eval_eti_steps_per_second": 0.273,
1194
+ "step": 4000
1195
  }
1196
  ],
1197
  "logging_steps": 50,
 
1206
  "early_stopping_threshold": 0.001
1207
  },
1208
  "attributes": {
1209
+ "early_stopping_patience_counter": 5
1210
  }
1211
  },
1212
  "TrainerControl": {
 
1215
  "should_evaluate": false,
1216
  "should_log": false,
1217
  "should_save": true,
1218
+ "should_training_stop": true
1219
  },
1220
  "attributes": {}
1221
  }