amanwithaplan commited on
Commit
af7490e
·
verified ·
1 Parent(s): 81431ac

Add new CrossEncoder model

Browse files
Files changed (2) hide show
  1. README.md +79 -35
  2. model.safetensors +1 -1
README.md CHANGED
@@ -26,13 +26,13 @@ model-index:
26
  type: NanoMSMARCO_R100
27
  metrics:
28
  - type: map
29
- value: 0.6172
30
  name: Map
31
  - type: mrr@10
32
- value: 0.6095
33
  name: Mrr@10
34
  - type: ndcg@10
35
- value: 0.6701
36
  name: Ndcg@10
37
  - task:
38
  type: cross-encoder-reranking
@@ -42,13 +42,13 @@ model-index:
42
  type: NanoNFCorpus_R100
43
  metrics:
44
  - type: map
45
- value: 0.4064
46
  name: Map
47
  - type: mrr@10
48
- value: 0.6548
49
  name: Mrr@10
50
  - type: ndcg@10
51
- value: 0.4547
52
  name: Ndcg@10
53
  - task:
54
  type: cross-encoder-reranking
@@ -58,13 +58,13 @@ model-index:
58
  type: NanoNQ_R100
59
  metrics:
60
  - type: map
61
- value: 0.6965
62
  name: Map
63
  - type: mrr@10
64
- value: 0.7159
65
  name: Mrr@10
66
  - type: ndcg@10
67
- value: 0.7507
68
  name: Ndcg@10
69
  - task:
70
  type: cross-encoder-nano-beir
@@ -74,13 +74,13 @@ model-index:
74
  type: NanoBEIR_R100_mean
75
  metrics:
76
  - type: map
77
- value: 0.5734
78
  name: Map
79
  - type: mrr@10
80
- value: 0.6601
81
  name: Mrr@10
82
  - type: ndcg@10
83
- value: 0.6252
84
  name: Ndcg@10
85
  ---
86
 
@@ -190,9 +190,9 @@ You can finetune this model on your own dataset.
190
 
191
  | Metric | NanoMSMARCO_R100 | NanoNFCorpus_R100 | NanoNQ_R100 |
192
  |:------------|:---------------------|:---------------------|:---------------------|
193
- | map | 0.6172 (+0.1276) | 0.4064 (+0.1454) | 0.6965 (+0.2769) |
194
- | mrr@10 | 0.6095 (+0.1320) | 0.6548 (+0.1549) | 0.7159 (+0.2892) |
195
- | **ndcg@10** | **0.6701 (+0.1297)** | **0.4547 (+0.1297)** | **0.7507 (+0.2501)** |
196
 
197
  #### Cross Encoder Nano BEIR
198
 
@@ -214,9 +214,9 @@ You can finetune this model on your own dataset.
214
 
215
  | Metric | Value |
216
  |:------------|:---------------------|
217
- | map | 0.5734 (+0.1833) |
218
- | mrr@10 | 0.6601 (+0.1921) |
219
- | **ndcg@10** | **0.6252 (+0.1698)** |
220
 
221
  <!--
222
  ## Bias, Risks and Limitations
@@ -288,7 +288,8 @@ You can finetune this model on your own dataset.
288
  #### Non-Default Hyperparameters
289
 
290
  - `per_device_train_batch_size`: 16
291
- - `learning_rate`: 1e-05
 
292
  - `warmup_steps`: 0.1
293
  - `bf16`: True
294
  - `eval_strategy`: steps
@@ -301,9 +302,9 @@ You can finetune this model on your own dataset.
301
  <details><summary>Click to expand</summary>
302
 
303
  - `per_device_train_batch_size`: 16
304
- - `num_train_epochs`: 3
305
  - `max_steps`: -1
306
- - `learning_rate`: 1e-05
307
  - `lr_scheduler_type`: linear
308
  - `lr_scheduler_kwargs`: None
309
  - `warmup_steps`: 0.1
@@ -404,20 +405,63 @@ You can finetune this model on your own dataset.
404
  | Epoch | Step | Training Loss | Validation Loss | NanoMSMARCO_R100_ndcg@10 | NanoNFCorpus_R100_ndcg@10 | NanoNQ_R100_ndcg@10 | NanoBEIR_R100_mean_ndcg@10 |
405
  |:----------:|:-------:|:-------------:|:---------------:|:------------------------:|:-------------------------:|:--------------------:|:--------------------------:|
406
  | 0.0070 | 1 | 0.9177 | - | - | - | - | - |
407
- | 0.1748 | 25 | 0.7344 | 0.6563 | 0.6886 (+0.1482) | 0.4553 (+0.1303) | 0.7583 (+0.2577) | 0.6341 (+0.1787) |
408
- | 0.3497 | 50 | 0.6174 | 0.5951 | 0.6760 (+0.1356) | 0.4425 (+0.1174) | 0.7654 (+0.2648) | 0.6280 (+0.1726) |
409
- | 0.5245 | 75 | 0.5743 | 0.5906 | 0.6804 (+0.1400) | 0.4428 (+0.1178) | 0.7669 (+0.2663) | 0.6300 (+0.1747) |
410
- | 0.6993 | 100 | 0.5721 | 0.5720 | 0.6742 (+0.1338) | 0.4505 (+0.1254) | 0.7664 (+0.2658) | 0.6304 (+0.1750) |
411
- | 0.8741 | 125 | 0.5445 | 0.5648 | 0.6707 (+0.1303) | 0.4541 (+0.1291) | 0.7593 (+0.2586) | 0.6280 (+0.1727) |
412
- | 1.0490 | 150 | 0.5479 | 0.5814 | 0.6686 (+0.1281) | 0.4463 (+0.1213) | 0.7597 (+0.2590) | 0.6249 (+0.1695) |
413
- | 1.2238 | 175 | 0.5260 | 0.5713 | 0.6686 (+0.1281) | 0.4570 (+0.1320) | 0.7595 (+0.2589) | 0.6284 (+0.1730) |
414
- | 1.3986 | 200 | 0.5033 | 0.5503 | 0.6681 (+0.1277) | 0.4588 (+0.1337) | 0.7567 (+0.2560) | 0.6279 (+0.1725) |
415
- | 1.5734 | 225 | 0.5010 | 0.5512 | 0.6707 (+0.1303) | 0.4641 (+0.1390) | 0.7589 (+0.2582) | 0.6312 (+0.1759) |
416
- | 1.7483 | 250 | 0.5030 | 0.5452 | 0.6701 (+0.1297) | 0.4570 (+0.1320) | 0.7582 (+0.2576) | 0.6284 (+0.1731) |
417
- | **1.9231** | **275** | **0.4834** | **0.5428** | **0.6707 (+0.1303)** | **0.4587 (+0.1337)** | **0.7571 (+0.2565)** | **0.6289 (+0.1735)** |
418
- | 2.0979 | 300 | 0.4585 | 0.5437 | 0.6705 (+0.1301) | 0.4600 (+0.1349) | 0.7577 (+0.2570) | 0.6294 (+0.1740) |
419
- | 2.2727 | 325 | 0.4483 | 0.5500 | 0.6630 (+0.1226) | 0.4524 (+0.1274) | 0.7543 (+0.2536) | 0.6232 (+0.1679) |
420
- | 2.4476 | 350 | 0.4578 | 0.5567 | 0.6701 (+0.1297) | 0.4547 (+0.1297) | 0.7507 (+0.2501) | 0.6252 (+0.1698) |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
 
422
  * The bold row denotes the saved checkpoint.
423
 
 
26
  type: NanoMSMARCO_R100
27
  metrics:
28
  - type: map
29
+ value: 0.5851
30
  name: Map
31
  - type: mrr@10
32
+ value: 0.5771
33
  name: Mrr@10
34
  - type: ndcg@10
35
+ value: 0.6458
36
  name: Ndcg@10
37
  - task:
38
  type: cross-encoder-reranking
 
42
  type: NanoNFCorpus_R100
43
  metrics:
44
  - type: map
45
+ value: 0.3857
46
  name: Map
47
  - type: mrr@10
48
+ value: 0.6234
49
  name: Mrr@10
50
  - type: ndcg@10
51
+ value: 0.4198
52
  name: Ndcg@10
53
  - task:
54
  type: cross-encoder-reranking
 
58
  type: NanoNQ_R100
59
  metrics:
60
  - type: map
61
+ value: 0.6845
62
  name: Map
63
  - type: mrr@10
64
+ value: 0.7
65
  name: Mrr@10
66
  - type: ndcg@10
67
+ value: 0.7309
68
  name: Ndcg@10
69
  - task:
70
  type: cross-encoder-nano-beir
 
74
  type: NanoBEIR_R100_mean
75
  metrics:
76
  - type: map
77
+ value: 0.5518
78
  name: Map
79
  - type: mrr@10
80
+ value: 0.6335
81
  name: Mrr@10
82
  - type: ndcg@10
83
+ value: 0.5988
84
  name: Ndcg@10
85
  ---
86
 
 
190
 
191
  | Metric | NanoMSMARCO_R100 | NanoNFCorpus_R100 | NanoNQ_R100 |
192
  |:------------|:---------------------|:---------------------|:---------------------|
193
+ | map | 0.5851 (+0.0955) | 0.3857 (+0.1247) | 0.6845 (+0.2649) |
194
+ | mrr@10 | 0.5771 (+0.0996) | 0.6234 (+0.1235) | 0.7000 (+0.2733) |
195
+ | **ndcg@10** | **0.6458 (+0.1053)** | **0.4198 (+0.0948)** | **0.7309 (+0.2303)** |
196
 
197
  #### Cross Encoder Nano BEIR
198
 
 
214
 
215
  | Metric | Value |
216
  |:------------|:---------------------|
217
+ | map | 0.5518 (+0.1617) |
218
+ | mrr@10 | 0.6335 (+0.1655) |
219
+ | **ndcg@10** | **0.5988 (+0.1435)** |
220
 
221
  <!--
222
  ## Bias, Risks and Limitations
 
288
  #### Non-Default Hyperparameters
289
 
290
  - `per_device_train_batch_size`: 16
291
+ - `num_train_epochs`: 10
292
+ - `learning_rate`: 2e-05
293
  - `warmup_steps`: 0.1
294
  - `bf16`: True
295
  - `eval_strategy`: steps
 
302
  <details><summary>Click to expand</summary>
303
 
304
  - `per_device_train_batch_size`: 16
305
+ - `num_train_epochs`: 10
306
  - `max_steps`: -1
307
+ - `learning_rate`: 2e-05
308
  - `lr_scheduler_type`: linear
309
  - `lr_scheduler_kwargs`: None
310
  - `warmup_steps`: 0.1
 
405
  | Epoch | Step | Training Loss | Validation Loss | NanoMSMARCO_R100_ndcg@10 | NanoNFCorpus_R100_ndcg@10 | NanoNQ_R100_ndcg@10 | NanoBEIR_R100_mean_ndcg@10 |
406
  |:----------:|:-------:|:-------------:|:---------------:|:------------------------:|:-------------------------:|:--------------------:|:--------------------------:|
407
  | 0.0070 | 1 | 0.9177 | - | - | - | - | - |
408
+ | 0.1748 | 25 | 0.7426 | 0.6592 | 0.6901 (+0.1496) | 0.4552 (+0.1302) | 0.7652 (+0.2645) | 0.6368 (+0.1814) |
409
+ | 0.3497 | 50 | 0.6246 | 0.5985 | 0.6856 (+0.1452) | 0.4370 (+0.1119) | 0.7658 (+0.2651) | 0.6295 (+0.1741) |
410
+ | 0.5245 | 75 | 0.5825 | 0.5924 | 0.6799 (+0.1395) | 0.4391 (+0.1140) | 0.7689 (+0.2682) | 0.6293 (+0.1739) |
411
+ | 0.6993 | 100 | 0.5749 | 0.5717 | 0.6743 (+0.1339) | 0.4488 (+0.1237) | 0.7634 (+0.2628) | 0.6288 (+0.1735) |
412
+ | 0.8741 | 125 | 0.5438 | 0.5726 | 0.6810 (+0.1405) | 0.4516 (+0.1266) | 0.7768 (+0.2761) | 0.6365 (+0.1811) |
413
+ | 1.0490 | 150 | 0.5430 | 0.5515 | 0.6674 (+0.1270) | 0.4421 (+0.1170) | 0.7694 (+0.2688) | 0.6263 (+0.1709) |
414
+ | 1.2238 | 175 | 0.5111 | 0.5671 | 0.6574 (+0.1169) | 0.4450 (+0.1199) | 0.7621 (+0.2615) | 0.6215 (+0.1661) |
415
+ | 1.3986 | 200 | 0.5118 | 0.5482 | 0.6580 (+0.1176) | 0.4500 (+0.1250) | 0.7433 (+0.2426) | 0.6171 (+0.1617) |
416
+ | 1.5734 | 225 | 0.5162 | 0.5539 | 0.6593 (+0.1189) | 0.4478 (+0.1227) | 0.7553 (+0.2547) | 0.6208 (+0.1654) |
417
+ | 1.7483 | 250 | 0.5052 | 0.5444 | 0.6606 (+0.1202) | 0.4498 (+0.1247) | 0.7695 (+0.2688) | 0.6266 (+0.1712) |
418
+ | 1.9231 | 275 | 0.4921 | 0.5383 | 0.6549 (+0.1144) | 0.4332 (+0.1081) | 0.7569 (+0.2562) | 0.6150 (+0.1596) |
419
+ | 2.0979 | 300 | 0.4638 | 0.5680 | 0.6601 (+0.1197) | 0.4495 (+0.1244) | 0.7582 (+0.2575) | 0.6226 (+0.1672) |
420
+ | 2.2727 | 325 | 0.4440 | 0.5592 | 0.6387 (+0.0982) | 0.4399 (+0.1148) | 0.7517 (+0.2511) | 0.6101 (+0.1547) |
421
+ | 2.4476 | 350 | 0.4740 | 0.5798 | 0.6597 (+0.1193) | 0.4379 (+0.1129) | 0.7467 (+0.2460) | 0.6148 (+0.1594) |
422
+ | 2.6224 | 375 | 0.4414 | 0.5420 | 0.6484 (+0.1080) | 0.4352 (+0.1101) | 0.7320 (+0.2314) | 0.6052 (+0.1498) |
423
+ | 2.7972 | 400 | 0.4443 | 0.5458 | 0.6543 (+0.1139) | 0.4371 (+0.1121) | 0.7327 (+0.2320) | 0.6080 (+0.1527) |
424
+ | 2.9720 | 425 | 0.4459 | 0.5625 | 0.6574 (+0.1170) | 0.4399 (+0.1148) | 0.7603 (+0.2597) | 0.6192 (+0.1638) |
425
+ | 3.1469 | 450 | 0.3961 | 0.5779 | 0.6640 (+0.1236) | 0.4345 (+0.1095) | 0.7411 (+0.2404) | 0.6132 (+0.1578) |
426
+ | 3.3217 | 475 | 0.4088 | 0.5492 | 0.6557 (+0.1152) | 0.4383 (+0.1133) | 0.7413 (+0.2406) | 0.6117 (+0.1564) |
427
+ | **3.4965** | **500** | **0.4219** | **0.5349** | **0.6504 (+0.1100)** | **0.4385 (+0.1135)** | **0.7263 (+0.2257)** | **0.6051 (+0.1497)** |
428
+ | 3.6713 | 525 | 0.4024 | 0.5885 | 0.6575 (+0.1170) | 0.4327 (+0.1076) | 0.7326 (+0.2320) | 0.6076 (+0.1522) |
429
+ | 3.8462 | 550 | 0.4180 | 0.5795 | 0.6504 (+0.1100) | 0.4323 (+0.1073) | 0.7256 (+0.2250) | 0.6028 (+0.1474) |
430
+ | 4.0210 | 575 | 0.3951 | 0.5594 | 0.6534 (+0.1130) | 0.4312 (+0.1062) | 0.7268 (+0.2262) | 0.6038 (+0.1484) |
431
+ | 4.1958 | 600 | 0.3958 | 0.5825 | 0.6482 (+0.1077) | 0.4323 (+0.1072) | 0.7342 (+0.2335) | 0.6049 (+0.1495) |
432
+ | 4.3706 | 625 | 0.4124 | 0.5635 | 0.6455 (+0.1051) | 0.4241 (+0.0990) | 0.7349 (+0.2343) | 0.6015 (+0.1461) |
433
+ | 4.5455 | 650 | 0.3802 | 0.5721 | 0.6583 (+0.1179) | 0.4300 (+0.1050) | 0.7244 (+0.2238) | 0.6043 (+0.1489) |
434
+ | 4.7203 | 675 | 0.3712 | 0.5446 | 0.6484 (+0.1079) | 0.4237 (+0.0986) | 0.7248 (+0.2242) | 0.5990 (+0.1436) |
435
+ | 4.8951 | 700 | 0.3730 | 0.5759 | 0.6578 (+0.1174) | 0.4370 (+0.1120) | 0.7466 (+0.2460) | 0.6138 (+0.1584) |
436
+ | 5.0699 | 725 | 0.3743 | 0.5644 | 0.6629 (+0.1225) | 0.4373 (+0.1122) | 0.7245 (+0.2238) | 0.6082 (+0.1529) |
437
+ | 5.2448 | 750 | 0.3398 | 0.5932 | 0.6518 (+0.1113) | 0.4234 (+0.0984) | 0.7292 (+0.2286) | 0.6015 (+0.1461) |
438
+ | 5.4196 | 775 | 0.3748 | 0.5749 | 0.6518 (+0.1113) | 0.4203 (+0.0952) | 0.7318 (+0.2312) | 0.6013 (+0.1459) |
439
+ | 5.5944 | 800 | 0.3585 | 0.5888 | 0.6480 (+0.1076) | 0.4038 (+0.0788) | 0.7268 (+0.2261) | 0.5929 (+0.1375) |
440
+ | 5.7692 | 825 | 0.3598 | 0.5709 | 0.6375 (+0.0971) | 0.4110 (+0.0860) | 0.7289 (+0.2283) | 0.5925 (+0.1371) |
441
+ | 5.9441 | 850 | 0.3743 | 0.5938 | 0.6415 (+0.1011) | 0.4244 (+0.0994) | 0.7268 (+0.2261) | 0.5976 (+0.1422) |
442
+ | 6.1189 | 875 | 0.3408 | 0.6177 | 0.6413 (+0.1009) | 0.4212 (+0.0962) | 0.7268 (+0.2261) | 0.5964 (+0.1411) |
443
+ | 6.2937 | 900 | 0.3300 | 0.5780 | 0.6410 (+0.1006) | 0.4278 (+0.1028) | 0.7268 (+0.2261) | 0.5985 (+0.1432) |
444
+ | 6.4685 | 925 | 0.3669 | 0.5930 | 0.6458 (+0.1053) | 0.4337 (+0.1087) | 0.7437 (+0.2431) | 0.6077 (+0.1524) |
445
+ | 6.6434 | 950 | 0.3542 | 0.5881 | 0.6458 (+0.1053) | 0.4355 (+0.1104) | 0.7300 (+0.2294) | 0.6037 (+0.1484) |
446
+ | 6.8182 | 975 | 0.3557 | 0.5747 | 0.6458 (+0.1053) | 0.4176 (+0.0925) | 0.7279 (+0.2272) | 0.5971 (+0.1417) |
447
+ | 6.9930 | 1000 | 0.3621 | 0.5544 | 0.6406 (+0.1002) | 0.4185 (+0.0934) | 0.7268 (+0.2261) | 0.5953 (+0.1399) |
448
+ | 7.1678 | 1025 | 0.3274 | 0.5899 | 0.6406 (+0.1002) | 0.4162 (+0.0912) | 0.7308 (+0.2301) | 0.5959 (+0.1405) |
449
+ | 7.3427 | 1050 | 0.3346 | 0.5836 | 0.6406 (+0.1002) | 0.4148 (+0.0898) | 0.7272 (+0.2266) | 0.5942 (+0.1389) |
450
+ | 7.5175 | 1075 | 0.3307 | 0.5834 | 0.6480 (+0.1076) | 0.4169 (+0.0918) | 0.7361 (+0.2355) | 0.6003 (+0.1450) |
451
+ | 7.6923 | 1100 | 0.3376 | 0.5779 | 0.6480 (+0.1076) | 0.4149 (+0.0899) | 0.7299 (+0.2292) | 0.5976 (+0.1422) |
452
+ | 7.8671 | 1125 | 0.3580 | 0.5928 | 0.6484 (+0.1079) | 0.4210 (+0.0960) | 0.7335 (+0.2329) | 0.6010 (+0.1456) |
453
+ | 8.0420 | 1150 | 0.3421 | 0.6072 | 0.6458 (+0.1053) | 0.4286 (+0.1035) | 0.7299 (+0.2293) | 0.6014 (+0.1460) |
454
+ | 8.2168 | 1175 | 0.3434 | 0.5995 | 0.6454 (+0.1050) | 0.4264 (+0.1014) | 0.7309 (+0.2303) | 0.6009 (+0.1455) |
455
+ | 8.3916 | 1200 | 0.3359 | 0.5879 | 0.6484 (+0.1079) | 0.4200 (+0.0950) | 0.7295 (+0.2289) | 0.5993 (+0.1439) |
456
+ | 8.5664 | 1225 | 0.3286 | 0.5959 | 0.6458 (+0.1053) | 0.4165 (+0.0914) | 0.7299 (+0.2292) | 0.5974 (+0.1420) |
457
+ | 8.7413 | 1250 | 0.3390 | 0.6043 | 0.6396 (+0.0992) | 0.4199 (+0.0949) | 0.7340 (+0.2334) | 0.5978 (+0.1425) |
458
+ | 8.9161 | 1275 | 0.3481 | 0.5999 | 0.6400 (+0.0995) | 0.4170 (+0.0919) | 0.7308 (+0.2301) | 0.5959 (+0.1405) |
459
+ | 9.0909 | 1300 | 0.3316 | 0.6179 | 0.6458 (+0.1053) | 0.4237 (+0.0987) | 0.7309 (+0.2303) | 0.6001 (+0.1448) |
460
+ | 9.2657 | 1325 | 0.3398 | 0.6044 | 0.6458 (+0.1053) | 0.4235 (+0.0985) | 0.7308 (+0.2301) | 0.6000 (+0.1447) |
461
+ | 9.4406 | 1350 | 0.3414 | 0.6164 | 0.6458 (+0.1053) | 0.4211 (+0.0961) | 0.7309 (+0.2303) | 0.5993 (+0.1439) |
462
+ | 9.6154 | 1375 | 0.3400 | 0.6056 | 0.6458 (+0.1053) | 0.4212 (+0.0962) | 0.7308 (+0.2301) | 0.5993 (+0.1439) |
463
+ | 9.7902 | 1400 | 0.3101 | 0.6042 | 0.6458 (+0.1053) | 0.4191 (+0.0941) | 0.7309 (+0.2303) | 0.5986 (+0.1432) |
464
+ | 9.9650 | 1425 | 0.3065 | 0.6096 | 0.6458 (+0.1053) | 0.4198 (+0.0948) | 0.7309 (+0.2303) | 0.5988 (+0.1435) |
465
 
466
  * The bold row denotes the saved checkpoint.
467
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71c4fe668cdcb0db3e2f3b286b4bd9f3dc86909ee7c9d13a91566bc714bdc64f
3
  size 598436708
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b0ea459b943b638996ed8be31b732785d92615475f541565a40a6de2c80250e
3
  size 598436708