Update auto_leaderboard_scores.json
Browse files- auto_leaderboard_scores.json +0 -416
auto_leaderboard_scores.json
CHANGED
|
@@ -26,58 +26,6 @@
|
|
| 26 |
"contradiction": 21.208054807166327
|
| 27 |
}
|
| 28 |
},
|
| 29 |
-
"Titan Text G1 Express v1": {
|
| 30 |
-
"nq": {
|
| 31 |
-
"abstain": 0.0,
|
| 32 |
-
"entailment": 23.616403565474773,
|
| 33 |
-
"neutral": 33.73074579359409,
|
| 34 |
-
"contradiction": 42.652850640931135
|
| 35 |
-
},
|
| 36 |
-
"msmarco": {
|
| 37 |
-
"abstain": 0.0,
|
| 38 |
-
"entailment": 80.10782412032412,
|
| 39 |
-
"neutral": 13.232652070152067,
|
| 40 |
-
"contradiction": 6.65952380952381
|
| 41 |
-
},
|
| 42 |
-
"dolly": {
|
| 43 |
-
"abstain": 6.0,
|
| 44 |
-
"entailment": 81.74334530717509,
|
| 45 |
-
"neutral": 11.292083976946202,
|
| 46 |
-
"contradiction": 6.964570715878704
|
| 47 |
-
},
|
| 48 |
-
"avg": {
|
| 49 |
-
"abstain": 2.0,
|
| 50 |
-
"entailment": 61.41597696413044,
|
| 51 |
-
"neutral": 19.58433904832503,
|
| 52 |
-
"contradiction": 18.999683987544532
|
| 53 |
-
}
|
| 54 |
-
},
|
| 55 |
-
"Titan Text G1 Lite v1": {
|
| 56 |
-
"nq": {
|
| 57 |
-
"abstain": 6.0,
|
| 58 |
-
"entailment": 14.60152436216266,
|
| 59 |
-
"neutral": 25.686193239384725,
|
| 60 |
-
"contradiction": 59.71228239845261
|
| 61 |
-
},
|
| 62 |
-
"msmarco": {
|
| 63 |
-
"abstain": 1.0,
|
| 64 |
-
"entailment": 71.78647951375224,
|
| 65 |
-
"neutral": 12.484804757532029,
|
| 66 |
-
"contradiction": 15.72871572871573
|
| 67 |
-
},
|
| 68 |
-
"dolly": {
|
| 69 |
-
"abstain": 2.0,
|
| 70 |
-
"entailment": 81.6472303206997,
|
| 71 |
-
"neutral": 10.782312925170068,
|
| 72 |
-
"contradiction": 7.570456754130224
|
| 73 |
-
},
|
| 74 |
-
"avg": {
|
| 75 |
-
"abstain": 3.0,
|
| 76 |
-
"entailment": 56.63516609392898,
|
| 77 |
-
"neutral": 16.175823031493135,
|
| 78 |
-
"contradiction": 27.189010874577885
|
| 79 |
-
}
|
| 80 |
-
},
|
| 81 |
"Baichuan 2 13B Chat": {
|
| 82 |
"nq": {
|
| 83 |
"abstain": 0.0,
|
|
@@ -548,58 +496,6 @@
|
|
| 548 |
"contradiction": 26.213064820601584
|
| 549 |
}
|
| 550 |
},
|
| 551 |
-
"Titan Text G1 Express v1": {
|
| 552 |
-
"nq": {
|
| 553 |
-
"abstain": 0.0,
|
| 554 |
-
"entailment": 23.64833917372617,
|
| 555 |
-
"neutral": 22.780768483322664,
|
| 556 |
-
"contradiction": 53.57089234295117
|
| 557 |
-
},
|
| 558 |
-
"msmarco": {
|
| 559 |
-
"abstain": 0.0,
|
| 560 |
-
"entailment": 70.3556956931957,
|
| 561 |
-
"neutral": 10.942121767121767,
|
| 562 |
-
"contradiction": 18.70218253968254
|
| 563 |
-
},
|
| 564 |
-
"dolly": {
|
| 565 |
-
"abstain": 6.0,
|
| 566 |
-
"entailment": 81.150924586469,
|
| 567 |
-
"neutral": 11.030085021293294,
|
| 568 |
-
"contradiction": 7.818990392237691
|
| 569 |
-
},
|
| 570 |
-
"avg": {
|
| 571 |
-
"abstain": 2.0,
|
| 572 |
-
"entailment": 57.92037550279006,
|
| 573 |
-
"neutral": 14.99699665661909,
|
| 574 |
-
"contradiction": 27.082627840590863
|
| 575 |
-
}
|
| 576 |
-
},
|
| 577 |
-
"Titan Text G1 Lite v1": {
|
| 578 |
-
"nq": {
|
| 579 |
-
"abstain": 6.0,
|
| 580 |
-
"entailment": 18.492735101777654,
|
| 581 |
-
"neutral": 16.707538914985722,
|
| 582 |
-
"contradiction": 64.79972598323661
|
| 583 |
-
},
|
| 584 |
-
"msmarco": {
|
| 585 |
-
"abstain": 1.0,
|
| 586 |
-
"entailment": 69.10369933097206,
|
| 587 |
-
"neutral": 6.52400629673357,
|
| 588 |
-
"contradiction": 24.37229437229437
|
| 589 |
-
},
|
| 590 |
-
"dolly": {
|
| 591 |
-
"abstain": 2.0,
|
| 592 |
-
"entailment": 80.44804826692582,
|
| 593 |
-
"neutral": 11.110301263362489,
|
| 594 |
-
"contradiction": 8.441650469711695
|
| 595 |
-
},
|
| 596 |
-
"avg": {
|
| 597 |
-
"abstain": 3.0,
|
| 598 |
-
"entailment": 56.57557410134729,
|
| 599 |
-
"neutral": 11.358057749810326,
|
| 600 |
-
"contradiction": 32.06636814884237
|
| 601 |
-
}
|
| 602 |
-
},
|
| 603 |
"Baichuan 2 13B Chat": {
|
| 604 |
"nq": {
|
| 605 |
"abstain": 0.0,
|
|
@@ -1070,58 +966,6 @@
|
|
| 1070 |
"contradiction": 13.131321123049064
|
| 1071 |
}
|
| 1072 |
},
|
| 1073 |
-
"Titan Text G1 Express v1": {
|
| 1074 |
-
"nq": {
|
| 1075 |
-
"abstain": 0.0,
|
| 1076 |
-
"entailment": 38.18513460223987,
|
| 1077 |
-
"neutral": 38.709908512540096,
|
| 1078 |
-
"contradiction": 23.10495688522004
|
| 1079 |
-
},
|
| 1080 |
-
"msmarco": {
|
| 1081 |
-
"abstain": 0.0,
|
| 1082 |
-
"entailment": 93.364942002442,
|
| 1083 |
-
"neutral": 2.330296092796093,
|
| 1084 |
-
"contradiction": 4.304761904761905
|
| 1085 |
-
},
|
| 1086 |
-
"dolly": {
|
| 1087 |
-
"abstain": 6.0,
|
| 1088 |
-
"entailment": 90.88652482269502,
|
| 1089 |
-
"neutral": 5.5673758865248235,
|
| 1090 |
-
"contradiction": 3.546099290780141
|
| 1091 |
-
},
|
| 1092 |
-
"avg": {
|
| 1093 |
-
"abstain": 2.0,
|
| 1094 |
-
"entailment": 73.80388093129768,
|
| 1095 |
-
"neutral": 15.739298618595074,
|
| 1096 |
-
"contradiction": 10.456820450107237
|
| 1097 |
-
}
|
| 1098 |
-
},
|
| 1099 |
-
"Titan Text G1 Lite v1": {
|
| 1100 |
-
"nq": {
|
| 1101 |
-
"abstain": 6.0,
|
| 1102 |
-
"entailment": 35.155487243253205,
|
| 1103 |
-
"neutral": 35.65182601086857,
|
| 1104 |
-
"contradiction": 29.192686745878234
|
| 1105 |
-
},
|
| 1106 |
-
"msmarco": {
|
| 1107 |
-
"abstain": 1.0,
|
| 1108 |
-
"entailment": 91.78166950894224,
|
| 1109 |
-
"neutral": 2.8792251519524243,
|
| 1110 |
-
"contradiction": 5.339105339105339
|
| 1111 |
-
},
|
| 1112 |
-
"dolly": {
|
| 1113 |
-
"abstain": 2.0,
|
| 1114 |
-
"entailment": 90.41241496598639,
|
| 1115 |
-
"neutral": 4.591836734693878,
|
| 1116 |
-
"contradiction": 4.995748299319728
|
| 1117 |
-
},
|
| 1118 |
-
"avg": {
|
| 1119 |
-
"abstain": 3.0,
|
| 1120 |
-
"entailment": 73.02892697222593,
|
| 1121 |
-
"neutral": 14.042319364484312,
|
| 1122 |
-
"contradiction": 12.928753663289747
|
| 1123 |
-
}
|
| 1124 |
-
},
|
| 1125 |
"Baichuan 2 13B Chat": {
|
| 1126 |
"nq": {
|
| 1127 |
"abstain": 0.0,
|
|
@@ -1592,58 +1436,6 @@
|
|
| 1592 |
"contradiction": 19.547659968855623
|
| 1593 |
}
|
| 1594 |
},
|
| 1595 |
-
"Titan Text G1 Express v1": {
|
| 1596 |
-
"nq": {
|
| 1597 |
-
"abstain": 4.0,
|
| 1598 |
-
"entailment": 25.518216124833774,
|
| 1599 |
-
"neutral": 32.236181404379934,
|
| 1600 |
-
"contradiction": 42.245602470786295
|
| 1601 |
-
},
|
| 1602 |
-
"msmarco": {
|
| 1603 |
-
"abstain": 8.0,
|
| 1604 |
-
"entailment": 85.18049488015163,
|
| 1605 |
-
"neutral": 11.810062871275685,
|
| 1606 |
-
"contradiction": 3.009442248572683
|
| 1607 |
-
},
|
| 1608 |
-
"dolly": {
|
| 1609 |
-
"abstain": 10.0,
|
| 1610 |
-
"entailment": 83.16915880683996,
|
| 1611 |
-
"neutral": 8.4975078598267,
|
| 1612 |
-
"contradiction": 8.333333333333332
|
| 1613 |
-
},
|
| 1614 |
-
"avg": {
|
| 1615 |
-
"abstain": 7.333333333333333,
|
| 1616 |
-
"entailment": 63.92654161717118,
|
| 1617 |
-
"neutral": 17.791276641590787,
|
| 1618 |
-
"contradiction": 18.28218174123802
|
| 1619 |
-
}
|
| 1620 |
-
},
|
| 1621 |
-
"Titan Text G1 Lite v1": {
|
| 1622 |
-
"nq": {
|
| 1623 |
-
"abstain": 10.0,
|
| 1624 |
-
"entailment": 22.08774250440917,
|
| 1625 |
-
"neutral": 24.67680776014109,
|
| 1626 |
-
"contradiction": 53.235449735449734
|
| 1627 |
-
},
|
| 1628 |
-
"msmarco": {
|
| 1629 |
-
"abstain": 17.0,
|
| 1630 |
-
"entailment": 74.75170523363295,
|
| 1631 |
-
"neutral": 11.381717345572765,
|
| 1632 |
-
"contradiction": 13.866577420794288
|
| 1633 |
-
},
|
| 1634 |
-
"dolly": {
|
| 1635 |
-
"abstain": 11.0,
|
| 1636 |
-
"entailment": 86.57580981110392,
|
| 1637 |
-
"neutral": 5.223758763084606,
|
| 1638 |
-
"contradiction": 8.200431425811464
|
| 1639 |
-
},
|
| 1640 |
-
"avg": {
|
| 1641 |
-
"abstain": 12.666666666666668,
|
| 1642 |
-
"entailment": 60.677616156399274,
|
| 1643 |
-
"neutral": 13.856907511487664,
|
| 1644 |
-
"contradiction": 25.465476332113063
|
| 1645 |
-
}
|
| 1646 |
-
},
|
| 1647 |
"Baichuan 2 13B Chat": {
|
| 1648 |
"nq": {
|
| 1649 |
"abstain": 0.0,
|
|
@@ -2114,58 +1906,6 @@
|
|
| 2114 |
"contradiction": 25.516629695018445
|
| 2115 |
}
|
| 2116 |
},
|
| 2117 |
-
"Titan Text G1 Express v1": {
|
| 2118 |
-
"nq": {
|
| 2119 |
-
"abstain": 4.0,
|
| 2120 |
-
"entailment": 28.406915796621675,
|
| 2121 |
-
"neutral": 20.81505929391959,
|
| 2122 |
-
"contradiction": 50.77802490945874
|
| 2123 |
-
},
|
| 2124 |
-
"msmarco": {
|
| 2125 |
-
"abstain": 8.0,
|
| 2126 |
-
"entailment": 77.88617588010172,
|
| 2127 |
-
"neutral": 5.7130932082978125,
|
| 2128 |
-
"contradiction": 16.400730911600476
|
| 2129 |
-
},
|
| 2130 |
-
"dolly": {
|
| 2131 |
-
"abstain": 10.0,
|
| 2132 |
-
"entailment": 86.35307875162948,
|
| 2133 |
-
"neutral": 7.355915957365234,
|
| 2134 |
-
"contradiction": 6.29100529100529
|
| 2135 |
-
},
|
| 2136 |
-
"avg": {
|
| 2137 |
-
"abstain": 7.333333333333333,
|
| 2138 |
-
"entailment": 63.540896349250694,
|
| 2139 |
-
"neutral": 11.460009724973203,
|
| 2140 |
-
"contradiction": 24.99909392577611
|
| 2141 |
-
}
|
| 2142 |
-
},
|
| 2143 |
-
"Titan Text G1 Lite v1": {
|
| 2144 |
-
"nq": {
|
| 2145 |
-
"abstain": 10.0,
|
| 2146 |
-
"entailment": 23.430335097001763,
|
| 2147 |
-
"neutral": 21.258898508898508,
|
| 2148 |
-
"contradiction": 55.31076639409973
|
| 2149 |
-
},
|
| 2150 |
-
"msmarco": {
|
| 2151 |
-
"abstain": 17.0,
|
| 2152 |
-
"entailment": 70.50138733715144,
|
| 2153 |
-
"neutral": 6.116512834965594,
|
| 2154 |
-
"contradiction": 23.382099827882964
|
| 2155 |
-
},
|
| 2156 |
-
"dolly": {
|
| 2157 |
-
"abstain": 11.0,
|
| 2158 |
-
"entailment": 88.25299544400667,
|
| 2159 |
-
"neutral": 5.309378511625702,
|
| 2160 |
-
"contradiction": 6.437626044367617
|
| 2161 |
-
},
|
| 2162 |
-
"avg": {
|
| 2163 |
-
"abstain": 12.666666666666668,
|
| 2164 |
-
"entailment": 60.36206832912336,
|
| 2165 |
-
"neutral": 11.043916483349992,
|
| 2166 |
-
"contradiction": 28.59401518752664
|
| 2167 |
-
}
|
| 2168 |
-
},
|
| 2169 |
"Baichuan 2 13B Chat": {
|
| 2170 |
"nq": {
|
| 2171 |
"abstain": 0.0,
|
|
@@ -2636,58 +2376,6 @@
|
|
| 2636 |
"contradiction": 10.995866050213877
|
| 2637 |
}
|
| 2638 |
},
|
| 2639 |
-
"Titan Text G1 Express v1": {
|
| 2640 |
-
"nq": {
|
| 2641 |
-
"abstain": 4.0,
|
| 2642 |
-
"entailment": 46.030861839685365,
|
| 2643 |
-
"neutral": 32.79050793297117,
|
| 2644 |
-
"contradiction": 21.178630227343465
|
| 2645 |
-
},
|
| 2646 |
-
"msmarco": {
|
| 2647 |
-
"abstain": 8.0,
|
| 2648 |
-
"entailment": 96.63619894598156,
|
| 2649 |
-
"neutral": 1.8568840579710144,
|
| 2650 |
-
"contradiction": 1.5069169960474307
|
| 2651 |
-
},
|
| 2652 |
-
"dolly": {
|
| 2653 |
-
"abstain": 10.0,
|
| 2654 |
-
"entailment": 90.24074074074075,
|
| 2655 |
-
"neutral": 5.462962962962963,
|
| 2656 |
-
"contradiction": 4.296296296296297
|
| 2657 |
-
},
|
| 2658 |
-
"avg": {
|
| 2659 |
-
"abstain": 7.333333333333333,
|
| 2660 |
-
"entailment": 77.0905025406718,
|
| 2661 |
-
"neutral": 13.70643439411954,
|
| 2662 |
-
"contradiction": 9.203063065208644
|
| 2663 |
-
}
|
| 2664 |
-
},
|
| 2665 |
-
"Titan Text G1 Lite v1": {
|
| 2666 |
-
"nq": {
|
| 2667 |
-
"abstain": 10.0,
|
| 2668 |
-
"entailment": 45.43614718614718,
|
| 2669 |
-
"neutral": 29.498356581689915,
|
| 2670 |
-
"contradiction": 25.0654962321629
|
| 2671 |
-
},
|
| 2672 |
-
"msmarco": {
|
| 2673 |
-
"abstain": 17.0,
|
| 2674 |
-
"entailment": 94.87951807228916,
|
| 2675 |
-
"neutral": 2.4096385542168677,
|
| 2676 |
-
"contradiction": 2.710843373493976
|
| 2677 |
-
},
|
| 2678 |
-
"dolly": {
|
| 2679 |
-
"abstain": 11.0,
|
| 2680 |
-
"entailment": 92.71089709291957,
|
| 2681 |
-
"neutral": 3.8202247191011236,
|
| 2682 |
-
"contradiction": 3.4688781879793114
|
| 2683 |
-
},
|
| 2684 |
-
"avg": {
|
| 2685 |
-
"abstain": 12.666666666666668,
|
| 2686 |
-
"entailment": 77.15848506879041,
|
| 2687 |
-
"neutral": 12.194091955542337,
|
| 2688 |
-
"contradiction": 10.647422975667249
|
| 2689 |
-
}
|
| 2690 |
-
},
|
| 2691 |
"Baichuan 2 13B Chat": {
|
| 2692 |
"nq": {
|
| 2693 |
"abstain": 0.0,
|
|
@@ -3158,58 +2846,6 @@
|
|
| 3158 |
"contradiction": 19.72008626343798
|
| 3159 |
}
|
| 3160 |
},
|
| 3161 |
-
"Titan Text G1 Express v1": {
|
| 3162 |
-
"nq": {
|
| 3163 |
-
"abstain": 0.0,
|
| 3164 |
-
"entailment": 24.58781446451725,
|
| 3165 |
-
"neutral": 32.03157099178771,
|
| 3166 |
-
"contradiction": 43.38061454369504
|
| 3167 |
-
},
|
| 3168 |
-
"msmarco": {
|
| 3169 |
-
"abstain": 0.0,
|
| 3170 |
-
"entailment": 83.52148962148962,
|
| 3171 |
-
"neutral": 11.197557997557997,
|
| 3172 |
-
"contradiction": 5.280952380952381
|
| 3173 |
-
},
|
| 3174 |
-
"dolly": {
|
| 3175 |
-
"abstain": 6.0,
|
| 3176 |
-
"entailment": 85.29957631021462,
|
| 3177 |
-
"neutral": 9.932690550932614,
|
| 3178 |
-
"contradiction": 4.767733138852776
|
| 3179 |
-
},
|
| 3180 |
-
"avg": {
|
| 3181 |
-
"abstain": 2.0,
|
| 3182 |
-
"entailment": 64.04452578830224,
|
| 3183 |
-
"neutral": 17.879543573885158,
|
| 3184 |
-
"contradiction": 18.075930637812597
|
| 3185 |
-
}
|
| 3186 |
-
},
|
| 3187 |
-
"Titan Text G1 Lite v1": {
|
| 3188 |
-
"nq": {
|
| 3189 |
-
"abstain": 6.0,
|
| 3190 |
-
"entailment": 16.618368333793864,
|
| 3191 |
-
"neutral": 23.308291885419543,
|
| 3192 |
-
"contradiction": 60.0733397807866
|
| 3193 |
-
},
|
| 3194 |
-
"msmarco": {
|
| 3195 |
-
"abstain": 1.0,
|
| 3196 |
-
"entailment": 78.77301150028423,
|
| 3197 |
-
"neutral": 9.707026979754252,
|
| 3198 |
-
"contradiction": 11.519961519961521
|
| 3199 |
-
},
|
| 3200 |
-
"dolly": {
|
| 3201 |
-
"abstain": 2.0,
|
| 3202 |
-
"entailment": 84.82932458697765,
|
| 3203 |
-
"neutral": 9.591836734693878,
|
| 3204 |
-
"contradiction": 5.578838678328474
|
| 3205 |
-
},
|
| 3206 |
-
"avg": {
|
| 3207 |
-
"abstain": 3.0,
|
| 3208 |
-
"entailment": 60.735149729995086,
|
| 3209 |
-
"neutral": 14.06177013135776,
|
| 3210 |
-
"contradiction": 25.20308013864715
|
| 3211 |
-
}
|
| 3212 |
-
},
|
| 3213 |
"Baichuan 2 13B Chat": {
|
| 3214 |
"nq": {
|
| 3215 |
"abstain": 0.0,
|
|
@@ -3680,58 +3316,6 @@
|
|
| 3680 |
"contradiction": 19.50263755970278
|
| 3681 |
}
|
| 3682 |
},
|
| 3683 |
-
"Titan Text G1 Express v1": {
|
| 3684 |
-
"nq": {
|
| 3685 |
-
"abstain": 4.0,
|
| 3686 |
-
"entailment": 27.965951532128003,
|
| 3687 |
-
"neutral": 29.802913589678298,
|
| 3688 |
-
"contradiction": 42.2311348781937
|
| 3689 |
-
},
|
| 3690 |
-
"msmarco": {
|
| 3691 |
-
"abstain": 8.0,
|
| 3692 |
-
"entailment": 89.74829819938516,
|
| 3693 |
-
"neutral": 7.498902064119455,
|
| 3694 |
-
"contradiction": 2.7527997364953887
|
| 3695 |
-
},
|
| 3696 |
-
"dolly": {
|
| 3697 |
-
"abstain": 10.0,
|
| 3698 |
-
"entailment": 89.23491296679703,
|
| 3699 |
-
"neutral": 4.997891266007208,
|
| 3700 |
-
"contradiction": 5.7671957671957665
|
| 3701 |
-
},
|
| 3702 |
-
"avg": {
|
| 3703 |
-
"abstain": 7.333333333333333,
|
| 3704 |
-
"entailment": 68.24718326776782,
|
| 3705 |
-
"neutral": 14.391327008808469,
|
| 3706 |
-
"contradiction": 17.361489723423706
|
| 3707 |
-
}
|
| 3708 |
-
},
|
| 3709 |
-
"Titan Text G1 Lite v1": {
|
| 3710 |
-
"nq": {
|
| 3711 |
-
"abstain": 10.0,
|
| 3712 |
-
"entailment": 23.19885361552028,
|
| 3713 |
-
"neutral": 23.70037678371012,
|
| 3714 |
-
"contradiction": 53.1007696007696
|
| 3715 |
-
},
|
| 3716 |
-
"msmarco": {
|
| 3717 |
-
"abstain": 17.0,
|
| 3718 |
-
"entailment": 80.72448524255755,
|
| 3719 |
-
"neutral": 8.119780710142155,
|
| 3720 |
-
"contradiction": 11.155734047300312
|
| 3721 |
-
},
|
| 3722 |
-
"dolly": {
|
| 3723 |
-
"abstain": 11.0,
|
| 3724 |
-
"entailment": 90.19227339452057,
|
| 3725 |
-
"neutral": 5.230000960338038,
|
| 3726 |
-
"contradiction": 4.577725645141375
|
| 3727 |
-
},
|
| 3728 |
-
"avg": {
|
| 3729 |
-
"abstain": 12.666666666666668,
|
| 3730 |
-
"entailment": 64.17992913221921,
|
| 3731 |
-
"neutral": 12.490251125747307,
|
| 3732 |
-
"contradiction": 23.329819742033482
|
| 3733 |
-
}
|
| 3734 |
-
},
|
| 3735 |
"Baichuan 2 13B Chat": {
|
| 3736 |
"nq": {
|
| 3737 |
"abstain": 0.0,
|
|
|
|
| 26 |
"contradiction": 21.208054807166327
|
| 27 |
}
|
| 28 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
"Baichuan 2 13B Chat": {
|
| 30 |
"nq": {
|
| 31 |
"abstain": 0.0,
|
|
|
|
| 496 |
"contradiction": 26.213064820601584
|
| 497 |
}
|
| 498 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 499 |
"Baichuan 2 13B Chat": {
|
| 500 |
"nq": {
|
| 501 |
"abstain": 0.0,
|
|
|
|
| 966 |
"contradiction": 13.131321123049064
|
| 967 |
}
|
| 968 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 969 |
"Baichuan 2 13B Chat": {
|
| 970 |
"nq": {
|
| 971 |
"abstain": 0.0,
|
|
|
|
| 1436 |
"contradiction": 19.547659968855623
|
| 1437 |
}
|
| 1438 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1439 |
"Baichuan 2 13B Chat": {
|
| 1440 |
"nq": {
|
| 1441 |
"abstain": 0.0,
|
|
|
|
| 1906 |
"contradiction": 25.516629695018445
|
| 1907 |
}
|
| 1908 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1909 |
"Baichuan 2 13B Chat": {
|
| 1910 |
"nq": {
|
| 1911 |
"abstain": 0.0,
|
|
|
|
| 2376 |
"contradiction": 10.995866050213877
|
| 2377 |
}
|
| 2378 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2379 |
"Baichuan 2 13B Chat": {
|
| 2380 |
"nq": {
|
| 2381 |
"abstain": 0.0,
|
|
|
|
| 2846 |
"contradiction": 19.72008626343798
|
| 2847 |
}
|
| 2848 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2849 |
"Baichuan 2 13B Chat": {
|
| 2850 |
"nq": {
|
| 2851 |
"abstain": 0.0,
|
|
|
|
| 3316 |
"contradiction": 19.50263755970278
|
| 3317 |
}
|
| 3318 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3319 |
"Baichuan 2 13B Chat": {
|
| 3320 |
"nq": {
|
| 3321 |
"abstain": 0.0,
|