File size: 5,381 Bytes
574b87a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 | {
"curves": {
"N5_global_ridge": {
"math": -0.010817307692307696,
"code": 0.026984126984126878,
"science": 1.5304878048780488
},
"N12_global_ridge": {
"math": -0.03425480769230772,
"code": 0.2539682539682539,
"science": 2.073170731707317
},
"N12_topk8_global_ridge": {
"math": -0.010817307692307696,
"code": 0.2738095238095238,
"science": 1.3353658536585367
},
"N12_topk12_global_ridge": {
"math": -0.018629807692307716,
"code": 0.23253968253968244,
"science": 1.829268292682927
},
"N16_global_ridge": {
"math": 0.027644230769230737,
"code": 0.24682539682539684,
"science": 1.5670731707317074
},
"N16_topk8_global_ridge": {
"math": 0.050480769230769204,
"code": 0.22539682539682537,
"science": 1.50609756097561
},
"N16_topk12_global_ridge": {
"math": 0.03906249999999999,
"code": 0.2396825396825396,
"science": 2.3170731707317076
}
},
"details": {
"N5_anchors": [
"gsm8k",
"svamp",
"multiarith",
"aqua_rat",
"math_algebra_easy"
],
"gsm_hard": {
"N5_global_ridge": {
"acc": 0.05333333333333334,
"gap_recovered": -0.11538461538461542
},
"N12_global_ridge": {
"acc": 0.05333333333333334,
"gap_recovered": -0.11538461538461542
},
"N12_topk8_global_ridge": {
"acc": 0.05333333333333334,
"gap_recovered": -0.11538461538461542
},
"N12_topk12_global_ridge": {
"acc": 0.05333333333333334,
"gap_recovered": -0.11538461538461542
},
"N16_topk12_global_ridge": {
"acc": 0.06333333333333334,
"gap_recovered": 0.0
}
},
"gsm8k_test_500": {
"N5_global_ridge": {
"acc": 0.1,
"gap_recovered": 0.09375000000000003
},
"N12_global_ridge": {
"acc": 0.09,
"gap_recovered": 0.04687499999999998
},
"N12_topk8_global_ridge": {
"acc": 0.1,
"gap_recovered": 0.09375000000000003
},
"N12_topk12_global_ridge": {
"acc": 0.09666666666666666,
"gap_recovered": 0.07812499999999999
},
"N16_topk12_global_ridge": {
"acc": 0.09666666666666666,
"gap_recovered": 0.07812499999999999
}
},
"mbpp_test_held": {
"N5_global_ridge": {
"acc": 0.24,
"gap_recovered": 0.11111111111111091
},
"N12_global_ridge": {
"acc": 0.25,
"gap_recovered": 0.22222222222222213
},
"N12_topk8_global_ridge": {
"acc": 0.26,
"gap_recovered": 0.3333333333333333
},
"N12_topk12_global_ridge": {
"acc": 0.25,
"gap_recovered": 0.22222222222222213
},
"N16_topk12_global_ridge": {
"acc": 0.25,
"gap_recovered": 0.22222222222222213
}
},
"mbpp_plus": {
"N5_global_ridge": {
"acc": 0.20333333333333334,
"gap_recovered": -0.057142857142857155
},
"N12_global_ridge": {
"acc": 0.2833333333333333,
"gap_recovered": 0.28571428571428564
},
"N12_topk8_global_ridge": {
"acc": 0.26666666666666666,
"gap_recovered": 0.21428571428571425
},
"N12_topk12_global_ridge": {
"acc": 0.2733333333333333,
"gap_recovered": 0.24285714285714274
},
"N16_topk12_global_ridge": {
"acc": 0.27666666666666667,
"gap_recovered": 0.2571428571428571
}
},
"arc_challenge": {
"N5_global_ridge": {
"acc": 0.7357859531772575,
"gap_recovered": 3.0
},
"N12_global_ridge": {
"acc": 0.7424749163879598,
"gap_recovered": 4.0
},
"N12_topk8_global_ridge": {
"acc": 0.7324414715719063,
"gap_recovered": 2.5
},
"N12_topk12_global_ridge": {
"acc": 0.7391304347826086,
"gap_recovered": 3.5
},
"N16_topk12_global_ridge": {
"acc": 0.745819397993311,
"gap_recovered": 4.5
}
},
"openbookqa_test": {
"N5_global_ridge": {
"acc": 0.7266666666666667,
"gap_recovered": 0.060975609756097754
},
"N12_global_ridge": {
"acc": 0.75,
"gap_recovered": 0.14634146341463428
},
"N12_topk8_global_ridge": {
"acc": 0.7566666666666667,
"gap_recovered": 0.17073170731707346
},
"N12_topk12_global_ridge": {
"acc": 0.7533333333333333,
"gap_recovered": 0.15853658536585366
},
"N16_topk12_global_ridge": {
"acc": 0.7466666666666667,
"gap_recovered": 0.1341463414634149
}
},
"N12_anchors": [
"gsm8k",
"svamp",
"multiarith",
"aqua_rat",
"math_algebra_easy",
"math_counting_easy",
"mbpp",
"humaneval",
"mbpp_sanitized",
"sciq",
"arc_easy",
"openbookqa"
],
"N16_anchors": [
"gsm8k",
"svamp",
"multiarith",
"aqua_rat",
"math_algebra_easy",
"math_counting_easy",
"mbpp",
"humaneval",
"mbpp_sanitized",
"sciq",
"arc_easy",
"openbookqa",
"medmcqa_easy",
"mmlu_elementary_math",
"mmlu_high_school_biology",
"mmlu_high_school_physics"
]
}
} |