| { |
| "curves": { |
| "N5_global_ridge": { |
| "math": -0.010817307692307696, |
| "code": 0.026984126984126878, |
| "science": 1.5304878048780488 |
| }, |
| "N12_global_ridge": { |
| "math": -0.03425480769230772, |
| "code": 0.2539682539682539, |
| "science": 2.073170731707317 |
| }, |
| "N12_topk8_global_ridge": { |
| "math": -0.010817307692307696, |
| "code": 0.2738095238095238, |
| "science": 1.3353658536585367 |
| }, |
| "N12_topk12_global_ridge": { |
| "math": -0.018629807692307716, |
| "code": 0.23253968253968244, |
| "science": 1.829268292682927 |
| }, |
| "N16_global_ridge": { |
| "math": 0.027644230769230737, |
| "code": 0.24682539682539684, |
| "science": 1.5670731707317074 |
| }, |
| "N16_topk8_global_ridge": { |
| "math": 0.050480769230769204, |
| "code": 0.22539682539682537, |
| "science": 1.50609756097561 |
| }, |
| "N16_topk12_global_ridge": { |
| "math": 0.03906249999999999, |
| "code": 0.2396825396825396, |
| "science": 2.3170731707317076 |
| } |
| }, |
| "details": { |
| "N5_anchors": [ |
| "gsm8k", |
| "svamp", |
| "multiarith", |
| "aqua_rat", |
| "math_algebra_easy" |
| ], |
| "gsm_hard": { |
| "N5_global_ridge": { |
| "acc": 0.05333333333333334, |
| "gap_recovered": -0.11538461538461542 |
| }, |
| "N12_global_ridge": { |
| "acc": 0.05333333333333334, |
| "gap_recovered": -0.11538461538461542 |
| }, |
| "N12_topk8_global_ridge": { |
| "acc": 0.05333333333333334, |
| "gap_recovered": -0.11538461538461542 |
| }, |
| "N12_topk12_global_ridge": { |
| "acc": 0.05333333333333334, |
| "gap_recovered": -0.11538461538461542 |
| }, |
| "N16_topk12_global_ridge": { |
| "acc": 0.06333333333333334, |
| "gap_recovered": 0.0 |
| } |
| }, |
| "gsm8k_test_500": { |
| "N5_global_ridge": { |
| "acc": 0.1, |
| "gap_recovered": 0.09375000000000003 |
| }, |
| "N12_global_ridge": { |
| "acc": 0.09, |
| "gap_recovered": 0.04687499999999998 |
| }, |
| "N12_topk8_global_ridge": { |
| "acc": 0.1, |
| "gap_recovered": 0.09375000000000003 |
| }, |
| "N12_topk12_global_ridge": { |
| "acc": 0.09666666666666666, |
| "gap_recovered": 0.07812499999999999 |
| }, |
| "N16_topk12_global_ridge": { |
| "acc": 0.09666666666666666, |
| "gap_recovered": 0.07812499999999999 |
| } |
| }, |
| "mbpp_test_held": { |
| "N5_global_ridge": { |
| "acc": 0.24, |
| "gap_recovered": 0.11111111111111091 |
| }, |
| "N12_global_ridge": { |
| "acc": 0.25, |
| "gap_recovered": 0.22222222222222213 |
| }, |
| "N12_topk8_global_ridge": { |
| "acc": 0.26, |
| "gap_recovered": 0.3333333333333333 |
| }, |
| "N12_topk12_global_ridge": { |
| "acc": 0.25, |
| "gap_recovered": 0.22222222222222213 |
| }, |
| "N16_topk12_global_ridge": { |
| "acc": 0.25, |
| "gap_recovered": 0.22222222222222213 |
| } |
| }, |
| "mbpp_plus": { |
| "N5_global_ridge": { |
| "acc": 0.20333333333333334, |
| "gap_recovered": -0.057142857142857155 |
| }, |
| "N12_global_ridge": { |
| "acc": 0.2833333333333333, |
| "gap_recovered": 0.28571428571428564 |
| }, |
| "N12_topk8_global_ridge": { |
| "acc": 0.26666666666666666, |
| "gap_recovered": 0.21428571428571425 |
| }, |
| "N12_topk12_global_ridge": { |
| "acc": 0.2733333333333333, |
| "gap_recovered": 0.24285714285714274 |
| }, |
| "N16_topk12_global_ridge": { |
| "acc": 0.27666666666666667, |
| "gap_recovered": 0.2571428571428571 |
| } |
| }, |
| "arc_challenge": { |
| "N5_global_ridge": { |
| "acc": 0.7357859531772575, |
| "gap_recovered": 3.0 |
| }, |
| "N12_global_ridge": { |
| "acc": 0.7424749163879598, |
| "gap_recovered": 4.0 |
| }, |
| "N12_topk8_global_ridge": { |
| "acc": 0.7324414715719063, |
| "gap_recovered": 2.5 |
| }, |
| "N12_topk12_global_ridge": { |
| "acc": 0.7391304347826086, |
| "gap_recovered": 3.5 |
| }, |
| "N16_topk12_global_ridge": { |
| "acc": 0.745819397993311, |
| "gap_recovered": 4.5 |
| } |
| }, |
| "openbookqa_test": { |
| "N5_global_ridge": { |
| "acc": 0.7266666666666667, |
| "gap_recovered": 0.060975609756097754 |
| }, |
| "N12_global_ridge": { |
| "acc": 0.75, |
| "gap_recovered": 0.14634146341463428 |
| }, |
| "N12_topk8_global_ridge": { |
| "acc": 0.7566666666666667, |
| "gap_recovered": 0.17073170731707346 |
| }, |
| "N12_topk12_global_ridge": { |
| "acc": 0.7533333333333333, |
| "gap_recovered": 0.15853658536585366 |
| }, |
| "N16_topk12_global_ridge": { |
| "acc": 0.7466666666666667, |
| "gap_recovered": 0.1341463414634149 |
| } |
| }, |
| "N12_anchors": [ |
| "gsm8k", |
| "svamp", |
| "multiarith", |
| "aqua_rat", |
| "math_algebra_easy", |
| "math_counting_easy", |
| "mbpp", |
| "humaneval", |
| "mbpp_sanitized", |
| "sciq", |
| "arc_easy", |
| "openbookqa" |
| ], |
| "N16_anchors": [ |
| "gsm8k", |
| "svamp", |
| "multiarith", |
| "aqua_rat", |
| "math_algebra_easy", |
| "math_counting_easy", |
| "mbpp", |
| "humaneval", |
| "mbpp_sanitized", |
| "sciq", |
| "arc_easy", |
| "openbookqa", |
| "medmcqa_easy", |
| "mmlu_elementary_math", |
| "mmlu_high_school_biology", |
| "mmlu_high_school_physics" |
| ] |
| } |
| } |