{ "curves": { "N5_global_ridge": { "math": -0.010817307692307696, "code": 0.026984126984126878, "science": 1.5304878048780488 }, "N12_global_ridge": { "math": -0.03425480769230772, "code": 0.2539682539682539, "science": 2.073170731707317 }, "N12_topk8_global_ridge": { "math": -0.010817307692307696, "code": 0.2738095238095238, "science": 1.3353658536585367 }, "N12_topk12_global_ridge": { "math": -0.018629807692307716, "code": 0.23253968253968244, "science": 1.829268292682927 }, "N16_global_ridge": { "math": 0.027644230769230737, "code": 0.24682539682539684, "science": 1.5670731707317074 }, "N16_topk8_global_ridge": { "math": 0.050480769230769204, "code": 0.22539682539682537, "science": 1.50609756097561 }, "N16_topk12_global_ridge": { "math": 0.03906249999999999, "code": 0.2396825396825396, "science": 2.3170731707317076 } }, "details": { "N5_anchors": [ "gsm8k", "svamp", "multiarith", "aqua_rat", "math_algebra_easy" ], "gsm_hard": { "N5_global_ridge": { "acc": 0.05333333333333334, "gap_recovered": -0.11538461538461542 }, "N12_global_ridge": { "acc": 0.05333333333333334, "gap_recovered": -0.11538461538461542 }, "N12_topk8_global_ridge": { "acc": 0.05333333333333334, "gap_recovered": -0.11538461538461542 }, "N12_topk12_global_ridge": { "acc": 0.05333333333333334, "gap_recovered": -0.11538461538461542 }, "N16_topk12_global_ridge": { "acc": 0.06333333333333334, "gap_recovered": 0.0 } }, "gsm8k_test_500": { "N5_global_ridge": { "acc": 0.1, "gap_recovered": 0.09375000000000003 }, "N12_global_ridge": { "acc": 0.09, "gap_recovered": 0.04687499999999998 }, "N12_topk8_global_ridge": { "acc": 0.1, "gap_recovered": 0.09375000000000003 }, "N12_topk12_global_ridge": { "acc": 0.09666666666666666, "gap_recovered": 0.07812499999999999 }, "N16_topk12_global_ridge": { "acc": 0.09666666666666666, "gap_recovered": 0.07812499999999999 } }, "mbpp_test_held": { "N5_global_ridge": { "acc": 0.24, "gap_recovered": 0.11111111111111091 }, "N12_global_ridge": { "acc": 0.25, "gap_recovered": 0.22222222222222213 }, "N12_topk8_global_ridge": { "acc": 0.26, "gap_recovered": 0.3333333333333333 }, "N12_topk12_global_ridge": { "acc": 0.25, "gap_recovered": 0.22222222222222213 }, "N16_topk12_global_ridge": { "acc": 0.25, "gap_recovered": 0.22222222222222213 } }, "mbpp_plus": { "N5_global_ridge": { "acc": 0.20333333333333334, "gap_recovered": -0.057142857142857155 }, "N12_global_ridge": { "acc": 0.2833333333333333, "gap_recovered": 0.28571428571428564 }, "N12_topk8_global_ridge": { "acc": 0.26666666666666666, "gap_recovered": 0.21428571428571425 }, "N12_topk12_global_ridge": { "acc": 0.2733333333333333, "gap_recovered": 0.24285714285714274 }, "N16_topk12_global_ridge": { "acc": 0.27666666666666667, "gap_recovered": 0.2571428571428571 } }, "arc_challenge": { "N5_global_ridge": { "acc": 0.7357859531772575, "gap_recovered": 3.0 }, "N12_global_ridge": { "acc": 0.7424749163879598, "gap_recovered": 4.0 }, "N12_topk8_global_ridge": { "acc": 0.7324414715719063, "gap_recovered": 2.5 }, "N12_topk12_global_ridge": { "acc": 0.7391304347826086, "gap_recovered": 3.5 }, "N16_topk12_global_ridge": { "acc": 0.745819397993311, "gap_recovered": 4.5 } }, "openbookqa_test": { "N5_global_ridge": { "acc": 0.7266666666666667, "gap_recovered": 0.060975609756097754 }, "N12_global_ridge": { "acc": 0.75, "gap_recovered": 0.14634146341463428 }, "N12_topk8_global_ridge": { "acc": 0.7566666666666667, "gap_recovered": 0.17073170731707346 }, "N12_topk12_global_ridge": { "acc": 0.7533333333333333, "gap_recovered": 0.15853658536585366 }, "N16_topk12_global_ridge": { "acc": 0.7466666666666667, "gap_recovered": 0.1341463414634149 } }, "N12_anchors": [ "gsm8k", "svamp", "multiarith", "aqua_rat", "math_algebra_easy", "math_counting_easy", "mbpp", "humaneval", "mbpp_sanitized", "sciq", "arc_easy", "openbookqa" ], "N16_anchors": [ "gsm8k", "svamp", "multiarith", "aqua_rat", "math_algebra_easy", "math_counting_easy", "mbpp", "humaneval", "mbpp_sanitized", "sciq", "arc_easy", "openbookqa", "medmcqa_easy", "mmlu_elementary_math", "mmlu_high_school_biology", "mmlu_high_school_physics" ] } }