cross-model-lora-prediction-3b / experiment2_round4.json
CK0607's picture
Round 4 oracle-fix results
574b87a verified
{
"curves": {
"N5_global_ridge": {
"math": -0.010817307692307696,
"code": 0.026984126984126878,
"science": 1.5304878048780488
},
"N12_global_ridge": {
"math": -0.03425480769230772,
"code": 0.2539682539682539,
"science": 2.073170731707317
},
"N12_topk8_global_ridge": {
"math": -0.010817307692307696,
"code": 0.2738095238095238,
"science": 1.3353658536585367
},
"N12_topk12_global_ridge": {
"math": -0.018629807692307716,
"code": 0.23253968253968244,
"science": 1.829268292682927
},
"N16_global_ridge": {
"math": 0.027644230769230737,
"code": 0.24682539682539684,
"science": 1.5670731707317074
},
"N16_topk8_global_ridge": {
"math": 0.050480769230769204,
"code": 0.22539682539682537,
"science": 1.50609756097561
},
"N16_topk12_global_ridge": {
"math": 0.03906249999999999,
"code": 0.2396825396825396,
"science": 2.3170731707317076
}
},
"details": {
"N5_anchors": [
"gsm8k",
"svamp",
"multiarith",
"aqua_rat",
"math_algebra_easy"
],
"gsm_hard": {
"N5_global_ridge": {
"acc": 0.05333333333333334,
"gap_recovered": -0.11538461538461542
},
"N12_global_ridge": {
"acc": 0.05333333333333334,
"gap_recovered": -0.11538461538461542
},
"N12_topk8_global_ridge": {
"acc": 0.05333333333333334,
"gap_recovered": -0.11538461538461542
},
"N12_topk12_global_ridge": {
"acc": 0.05333333333333334,
"gap_recovered": -0.11538461538461542
},
"N16_topk12_global_ridge": {
"acc": 0.06333333333333334,
"gap_recovered": 0.0
}
},
"gsm8k_test_500": {
"N5_global_ridge": {
"acc": 0.1,
"gap_recovered": 0.09375000000000003
},
"N12_global_ridge": {
"acc": 0.09,
"gap_recovered": 0.04687499999999998
},
"N12_topk8_global_ridge": {
"acc": 0.1,
"gap_recovered": 0.09375000000000003
},
"N12_topk12_global_ridge": {
"acc": 0.09666666666666666,
"gap_recovered": 0.07812499999999999
},
"N16_topk12_global_ridge": {
"acc": 0.09666666666666666,
"gap_recovered": 0.07812499999999999
}
},
"mbpp_test_held": {
"N5_global_ridge": {
"acc": 0.24,
"gap_recovered": 0.11111111111111091
},
"N12_global_ridge": {
"acc": 0.25,
"gap_recovered": 0.22222222222222213
},
"N12_topk8_global_ridge": {
"acc": 0.26,
"gap_recovered": 0.3333333333333333
},
"N12_topk12_global_ridge": {
"acc": 0.25,
"gap_recovered": 0.22222222222222213
},
"N16_topk12_global_ridge": {
"acc": 0.25,
"gap_recovered": 0.22222222222222213
}
},
"mbpp_plus": {
"N5_global_ridge": {
"acc": 0.20333333333333334,
"gap_recovered": -0.057142857142857155
},
"N12_global_ridge": {
"acc": 0.2833333333333333,
"gap_recovered": 0.28571428571428564
},
"N12_topk8_global_ridge": {
"acc": 0.26666666666666666,
"gap_recovered": 0.21428571428571425
},
"N12_topk12_global_ridge": {
"acc": 0.2733333333333333,
"gap_recovered": 0.24285714285714274
},
"N16_topk12_global_ridge": {
"acc": 0.27666666666666667,
"gap_recovered": 0.2571428571428571
}
},
"arc_challenge": {
"N5_global_ridge": {
"acc": 0.7357859531772575,
"gap_recovered": 3.0
},
"N12_global_ridge": {
"acc": 0.7424749163879598,
"gap_recovered": 4.0
},
"N12_topk8_global_ridge": {
"acc": 0.7324414715719063,
"gap_recovered": 2.5
},
"N12_topk12_global_ridge": {
"acc": 0.7391304347826086,
"gap_recovered": 3.5
},
"N16_topk12_global_ridge": {
"acc": 0.745819397993311,
"gap_recovered": 4.5
}
},
"openbookqa_test": {
"N5_global_ridge": {
"acc": 0.7266666666666667,
"gap_recovered": 0.060975609756097754
},
"N12_global_ridge": {
"acc": 0.75,
"gap_recovered": 0.14634146341463428
},
"N12_topk8_global_ridge": {
"acc": 0.7566666666666667,
"gap_recovered": 0.17073170731707346
},
"N12_topk12_global_ridge": {
"acc": 0.7533333333333333,
"gap_recovered": 0.15853658536585366
},
"N16_topk12_global_ridge": {
"acc": 0.7466666666666667,
"gap_recovered": 0.1341463414634149
}
},
"N12_anchors": [
"gsm8k",
"svamp",
"multiarith",
"aqua_rat",
"math_algebra_easy",
"math_counting_easy",
"mbpp",
"humaneval",
"mbpp_sanitized",
"sciq",
"arc_easy",
"openbookqa"
],
"N16_anchors": [
"gsm8k",
"svamp",
"multiarith",
"aqua_rat",
"math_algebra_easy",
"math_counting_easy",
"mbpp",
"humaneval",
"mbpp_sanitized",
"sciq",
"arc_easy",
"openbookqa",
"medmcqa_easy",
"mmlu_elementary_math",
"mmlu_high_school_biology",
"mmlu_high_school_physics"
]
}
}