cross-model-lora-prediction-3b / experiment2_round3.json
CK0607's picture
Round 3 3B domain expansion results
4838960 verified
{
"curves": {
"N5_global_ridge": {
"math": -0.3999999999999998,
"code": 0.5000000000000002,
"science": 1.1666666666666667
},
"N12_global_ridge": {
"math": 0.3000000000000002,
"code": -0.5000000000000042,
"science": 0.5
},
"N12_topk8_global_ridge": {
"math": 1.8000000000000043,
"code": -0.5000000000000042,
"science": 0.3333333333333333
},
"N12_topk12_global_ridge": {
"math": -0.29999999999999977,
"code": -0.5000000000000042,
"science": 0.3333333333333333
},
"N22_global_ridge": {
"math": -0.0999999999999998,
"code": 0.5,
"science": 0.0
},
"N22_topk8_global_ridge": {
"math": 1.3000000000000023,
"code": 0.0,
"science": 0.0
},
"N22_topk12_global_ridge": {
"math": 0.20000000000000023,
"code": 0.5,
"science": 0.5
}
},
"details": {
"N5_anchors": [
"gsm8k",
"svamp",
"asdiv",
"mawps",
"multiarith"
],
"gsm_hard": {
"N5_global_ridge": {
"acc": 0.043333333333333335,
"gap_recovered": -0.7999999999999996
},
"N12_global_ridge": {
"acc": 0.05,
"gap_recovered": -0.3999999999999996
},
"N12_topk8_global_ridge": {
"acc": 0.05,
"gap_recovered": -0.3999999999999996
},
"N12_topk12_global_ridge": {
"acc": 0.04666666666666667,
"gap_recovered": -0.5999999999999995
},
"N22_topk12_global_ridge": {
"acc": 0.04666666666666667,
"gap_recovered": -0.5999999999999995
}
},
"math_algebra_medium": {
"N5_global_ridge": {
"acc": 0.09333333333333334,
"gap_recovered": 0.0
},
"N12_global_ridge": {
"acc": 0.09666666666666666,
"gap_recovered": 1.0
},
"N12_topk8_global_ridge": {
"acc": 0.10666666666666667,
"gap_recovered": 4.000000000000008
},
"N12_topk12_global_ridge": {
"acc": 0.09333333333333334,
"gap_recovered": 0.0
},
"N22_topk12_global_ridge": {
"acc": 0.09666666666666666,
"gap_recovered": 1.0
}
},
"humaneval_plus": {
"N5_global_ridge": {
"acc": 0.054878048780487805,
"gap_recovered": 2.0000000000000004
},
"N12_global_ridge": {
"acc": 0.06707317073170732,
"gap_recovered": 1.0
},
"N12_topk8_global_ridge": {
"acc": 0.06707317073170732,
"gap_recovered": 1.0
},
"N12_topk12_global_ridge": {
"acc": 0.06707317073170732,
"gap_recovered": 1.0
},
"N22_topk12_global_ridge": {
"acc": 0.06707317073170732,
"gap_recovered": 1.0
}
},
"mbpp_plus": {
"N5_global_ridge": {
"acc": 0.21333333333333335,
"gap_recovered": -1.0
},
"N12_global_ridge": {
"acc": 0.21,
"gap_recovered": -2.0000000000000084
},
"N12_topk8_global_ridge": {
"acc": 0.21,
"gap_recovered": -2.0000000000000084
},
"N12_topk12_global_ridge": {
"acc": 0.21,
"gap_recovered": -2.0000000000000084
},
"N22_topk12_global_ridge": {
"acc": 0.21666666666666667,
"gap_recovered": 0.0
}
},
"arc_challenge": {
"N5_global_ridge": {
"acc": 0.7290969899665551,
"gap_recovered": 1.1666666666666667
},
"N12_global_ridge": {
"acc": 0.7157190635451505,
"gap_recovered": 0.5
},
"N12_topk8_global_ridge": {
"acc": 0.7123745819397993,
"gap_recovered": 0.3333333333333333
},
"N12_topk12_global_ridge": {
"acc": 0.7123745819397993,
"gap_recovered": 0.3333333333333333
},
"N22_topk12_global_ridge": {
"acc": 0.7157190635451505,
"gap_recovered": 0.5
}
},
"mmlu_college_chemistry": {
"N5_global_ridge": {
"acc": 0.125,
"gap_recovered": null
},
"N12_global_ridge": {
"acc": 0.375,
"gap_recovered": null
},
"N12_topk8_global_ridge": {
"acc": 0.375,
"gap_recovered": null
},
"N12_topk12_global_ridge": {
"acc": 0.375,
"gap_recovered": null
},
"N22_topk12_global_ridge": {
"acc": 0.375,
"gap_recovered": null
}
},
"N12_anchors": [
"gsm8k",
"svamp",
"asdiv",
"mawps",
"multiarith",
"aqua_rat",
"math_algebra_easy",
"math_counting_easy",
"mbpp",
"humaneval",
"codealpaca_mini",
"mbpp_sanitized"
],
"N22_anchors": [
"gsm8k",
"svamp",
"asdiv",
"mawps",
"multiarith",
"aqua_rat",
"math_algebra_easy",
"math_counting_easy",
"mbpp",
"humaneval",
"codealpaca_mini",
"mbpp_sanitized",
"conala_curated",
"livecodebench_easy",
"sciq",
"arc_easy",
"openbookqa",
"medmcqa_easy",
"mmlu_elementary_math",
"mmlu_high_school_biology",
"mmlu_high_school_physics",
"pubmedqa_pqal"
]
}
}