cross-model-lora-prediction-3b / experiment3_round3.json
CK0607's picture
Round 3 3B domain expansion results
4838960 verified
{
"pools": {
"math-only": [
"gsm8k",
"svamp",
"asdiv",
"mawps",
"multiarith",
"aqua_rat",
"math_algebra_easy",
"math_counting_easy"
],
"code-only": [
"mbpp",
"humaneval",
"codealpaca_mini",
"mbpp_sanitized",
"conala_curated",
"livecodebench_easy"
],
"science-only": [
"sciq",
"arc_easy",
"openbookqa",
"medmcqa_easy",
"mmlu_elementary_math",
"mmlu_high_school_biology",
"mmlu_high_school_physics",
"pubmedqa_pqal"
],
"math+code": [
"gsm8k",
"svamp",
"asdiv",
"mawps",
"multiarith",
"aqua_rat",
"math_algebra_easy",
"math_counting_easy",
"mbpp",
"humaneval",
"codealpaca_mini",
"mbpp_sanitized",
"conala_curated",
"livecodebench_easy"
],
"all": [
"gsm8k",
"svamp",
"asdiv",
"mawps",
"multiarith",
"aqua_rat",
"math_algebra_easy",
"math_counting_easy",
"mbpp",
"humaneval",
"codealpaca_mini",
"mbpp_sanitized",
"conala_curated",
"livecodebench_easy",
"sciq",
"arc_easy",
"openbookqa",
"medmcqa_easy",
"mmlu_elementary_math",
"mmlu_high_school_biology",
"mmlu_high_school_physics",
"pubmedqa_pqal"
]
},
"heatmap": {
"math-only": {
"gsm_hard": -0.3999999999999996,
"math_algebra_medium": 2.000000000000004,
"humaneval_plus": 0.5000000000000006,
"mbpp_plus": 1.0,
"arc_challenge": 0.5,
"mmlu_college_chemistry": null
},
"code-only": {
"gsm_hard": 1.1999999999999995,
"math_algebra_medium": 8.000000000000012,
"humaneval_plus": 1.0,
"mbpp_plus": -3.0000000000000084,
"arc_challenge": 1.1666666666666667,
"mmlu_college_chemistry": null
},
"science-only": {
"gsm_hard": 0.4000000000000004,
"math_algebra_medium": 6.000000000000008,
"humaneval_plus": 0.5000000000000006,
"mbpp_plus": -1.0,
"arc_challenge": 0.5,
"mmlu_college_chemistry": null
},
"math+code": {
"gsm_hard": -0.3999999999999996,
"math_algebra_medium": 0.0,
"humaneval_plus": 1.0,
"mbpp_plus": -2.0000000000000084,
"arc_challenge": 0.3333333333333333,
"mmlu_college_chemistry": null
},
"all": {
"gsm_hard": -0.3999999999999996,
"math_algebra_medium": 3.000000000000004,
"humaneval_plus": 1.0,
"mbpp_plus": -1.0,
"arc_challenge": 0.0,
"mmlu_college_chemistry": null
}
},
"selected_top3": {
"math-only": {
"gsm_hard": [
"multiarith",
"math_algebra_easy",
"math_counting_easy"
],
"math_algebra_medium": [
"math_algebra_easy",
"multiarith",
"math_counting_easy"
],
"humaneval_plus": [
"math_counting_easy",
"multiarith",
"math_algebra_easy"
],
"mbpp_plus": [
"math_counting_easy",
"multiarith",
"math_algebra_easy"
],
"arc_challenge": [
"aqua_rat",
"multiarith",
"math_algebra_easy"
],
"mmlu_college_chemistry": [
"math_counting_easy",
"multiarith",
"math_algebra_easy"
]
},
"code-only": {
"gsm_hard": [
"humaneval",
"mbpp_sanitized",
"mbpp"
],
"math_algebra_medium": [
"humaneval",
"mbpp_sanitized",
"mbpp"
],
"humaneval_plus": [
"humaneval",
"mbpp_sanitized",
"mbpp"
],
"mbpp_plus": [
"mbpp",
"mbpp_sanitized",
"humaneval"
],
"arc_challenge": [
"humaneval",
"mbpp_sanitized",
"mbpp"
],
"mmlu_college_chemistry": [
"mbpp",
"mbpp_sanitized",
"humaneval"
]
},
"science-only": {
"gsm_hard": [
"mmlu_elementary_math",
"mmlu_high_school_biology",
"mmlu_high_school_physics"
],
"math_algebra_medium": [
"mmlu_high_school_biology",
"mmlu_elementary_math",
"mmlu_high_school_physics"
],
"humaneval_plus": [
"mmlu_high_school_physics",
"mmlu_high_school_biology",
"mmlu_elementary_math"
],
"mbpp_plus": [
"mmlu_high_school_physics",
"mmlu_high_school_biology",
"mmlu_elementary_math"
],
"arc_challenge": [
"arc_easy",
"sciq",
"openbookqa"
],
"mmlu_college_chemistry": [
"mmlu_high_school_physics",
"mmlu_high_school_biology",
"mmlu_elementary_math"
]
},
"math+code": {
"gsm_hard": [
"multiarith",
"math_algebra_easy",
"math_counting_easy"
],
"math_algebra_medium": [
"math_algebra_easy",
"multiarith",
"math_counting_easy"
],
"humaneval_plus": [
"humaneval",
"mbpp_sanitized",
"mbpp"
],
"mbpp_plus": [
"mbpp_sanitized",
"mbpp",
"humaneval"
],
"arc_challenge": [
"aqua_rat",
"multiarith",
"math_algebra_easy"
],
"mmlu_college_chemistry": [
"mbpp_sanitized",
"mbpp",
"math_counting_easy"
]
},
"all": {
"gsm_hard": [
"multiarith",
"math_algebra_easy",
"math_counting_easy"
],
"math_algebra_medium": [
"math_algebra_easy",
"multiarith",
"math_counting_easy"
],
"humaneval_plus": [
"humaneval",
"mbpp_sanitized",
"mbpp"
],
"mbpp_plus": [
"mbpp_sanitized",
"mbpp",
"humaneval"
],
"arc_challenge": [
"arc_easy",
"sciq",
"openbookqa"
],
"mmlu_college_chemistry": [
"mmlu_high_school_physics",
"mbpp_sanitized",
"mbpp"
]
}
},
"best_pool_by_domain": {
"math": {
"best_anchor_pool": "code-only",
"score": 4.600000000000006,
"top3_selections": {
"gsm_hard": [
"humaneval",
"mbpp_sanitized",
"mbpp"
],
"math_algebra_medium": [
"humaneval",
"mbpp_sanitized",
"mbpp"
]
}
},
"code": {
"best_anchor_pool": "math-only",
"score": 0.7500000000000002,
"top3_selections": {
"humaneval_plus": [
"math_counting_easy",
"multiarith",
"math_algebra_easy"
],
"mbpp_plus": [
"math_counting_easy",
"multiarith",
"math_algebra_easy"
]
}
},
"science": {
"best_anchor_pool": "code-only",
"score": 1.1666666666666667,
"top3_selections": {
"arc_challenge": [
"humaneval",
"mbpp_sanitized",
"mbpp"
],
"mmlu_college_chemistry": [
"mbpp",
"mbpp_sanitized",
"humaneval"
]
}
}
}
}