cross-model-lora-prediction-3b / experiment3_round4.json
CK0607's picture
Round 4 oracle-fix results
574b87a verified
{
"pools": {
"math-only": [
"gsm8k",
"svamp",
"multiarith",
"aqua_rat",
"math_algebra_easy",
"math_counting_easy"
],
"code-only": [
"mbpp",
"humaneval",
"mbpp_sanitized"
],
"science-only": [
"sciq",
"arc_easy",
"openbookqa",
"medmcqa_easy",
"mmlu_elementary_math",
"mmlu_high_school_biology",
"mmlu_high_school_physics"
],
"math+code": [
"gsm8k",
"svamp",
"multiarith",
"aqua_rat",
"math_algebra_easy",
"math_counting_easy",
"mbpp",
"humaneval",
"mbpp_sanitized"
],
"all": [
"gsm8k",
"svamp",
"multiarith",
"aqua_rat",
"math_algebra_easy",
"math_counting_easy",
"mbpp",
"humaneval",
"mbpp_sanitized",
"sciq",
"arc_easy",
"openbookqa",
"medmcqa_easy",
"mmlu_elementary_math",
"mmlu_high_school_biology",
"mmlu_high_school_physics"
]
},
"heatmap": {
"math-only": {
"gsm_hard": -0.038461538461538554,
"gsm8k_test_500": 0.07812499999999999,
"mbpp_test_held": 0.0,
"mbpp_plus": -0.04285714285714289,
"arc_challenge": 2.0,
"openbookqa_test": 0.10975609756097571
},
"code-only": {
"gsm_hard": -0.15384615384615388,
"gsm8k_test_500": -0.03125000000000001,
"mbpp_test_held": 0.44444444444444453,
"mbpp_plus": 0.24285714285714274,
"arc_challenge": -1.5,
"openbookqa_test": 0.012195121951219795
},
"science-only": {
"gsm_hard": -0.038461538461538554,
"gsm8k_test_500": 0.171875,
"mbpp_test_held": 0.11111111111111091,
"mbpp_plus": -0.01428571428571426,
"arc_challenge": 3.5,
"openbookqa_test": 0.10975609756097571
},
"math+code": {
"gsm_hard": -0.15384615384615388,
"gsm8k_test_500": 0.09375000000000003,
"mbpp_test_held": 0.22222222222222213,
"mbpp_plus": 0.21428571428571425,
"arc_challenge": 0.5,
"openbookqa_test": 0.09756097560975632
},
"all": {
"gsm_hard": 0.038461538461538394,
"gsm8k_test_500": 0.06250000000000001,
"mbpp_test_held": 0.22222222222222213,
"mbpp_plus": 0.22857142857142862,
"arc_challenge": 3.0,
"openbookqa_test": 0.012195121951219795
}
},
"selected_top3": {
"math-only": {
"gsm_hard": [
"math_counting_easy",
"multiarith",
"math_algebra_easy"
],
"gsm8k_test_500": [
"math_counting_easy",
"multiarith",
"math_algebra_easy"
],
"mbpp_test_held": [
"math_counting_easy",
"multiarith",
"math_algebra_easy"
],
"mbpp_plus": [
"math_counting_easy",
"multiarith",
"math_algebra_easy"
],
"arc_challenge": [
"math_counting_easy",
"multiarith",
"math_algebra_easy"
],
"openbookqa_test": [
"math_counting_easy",
"multiarith",
"math_algebra_easy"
]
},
"code-only": {
"gsm_hard": [
"mbpp_sanitized",
"humaneval",
"mbpp"
],
"gsm8k_test_500": [
"mbpp_sanitized",
"humaneval",
"mbpp"
],
"mbpp_test_held": [
"mbpp_sanitized",
"humaneval",
"mbpp"
],
"mbpp_plus": [
"mbpp_sanitized",
"humaneval",
"mbpp"
],
"arc_challenge": [
"mbpp_sanitized",
"humaneval",
"mbpp"
],
"openbookqa_test": [
"mbpp_sanitized",
"humaneval",
"mbpp"
]
},
"science-only": {
"gsm_hard": [
"mmlu_high_school_physics",
"mmlu_elementary_math",
"mmlu_high_school_biology"
],
"gsm8k_test_500": [
"mmlu_high_school_physics",
"mmlu_elementary_math",
"mmlu_high_school_biology"
],
"mbpp_test_held": [
"mmlu_high_school_physics",
"mmlu_high_school_biology",
"mmlu_elementary_math"
],
"mbpp_plus": [
"mmlu_high_school_physics",
"mmlu_high_school_biology",
"mmlu_elementary_math"
],
"arc_challenge": [
"mmlu_high_school_physics",
"mmlu_high_school_biology",
"mmlu_elementary_math"
],
"openbookqa_test": [
"mmlu_high_school_physics",
"mmlu_high_school_biology",
"mmlu_elementary_math"
]
},
"math+code": {
"gsm_hard": [
"math_counting_easy",
"mbpp_sanitized",
"humaneval"
],
"gsm8k_test_500": [
"math_counting_easy",
"mbpp_sanitized",
"humaneval"
],
"mbpp_test_held": [
"mbpp_sanitized",
"math_counting_easy",
"humaneval"
],
"mbpp_plus": [
"mbpp_sanitized",
"humaneval",
"math_counting_easy"
],
"arc_challenge": [
"math_counting_easy",
"mbpp_sanitized",
"humaneval"
],
"openbookqa_test": [
"mbpp_sanitized",
"math_counting_easy",
"humaneval"
]
},
"all": {
"gsm_hard": [
"math_counting_easy",
"mbpp_sanitized",
"mmlu_high_school_physics"
],
"gsm8k_test_500": [
"math_counting_easy",
"mbpp_sanitized",
"mmlu_high_school_physics"
],
"mbpp_test_held": [
"mbpp_sanitized",
"math_counting_easy",
"humaneval"
],
"mbpp_plus": [
"mbpp_sanitized",
"humaneval",
"math_counting_easy"
],
"arc_challenge": [
"mmlu_high_school_physics",
"mmlu_high_school_biology",
"mmlu_elementary_math"
],
"openbookqa_test": [
"mmlu_high_school_physics",
"mmlu_high_school_biology",
"mbpp_sanitized"
]
}
},
"best_pool_by_domain": {
"math": {
"best_anchor_pool": "science-only",
"score": 0.06670673076923073,
"top3_selections": {
"gsm_hard": [
"mmlu_high_school_physics",
"mmlu_elementary_math",
"mmlu_high_school_biology"
],
"gsm8k_test_500": [
"mmlu_high_school_physics",
"mmlu_elementary_math",
"mmlu_high_school_biology"
]
}
},
"code": {
"best_anchor_pool": "code-only",
"score": 0.34365079365079365,
"top3_selections": {
"mbpp_test_held": [
"mbpp_sanitized",
"humaneval",
"mbpp"
],
"mbpp_plus": [
"mbpp_sanitized",
"humaneval",
"mbpp"
]
}
},
"science": {
"best_anchor_pool": "science-only",
"score": 1.8048780487804879,
"top3_selections": {
"arc_challenge": [
"mmlu_high_school_physics",
"mmlu_high_school_biology",
"mmlu_elementary_math"
],
"openbookqa_test": [
"mmlu_high_school_physics",
"mmlu_high_school_biology",
"mmlu_elementary_math"
]
}
}
},
"heldout_names": [
"gsm_hard",
"gsm8k_test_500",
"mbpp_test_held",
"mbpp_plus",
"arc_challenge",
"openbookqa_test"
]
}