{ "pools": { "math-only": [ "gsm8k", "svamp", "multiarith", "aqua_rat", "math_algebra_easy", "math_counting_easy" ], "code-only": [ "mbpp", "humaneval", "mbpp_sanitized" ], "science-only": [ "sciq", "arc_easy", "openbookqa", "medmcqa_easy", "mmlu_elementary_math", "mmlu_high_school_biology", "mmlu_high_school_physics" ], "math+code": [ "gsm8k", "svamp", "multiarith", "aqua_rat", "math_algebra_easy", "math_counting_easy", "mbpp", "humaneval", "mbpp_sanitized" ], "all": [ "gsm8k", "svamp", "multiarith", "aqua_rat", "math_algebra_easy", "math_counting_easy", "mbpp", "humaneval", "mbpp_sanitized", "sciq", "arc_easy", "openbookqa", "medmcqa_easy", "mmlu_elementary_math", "mmlu_high_school_biology", "mmlu_high_school_physics" ] }, "heatmap": { "math-only": { "gsm_hard": -0.038461538461538554, "gsm8k_test_500": 0.07812499999999999, "mbpp_test_held": 0.0, "mbpp_plus": -0.04285714285714289, "arc_challenge": 2.0, "openbookqa_test": 0.10975609756097571 }, "code-only": { "gsm_hard": -0.15384615384615388, "gsm8k_test_500": -0.03125000000000001, "mbpp_test_held": 0.44444444444444453, "mbpp_plus": 0.24285714285714274, "arc_challenge": -1.5, "openbookqa_test": 0.012195121951219795 }, "science-only": { "gsm_hard": -0.038461538461538554, "gsm8k_test_500": 0.171875, "mbpp_test_held": 0.11111111111111091, "mbpp_plus": -0.01428571428571426, "arc_challenge": 3.5, "openbookqa_test": 0.10975609756097571 }, "math+code": { "gsm_hard": -0.15384615384615388, "gsm8k_test_500": 0.09375000000000003, "mbpp_test_held": 0.22222222222222213, "mbpp_plus": 0.21428571428571425, "arc_challenge": 0.5, "openbookqa_test": 0.09756097560975632 }, "all": { "gsm_hard": 0.038461538461538394, "gsm8k_test_500": 0.06250000000000001, "mbpp_test_held": 0.22222222222222213, "mbpp_plus": 0.22857142857142862, "arc_challenge": 3.0, "openbookqa_test": 0.012195121951219795 } }, "selected_top3": { "math-only": { "gsm_hard": [ "math_counting_easy", "multiarith", "math_algebra_easy" ], "gsm8k_test_500": [ "math_counting_easy", "multiarith", "math_algebra_easy" ], "mbpp_test_held": [ "math_counting_easy", "multiarith", "math_algebra_easy" ], "mbpp_plus": [ "math_counting_easy", "multiarith", "math_algebra_easy" ], "arc_challenge": [ "math_counting_easy", "multiarith", "math_algebra_easy" ], "openbookqa_test": [ "math_counting_easy", "multiarith", "math_algebra_easy" ] }, "code-only": { "gsm_hard": [ "mbpp_sanitized", "humaneval", "mbpp" ], "gsm8k_test_500": [ "mbpp_sanitized", "humaneval", "mbpp" ], "mbpp_test_held": [ "mbpp_sanitized", "humaneval", "mbpp" ], "mbpp_plus": [ "mbpp_sanitized", "humaneval", "mbpp" ], "arc_challenge": [ "mbpp_sanitized", "humaneval", "mbpp" ], "openbookqa_test": [ "mbpp_sanitized", "humaneval", "mbpp" ] }, "science-only": { "gsm_hard": [ "mmlu_high_school_physics", "mmlu_elementary_math", "mmlu_high_school_biology" ], "gsm8k_test_500": [ "mmlu_high_school_physics", "mmlu_elementary_math", "mmlu_high_school_biology" ], "mbpp_test_held": [ "mmlu_high_school_physics", "mmlu_high_school_biology", "mmlu_elementary_math" ], "mbpp_plus": [ "mmlu_high_school_physics", "mmlu_high_school_biology", "mmlu_elementary_math" ], "arc_challenge": [ "mmlu_high_school_physics", "mmlu_high_school_biology", "mmlu_elementary_math" ], "openbookqa_test": [ "mmlu_high_school_physics", "mmlu_high_school_biology", "mmlu_elementary_math" ] }, "math+code": { "gsm_hard": [ "math_counting_easy", "mbpp_sanitized", "humaneval" ], "gsm8k_test_500": [ "math_counting_easy", "mbpp_sanitized", "humaneval" ], "mbpp_test_held": [ "mbpp_sanitized", "math_counting_easy", "humaneval" ], "mbpp_plus": [ "mbpp_sanitized", "humaneval", "math_counting_easy" ], "arc_challenge": [ "math_counting_easy", "mbpp_sanitized", "humaneval" ], "openbookqa_test": [ "mbpp_sanitized", "math_counting_easy", "humaneval" ] }, "all": { "gsm_hard": [ "math_counting_easy", "mbpp_sanitized", "mmlu_high_school_physics" ], "gsm8k_test_500": [ "math_counting_easy", "mbpp_sanitized", "mmlu_high_school_physics" ], "mbpp_test_held": [ "mbpp_sanitized", "math_counting_easy", "humaneval" ], "mbpp_plus": [ "mbpp_sanitized", "humaneval", "math_counting_easy" ], "arc_challenge": [ "mmlu_high_school_physics", "mmlu_high_school_biology", "mmlu_elementary_math" ], "openbookqa_test": [ "mmlu_high_school_physics", "mmlu_high_school_biology", "mbpp_sanitized" ] } }, "best_pool_by_domain": { "math": { "best_anchor_pool": "science-only", "score": 0.06670673076923073, "top3_selections": { "gsm_hard": [ "mmlu_high_school_physics", "mmlu_elementary_math", "mmlu_high_school_biology" ], "gsm8k_test_500": [ "mmlu_high_school_physics", "mmlu_elementary_math", "mmlu_high_school_biology" ] } }, "code": { "best_anchor_pool": "code-only", "score": 0.34365079365079365, "top3_selections": { "mbpp_test_held": [ "mbpp_sanitized", "humaneval", "mbpp" ], "mbpp_plus": [ "mbpp_sanitized", "humaneval", "mbpp" ] } }, "science": { "best_anchor_pool": "science-only", "score": 1.8048780487804879, "top3_selections": { "arc_challenge": [ "mmlu_high_school_physics", "mmlu_high_school_biology", "mmlu_elementary_math" ], "openbookqa_test": [ "mmlu_high_school_physics", "mmlu_high_school_biology", "mmlu_elementary_math" ] } } }, "heldout_names": [ "gsm_hard", "gsm8k_test_500", "mbpp_test_held", "mbpp_plus", "arc_challenge", "openbookqa_test" ] }