| { |
| "pools": { |
| "math-only": [ |
| "gsm8k", |
| "svamp", |
| "multiarith", |
| "aqua_rat", |
| "math_algebra_easy", |
| "math_counting_easy" |
| ], |
| "code-only": [ |
| "mbpp", |
| "humaneval", |
| "mbpp_sanitized" |
| ], |
| "science-only": [ |
| "sciq", |
| "arc_easy", |
| "openbookqa", |
| "medmcqa_easy", |
| "mmlu_elementary_math", |
| "mmlu_high_school_biology", |
| "mmlu_high_school_physics" |
| ], |
| "math+code": [ |
| "gsm8k", |
| "svamp", |
| "multiarith", |
| "aqua_rat", |
| "math_algebra_easy", |
| "math_counting_easy", |
| "mbpp", |
| "humaneval", |
| "mbpp_sanitized" |
| ], |
| "all": [ |
| "gsm8k", |
| "svamp", |
| "multiarith", |
| "aqua_rat", |
| "math_algebra_easy", |
| "math_counting_easy", |
| "mbpp", |
| "humaneval", |
| "mbpp_sanitized", |
| "sciq", |
| "arc_easy", |
| "openbookqa", |
| "medmcqa_easy", |
| "mmlu_elementary_math", |
| "mmlu_high_school_biology", |
| "mmlu_high_school_physics" |
| ] |
| }, |
| "heatmap": { |
| "math-only": { |
| "gsm_hard": -0.038461538461538554, |
| "gsm8k_test_500": 0.07812499999999999, |
| "mbpp_test_held": 0.0, |
| "mbpp_plus": -0.04285714285714289, |
| "arc_challenge": 2.0, |
| "openbookqa_test": 0.10975609756097571 |
| }, |
| "code-only": { |
| "gsm_hard": -0.15384615384615388, |
| "gsm8k_test_500": -0.03125000000000001, |
| "mbpp_test_held": 0.44444444444444453, |
| "mbpp_plus": 0.24285714285714274, |
| "arc_challenge": -1.5, |
| "openbookqa_test": 0.012195121951219795 |
| }, |
| "science-only": { |
| "gsm_hard": -0.038461538461538554, |
| "gsm8k_test_500": 0.171875, |
| "mbpp_test_held": 0.11111111111111091, |
| "mbpp_plus": -0.01428571428571426, |
| "arc_challenge": 3.5, |
| "openbookqa_test": 0.10975609756097571 |
| }, |
| "math+code": { |
| "gsm_hard": -0.15384615384615388, |
| "gsm8k_test_500": 0.09375000000000003, |
| "mbpp_test_held": 0.22222222222222213, |
| "mbpp_plus": 0.21428571428571425, |
| "arc_challenge": 0.5, |
| "openbookqa_test": 0.09756097560975632 |
| }, |
| "all": { |
| "gsm_hard": 0.038461538461538394, |
| "gsm8k_test_500": 0.06250000000000001, |
| "mbpp_test_held": 0.22222222222222213, |
| "mbpp_plus": 0.22857142857142862, |
| "arc_challenge": 3.0, |
| "openbookqa_test": 0.012195121951219795 |
| } |
| }, |
| "selected_top3": { |
| "math-only": { |
| "gsm_hard": [ |
| "math_counting_easy", |
| "multiarith", |
| "math_algebra_easy" |
| ], |
| "gsm8k_test_500": [ |
| "math_counting_easy", |
| "multiarith", |
| "math_algebra_easy" |
| ], |
| "mbpp_test_held": [ |
| "math_counting_easy", |
| "multiarith", |
| "math_algebra_easy" |
| ], |
| "mbpp_plus": [ |
| "math_counting_easy", |
| "multiarith", |
| "math_algebra_easy" |
| ], |
| "arc_challenge": [ |
| "math_counting_easy", |
| "multiarith", |
| "math_algebra_easy" |
| ], |
| "openbookqa_test": [ |
| "math_counting_easy", |
| "multiarith", |
| "math_algebra_easy" |
| ] |
| }, |
| "code-only": { |
| "gsm_hard": [ |
| "mbpp_sanitized", |
| "humaneval", |
| "mbpp" |
| ], |
| "gsm8k_test_500": [ |
| "mbpp_sanitized", |
| "humaneval", |
| "mbpp" |
| ], |
| "mbpp_test_held": [ |
| "mbpp_sanitized", |
| "humaneval", |
| "mbpp" |
| ], |
| "mbpp_plus": [ |
| "mbpp_sanitized", |
| "humaneval", |
| "mbpp" |
| ], |
| "arc_challenge": [ |
| "mbpp_sanitized", |
| "humaneval", |
| "mbpp" |
| ], |
| "openbookqa_test": [ |
| "mbpp_sanitized", |
| "humaneval", |
| "mbpp" |
| ] |
| }, |
| "science-only": { |
| "gsm_hard": [ |
| "mmlu_high_school_physics", |
| "mmlu_elementary_math", |
| "mmlu_high_school_biology" |
| ], |
| "gsm8k_test_500": [ |
| "mmlu_high_school_physics", |
| "mmlu_elementary_math", |
| "mmlu_high_school_biology" |
| ], |
| "mbpp_test_held": [ |
| "mmlu_high_school_physics", |
| "mmlu_high_school_biology", |
| "mmlu_elementary_math" |
| ], |
| "mbpp_plus": [ |
| "mmlu_high_school_physics", |
| "mmlu_high_school_biology", |
| "mmlu_elementary_math" |
| ], |
| "arc_challenge": [ |
| "mmlu_high_school_physics", |
| "mmlu_high_school_biology", |
| "mmlu_elementary_math" |
| ], |
| "openbookqa_test": [ |
| "mmlu_high_school_physics", |
| "mmlu_high_school_biology", |
| "mmlu_elementary_math" |
| ] |
| }, |
| "math+code": { |
| "gsm_hard": [ |
| "math_counting_easy", |
| "mbpp_sanitized", |
| "humaneval" |
| ], |
| "gsm8k_test_500": [ |
| "math_counting_easy", |
| "mbpp_sanitized", |
| "humaneval" |
| ], |
| "mbpp_test_held": [ |
| "mbpp_sanitized", |
| "math_counting_easy", |
| "humaneval" |
| ], |
| "mbpp_plus": [ |
| "mbpp_sanitized", |
| "humaneval", |
| "math_counting_easy" |
| ], |
| "arc_challenge": [ |
| "math_counting_easy", |
| "mbpp_sanitized", |
| "humaneval" |
| ], |
| "openbookqa_test": [ |
| "mbpp_sanitized", |
| "math_counting_easy", |
| "humaneval" |
| ] |
| }, |
| "all": { |
| "gsm_hard": [ |
| "math_counting_easy", |
| "mbpp_sanitized", |
| "mmlu_high_school_physics" |
| ], |
| "gsm8k_test_500": [ |
| "math_counting_easy", |
| "mbpp_sanitized", |
| "mmlu_high_school_physics" |
| ], |
| "mbpp_test_held": [ |
| "mbpp_sanitized", |
| "math_counting_easy", |
| "humaneval" |
| ], |
| "mbpp_plus": [ |
| "mbpp_sanitized", |
| "humaneval", |
| "math_counting_easy" |
| ], |
| "arc_challenge": [ |
| "mmlu_high_school_physics", |
| "mmlu_high_school_biology", |
| "mmlu_elementary_math" |
| ], |
| "openbookqa_test": [ |
| "mmlu_high_school_physics", |
| "mmlu_high_school_biology", |
| "mbpp_sanitized" |
| ] |
| } |
| }, |
| "best_pool_by_domain": { |
| "math": { |
| "best_anchor_pool": "science-only", |
| "score": 0.06670673076923073, |
| "top3_selections": { |
| "gsm_hard": [ |
| "mmlu_high_school_physics", |
| "mmlu_elementary_math", |
| "mmlu_high_school_biology" |
| ], |
| "gsm8k_test_500": [ |
| "mmlu_high_school_physics", |
| "mmlu_elementary_math", |
| "mmlu_high_school_biology" |
| ] |
| } |
| }, |
| "code": { |
| "best_anchor_pool": "code-only", |
| "score": 0.34365079365079365, |
| "top3_selections": { |
| "mbpp_test_held": [ |
| "mbpp_sanitized", |
| "humaneval", |
| "mbpp" |
| ], |
| "mbpp_plus": [ |
| "mbpp_sanitized", |
| "humaneval", |
| "mbpp" |
| ] |
| } |
| }, |
| "science": { |
| "best_anchor_pool": "science-only", |
| "score": 1.8048780487804879, |
| "top3_selections": { |
| "arc_challenge": [ |
| "mmlu_high_school_physics", |
| "mmlu_high_school_biology", |
| "mmlu_elementary_math" |
| ], |
| "openbookqa_test": [ |
| "mmlu_high_school_physics", |
| "mmlu_high_school_biology", |
| "mmlu_elementary_math" |
| ] |
| } |
| } |
| }, |
| "heldout_names": [ |
| "gsm_hard", |
| "gsm8k_test_500", |
| "mbpp_test_held", |
| "mbpp_plus", |
| "arc_challenge", |
| "openbookqa_test" |
| ] |
| } |