| { |
| "config": { |
| "hub_repo": "CK0607/cross-model-lora-prediction-3b", |
| "model_Y": "meta-llama/Llama-3.2-3B-Instruct", |
| "method": "global_ridge", |
| "no_surrogate": true, |
| "heldouts": [ |
| "gsm_hard", |
| "gsm8k_test_500", |
| "mbpp_test_held", |
| "mbpp_plus", |
| "openbookqa_test" |
| ] |
| }, |
| "pools": { |
| "math_only": [ |
| "r4:gsm8k", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mawps" |
| ], |
| "code_only": [ |
| "r4:mbpp", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated" |
| ], |
| "science_only": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:mmlu_high_school_biology", |
| "r4:mmlu_high_school_physics", |
| "r4:mmlu_elementary_math", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "math_plus_code": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated" |
| ], |
| "all": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ] |
| }, |
| "baselines": { |
| "gsm_hard": { |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15 |
| }, |
| "gsm8k_test_500": { |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333 |
| }, |
| "mbpp_test_held": { |
| "base_Y": 0.23, |
| "oracle": 0.32 |
| }, |
| "mbpp_plus": { |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45 |
| }, |
| "openbookqa_test": { |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333 |
| } |
| }, |
| "summary": { |
| "by_pool": { |
| "math_only": { |
| "mean_gap_recovered": -0.0013892053068882359, |
| "n": 5 |
| }, |
| "code_only": { |
| "mean_gap_recovered": -1.2570036332231456, |
| "n": 5 |
| }, |
| "science_only": { |
| "mean_gap_recovered": 0.05831325009678664, |
| "n": 5 |
| }, |
| "math_plus_code": { |
| "mean_gap_recovered": 0.10617504615980225, |
| "n": 5 |
| }, |
| "all": { |
| "mean_gap_recovered": 0.12118442702284167, |
| "n": 5 |
| } |
| }, |
| "matched_vs_mismatched": { |
| "matched_domain": { |
| "mean_gap_recovered": -0.33363405451820094, |
| "n": 9 |
| }, |
| "mismatched_domain": { |
| "mean_gap_recovered": -0.22425602006394688, |
| "n": 11 |
| }, |
| "all_control": { |
| "mean_gap_recovered": 0.12118442702284167, |
| "n": 5 |
| } |
| }, |
| "winners_by_task": { |
| "gsm_hard": { |
| "pool": "science_only", |
| "gap_recovered": 0.0, |
| "match_type": "mismatched_domain" |
| }, |
| "gsm8k_test_500": { |
| "pool": "science_only", |
| "gap_recovered": 0.109375, |
| "match_type": "mismatched_domain" |
| }, |
| "mbpp_test_held": { |
| "pool": "math_plus_code", |
| "gap_recovered": 0.22222222222222213, |
| "match_type": "matched_domain" |
| }, |
| "mbpp_plus": { |
| "pool": "math_plus_code", |
| "gap_recovered": 0.17142857142857135, |
| "match_type": "matched_domain" |
| }, |
| "openbookqa_test": { |
| "pool": "all", |
| "gap_recovered": 0.1341463414634149, |
| "match_type": "all_control" |
| } |
| }, |
| "decision": "mixed; more anchors and curation both matter" |
| }, |
| "records": [ |
| { |
| "cell_id": "B::all::gsm8k_test_500", |
| "stage": "pool_transfer", |
| "pool": "all", |
| "pool_size": 24, |
| "pool_anchor_names": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "pool_domains": [ |
| "code", |
| "math", |
| "science" |
| ], |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "method": "global_ridge", |
| "match_type": "all_control", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm8k_test_500_global_ridge_all_N24", |
| "accuracy": 0.09666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 7.885, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.07812499999999999 |
| }, |
| { |
| "cell_id": "B::all::gsm_hard", |
| "stage": "pool_transfer", |
| "pool": "all", |
| "pool_size": 24, |
| "pool_anchor_names": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "pool_domains": [ |
| "code", |
| "math", |
| "science" |
| ], |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "method": "global_ridge", |
| "match_type": "all_control", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm_hard_global_ridge_all_N24", |
| "accuracy": 0.06333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 22.563, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.0 |
| }, |
| { |
| "cell_id": "B::all::mbpp_plus", |
| "stage": "pool_transfer", |
| "pool": "all", |
| "pool_size": 24, |
| "pool_anchor_names": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "pool_domains": [ |
| "code", |
| "math", |
| "science" |
| ], |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "method": "global_ridge", |
| "match_type": "all_control", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_plus_global_ridge_all_N24", |
| "accuracy": 0.25666666666666665, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 150.049, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.17142857142857135 |
| }, |
| { |
| "cell_id": "B::all::mbpp_test_held", |
| "stage": "pool_transfer", |
| "pool": "all", |
| "pool_size": 24, |
| "pool_anchor_names": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "pool_domains": [ |
| "code", |
| "math", |
| "science" |
| ], |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "method": "global_ridge", |
| "match_type": "all_control", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_test_held_global_ridge_all_N24", |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 6, |
| "eval_seconds": 50.906, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213 |
| }, |
| { |
| "cell_id": "B::all::openbookqa_test", |
| "stage": "pool_transfer", |
| "pool": "all", |
| "pool_size": 24, |
| "pool_anchor_names": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:mmlu_high_school_biology", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mmlu_high_school_physics", |
| "r4:mbpp_sanitized", |
| "r4:mmlu_elementary_math", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r4:medmcqa_easy", |
| "r5:aqua_rat_numeric", |
| "r5:math_counting_easy", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "pool_domains": [ |
| "code", |
| "math", |
| "science" |
| ], |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "method": "global_ridge", |
| "match_type": "all_control", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/openbookqa_test_global_ridge_all_N24", |
| "accuracy": 0.7466666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 22.64, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.1341463414634149 |
| }, |
| { |
| "cell_id": "B::code_only::gsm8k_test_500", |
| "stage": "pool_transfer", |
| "pool": "code_only", |
| "pool_size": 6, |
| "pool_anchor_names": [ |
| "r4:mbpp", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated" |
| ], |
| "pool_domains": [ |
| "code" |
| ], |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "method": "global_ridge", |
| "match_type": "mismatched_domain", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm8k_test_500_global_ridge_code_only_N6", |
| "accuracy": 0.0, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 29.342, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": -0.37500000000000006 |
| }, |
| { |
| "cell_id": "B::code_only::gsm_hard", |
| "stage": "pool_transfer", |
| "pool": "code_only", |
| "pool_size": 6, |
| "pool_anchor_names": [ |
| "r4:mbpp", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated" |
| ], |
| "pool_domains": [ |
| "code" |
| ], |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "method": "global_ridge", |
| "match_type": "mismatched_domain", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm_hard_global_ridge_code_only_N6", |
| "accuracy": 0.0, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 29.625, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": -0.730769230769231 |
| }, |
| { |
| "cell_id": "B::code_only::mbpp_plus", |
| "stage": "pool_transfer", |
| "pool": "code_only", |
| "pool_size": 6, |
| "pool_anchor_names": [ |
| "r4:mbpp", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated" |
| ], |
| "pool_domains": [ |
| "code" |
| ], |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "method": "global_ridge", |
| "match_type": "matched_domain", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_plus_global_ridge_code_only_N6", |
| "accuracy": 0.0, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 225.873, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": -0.9285714285714286 |
| }, |
| { |
| "cell_id": "B::code_only::mbpp_test_held", |
| "stage": "pool_transfer", |
| "pool": "code_only", |
| "pool_size": 6, |
| "pool_anchor_names": [ |
| "r4:mbpp", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated" |
| ], |
| "pool_domains": [ |
| "code" |
| ], |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "method": "global_ridge", |
| "match_type": "matched_domain", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_test_held_global_ridge_code_only_N6", |
| "accuracy": 0.0, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 7, |
| "eval_seconds": 74.141, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": -2.555555555555556 |
| }, |
| { |
| "cell_id": "B::code_only::openbookqa_test", |
| "stage": "pool_transfer", |
| "pool": "code_only", |
| "pool_size": 6, |
| "pool_anchor_names": [ |
| "r4:mbpp", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated" |
| ], |
| "pool_domains": [ |
| "code" |
| ], |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "method": "global_ridge", |
| "match_type": "mismatched_domain", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/openbookqa_test_global_ridge_code_only_N6", |
| "accuracy": 0.24666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 28.739, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": -1.6951219512195121 |
| }, |
| { |
| "cell_id": "B::math_only::gsm8k_test_500", |
| "stage": "pool_transfer", |
| "pool": "math_only", |
| "pool_size": 8, |
| "pool_anchor_names": [ |
| "r4:gsm8k", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mawps" |
| ], |
| "pool_domains": [ |
| "math" |
| ], |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "method": "global_ridge", |
| "match_type": "matched_domain", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm8k_test_500_global_ridge_math_only_N8", |
| "accuracy": 0.09666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 1, |
| "eval_seconds": 7.423, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.07812499999999999 |
| }, |
| { |
| "cell_id": "B::math_only::gsm_hard", |
| "stage": "pool_transfer", |
| "pool": "math_only", |
| "pool_size": 8, |
| "pool_anchor_names": [ |
| "r4:gsm8k", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mawps" |
| ], |
| "pool_domains": [ |
| "math" |
| ], |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "method": "global_ridge", |
| "match_type": "matched_domain", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm_hard_global_ridge_math_only_N8", |
| "accuracy": 0.05333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 20.802, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": -0.11538461538461542 |
| }, |
| { |
| "cell_id": "B::math_only::mbpp_plus", |
| "stage": "pool_transfer", |
| "pool": "math_only", |
| "pool_size": 8, |
| "pool_anchor_names": [ |
| "r4:gsm8k", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mawps" |
| ], |
| "pool_domains": [ |
| "math" |
| ], |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "method": "global_ridge", |
| "match_type": "mismatched_domain", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_plus_global_ridge_math_only_N8", |
| "accuracy": 0.20666666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 156.482, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": -0.04285714285714289 |
| }, |
| { |
| "cell_id": "B::math_only::mbpp_test_held", |
| "stage": "pool_transfer", |
| "pool": "math_only", |
| "pool_size": 8, |
| "pool_anchor_names": [ |
| "r4:gsm8k", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mawps" |
| ], |
| "pool_domains": [ |
| "math" |
| ], |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "method": "global_ridge", |
| "match_type": "mismatched_domain", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_test_held_global_ridge_math_only_N8", |
| "accuracy": 0.23, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 2, |
| "eval_seconds": 50.775, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.0 |
| }, |
| { |
| "cell_id": "B::math_only::openbookqa_test", |
| "stage": "pool_transfer", |
| "pool": "math_only", |
| "pool_size": 8, |
| "pool_anchor_names": [ |
| "r4:gsm8k", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mawps" |
| ], |
| "pool_domains": [ |
| "math" |
| ], |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "method": "global_ridge", |
| "match_type": "mismatched_domain", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/openbookqa_test_global_ridge_math_only_N8", |
| "accuracy": 0.73, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 4, |
| "eval_seconds": 4.616, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.07317073170731714 |
| }, |
| { |
| "cell_id": "B::math_plus_code::gsm8k_test_500", |
| "stage": "pool_transfer", |
| "pool": "math_plus_code", |
| "pool_size": 14, |
| "pool_anchor_names": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated" |
| ], |
| "pool_domains": [ |
| "code", |
| "math" |
| ], |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "method": "global_ridge", |
| "match_type": "matched_domain", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm8k_test_500_global_ridge_math_plus_code_N14", |
| "accuracy": 0.09666666666666666, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 0, |
| "eval_seconds": 7.802, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.07812499999999999 |
| }, |
| { |
| "cell_id": "B::math_plus_code::gsm_hard", |
| "stage": "pool_transfer", |
| "pool": "math_plus_code", |
| "pool_size": 14, |
| "pool_anchor_names": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated" |
| ], |
| "pool_domains": [ |
| "code", |
| "math" |
| ], |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "method": "global_ridge", |
| "match_type": "matched_domain", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm_hard_global_ridge_math_plus_code_N14", |
| "accuracy": 0.06, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 7, |
| "eval_seconds": 20.838, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": -0.038461538461538554 |
| }, |
| { |
| "cell_id": "B::math_plus_code::mbpp_plus", |
| "stage": "pool_transfer", |
| "pool": "math_plus_code", |
| "pool_size": 14, |
| "pool_anchor_names": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated" |
| ], |
| "pool_domains": [ |
| "code", |
| "math" |
| ], |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "method": "global_ridge", |
| "match_type": "matched_domain", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_plus_global_ridge_math_plus_code_N14", |
| "accuracy": 0.25666666666666665, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 147.368, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": 0.17142857142857135 |
| }, |
| { |
| "cell_id": "B::math_plus_code::mbpp_test_held", |
| "stage": "pool_transfer", |
| "pool": "math_plus_code", |
| "pool_size": 14, |
| "pool_anchor_names": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated" |
| ], |
| "pool_domains": [ |
| "code", |
| "math" |
| ], |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "method": "global_ridge", |
| "match_type": "matched_domain", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_test_held_global_ridge_math_plus_code_N14", |
| "accuracy": 0.25, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 1, |
| "eval_seconds": 50.993, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.22222222222222213 |
| }, |
| { |
| "cell_id": "B::math_plus_code::openbookqa_test", |
| "stage": "pool_transfer", |
| "pool": "math_plus_code", |
| "pool_size": 14, |
| "pool_anchor_names": [ |
| "r4:gsm8k", |
| "r4:mbpp", |
| "r4:svamp", |
| "r4:multiarith", |
| "r4:math_counting_easy", |
| "r4:humaneval", |
| "r4:mbpp_sanitized", |
| "r4:math_algebra_easy", |
| "r4:aqua_rat", |
| "r5:aqua_rat_numeric", |
| "r5:mawps", |
| "r5:mbpp_sanitized", |
| "r5:humaneval", |
| "r5:conala_curated" |
| ], |
| "pool_domains": [ |
| "code", |
| "math" |
| ], |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "method": "global_ridge", |
| "match_type": "mismatched_domain", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/openbookqa_test_global_ridge_math_plus_code_N14", |
| "accuracy": 0.7366666666666667, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 4.391, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.09756097560975632 |
| }, |
| { |
| "cell_id": "B::science_only::gsm8k_test_500", |
| "stage": "pool_transfer", |
| "pool": "science_only", |
| "pool_size": 8, |
| "pool_anchor_names": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:mmlu_high_school_biology", |
| "r4:mmlu_high_school_physics", |
| "r4:mmlu_elementary_math", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "pool_domains": [ |
| "science" |
| ], |
| "task": "gsm8k_test_500", |
| "target_domain": "math", |
| "method": "global_ridge", |
| "match_type": "mismatched_domain", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm8k_test_500_global_ridge_science_only_N8", |
| "accuracy": 0.10333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 3, |
| "eval_seconds": 27.498, |
| "base_Y": 0.08, |
| "oracle": 0.29333333333333333, |
| "gap_recovered": 0.109375 |
| }, |
| { |
| "cell_id": "B::science_only::gsm_hard", |
| "stage": "pool_transfer", |
| "pool": "science_only", |
| "pool_size": 8, |
| "pool_anchor_names": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:mmlu_high_school_biology", |
| "r4:mmlu_high_school_physics", |
| "r4:mmlu_elementary_math", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "pool_domains": [ |
| "science" |
| ], |
| "task": "gsm_hard", |
| "target_domain": "math", |
| "method": "global_ridge", |
| "match_type": "mismatched_domain", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm_hard_global_ridge_science_only_N8", |
| "accuracy": 0.06333333333333334, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 2, |
| "eval_seconds": 29.001, |
| "base_Y": 0.06333333333333334, |
| "oracle": 0.15, |
| "gap_recovered": 0.0 |
| }, |
| { |
| "cell_id": "B::science_only::mbpp_plus", |
| "stage": "pool_transfer", |
| "pool": "science_only", |
| "pool_size": 8, |
| "pool_anchor_names": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:mmlu_high_school_biology", |
| "r4:mmlu_high_school_physics", |
| "r4:mmlu_elementary_math", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "pool_domains": [ |
| "science" |
| ], |
| "task": "mbpp_plus", |
| "target_domain": "code", |
| "method": "global_ridge", |
| "match_type": "mismatched_domain", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_plus_global_ridge_science_only_N8", |
| "accuracy": 0.21333333333333335, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 5, |
| "eval_seconds": 164.28, |
| "base_Y": 0.21666666666666667, |
| "oracle": 0.45, |
| "gap_recovered": -0.01428571428571426 |
| }, |
| { |
| "cell_id": "B::science_only::mbpp_test_held", |
| "stage": "pool_transfer", |
| "pool": "science_only", |
| "pool_size": 8, |
| "pool_anchor_names": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:mmlu_high_school_biology", |
| "r4:mmlu_high_school_physics", |
| "r4:mmlu_elementary_math", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "pool_domains": [ |
| "science" |
| ], |
| "task": "mbpp_test_held", |
| "target_domain": "code", |
| "method": "global_ridge", |
| "match_type": "mismatched_domain", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_test_held_global_ridge_science_only_N8", |
| "accuracy": 0.24, |
| "real_generation_eval": true, |
| "eval_examples": 100, |
| "gpu": 4, |
| "eval_seconds": 50.504, |
| "base_Y": 0.23, |
| "oracle": 0.32, |
| "gap_recovered": 0.11111111111111091 |
| }, |
| { |
| "cell_id": "B::science_only::openbookqa_test", |
| "stage": "pool_transfer", |
| "pool": "science_only", |
| "pool_size": 8, |
| "pool_anchor_names": [ |
| "r4:sciq", |
| "r4:arc_easy", |
| "r4:openbookqa", |
| "r4:mmlu_high_school_biology", |
| "r4:mmlu_high_school_physics", |
| "r4:mmlu_elementary_math", |
| "r5:medmcqa_easy", |
| "r5:pubmedqa_pqal" |
| ], |
| "pool_domains": [ |
| "science" |
| ], |
| "task": "openbookqa_test", |
| "target_domain": "science", |
| "method": "global_ridge", |
| "match_type": "matched_domain", |
| "adapter_dir": "/workspace/round3_out/round_final/Y_pred/openbookqa_test_global_ridge_science_only_N8", |
| "accuracy": 0.7333333333333333, |
| "real_generation_eval": true, |
| "eval_examples": 300, |
| "gpu": 6, |
| "eval_seconds": 28.651, |
| "base_Y": 0.71, |
| "oracle": 0.9833333333333333, |
| "gap_recovered": 0.08536585365853654 |
| } |
| ] |
| } |