{ "config": { "hub_repo": "CK0607/cross-model-lora-prediction-3b", "model_Y": "meta-llama/Llama-3.2-3B-Instruct", "method": "global_ridge", "no_surrogate": true, "heldouts": [ "gsm_hard", "gsm8k_test_500", "mbpp_test_held", "mbpp_plus", "openbookqa_test" ] }, "pools": { "math_only": [ "r4:gsm8k", "r4:svamp", "r4:multiarith", "r4:math_counting_easy", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mawps" ], "code_only": [ "r4:mbpp", "r4:humaneval", "r4:mbpp_sanitized", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated" ], "science_only": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:mmlu_high_school_biology", "r4:mmlu_high_school_physics", "r4:mmlu_elementary_math", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "math_plus_code": [ "r4:gsm8k", "r4:mbpp", "r4:svamp", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated" ], "all": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ] }, "baselines": { "gsm_hard": { "base_Y": 0.06333333333333334, "oracle": 0.15 }, "gsm8k_test_500": { "base_Y": 0.08, "oracle": 0.29333333333333333 }, "mbpp_test_held": { "base_Y": 0.23, "oracle": 0.32 }, "mbpp_plus": { "base_Y": 0.21666666666666667, "oracle": 0.45 }, "openbookqa_test": { "base_Y": 0.71, "oracle": 0.9833333333333333 } }, "summary": { "by_pool": { "math_only": { "mean_gap_recovered": -0.0013892053068882359, "n": 5 }, "code_only": { "mean_gap_recovered": -1.2570036332231456, "n": 5 }, "science_only": { "mean_gap_recovered": 0.05831325009678664, "n": 5 }, "math_plus_code": { "mean_gap_recovered": 0.10617504615980225, "n": 5 }, "all": { "mean_gap_recovered": 0.12118442702284167, "n": 5 } }, "matched_vs_mismatched": { "matched_domain": { "mean_gap_recovered": -0.33363405451820094, "n": 9 }, "mismatched_domain": { "mean_gap_recovered": -0.22425602006394688, "n": 11 }, "all_control": { "mean_gap_recovered": 0.12118442702284167, "n": 5 } }, "winners_by_task": { "gsm_hard": { "pool": "science_only", "gap_recovered": 0.0, "match_type": "mismatched_domain" }, "gsm8k_test_500": { "pool": "science_only", "gap_recovered": 0.109375, "match_type": "mismatched_domain" }, "mbpp_test_held": { "pool": "math_plus_code", "gap_recovered": 0.22222222222222213, "match_type": "matched_domain" }, "mbpp_plus": { "pool": "math_plus_code", "gap_recovered": 0.17142857142857135, "match_type": "matched_domain" }, "openbookqa_test": { "pool": "all", "gap_recovered": 0.1341463414634149, "match_type": "all_control" } }, "decision": "mixed; more anchors and curation both matter" }, "records": [ { "cell_id": "B::all::gsm8k_test_500", "stage": "pool_transfer", "pool": "all", "pool_size": 24, "pool_anchor_names": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "pool_domains": [ "code", "math", "science" ], "task": "gsm8k_test_500", "target_domain": "math", "method": "global_ridge", "match_type": "all_control", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm8k_test_500_global_ridge_all_N24", "accuracy": 0.09666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 7.885, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.07812499999999999 }, { "cell_id": "B::all::gsm_hard", "stage": "pool_transfer", "pool": "all", "pool_size": 24, "pool_anchor_names": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "pool_domains": [ "code", "math", "science" ], "task": "gsm_hard", "target_domain": "math", "method": "global_ridge", "match_type": "all_control", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm_hard_global_ridge_all_N24", "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 22.563, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0 }, { "cell_id": "B::all::mbpp_plus", "stage": "pool_transfer", "pool": "all", "pool_size": 24, "pool_anchor_names": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "pool_domains": [ "code", "math", "science" ], "task": "mbpp_plus", "target_domain": "code", "method": "global_ridge", "match_type": "all_control", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_plus_global_ridge_all_N24", "accuracy": 0.25666666666666665, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 150.049, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.17142857142857135 }, { "cell_id": "B::all::mbpp_test_held", "stage": "pool_transfer", "pool": "all", "pool_size": 24, "pool_anchor_names": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "pool_domains": [ "code", "math", "science" ], "task": "mbpp_test_held", "target_domain": "code", "method": "global_ridge", "match_type": "all_control", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_test_held_global_ridge_all_N24", "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 6, "eval_seconds": 50.906, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213 }, { "cell_id": "B::all::openbookqa_test", "stage": "pool_transfer", "pool": "all", "pool_size": 24, "pool_anchor_names": [ "r4:gsm8k", "r4:mbpp", "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:svamp", "r4:multiarith", "r4:mmlu_high_school_biology", "r4:math_counting_easy", "r4:humaneval", "r4:mmlu_high_school_physics", "r4:mbpp_sanitized", "r4:mmlu_elementary_math", "r4:math_algebra_easy", "r4:aqua_rat", "r4:medmcqa_easy", "r5:aqua_rat_numeric", "r5:math_counting_easy", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "pool_domains": [ "code", "math", "science" ], "task": "openbookqa_test", "target_domain": "science", "method": "global_ridge", "match_type": "all_control", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/openbookqa_test_global_ridge_all_N24", "accuracy": 0.7466666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 22.64, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.1341463414634149 }, { "cell_id": "B::code_only::gsm8k_test_500", "stage": "pool_transfer", "pool": "code_only", "pool_size": 6, "pool_anchor_names": [ "r4:mbpp", "r4:humaneval", "r4:mbpp_sanitized", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated" ], "pool_domains": [ "code" ], "task": "gsm8k_test_500", "target_domain": "math", "method": "global_ridge", "match_type": "mismatched_domain", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm8k_test_500_global_ridge_code_only_N6", "accuracy": 0.0, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 29.342, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": -0.37500000000000006 }, { "cell_id": "B::code_only::gsm_hard", "stage": "pool_transfer", "pool": "code_only", "pool_size": 6, "pool_anchor_names": [ "r4:mbpp", "r4:humaneval", "r4:mbpp_sanitized", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated" ], "pool_domains": [ "code" ], "task": "gsm_hard", "target_domain": "math", "method": "global_ridge", "match_type": "mismatched_domain", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm_hard_global_ridge_code_only_N6", "accuracy": 0.0, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 29.625, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.730769230769231 }, { "cell_id": "B::code_only::mbpp_plus", "stage": "pool_transfer", "pool": "code_only", "pool_size": 6, "pool_anchor_names": [ "r4:mbpp", "r4:humaneval", "r4:mbpp_sanitized", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated" ], "pool_domains": [ "code" ], "task": "mbpp_plus", "target_domain": "code", "method": "global_ridge", "match_type": "matched_domain", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_plus_global_ridge_code_only_N6", "accuracy": 0.0, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 225.873, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.9285714285714286 }, { "cell_id": "B::code_only::mbpp_test_held", "stage": "pool_transfer", "pool": "code_only", "pool_size": 6, "pool_anchor_names": [ "r4:mbpp", "r4:humaneval", "r4:mbpp_sanitized", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated" ], "pool_domains": [ "code" ], "task": "mbpp_test_held", "target_domain": "code", "method": "global_ridge", "match_type": "matched_domain", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_test_held_global_ridge_code_only_N6", "accuracy": 0.0, "real_generation_eval": true, "eval_examples": 100, "gpu": 7, "eval_seconds": 74.141, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": -2.555555555555556 }, { "cell_id": "B::code_only::openbookqa_test", "stage": "pool_transfer", "pool": "code_only", "pool_size": 6, "pool_anchor_names": [ "r4:mbpp", "r4:humaneval", "r4:mbpp_sanitized", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated" ], "pool_domains": [ "code" ], "task": "openbookqa_test", "target_domain": "science", "method": "global_ridge", "match_type": "mismatched_domain", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/openbookqa_test_global_ridge_code_only_N6", "accuracy": 0.24666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 28.739, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": -1.6951219512195121 }, { "cell_id": "B::math_only::gsm8k_test_500", "stage": "pool_transfer", "pool": "math_only", "pool_size": 8, "pool_anchor_names": [ "r4:gsm8k", "r4:svamp", "r4:multiarith", "r4:math_counting_easy", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mawps" ], "pool_domains": [ "math" ], "task": "gsm8k_test_500", "target_domain": "math", "method": "global_ridge", "match_type": "matched_domain", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm8k_test_500_global_ridge_math_only_N8", "accuracy": 0.09666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 1, "eval_seconds": 7.423, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.07812499999999999 }, { "cell_id": "B::math_only::gsm_hard", "stage": "pool_transfer", "pool": "math_only", "pool_size": 8, "pool_anchor_names": [ "r4:gsm8k", "r4:svamp", "r4:multiarith", "r4:math_counting_easy", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mawps" ], "pool_domains": [ "math" ], "task": "gsm_hard", "target_domain": "math", "method": "global_ridge", "match_type": "matched_domain", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm_hard_global_ridge_math_only_N8", "accuracy": 0.05333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 20.802, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.11538461538461542 }, { "cell_id": "B::math_only::mbpp_plus", "stage": "pool_transfer", "pool": "math_only", "pool_size": 8, "pool_anchor_names": [ "r4:gsm8k", "r4:svamp", "r4:multiarith", "r4:math_counting_easy", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mawps" ], "pool_domains": [ "math" ], "task": "mbpp_plus", "target_domain": "code", "method": "global_ridge", "match_type": "mismatched_domain", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_plus_global_ridge_math_only_N8", "accuracy": 0.20666666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 156.482, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.04285714285714289 }, { "cell_id": "B::math_only::mbpp_test_held", "stage": "pool_transfer", "pool": "math_only", "pool_size": 8, "pool_anchor_names": [ "r4:gsm8k", "r4:svamp", "r4:multiarith", "r4:math_counting_easy", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mawps" ], "pool_domains": [ "math" ], "task": "mbpp_test_held", "target_domain": "code", "method": "global_ridge", "match_type": "mismatched_domain", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_test_held_global_ridge_math_only_N8", "accuracy": 0.23, "real_generation_eval": true, "eval_examples": 100, "gpu": 2, "eval_seconds": 50.775, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.0 }, { "cell_id": "B::math_only::openbookqa_test", "stage": "pool_transfer", "pool": "math_only", "pool_size": 8, "pool_anchor_names": [ "r4:gsm8k", "r4:svamp", "r4:multiarith", "r4:math_counting_easy", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mawps" ], "pool_domains": [ "math" ], "task": "openbookqa_test", "target_domain": "science", "method": "global_ridge", "match_type": "mismatched_domain", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/openbookqa_test_global_ridge_math_only_N8", "accuracy": 0.73, "real_generation_eval": true, "eval_examples": 300, "gpu": 4, "eval_seconds": 4.616, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.07317073170731714 }, { "cell_id": "B::math_plus_code::gsm8k_test_500", "stage": "pool_transfer", "pool": "math_plus_code", "pool_size": 14, "pool_anchor_names": [ "r4:gsm8k", "r4:mbpp", "r4:svamp", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated" ], "pool_domains": [ "code", "math" ], "task": "gsm8k_test_500", "target_domain": "math", "method": "global_ridge", "match_type": "matched_domain", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm8k_test_500_global_ridge_math_plus_code_N14", "accuracy": 0.09666666666666666, "real_generation_eval": true, "eval_examples": 300, "gpu": 0, "eval_seconds": 7.802, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.07812499999999999 }, { "cell_id": "B::math_plus_code::gsm_hard", "stage": "pool_transfer", "pool": "math_plus_code", "pool_size": 14, "pool_anchor_names": [ "r4:gsm8k", "r4:mbpp", "r4:svamp", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated" ], "pool_domains": [ "code", "math" ], "task": "gsm_hard", "target_domain": "math", "method": "global_ridge", "match_type": "matched_domain", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm_hard_global_ridge_math_plus_code_N14", "accuracy": 0.06, "real_generation_eval": true, "eval_examples": 300, "gpu": 7, "eval_seconds": 20.838, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": -0.038461538461538554 }, { "cell_id": "B::math_plus_code::mbpp_plus", "stage": "pool_transfer", "pool": "math_plus_code", "pool_size": 14, "pool_anchor_names": [ "r4:gsm8k", "r4:mbpp", "r4:svamp", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated" ], "pool_domains": [ "code", "math" ], "task": "mbpp_plus", "target_domain": "code", "method": "global_ridge", "match_type": "matched_domain", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_plus_global_ridge_math_plus_code_N14", "accuracy": 0.25666666666666665, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 147.368, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": 0.17142857142857135 }, { "cell_id": "B::math_plus_code::mbpp_test_held", "stage": "pool_transfer", "pool": "math_plus_code", "pool_size": 14, "pool_anchor_names": [ "r4:gsm8k", "r4:mbpp", "r4:svamp", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated" ], "pool_domains": [ "code", "math" ], "task": "mbpp_test_held", "target_domain": "code", "method": "global_ridge", "match_type": "matched_domain", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_test_held_global_ridge_math_plus_code_N14", "accuracy": 0.25, "real_generation_eval": true, "eval_examples": 100, "gpu": 1, "eval_seconds": 50.993, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.22222222222222213 }, { "cell_id": "B::math_plus_code::openbookqa_test", "stage": "pool_transfer", "pool": "math_plus_code", "pool_size": 14, "pool_anchor_names": [ "r4:gsm8k", "r4:mbpp", "r4:svamp", "r4:multiarith", "r4:math_counting_easy", "r4:humaneval", "r4:mbpp_sanitized", "r4:math_algebra_easy", "r4:aqua_rat", "r5:aqua_rat_numeric", "r5:mawps", "r5:mbpp_sanitized", "r5:humaneval", "r5:conala_curated" ], "pool_domains": [ "code", "math" ], "task": "openbookqa_test", "target_domain": "science", "method": "global_ridge", "match_type": "mismatched_domain", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/openbookqa_test_global_ridge_math_plus_code_N14", "accuracy": 0.7366666666666667, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 4.391, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.09756097560975632 }, { "cell_id": "B::science_only::gsm8k_test_500", "stage": "pool_transfer", "pool": "science_only", "pool_size": 8, "pool_anchor_names": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:mmlu_high_school_biology", "r4:mmlu_high_school_physics", "r4:mmlu_elementary_math", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "pool_domains": [ "science" ], "task": "gsm8k_test_500", "target_domain": "math", "method": "global_ridge", "match_type": "mismatched_domain", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm8k_test_500_global_ridge_science_only_N8", "accuracy": 0.10333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 3, "eval_seconds": 27.498, "base_Y": 0.08, "oracle": 0.29333333333333333, "gap_recovered": 0.109375 }, { "cell_id": "B::science_only::gsm_hard", "stage": "pool_transfer", "pool": "science_only", "pool_size": 8, "pool_anchor_names": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:mmlu_high_school_biology", "r4:mmlu_high_school_physics", "r4:mmlu_elementary_math", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "pool_domains": [ "science" ], "task": "gsm_hard", "target_domain": "math", "method": "global_ridge", "match_type": "mismatched_domain", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm_hard_global_ridge_science_only_N8", "accuracy": 0.06333333333333334, "real_generation_eval": true, "eval_examples": 300, "gpu": 2, "eval_seconds": 29.001, "base_Y": 0.06333333333333334, "oracle": 0.15, "gap_recovered": 0.0 }, { "cell_id": "B::science_only::mbpp_plus", "stage": "pool_transfer", "pool": "science_only", "pool_size": 8, "pool_anchor_names": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:mmlu_high_school_biology", "r4:mmlu_high_school_physics", "r4:mmlu_elementary_math", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "pool_domains": [ "science" ], "task": "mbpp_plus", "target_domain": "code", "method": "global_ridge", "match_type": "mismatched_domain", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_plus_global_ridge_science_only_N8", "accuracy": 0.21333333333333335, "real_generation_eval": true, "eval_examples": 300, "gpu": 5, "eval_seconds": 164.28, "base_Y": 0.21666666666666667, "oracle": 0.45, "gap_recovered": -0.01428571428571426 }, { "cell_id": "B::science_only::mbpp_test_held", "stage": "pool_transfer", "pool": "science_only", "pool_size": 8, "pool_anchor_names": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:mmlu_high_school_biology", "r4:mmlu_high_school_physics", "r4:mmlu_elementary_math", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "pool_domains": [ "science" ], "task": "mbpp_test_held", "target_domain": "code", "method": "global_ridge", "match_type": "mismatched_domain", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_test_held_global_ridge_science_only_N8", "accuracy": 0.24, "real_generation_eval": true, "eval_examples": 100, "gpu": 4, "eval_seconds": 50.504, "base_Y": 0.23, "oracle": 0.32, "gap_recovered": 0.11111111111111091 }, { "cell_id": "B::science_only::openbookqa_test", "stage": "pool_transfer", "pool": "science_only", "pool_size": 8, "pool_anchor_names": [ "r4:sciq", "r4:arc_easy", "r4:openbookqa", "r4:mmlu_high_school_biology", "r4:mmlu_high_school_physics", "r4:mmlu_elementary_math", "r5:medmcqa_easy", "r5:pubmedqa_pqal" ], "pool_domains": [ "science" ], "task": "openbookqa_test", "target_domain": "science", "method": "global_ridge", "match_type": "matched_domain", "adapter_dir": "/workspace/round3_out/round_final/Y_pred/openbookqa_test_global_ridge_science_only_N8", "accuracy": 0.7333333333333333, "real_generation_eval": true, "eval_examples": 300, "gpu": 6, "eval_seconds": 28.651, "base_Y": 0.71, "oracle": 0.9833333333333333, "gap_recovered": 0.08536585365853654 } ] }