{ "meta": { "model": "meta-llama/Llama-2-7b-chat-hf", "device": "cuda", "dtype": "fp32", "layer": 10, "layers_path": "model.layers", "seed": 123, "task": "humaneval", "eval_mode": "gen_code_compile", "eval_meta": { "hf_id": "openai_humaneval", "split": "test", "n_total": 164 }, "n_eval_loaded": 164, "n_scanned": 164, "base_acc_scan": 0.3475609756097561, "ablt_acc_scan": 0.31097560975609756, "flips_total": 38, "flips_used": 38, "patch_steps": [ 0, 1, 2, 3 ], "patch_n_steps": 4, "Qs_path": "Q_shared_layer10.npy", "Qs_shape": [ 4096, 97 ], "gold_text_prefix": " ", "dist_text_prefix": " ", "gold_max_tokens": 0, "distractor_mode": "next_gold", "answer_prefix_effective": "\nFinal answer:", "max_new_tokens_effective": 256, "run_coeff_controls": false, "use_benchmark_loader": false, "hf_id": "openai_humaneval", "hf_split": "test" }, "summary_on_flips": { "patched_self": { "n": 38, "rescued": 8, "rescued_pct": 21.05263157894737 }, "control_time_shuffled": { "n": 38, "rescued": 8, "rescued_pct": 21.05263157894737 }, "control_shared_randvec": { "n": 38, "rescued": 10, "rescued_pct": 26.31578947368421 }, "control_rand_subspace": { "n": 38, "rescued": 7, "rescued_pct": 18.42105263157895 }, "control_patch_nonshared": { "n": 38, "rescued": 2, "rescued_pct": 5.2631578947368425 } }, "scan_rows": [ { "ex_id": "openai_humaneval-test-18", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-31", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-158", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-43", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-39", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-15", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-151", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-101", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-34", "baseline": { "compile_ok": true, "n_gen_tokens": 232 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-52", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-113", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-119", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-83", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-116", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-56", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-131", "baseline": { "compile_ok": true, "n_gen_tokens": 191 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-1", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-159", "baseline": { "compile_ok": true, "n_gen_tokens": 163 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-123", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-23", "baseline": { "compile_ok": false, "n_gen_tokens": 227 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-124", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-38", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-84", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-41", "baseline": { "compile_ok": false, "n_gen_tokens": 174 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-134", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-2", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-80", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-74", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-162", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-138", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-87", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-145", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-54", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-109", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-102", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-62", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-129", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-110", "baseline": { "compile_ok": true, "n_gen_tokens": 208 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-4", "baseline": { "compile_ok": false, "n_gen_tokens": 222 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-8", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-97", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-137", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-96", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-65", "baseline": { "compile_ok": false, "n_gen_tokens": 237 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-114", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-117", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-155", "baseline": { "compile_ok": false, "n_gen_tokens": 233 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-37", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-115", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-86", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-85", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-53", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-130", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-139", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-81", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-19", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-69", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-122", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-108", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-48", "baseline": { "compile_ok": false, "n_gen_tokens": 244 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-17", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-95", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-91", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-79", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-14", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-77", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-3", "baseline": { "compile_ok": true, "n_gen_tokens": 156 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-146", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-57", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-143", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-0", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-106", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-50", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-58", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-147", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-160", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-67", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-55", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-118", "baseline": { "compile_ok": true, "n_gen_tokens": 204 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-154", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-25", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-36", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-63", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-132", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-21", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-9", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-72", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-128", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-126", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-70", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-40", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-66", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-71", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-107", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-32", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-11", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-47", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-22", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-140", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-30", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-10", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-112", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-111", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-28", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-135", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-94", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-13", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-7", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-157", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-49", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-120", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-89", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-98", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-100", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-64", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-103", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-125", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-51", "baseline": { "compile_ok": false, "n_gen_tokens": 229 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-90", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-152", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-24", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-20", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-148", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-92", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-26", "baseline": { "compile_ok": false, "n_gen_tokens": 235 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-16", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-142", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-99", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-127", "baseline": { "compile_ok": true, "n_gen_tokens": 171 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-156", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-141", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-78", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-68", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-93", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-60", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-82", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-59", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-149", "baseline": { "compile_ok": true, "n_gen_tokens": 205 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-42", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-163", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-133", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-161", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-29", "baseline": { "compile_ok": false, "n_gen_tokens": 239 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-27", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-61", "baseline": { "compile_ok": true, "n_gen_tokens": 253 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-104", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-45", "baseline": { "compile_ok": false, "n_gen_tokens": 213 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-75", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-6", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-136", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-33", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-44", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-88", "baseline": { "compile_ok": false, "n_gen_tokens": 233 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-12", "baseline": { "compile_ok": true, "n_gen_tokens": 177 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-105", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-121", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-144", "baseline": { "compile_ok": true, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-73", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-76", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-5", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": true, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-46", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-150", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-35", "baseline": { "compile_ok": false, "n_gen_tokens": 256 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } }, { "ex_id": "openai_humaneval-test-153", "baseline": { "compile_ok": true, "n_gen_tokens": 242 }, "ablated": { "compile_ok": false, "n_gen_tokens": 256 } } ], "flip_rows": [ { "ex_id": "openai_humaneval-test-31", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-158", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-39", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-101", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": true }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-56", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-1", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": true }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": true }, "control_rand_subspace": { "compile_ok": true }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-159", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": true }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-123", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": true }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-38", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": true }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-84", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": true }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": true }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-134", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": true }, "control_time_shuffled": { "compile_ok": true }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": true } }, { "ex_id": "openai_humaneval-test-54", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-110", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-86", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": true }, "control_rand_subspace": { "compile_ok": true }, "control_patch_nonshared": { "compile_ok": true } }, { "ex_id": "openai_humaneval-test-53", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-81", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-122", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-17", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": true }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-95", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": true }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-3", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": true }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-58", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": true }, "control_shared_randvec": { "compile_ok": true }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-147", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": true }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-55", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-118", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-30", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-112", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": true }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": true }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-49", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-64", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-92", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": true }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": true }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-127", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": true }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-68", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-93", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-133", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": true }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-61", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": true }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-104", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": true }, "control_time_shuffled": { "compile_ok": true }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-12", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": true }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-105", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": false }, "control_time_shuffled": { "compile_ok": true }, "control_shared_randvec": { "compile_ok": true }, "control_rand_subspace": { "compile_ok": true }, "control_patch_nonshared": { "compile_ok": false } }, { "ex_id": "openai_humaneval-test-153", "baseline": { "compile_ok": true }, "ablated": { "compile_ok": false }, "patched_self": { "compile_ok": true }, "control_time_shuffled": { "compile_ok": false }, "control_shared_randvec": { "compile_ok": false }, "control_rand_subspace": { "compile_ok": false }, "control_patch_nonshared": { "compile_ok": false } } ] }