{ "model": "microsoft/Phi-4-mini-instruct", "study_checkpoint_dir": "/common/users/vp752/iconoclast_ilabs/checkpoints/phi4-mini-seq", "base_metrics": { "refusals": 20, "overrefusals": 1, "harmful_marker_hits": 61, "harmful_compliance_score": 0.1103125, "objective_regime": "refusal_reduction" }, "pareto_trials": [ { "index": 28, "refusals": 2, "overrefusals": 1, "harmful_marker_hits": 2, "harmful_compliance_score": 0.8972916666666666, "objective_regime": "refusal_reduction", "merge_penalty": 0.0, "kl_divergence": 0.02042904868721962, "direction_method": "variance", "direction_scope": "global", "direction_index": 16.45222352347153, "direction_blend": 0.3728440320243102, "parameters": { "attn.o_proj": { "max_weight": 1.882400232860422, "max_weight_position": 13.328367131271403, "min_weight": 0.6652997220745103, "min_weight_distance": 8.31304092962315 }, "mlp.down_proj": { "max_weight": 1.5015368946798242, "max_weight_position": 13.674168714713092, "min_weight": 0.11981929558782818, "min_weight_distance": 11.52494746677227 } }, "harmful_axis_metrics": {} }, { "index": 35, "refusals": 3, "overrefusals": 1, "harmful_marker_hits": 3, "harmful_compliance_score": 0.8765624999999998, "objective_regime": "refusal_reduction", "merge_penalty": 0.0, "kl_divergence": 0.01764761470258236, "direction_method": "variance", "direction_scope": "global", "direction_index": 14.391700130129191, "direction_blend": 0.48119901424426287, "parameters": { "attn.o_proj": { "max_weight": 1.8153179845721898, "max_weight_position": 14.096237144989855, "min_weight": 0.1952412632909053, "min_weight_distance": 7.563651219512723 }, "mlp.down_proj": { "max_weight": 1.6339436954317168, "max_weight_position": 16.614695316501514, "min_weight": 0.14139150079012267, "min_weight_distance": 10.933304280225325 } }, "harmful_axis_metrics": {} }, { "index": 21, "refusals": 4, "overrefusals": 1, "harmful_marker_hits": 5, "harmful_compliance_score": 0.8979166666666665, "objective_regime": "refusal_reduction", "merge_penalty": 0.0, "kl_divergence": 0.014471019618213177, "direction_method": "mean", "direction_scope": "global", "direction_index": 14.498629309192385, "direction_blend": 0.8300558396943958, "parameters": { "attn.o_proj": { "max_weight": 1.1983582967647113, "max_weight_position": 13.868190570832633, "min_weight": 0.8900674738884767, "min_weight_distance": 11.933617812175987 }, "mlp.down_proj": { "max_weight": 1.1356281861124395, "max_weight_position": 16.564984175000383, "min_weight": 0.3297443238531411, "min_weight_distance": 18.394843487603442 } }, "harmful_axis_metrics": {} }, { "index": 30, "refusals": 10, "overrefusals": 1, "harmful_marker_hits": 11, "harmful_compliance_score": 0.8746875, "objective_regime": "refusal_reduction", "merge_penalty": 0.0, "kl_divergence": 0.010015908628702164, "direction_method": "median", "direction_scope": "global", "direction_index": 17.66484981854466, "direction_blend": 0.21628527164515216, "parameters": { "attn.o_proj": { "max_weight": 1.774744142249135, "max_weight_position": 14.257925722400284, "min_weight": 0.6188083561998832, "min_weight_distance": 9.469596075803267 }, "mlp.down_proj": { "max_weight": 1.3468990075749931, "max_weight_position": 18.293592163081765, "min_weight": 0.10765475308238112, "min_weight_distance": 8.26487191097985 } }, "harmful_axis_metrics": {} } ] }