| { | |
| "trial_index": 21, | |
| "refusals": 9, | |
| "total_eval_prompts": 100, | |
| "kl_divergence": 0.03438706323504448, | |
| "base_refusals": 46, | |
| "n_trials": 30, | |
| "n_startup_trials": 10, | |
| "parameters": { | |
| "direction_index": "per layer", | |
| "attn.o_proj.max_weight": "1.49", | |
| "attn.o_proj.max_weight_position": "24.83", | |
| "attn.o_proj.min_weight": "0.73", | |
| "attn.o_proj.min_weight_distance": "8.80", | |
| "mlp.down_proj.max_weight": "1.11", | |
| "mlp.down_proj.max_weight_position": "35.18", | |
| "mlp.down_proj.min_weight": "0.48", | |
| "mlp.down_proj.min_weight_distance": "3.09" | |
| } | |
| } |