Instructions to use HaadesX/Iconoclast with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use HaadesX/Iconoclast with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("HaadesX/Iconoclast", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "model": "microsoft/Phi-4-mini-instruct", | |
| "study_checkpoint_dir": "/common/users/vp752/iconoclast_ilabs/checkpoints/phi4-mini-seq", | |
| "base_metrics": { | |
| "refusals": 20, | |
| "overrefusals": 1, | |
| "harmful_marker_hits": 61, | |
| "harmful_compliance_score": 0.1103125, | |
| "objective_regime": "refusal_reduction" | |
| }, | |
| "pareto_trials": [ | |
| { | |
| "index": 28, | |
| "refusals": 2, | |
| "overrefusals": 1, | |
| "harmful_marker_hits": 2, | |
| "harmful_compliance_score": 0.8972916666666666, | |
| "objective_regime": "refusal_reduction", | |
| "merge_penalty": 0.0, | |
| "kl_divergence": 0.02042904868721962, | |
| "direction_method": "variance", | |
| "direction_scope": "global", | |
| "direction_index": 16.45222352347153, | |
| "direction_blend": 0.3728440320243102, | |
| "parameters": { | |
| "attn.o_proj": { | |
| "max_weight": 1.882400232860422, | |
| "max_weight_position": 13.328367131271403, | |
| "min_weight": 0.6652997220745103, | |
| "min_weight_distance": 8.31304092962315 | |
| }, | |
| "mlp.down_proj": { | |
| "max_weight": 1.5015368946798242, | |
| "max_weight_position": 13.674168714713092, | |
| "min_weight": 0.11981929558782818, | |
| "min_weight_distance": 11.52494746677227 | |
| } | |
| }, | |
| "harmful_axis_metrics": {} | |
| }, | |
| { | |
| "index": 35, | |
| "refusals": 3, | |
| "overrefusals": 1, | |
| "harmful_marker_hits": 3, | |
| "harmful_compliance_score": 0.8765624999999998, | |
| "objective_regime": "refusal_reduction", | |
| "merge_penalty": 0.0, | |
| "kl_divergence": 0.01764761470258236, | |
| "direction_method": "variance", | |
| "direction_scope": "global", | |
| "direction_index": 14.391700130129191, | |
| "direction_blend": 0.48119901424426287, | |
| "parameters": { | |
| "attn.o_proj": { | |
| "max_weight": 1.8153179845721898, | |
| "max_weight_position": 14.096237144989855, | |
| "min_weight": 0.1952412632909053, | |
| "min_weight_distance": 7.563651219512723 | |
| }, | |
| "mlp.down_proj": { | |
| "max_weight": 1.6339436954317168, | |
| "max_weight_position": 16.614695316501514, | |
| "min_weight": 0.14139150079012267, | |
| "min_weight_distance": 10.933304280225325 | |
| } | |
| }, | |
| "harmful_axis_metrics": {} | |
| }, | |
| { | |
| "index": 21, | |
| "refusals": 4, | |
| "overrefusals": 1, | |
| "harmful_marker_hits": 5, | |
| "harmful_compliance_score": 0.8979166666666665, | |
| "objective_regime": "refusal_reduction", | |
| "merge_penalty": 0.0, | |
| "kl_divergence": 0.014471019618213177, | |
| "direction_method": "mean", | |
| "direction_scope": "global", | |
| "direction_index": 14.498629309192385, | |
| "direction_blend": 0.8300558396943958, | |
| "parameters": { | |
| "attn.o_proj": { | |
| "max_weight": 1.1983582967647113, | |
| "max_weight_position": 13.868190570832633, | |
| "min_weight": 0.8900674738884767, | |
| "min_weight_distance": 11.933617812175987 | |
| }, | |
| "mlp.down_proj": { | |
| "max_weight": 1.1356281861124395, | |
| "max_weight_position": 16.564984175000383, | |
| "min_weight": 0.3297443238531411, | |
| "min_weight_distance": 18.394843487603442 | |
| } | |
| }, | |
| "harmful_axis_metrics": {} | |
| }, | |
| { | |
| "index": 30, | |
| "refusals": 10, | |
| "overrefusals": 1, | |
| "harmful_marker_hits": 11, | |
| "harmful_compliance_score": 0.8746875, | |
| "objective_regime": "refusal_reduction", | |
| "merge_penalty": 0.0, | |
| "kl_divergence": 0.010015908628702164, | |
| "direction_method": "median", | |
| "direction_scope": "global", | |
| "direction_index": 17.66484981854466, | |
| "direction_blend": 0.21628527164515216, | |
| "parameters": { | |
| "attn.o_proj": { | |
| "max_weight": 1.774744142249135, | |
| "max_weight_position": 14.257925722400284, | |
| "min_weight": 0.6188083561998832, | |
| "min_weight_distance": 9.469596075803267 | |
| }, | |
| "mlp.down_proj": { | |
| "max_weight": 1.3468990075749931, | |
| "max_weight_position": 18.293592163081765, | |
| "min_weight": 0.10765475308238112, | |
| "min_weight_distance": 8.26487191097985 | |
| } | |
| }, | |
| "harmful_axis_metrics": {} | |
| } | |
| ] | |
| } |