{ "backbone_id": "Qwen/Qwen2.5-7B", "backbone_dtype": "bfloat16", "mah_layer_indices": [ 7, 14, 21 ], "rrm_inject_indices": [ 14, 21 ], "community_layer_idx": 4, "num_mah_layers": 3, "mah": { "d_sub": 512, "d_divergence": 256, "num_heads": 4, "dropout": 0.1 }, "rrm": { "d_meta": 512, "inject_scale": 1.0 }, "ben": { "d_hidden": 256 }, "community": { "num_prototypes": 32, "d_community": 64, "temperature": 1.0, "use_prototypes": false }, "loss": { "ce_weight": 1.0, "chain_weight": 0.5, "bif_weight": 1.0, "regime_weight": 5.0, "div_alive_weight": 0.1, "inject_reg_weight": 0.0, "inject_target_norm": 1.0, "community_entropy_weight": 0.01, "community_supcon_weight": 2.0, "community_supcon_temperature": 0.1, "divergence_supcon_weight": 0.3, "divergence_supcon_temperature": 0.1, "listnet_weight": 0.5, "listnet_temperature": 1.0, "chain_residual_aux_weight": 0.05, "chain_residual_aux_target": 0.5 } }