jacobcd52
/

ss_bridges_d3072_f0.005

jacobcd52 commited on Dec 22, 2025

Commit

c696036

verified ·

1 Parent(s): 1fbedb1

Upload training_config.yaml with huggingface_hub

Files changed (1) hide show

training_config.yaml CHANGED Viewed

@@ -18,7 +18,13 @@ sparse_model:
   use_bias: true
   use_flash_attention: true
 bridges:
   encoder_afrac: 0.25
   coef_nmse: 1.0
   coef_kl_d2s: 1.0
   coef_kl_s2d: 1.0
@@ -33,7 +39,7 @@ sparsity:
   min_weights_per_neuron: 4
   enable_activation_sparsity: true
   activation_topk_fraction: 0.25
-  activation_sparsity_locations: attn_in,attn_out,mlp_in,mlp_out,mlp_neuron,attn_v,attn_k,attn_q
 optimizer:
   optimizer_type: adamw
   learning_rate: 0.001

   use_bias: true
   use_flash_attention: true
 bridges:
+  bridge_act_fn: abstopk
   encoder_afrac: 0.25
+  threshold_sharpness_init: 1.0
+  threshold_sharpness_final: 100.0
+  threshold_anneal_start_fraction: 0.0
+  threshold_anneal_end_fraction: 0.5
+  threshold_init_log_eps: -1.0
   coef_nmse: 1.0
   coef_kl_d2s: 1.0
   coef_kl_s2d: 1.0
   min_weights_per_neuron: 4
   enable_activation_sparsity: true
   activation_topk_fraction: 0.25
+  activation_sparsity_locations: attn_in,attn_out,mlp_in,mlp_out,mlp_neuron,attn_v,attn_k,attn_q,resid_mid,resid_pre
 optimizer:
   optimizer_type: adamw
   learning_rate: 0.001