jacobcd52 commited on
Commit
c696036
·
verified ·
1 Parent(s): 1fbedb1

Upload training_config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_config.yaml +7 -1
training_config.yaml CHANGED
@@ -18,7 +18,13 @@ sparse_model:
18
  use_bias: true
19
  use_flash_attention: true
20
  bridges:
 
21
  encoder_afrac: 0.25
 
 
 
 
 
22
  coef_nmse: 1.0
23
  coef_kl_d2s: 1.0
24
  coef_kl_s2d: 1.0
@@ -33,7 +39,7 @@ sparsity:
33
  min_weights_per_neuron: 4
34
  enable_activation_sparsity: true
35
  activation_topk_fraction: 0.25
36
- activation_sparsity_locations: attn_in,attn_out,mlp_in,mlp_out,mlp_neuron,attn_v,attn_k,attn_q
37
  optimizer:
38
  optimizer_type: adamw
39
  learning_rate: 0.001
 
18
  use_bias: true
19
  use_flash_attention: true
20
  bridges:
21
+ bridge_act_fn: abstopk
22
  encoder_afrac: 0.25
23
+ threshold_sharpness_init: 1.0
24
+ threshold_sharpness_final: 100.0
25
+ threshold_anneal_start_fraction: 0.0
26
+ threshold_anneal_end_fraction: 0.5
27
+ threshold_init_log_eps: -1.0
28
  coef_nmse: 1.0
29
  coef_kl_d2s: 1.0
30
  coef_kl_s2d: 1.0
 
39
  min_weights_per_neuron: 4
40
  enable_activation_sparsity: true
41
  activation_topk_fraction: 0.25
42
+ activation_sparsity_locations: attn_in,attn_out,mlp_in,mlp_out,mlp_neuron,attn_v,attn_k,attn_q,resid_mid,resid_pre
43
  optimizer:
44
  optimizer_type: adamw
45
  learning_rate: 0.001