XiangPan commited on
Commit
35b92f9
·
verified ·
1 Parent(s): 248be27

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/config.json +46 -0
  2. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json +26 -0
  3. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/standard_eval_results.json +1 -0
  4. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/config.json +46 -0
  5. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json +26 -0
  6. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/standard_eval_results.json +1 -0
  7. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/config.json +46 -0
  8. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json +26 -0
  9. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/standard_eval_results.json +1 -0
  10. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/config.json +46 -0
  11. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json +26 -0
  12. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/standard_eval_results.json +1 -0
  13. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/config.json +46 -0
  14. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json +26 -0
  15. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/standard_eval_results.json +1 -0
  16. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/config.json +46 -0
  17. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json +26 -0
  18. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/standard_eval_results.json +1 -0
  19. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/config.json +46 -0
  20. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json +26 -0
  21. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/standard_eval_results.json +1 -0
  22. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/config.json +46 -0
  23. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json +26 -0
  24. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/standard_eval_results.json +1 -0
  25. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/config.json +46 -0
  26. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json +26 -0
  27. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/standard_eval_results.json +1 -0
  28. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/config.json +46 -0
  29. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json +26 -0
  30. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/standard_eval_results.json +1 -0
  31. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/config.json +46 -0
  32. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json +26 -0
  33. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/standard_eval_results.json +14 -0
  34. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/config.json +46 -0
  35. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json +26 -0
  36. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/standard_eval_results.json +14 -0
  37. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/config.json +46 -0
  38. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json +26 -0
  39. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/standard_eval_results.json +14 -0
  40. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/config.json +46 -0
  41. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json +26 -0
  42. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/standard_eval_results.json +14 -0
  43. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/config.json +46 -0
  44. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json +26 -0
  45. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/standard_eval_results.json +14 -0
  46. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_0/config.json +34 -0
  47. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_1/config.json +34 -0
  48. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_2/config.json +34 -0
  49. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_3/config.json +34 -0
  50. TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_4/config.json +34 -0
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/seed-0_trainer_0",
4
+ "trainer_class": "TopKTrainer",
5
+ "dict_class": "AutoEncoderTopK",
6
+ "lr": 0.0003,
7
+ "steps": 14648,
8
+ "auxk_alpha": 0.03125,
9
+ "diversity_scale": 0.0,
10
+ "diversity_type": null,
11
+ "warmup_steps": 1000,
12
+ "decay_start": 1200,
13
+ "threshold_beta": 0.999,
14
+ "threshold_start_step": 1000,
15
+ "seed": 0,
16
+ "activation_dim": 512,
17
+ "dict_size": 2048,
18
+ "k": 128,
19
+ "device": "cuda:0",
20
+ "layer": 4,
21
+ "lm_name": "EleutherAI/pythia-70m-deduped",
22
+ "submodule_name": "resid_post_layer_4",
23
+ "random_mask_bit_ratio": 0.0,
24
+ "random_mask_bit_ratio_force_topk": false,
25
+ "random_mask_bit_original_scale": 1.0,
26
+ "random_mask_bit_pos": "input",
27
+ "random_mask_bit_reconstruction_target": "original",
28
+ "nonlinear_sae_loss_scale": 0.0,
29
+ "nonlinear_sae_input": "original",
30
+ "nonlinear_sae_target": "residual",
31
+ "nonlinear_use_encoder_mlp": true,
32
+ "nonlinear_use_decoder_mlp": false,
33
+ "nonlinear_topk_k": -1,
34
+ "nonlinear_block_gradients": false,
35
+ "meta_sae_loss_scale": 0.0
36
+ },
37
+ "buffer": {
38
+ "d_submodule": 512,
39
+ "io": "out",
40
+ "n_ctxs": 2048,
41
+ "ctx_len": 128,
42
+ "refresh_batch_size": 24,
43
+ "out_batch_size": 2048,
44
+ "device": "cuda:0"
45
+ }
46
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "intrinsic_dim_0.8": 980,
3
+ "intrinsic_dim_0.9": 1364,
4
+ "intrinsic_dim_0.95": 1619,
5
+ "effective_rank": 1223.4310302734375,
6
+ "feature_sparsity": 0.9375,
7
+ "mean_correlation": 0.03588181361556053,
8
+ "max_correlation": 1.000002145767212,
9
+ "correlation_std": 0.06462709605693817,
10
+ "decoder_coactive_mean_sim": 0.0015496726846322417,
11
+ "decoder_coactive_max_sim": 0.4138309955596924,
12
+ "decoder_coactive_std_sim": 0.01725614070892334,
13
+ "encoder_coactive_mean_sim": 0.002769735874608159,
14
+ "encoder_coactive_max_sim": 0.3684731125831604,
15
+ "encoder_coactive_std_sim": 0.017463568598031998,
16
+ "decoder_per_sample_mean_sim": 0.0015496726846322417,
17
+ "decoder_per_sample_max_sim": 0.3042750656604767,
18
+ "encoder_per_sample_mean_sim": 0.002769735874608159,
19
+ "encoder_per_sample_max_sim": 0.2725919187068939,
20
+ "encoder_mean_correlation": 0.0021128507796674967,
21
+ "encoder_max_correlation": 1.000000238418579,
22
+ "encoder_correlation_std": 0.05275088548660278,
23
+ "decoder_mean_correlation": 0.0006922338507138193,
24
+ "decoder_max_correlation": 1.000001072883606,
25
+ "decoder_correlation_std": 0.05089571326971054
26
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_0/standard_eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.7783051311969755, "l1_loss": 94.76761198043823, "l0": 128.0, "frac_variance_explained": 0.942155122756958, "cossim": 0.9667428568005562, "l2_ratio": 0.9664503745734692, "relative_reconstruction_bias": 0.9999962501227856, "loss_original": 5.4671875, "loss_reconstructed": 5.718359375, "loss_zero": 10.740625, "frac_recovered": 0.9515625, "frac_alive": 1.0}
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/seed-1_trainer_1",
4
+ "trainer_class": "TopKTrainer",
5
+ "dict_class": "AutoEncoderTopK",
6
+ "lr": 0.0003,
7
+ "steps": 14648,
8
+ "auxk_alpha": 0.03125,
9
+ "diversity_scale": 0.0,
10
+ "diversity_type": null,
11
+ "warmup_steps": 1000,
12
+ "decay_start": 1200,
13
+ "threshold_beta": 0.999,
14
+ "threshold_start_step": 1000,
15
+ "seed": 1,
16
+ "activation_dim": 512,
17
+ "dict_size": 2048,
18
+ "k": 128,
19
+ "device": "cuda:0",
20
+ "layer": 4,
21
+ "lm_name": "EleutherAI/pythia-70m-deduped",
22
+ "submodule_name": "resid_post_layer_4",
23
+ "random_mask_bit_ratio": 0.0,
24
+ "random_mask_bit_ratio_force_topk": false,
25
+ "random_mask_bit_original_scale": 1.0,
26
+ "random_mask_bit_pos": "input",
27
+ "random_mask_bit_reconstruction_target": "original",
28
+ "nonlinear_sae_loss_scale": 0.0,
29
+ "nonlinear_sae_input": "original",
30
+ "nonlinear_sae_target": "residual",
31
+ "nonlinear_use_encoder_mlp": true,
32
+ "nonlinear_use_decoder_mlp": false,
33
+ "nonlinear_topk_k": -1,
34
+ "nonlinear_block_gradients": false,
35
+ "meta_sae_loss_scale": 0.0
36
+ },
37
+ "buffer": {
38
+ "d_submodule": 512,
39
+ "io": "out",
40
+ "n_ctxs": 2048,
41
+ "ctx_len": 128,
42
+ "refresh_batch_size": 24,
43
+ "out_batch_size": 2048,
44
+ "device": "cuda:0"
45
+ }
46
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "intrinsic_dim_0.8": 979,
3
+ "intrinsic_dim_0.9": 1363,
4
+ "intrinsic_dim_0.95": 1619,
5
+ "effective_rank": 1231.33984375,
6
+ "feature_sparsity": 0.9375,
7
+ "mean_correlation": 0.03564154729247093,
8
+ "max_correlation": 1.0000020265579224,
9
+ "correlation_std": 0.06366480886936188,
10
+ "decoder_coactive_mean_sim": 0.0015298674115911126,
11
+ "decoder_coactive_max_sim": 0.3695344626903534,
12
+ "decoder_coactive_std_sim": 0.0173921138048172,
13
+ "encoder_coactive_mean_sim": 0.0028206498827785254,
14
+ "encoder_coactive_max_sim": 0.42045122385025024,
15
+ "encoder_coactive_std_sim": 0.017060182988643646,
16
+ "decoder_per_sample_mean_sim": 0.0015298674115911126,
17
+ "decoder_per_sample_max_sim": 0.2891332805156708,
18
+ "encoder_per_sample_mean_sim": 0.0028206498827785254,
19
+ "encoder_per_sample_max_sim": 0.24817219376564026,
20
+ "encoder_mean_correlation": 0.002107219770550728,
21
+ "encoder_max_correlation": 1.000000238418579,
22
+ "encoder_correlation_std": 0.052732452750205994,
23
+ "decoder_mean_correlation": 0.000679917458910495,
24
+ "decoder_max_correlation": 1.000001072883606,
25
+ "decoder_correlation_std": 0.05092431232333183
26
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_1/standard_eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.784668281674385, "l1_loss": 94.24630346298218, "l0": 128.0, "frac_variance_explained": 0.9389505088329315, "cossim": 0.9668197274208069, "l2_ratio": 0.9664119355380535, "relative_reconstruction_bias": 0.99992056787014, "loss_original": 5.4671875, "loss_reconstructed": 5.71640625, "loss_zero": 10.740625, "frac_recovered": 0.95185546875, "frac_alive": 1.0}
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/seed-2_trainer_2",
4
+ "trainer_class": "TopKTrainer",
5
+ "dict_class": "AutoEncoderTopK",
6
+ "lr": 0.0003,
7
+ "steps": 14648,
8
+ "auxk_alpha": 0.03125,
9
+ "diversity_scale": 0.0,
10
+ "diversity_type": null,
11
+ "warmup_steps": 1000,
12
+ "decay_start": 1200,
13
+ "threshold_beta": 0.999,
14
+ "threshold_start_step": 1000,
15
+ "seed": 2,
16
+ "activation_dim": 512,
17
+ "dict_size": 2048,
18
+ "k": 128,
19
+ "device": "cuda:0",
20
+ "layer": 4,
21
+ "lm_name": "EleutherAI/pythia-70m-deduped",
22
+ "submodule_name": "resid_post_layer_4",
23
+ "random_mask_bit_ratio": 0.0,
24
+ "random_mask_bit_ratio_force_topk": false,
25
+ "random_mask_bit_original_scale": 1.0,
26
+ "random_mask_bit_pos": "input",
27
+ "random_mask_bit_reconstruction_target": "original",
28
+ "nonlinear_sae_loss_scale": 0.0,
29
+ "nonlinear_sae_input": "original",
30
+ "nonlinear_sae_target": "residual",
31
+ "nonlinear_use_encoder_mlp": true,
32
+ "nonlinear_use_decoder_mlp": false,
33
+ "nonlinear_topk_k": -1,
34
+ "nonlinear_block_gradients": false,
35
+ "meta_sae_loss_scale": 0.0
36
+ },
37
+ "buffer": {
38
+ "d_submodule": 512,
39
+ "io": "out",
40
+ "n_ctxs": 2048,
41
+ "ctx_len": 128,
42
+ "refresh_batch_size": 24,
43
+ "out_batch_size": 2048,
44
+ "device": "cuda:0"
45
+ }
46
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "intrinsic_dim_0.8": 968,
3
+ "intrinsic_dim_0.9": 1353,
4
+ "intrinsic_dim_0.95": 1611,
5
+ "effective_rank": 1223.5145263671875,
6
+ "feature_sparsity": 0.9375,
7
+ "mean_correlation": 0.035781797021627426,
8
+ "max_correlation": 1.000002145767212,
9
+ "correlation_std": 0.06626083701848984,
10
+ "decoder_coactive_mean_sim": 0.0015025029424577951,
11
+ "decoder_coactive_max_sim": 0.44835346937179565,
12
+ "decoder_coactive_std_sim": 0.017151132225990295,
13
+ "encoder_coactive_mean_sim": 0.002768551465123892,
14
+ "encoder_coactive_max_sim": 0.4386450946331024,
15
+ "encoder_coactive_std_sim": 0.017231818288564682,
16
+ "decoder_per_sample_mean_sim": 0.0015025028260424733,
17
+ "decoder_per_sample_max_sim": 0.2751252353191376,
18
+ "encoder_per_sample_mean_sim": 0.002768551232293248,
19
+ "encoder_per_sample_max_sim": 0.2641173303127289,
20
+ "encoder_mean_correlation": 0.0015855191741138697,
21
+ "encoder_max_correlation": 1.0000003576278687,
22
+ "encoder_correlation_std": 0.05263438820838928,
23
+ "decoder_mean_correlation": 0.0006925205234438181,
24
+ "decoder_max_correlation": 1.0000011920928955,
25
+ "decoder_correlation_std": 0.05096922814846039
26
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_2/standard_eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.7807009369134903, "l1_loss": 95.33445606231689, "l0": 128.0, "frac_variance_explained": 0.9413482956588268, "cossim": 0.9669262684881688, "l2_ratio": 0.9666900806128979, "relative_reconstruction_bias": 0.9999219357967377, "loss_original": 5.4671875, "loss_reconstructed": 5.70546875, "loss_zero": 10.740625, "frac_recovered": 0.953857421875, "frac_alive": 1.0}
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/seed-3_trainer_3",
4
+ "trainer_class": "TopKTrainer",
5
+ "dict_class": "AutoEncoderTopK",
6
+ "lr": 0.0003,
7
+ "steps": 14648,
8
+ "auxk_alpha": 0.03125,
9
+ "diversity_scale": 0.0,
10
+ "diversity_type": null,
11
+ "warmup_steps": 1000,
12
+ "decay_start": 1200,
13
+ "threshold_beta": 0.999,
14
+ "threshold_start_step": 1000,
15
+ "seed": 3,
16
+ "activation_dim": 512,
17
+ "dict_size": 2048,
18
+ "k": 128,
19
+ "device": "cuda:0",
20
+ "layer": 4,
21
+ "lm_name": "EleutherAI/pythia-70m-deduped",
22
+ "submodule_name": "resid_post_layer_4",
23
+ "random_mask_bit_ratio": 0.0,
24
+ "random_mask_bit_ratio_force_topk": false,
25
+ "random_mask_bit_original_scale": 1.0,
26
+ "random_mask_bit_pos": "input",
27
+ "random_mask_bit_reconstruction_target": "original",
28
+ "nonlinear_sae_loss_scale": 0.0,
29
+ "nonlinear_sae_input": "original",
30
+ "nonlinear_sae_target": "residual",
31
+ "nonlinear_use_encoder_mlp": true,
32
+ "nonlinear_use_decoder_mlp": false,
33
+ "nonlinear_topk_k": -1,
34
+ "nonlinear_block_gradients": false,
35
+ "meta_sae_loss_scale": 0.0
36
+ },
37
+ "buffer": {
38
+ "d_submodule": 512,
39
+ "io": "out",
40
+ "n_ctxs": 2048,
41
+ "ctx_len": 128,
42
+ "refresh_batch_size": 24,
43
+ "out_batch_size": 2048,
44
+ "device": "cuda:0"
45
+ }
46
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "intrinsic_dim_0.8": 972,
3
+ "intrinsic_dim_0.9": 1358,
4
+ "intrinsic_dim_0.95": 1616,
5
+ "effective_rank": 1217.7279052734375,
6
+ "feature_sparsity": 0.9375,
7
+ "mean_correlation": 0.036073531955480576,
8
+ "max_correlation": 1.0000025033950806,
9
+ "correlation_std": 0.06570165604352951,
10
+ "decoder_coactive_mean_sim": 0.0015081887831911445,
11
+ "decoder_coactive_max_sim": 0.30734848976135254,
12
+ "decoder_coactive_std_sim": 0.016810396686196327,
13
+ "encoder_coactive_mean_sim": 0.002697763964533806,
14
+ "encoder_coactive_max_sim": 0.36042797565460205,
15
+ "encoder_coactive_std_sim": 0.016821540892124176,
16
+ "decoder_per_sample_mean_sim": 0.0015081887831911445,
17
+ "decoder_per_sample_max_sim": 0.24087123572826385,
18
+ "encoder_per_sample_mean_sim": 0.0026977641973644495,
19
+ "encoder_per_sample_max_sim": 0.22100086510181427,
20
+ "encoder_mean_correlation": 0.001659764559008181,
21
+ "encoder_max_correlation": 1.0000003576278687,
22
+ "encoder_correlation_std": 0.05284586176276207,
23
+ "decoder_mean_correlation": 0.0006780978292226791,
24
+ "decoder_max_correlation": 1.000001072883606,
25
+ "decoder_correlation_std": 0.05088222399353981
26
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_3/standard_eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.792155793309212, "l1_loss": 95.12483205795289, "l0": 128.0, "frac_variance_explained": 0.9385048314929009, "cossim": 0.9666961587965488, "l2_ratio": 0.9664709158241749, "relative_reconstruction_bias": 1.0001889944076539, "loss_original": 5.4671875, "loss_reconstructed": 5.7171875, "loss_zero": 10.740625, "frac_recovered": 0.95166015625, "frac_alive": 0.99951171875}
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/seed-4_trainer_4",
4
+ "trainer_class": "TopKTrainer",
5
+ "dict_class": "AutoEncoderTopK",
6
+ "lr": 0.0003,
7
+ "steps": 14648,
8
+ "auxk_alpha": 0.03125,
9
+ "diversity_scale": 0.0,
10
+ "diversity_type": null,
11
+ "warmup_steps": 1000,
12
+ "decay_start": 1200,
13
+ "threshold_beta": 0.999,
14
+ "threshold_start_step": 1000,
15
+ "seed": 4,
16
+ "activation_dim": 512,
17
+ "dict_size": 2048,
18
+ "k": 128,
19
+ "device": "cuda:0",
20
+ "layer": 4,
21
+ "lm_name": "EleutherAI/pythia-70m-deduped",
22
+ "submodule_name": "resid_post_layer_4",
23
+ "random_mask_bit_ratio": 0.0,
24
+ "random_mask_bit_ratio_force_topk": false,
25
+ "random_mask_bit_original_scale": 1.0,
26
+ "random_mask_bit_pos": "input",
27
+ "random_mask_bit_reconstruction_target": "original",
28
+ "nonlinear_sae_loss_scale": 0.0,
29
+ "nonlinear_sae_input": "original",
30
+ "nonlinear_sae_target": "residual",
31
+ "nonlinear_use_encoder_mlp": true,
32
+ "nonlinear_use_decoder_mlp": false,
33
+ "nonlinear_topk_k": -1,
34
+ "nonlinear_block_gradients": false,
35
+ "meta_sae_loss_scale": 0.0
36
+ },
37
+ "buffer": {
38
+ "d_submodule": 512,
39
+ "io": "out",
40
+ "n_ctxs": 2048,
41
+ "ctx_len": 128,
42
+ "refresh_batch_size": 24,
43
+ "out_batch_size": 2048,
44
+ "device": "cuda:0"
45
+ }
46
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "intrinsic_dim_0.8": 971,
3
+ "intrinsic_dim_0.9": 1355,
4
+ "intrinsic_dim_0.95": 1612,
5
+ "effective_rank": 1222.1011962890625,
6
+ "feature_sparsity": 0.9375,
7
+ "mean_correlation": 0.03547367453575134,
8
+ "max_correlation": 1.0000027418136597,
9
+ "correlation_std": 0.06494747847318649,
10
+ "decoder_coactive_mean_sim": 0.001463062479160726,
11
+ "decoder_coactive_max_sim": 0.3568807542324066,
12
+ "decoder_coactive_std_sim": 0.016684627160429955,
13
+ "encoder_coactive_mean_sim": 0.002751479623839259,
14
+ "encoder_coactive_max_sim": 0.47617244720458984,
15
+ "encoder_coactive_std_sim": 0.016827696934342384,
16
+ "decoder_per_sample_mean_sim": 0.0014630623627454042,
17
+ "decoder_per_sample_max_sim": 0.2273091822862625,
18
+ "encoder_per_sample_mean_sim": 0.002751479623839259,
19
+ "encoder_per_sample_max_sim": 0.2101120501756668,
20
+ "encoder_mean_correlation": 0.001956398133188486,
21
+ "encoder_max_correlation": 1.000000238418579,
22
+ "encoder_correlation_std": 0.053541772067546844,
23
+ "decoder_mean_correlation": 0.0007174276979640126,
24
+ "decoder_max_correlation": 1.0000011920928955,
25
+ "decoder_correlation_std": 0.050983842462301254
26
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-128/dict_size-2048/trainer_4/standard_eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.795499560236931, "l1_loss": 95.14364604949951, "l0": 128.0, "frac_variance_explained": 0.9360299751162529, "cossim": 0.9666673980653286, "l2_ratio": 0.9663952246308327, "relative_reconstruction_bias": 0.9999688774347305, "loss_original": 5.4671875, "loss_reconstructed": 5.721484375, "loss_zero": 10.740625, "frac_recovered": 0.9509765625, "frac_alive": 1.0}
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/seed-0_trainer_0",
4
+ "trainer_class": "TopKTrainer",
5
+ "dict_class": "AutoEncoderTopK",
6
+ "lr": 0.0003,
7
+ "steps": 14648,
8
+ "auxk_alpha": 0.03125,
9
+ "diversity_scale": 0.0,
10
+ "diversity_type": null,
11
+ "warmup_steps": 1000,
12
+ "decay_start": 1200,
13
+ "threshold_beta": 0.999,
14
+ "threshold_start_step": 1000,
15
+ "seed": 0,
16
+ "activation_dim": 512,
17
+ "dict_size": 2048,
18
+ "k": 256,
19
+ "device": "cuda:0",
20
+ "layer": 4,
21
+ "lm_name": "EleutherAI/pythia-70m-deduped",
22
+ "submodule_name": "resid_post_layer_4",
23
+ "random_mask_bit_ratio": 0.0,
24
+ "random_mask_bit_ratio_force_topk": false,
25
+ "random_mask_bit_original_scale": 1.0,
26
+ "random_mask_bit_pos": "input",
27
+ "random_mask_bit_reconstruction_target": "original",
28
+ "nonlinear_sae_loss_scale": 0.0,
29
+ "nonlinear_sae_input": "original",
30
+ "nonlinear_sae_target": "residual",
31
+ "nonlinear_use_encoder_mlp": true,
32
+ "nonlinear_use_decoder_mlp": false,
33
+ "nonlinear_topk_k": -1,
34
+ "nonlinear_block_gradients": false,
35
+ "meta_sae_loss_scale": 0.0
36
+ },
37
+ "buffer": {
38
+ "d_submodule": 512,
39
+ "io": "out",
40
+ "n_ctxs": 2048,
41
+ "ctx_len": 128,
42
+ "refresh_batch_size": 24,
43
+ "out_batch_size": 2048,
44
+ "device": "cuda:0"
45
+ }
46
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "intrinsic_dim_0.8": 480,
3
+ "intrinsic_dim_0.9": 696,
4
+ "intrinsic_dim_0.95": 998,
5
+ "effective_rank": 654.4144287109375,
6
+ "feature_sparsity": 0.875,
7
+ "mean_correlation": 0.04879017919301987,
8
+ "max_correlation": 1.0000032186508179,
9
+ "correlation_std": 0.1387787014245987,
10
+ "decoder_coactive_mean_sim": -0.000603697553742677,
11
+ "decoder_coactive_max_sim": 0.4850667119026184,
12
+ "decoder_coactive_std_sim": 0.011331773363053799,
13
+ "encoder_coactive_mean_sim": 0.004783302079886198,
14
+ "encoder_coactive_max_sim": 0.6346930861473083,
15
+ "encoder_coactive_std_sim": 0.020051371306180954,
16
+ "decoder_per_sample_mean_sim": -0.000603697553742677,
17
+ "decoder_per_sample_max_sim": 0.19871939718723297,
18
+ "encoder_per_sample_mean_sim": 0.004783302079886198,
19
+ "encoder_per_sample_max_sim": 0.6304284930229187,
20
+ "encoder_mean_correlation": 0.00369116198271513,
21
+ "encoder_max_correlation": 1.000000238418579,
22
+ "encoder_correlation_std": 0.06792980432510376,
23
+ "decoder_mean_correlation": 0.0015754885971546173,
24
+ "decoder_max_correlation": 1.0000011920928955,
25
+ "decoder_correlation_std": 0.0512159988284111
26
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_0/standard_eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 2.7219308167696, "l1_loss": 219.18922500610353, "l0": 256.0, "frac_variance_explained": 0.9695879392325878, "cossim": 0.9831567205488682, "l2_ratio": 0.983003368973732, "relative_reconstruction_bias": 0.9997936256229878, "loss_original": 5.4671875, "loss_reconstructed": 5.5921875, "loss_zero": 10.740625, "frac_recovered": 0.9759765625, "frac_alive": 0.97412109375}
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/seed-1_trainer_1",
4
+ "trainer_class": "TopKTrainer",
5
+ "dict_class": "AutoEncoderTopK",
6
+ "lr": 0.0003,
7
+ "steps": 14648,
8
+ "auxk_alpha": 0.03125,
9
+ "diversity_scale": 0.0,
10
+ "diversity_type": null,
11
+ "warmup_steps": 1000,
12
+ "decay_start": 1200,
13
+ "threshold_beta": 0.999,
14
+ "threshold_start_step": 1000,
15
+ "seed": 1,
16
+ "activation_dim": 512,
17
+ "dict_size": 2048,
18
+ "k": 256,
19
+ "device": "cuda:0",
20
+ "layer": 4,
21
+ "lm_name": "EleutherAI/pythia-70m-deduped",
22
+ "submodule_name": "resid_post_layer_4",
23
+ "random_mask_bit_ratio": 0.0,
24
+ "random_mask_bit_ratio_force_topk": false,
25
+ "random_mask_bit_original_scale": 1.0,
26
+ "random_mask_bit_pos": "input",
27
+ "random_mask_bit_reconstruction_target": "original",
28
+ "nonlinear_sae_loss_scale": 0.0,
29
+ "nonlinear_sae_input": "original",
30
+ "nonlinear_sae_target": "residual",
31
+ "nonlinear_use_encoder_mlp": true,
32
+ "nonlinear_use_decoder_mlp": false,
33
+ "nonlinear_topk_k": -1,
34
+ "nonlinear_block_gradients": false,
35
+ "meta_sae_loss_scale": 0.0
36
+ },
37
+ "buffer": {
38
+ "d_submodule": 512,
39
+ "io": "out",
40
+ "n_ctxs": 2048,
41
+ "ctx_len": 128,
42
+ "refresh_batch_size": 24,
43
+ "out_batch_size": 2048,
44
+ "device": "cuda:0"
45
+ }
46
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "intrinsic_dim_0.8": 486,
3
+ "intrinsic_dim_0.9": 680,
4
+ "intrinsic_dim_0.95": 972,
5
+ "effective_rank": 676.2651977539062,
6
+ "feature_sparsity": 0.875,
7
+ "mean_correlation": 0.0487031452357769,
8
+ "max_correlation": 1.0000033378601074,
9
+ "correlation_std": 0.1352577954530716,
10
+ "decoder_coactive_mean_sim": -0.000500466616358608,
11
+ "decoder_coactive_max_sim": 0.42623040080070496,
12
+ "decoder_coactive_std_sim": 0.011117835529148579,
13
+ "encoder_coactive_mean_sim": 0.0036506024189293385,
14
+ "encoder_coactive_max_sim": 0.46891945600509644,
15
+ "encoder_coactive_std_sim": 0.016733255237340927,
16
+ "decoder_per_sample_mean_sim": -0.000500466616358608,
17
+ "decoder_per_sample_max_sim": 0.2542282044887543,
18
+ "encoder_per_sample_mean_sim": 0.0036506024189293385,
19
+ "encoder_per_sample_max_sim": 0.44621554017066956,
20
+ "encoder_mean_correlation": 0.003571811132133007,
21
+ "encoder_max_correlation": 1.000000238418579,
22
+ "encoder_correlation_std": 0.06752442568540573,
23
+ "decoder_mean_correlation": 0.001791062531992793,
24
+ "decoder_max_correlation": 1.0000009536743164,
25
+ "decoder_correlation_std": 0.051166798919439316
26
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_1/standard_eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 2.659628000855446, "l1_loss": 204.22452449798584, "l0": 256.0, "frac_variance_explained": 0.9706387490034103, "cossim": 0.9838522009551525, "l2_ratio": 0.9836978435516357, "relative_reconstruction_bias": 0.99996752217412, "loss_original": 5.4671875, "loss_reconstructed": 5.5828125, "loss_zero": 10.740625, "frac_recovered": 0.97783203125, "frac_alive": 0.96240234375}
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/seed-2_trainer_2",
4
+ "trainer_class": "TopKTrainer",
5
+ "dict_class": "AutoEncoderTopK",
6
+ "lr": 0.0003,
7
+ "steps": 14648,
8
+ "auxk_alpha": 0.03125,
9
+ "diversity_scale": 0.0,
10
+ "diversity_type": null,
11
+ "warmup_steps": 1000,
12
+ "decay_start": 1200,
13
+ "threshold_beta": 0.999,
14
+ "threshold_start_step": 1000,
15
+ "seed": 2,
16
+ "activation_dim": 512,
17
+ "dict_size": 2048,
18
+ "k": 256,
19
+ "device": "cuda:0",
20
+ "layer": 4,
21
+ "lm_name": "EleutherAI/pythia-70m-deduped",
22
+ "submodule_name": "resid_post_layer_4",
23
+ "random_mask_bit_ratio": 0.0,
24
+ "random_mask_bit_ratio_force_topk": false,
25
+ "random_mask_bit_original_scale": 1.0,
26
+ "random_mask_bit_pos": "input",
27
+ "random_mask_bit_reconstruction_target": "original",
28
+ "nonlinear_sae_loss_scale": 0.0,
29
+ "nonlinear_sae_input": "original",
30
+ "nonlinear_sae_target": "residual",
31
+ "nonlinear_use_encoder_mlp": true,
32
+ "nonlinear_use_decoder_mlp": false,
33
+ "nonlinear_topk_k": -1,
34
+ "nonlinear_block_gradients": false,
35
+ "meta_sae_loss_scale": 0.0
36
+ },
37
+ "buffer": {
38
+ "d_submodule": 512,
39
+ "io": "out",
40
+ "n_ctxs": 2048,
41
+ "ctx_len": 128,
42
+ "refresh_batch_size": 24,
43
+ "out_batch_size": 2048,
44
+ "device": "cuda:0"
45
+ }
46
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "intrinsic_dim_0.8": 481,
3
+ "intrinsic_dim_0.9": 688,
4
+ "intrinsic_dim_0.95": 983,
5
+ "effective_rank": 670.652587890625,
6
+ "feature_sparsity": 0.8750005960464478,
7
+ "mean_correlation": 0.04843044653534889,
8
+ "max_correlation": 1.0000027418136597,
9
+ "correlation_std": 0.13766011595726013,
10
+ "decoder_coactive_mean_sim": -0.0005655006971210241,
11
+ "decoder_coactive_max_sim": 0.45535174012184143,
12
+ "decoder_coactive_std_sim": 0.011256729252636433,
13
+ "encoder_coactive_mean_sim": 0.004165450111031532,
14
+ "encoder_coactive_max_sim": 0.5755204558372498,
15
+ "encoder_coactive_std_sim": 0.018040597438812256,
16
+ "decoder_per_sample_mean_sim": -0.0005655006389133632,
17
+ "decoder_per_sample_max_sim": 0.24903523921966553,
18
+ "encoder_per_sample_mean_sim": 0.004165449645370245,
19
+ "encoder_per_sample_max_sim": 0.5712661147117615,
20
+ "encoder_mean_correlation": 0.0021907533518970013,
21
+ "encoder_max_correlation": 1.000000238418579,
22
+ "encoder_correlation_std": 0.06807170808315277,
23
+ "decoder_mean_correlation": 0.001756084617227316,
24
+ "decoder_max_correlation": 1.0000009536743164,
25
+ "decoder_correlation_std": 0.051152873784303665
26
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_2/standard_eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 2.702136534452438, "l1_loss": 209.18400897979737, "l0": 255.9986328125, "frac_variance_explained": 0.9689409710466862, "cossim": 0.9832561373710632, "l2_ratio": 0.9830743968486786, "relative_reconstruction_bias": 0.9999133288860321, "loss_original": 5.4671875, "loss_reconstructed": 5.591015625, "loss_zero": 10.740625, "frac_recovered": 0.97607421875, "frac_alive": 0.97119140625}
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/seed-3_trainer_3",
4
+ "trainer_class": "TopKTrainer",
5
+ "dict_class": "AutoEncoderTopK",
6
+ "lr": 0.0003,
7
+ "steps": 14648,
8
+ "auxk_alpha": 0.03125,
9
+ "diversity_scale": 0.0,
10
+ "diversity_type": null,
11
+ "warmup_steps": 1000,
12
+ "decay_start": 1200,
13
+ "threshold_beta": 0.999,
14
+ "threshold_start_step": 1000,
15
+ "seed": 3,
16
+ "activation_dim": 512,
17
+ "dict_size": 2048,
18
+ "k": 256,
19
+ "device": "cuda:0",
20
+ "layer": 4,
21
+ "lm_name": "EleutherAI/pythia-70m-deduped",
22
+ "submodule_name": "resid_post_layer_4",
23
+ "random_mask_bit_ratio": 0.0,
24
+ "random_mask_bit_ratio_force_topk": false,
25
+ "random_mask_bit_original_scale": 1.0,
26
+ "random_mask_bit_pos": "input",
27
+ "random_mask_bit_reconstruction_target": "original",
28
+ "nonlinear_sae_loss_scale": 0.0,
29
+ "nonlinear_sae_input": "original",
30
+ "nonlinear_sae_target": "residual",
31
+ "nonlinear_use_encoder_mlp": true,
32
+ "nonlinear_use_decoder_mlp": false,
33
+ "nonlinear_topk_k": -1,
34
+ "nonlinear_block_gradients": false,
35
+ "meta_sae_loss_scale": 0.0
36
+ },
37
+ "buffer": {
38
+ "d_submodule": 512,
39
+ "io": "out",
40
+ "n_ctxs": 2048,
41
+ "ctx_len": 128,
42
+ "refresh_batch_size": 24,
43
+ "out_batch_size": 2048,
44
+ "device": "cuda:0"
45
+ }
46
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "intrinsic_dim_0.8": 472,
3
+ "intrinsic_dim_0.9": 677,
4
+ "intrinsic_dim_0.95": 968,
5
+ "effective_rank": 652.3516235351562,
6
+ "feature_sparsity": 0.875,
7
+ "mean_correlation": 0.04835033416748047,
8
+ "max_correlation": 1.0000038146972656,
9
+ "correlation_std": 0.13854721188545227,
10
+ "decoder_coactive_mean_sim": -0.0006331527838483453,
11
+ "decoder_coactive_max_sim": 0.47532302141189575,
12
+ "decoder_coactive_std_sim": 0.011337202973663807,
13
+ "encoder_coactive_mean_sim": 0.005108896177262068,
14
+ "encoder_coactive_max_sim": 0.5862697958946228,
15
+ "encoder_coactive_std_sim": 0.019558507949113846,
16
+ "decoder_per_sample_mean_sim": -0.0006331527838483453,
17
+ "decoder_per_sample_max_sim": 0.2255595475435257,
18
+ "encoder_per_sample_mean_sim": 0.005108896177262068,
19
+ "encoder_per_sample_max_sim": 0.5814019441604614,
20
+ "encoder_mean_correlation": 0.002764908829703927,
21
+ "encoder_max_correlation": 1.000000238418579,
22
+ "encoder_correlation_std": 0.07186086475849152,
23
+ "decoder_mean_correlation": 0.001473523210734129,
24
+ "decoder_max_correlation": 1.000001072883606,
25
+ "decoder_correlation_std": 0.05101215839385986
26
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_3/standard_eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 2.7153001874685287, "l1_loss": 214.051033782959, "l0": 256.0, "frac_variance_explained": 0.9711439780890941, "cossim": 0.9831305019557476, "l2_ratio": 0.9829454332590103, "relative_reconstruction_bias": 0.9999813109636306, "loss_original": 5.4671875, "loss_reconstructed": 5.588671875, "loss_zero": 10.740625, "frac_recovered": 0.976611328125, "frac_alive": 0.96630859375}
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/seed-4_trainer_4",
4
+ "trainer_class": "TopKTrainer",
5
+ "dict_class": "AutoEncoderTopK",
6
+ "lr": 0.0003,
7
+ "steps": 14648,
8
+ "auxk_alpha": 0.03125,
9
+ "diversity_scale": 0.0,
10
+ "diversity_type": null,
11
+ "warmup_steps": 1000,
12
+ "decay_start": 1200,
13
+ "threshold_beta": 0.999,
14
+ "threshold_start_step": 1000,
15
+ "seed": 4,
16
+ "activation_dim": 512,
17
+ "dict_size": 2048,
18
+ "k": 256,
19
+ "device": "cuda:0",
20
+ "layer": 4,
21
+ "lm_name": "EleutherAI/pythia-70m-deduped",
22
+ "submodule_name": "resid_post_layer_4",
23
+ "random_mask_bit_ratio": 0.0,
24
+ "random_mask_bit_ratio_force_topk": false,
25
+ "random_mask_bit_original_scale": 1.0,
26
+ "random_mask_bit_pos": "input",
27
+ "random_mask_bit_reconstruction_target": "original",
28
+ "nonlinear_sae_loss_scale": 0.0,
29
+ "nonlinear_sae_input": "original",
30
+ "nonlinear_sae_target": "residual",
31
+ "nonlinear_use_encoder_mlp": true,
32
+ "nonlinear_use_decoder_mlp": false,
33
+ "nonlinear_topk_k": -1,
34
+ "nonlinear_block_gradients": false,
35
+ "meta_sae_loss_scale": 0.0
36
+ },
37
+ "buffer": {
38
+ "d_submodule": 512,
39
+ "io": "out",
40
+ "n_ctxs": 2048,
41
+ "ctx_len": 128,
42
+ "refresh_batch_size": 24,
43
+ "out_batch_size": 2048,
44
+ "device": "cuda:0"
45
+ }
46
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "intrinsic_dim_0.8": 481,
3
+ "intrinsic_dim_0.9": 691,
4
+ "intrinsic_dim_0.95": 987,
5
+ "effective_rank": 678.6663818359375,
6
+ "feature_sparsity": 0.8750003576278687,
7
+ "mean_correlation": 0.04904370754957199,
8
+ "max_correlation": 1.0000029802322388,
9
+ "correlation_std": 0.13809221982955933,
10
+ "decoder_coactive_mean_sim": -0.0006003740127198398,
11
+ "decoder_coactive_max_sim": 0.4896470904350281,
12
+ "decoder_coactive_std_sim": 0.01121507491916418,
13
+ "encoder_coactive_mean_sim": 0.004258359782397747,
14
+ "encoder_coactive_max_sim": 0.4985700845718384,
15
+ "encoder_coactive_std_sim": 0.01753879338502884,
16
+ "decoder_per_sample_mean_sim": -0.0006003740709275007,
17
+ "decoder_per_sample_max_sim": 0.2869977653026581,
18
+ "encoder_per_sample_mean_sim": 0.004258360248059034,
19
+ "encoder_per_sample_max_sim": 0.4956880509853363,
20
+ "encoder_mean_correlation": 0.0028513818979263306,
21
+ "encoder_max_correlation": 1.0000003576278687,
22
+ "encoder_correlation_std": 0.06897676736116409,
23
+ "decoder_mean_correlation": 0.0016533236484974623,
24
+ "decoder_max_correlation": 1.000001072883606,
25
+ "decoder_correlation_std": 0.05112037807703018
26
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-256/dict_size-2048/trainer_4/standard_eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 2.701796779036522, "l1_loss": 206.50695514678955, "l0": 255.9994140625, "frac_variance_explained": 0.9682310611009598, "cossim": 0.9833530187606812, "l2_ratio": 0.9831482082605362, "relative_reconstruction_bias": 0.9999046422541141, "loss_original": 5.4671875, "loss_reconstructed": 5.59140625, "loss_zero": 10.740625, "frac_recovered": 0.976123046875, "frac_alive": 0.97509765625}
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/seed-0_trainer_0",
4
+ "trainer_class": "TopKTrainer",
5
+ "dict_class": "AutoEncoderTopK",
6
+ "lr": 0.0003,
7
+ "steps": 14648,
8
+ "auxk_alpha": 0.03125,
9
+ "diversity_scale": 0.0,
10
+ "diversity_type": null,
11
+ "warmup_steps": 1000,
12
+ "decay_start": 1200,
13
+ "threshold_beta": 0.999,
14
+ "threshold_start_step": 1000,
15
+ "seed": 0,
16
+ "activation_dim": 512,
17
+ "dict_size": 2048,
18
+ "k": 32,
19
+ "device": "cuda:0",
20
+ "layer": 4,
21
+ "lm_name": "EleutherAI/pythia-70m-deduped",
22
+ "submodule_name": "resid_post_layer_4",
23
+ "random_mask_bit_ratio": 0.0,
24
+ "random_mask_bit_ratio_force_topk": false,
25
+ "random_mask_bit_original_scale": 1.0,
26
+ "random_mask_bit_pos": "input",
27
+ "random_mask_bit_reconstruction_target": "original",
28
+ "nonlinear_sae_loss_scale": 0.0,
29
+ "nonlinear_sae_input": "original",
30
+ "nonlinear_sae_target": "residual",
31
+ "nonlinear_use_encoder_mlp": true,
32
+ "nonlinear_use_decoder_mlp": false,
33
+ "nonlinear_topk_k": -1,
34
+ "nonlinear_block_gradients": false,
35
+ "meta_sae_loss_scale": 0.0
36
+ },
37
+ "buffer": {
38
+ "d_submodule": 512,
39
+ "io": "out",
40
+ "n_ctxs": 2048,
41
+ "ctx_len": 128,
42
+ "refresh_batch_size": 24,
43
+ "out_batch_size": 2048,
44
+ "device": "cuda:0"
45
+ }
46
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/self_feature_space_diversity_eval_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "intrinsic_dim_0.8": 1145,
3
+ "intrinsic_dim_0.9": 1494,
4
+ "intrinsic_dim_0.95": 1716,
5
+ "effective_rank": 1393.9248046875,
6
+ "feature_sparsity": 0.984375,
7
+ "mean_correlation": 0.009593969210982323,
8
+ "max_correlation": 1.0000017881393433,
9
+ "correlation_std": 0.034370556473731995,
10
+ "decoder_coactive_mean_sim": 0.012857094407081604,
11
+ "decoder_coactive_max_sim": 0.5831928253173828,
12
+ "decoder_coactive_std_sim": 0.04128008335828781,
13
+ "encoder_coactive_mean_sim": 0.013956918381154537,
14
+ "encoder_coactive_max_sim": 0.45717740058898926,
15
+ "encoder_coactive_std_sim": 0.03173547610640526,
16
+ "decoder_per_sample_mean_sim": 0.012857094407081604,
17
+ "decoder_per_sample_max_sim": 0.2939373552799225,
18
+ "encoder_per_sample_mean_sim": 0.013956919312477112,
19
+ "encoder_per_sample_max_sim": 0.19147595763206482,
20
+ "encoder_mean_correlation": 0.0025674644857645035,
21
+ "encoder_max_correlation": 1.0000003576278687,
22
+ "encoder_correlation_std": 0.059210386127233505,
23
+ "decoder_mean_correlation": 0.004608687479048967,
24
+ "decoder_max_correlation": 1.0000009536743164,
25
+ "decoder_correlation_std": 0.057089705020189285
26
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_0/standard_eval_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "l2_loss": 5.521578305959702,
3
+ "l1_loss": 44.27927327156067,
4
+ "l0": 32.0,
5
+ "frac_variance_explained": 0.8718812368810177,
6
+ "cossim": 0.9284569166600705,
7
+ "l2_ratio": 0.9278947010636329,
8
+ "relative_reconstruction_bias": 1.0000686429440975,
9
+ "loss_original": 5.4671875,
10
+ "loss_reconstructed": 6.01953125,
11
+ "loss_zero": 10.740625,
12
+ "frac_recovered": 0.89345703125,
13
+ "frac_alive": 1.0
14
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/seed-1_trainer_1",
4
+ "trainer_class": "TopKTrainer",
5
+ "dict_class": "AutoEncoderTopK",
6
+ "lr": 0.0003,
7
+ "steps": 14648,
8
+ "auxk_alpha": 0.03125,
9
+ "diversity_scale": 0.0,
10
+ "diversity_type": null,
11
+ "warmup_steps": 1000,
12
+ "decay_start": 1200,
13
+ "threshold_beta": 0.999,
14
+ "threshold_start_step": 1000,
15
+ "seed": 1,
16
+ "activation_dim": 512,
17
+ "dict_size": 2048,
18
+ "k": 32,
19
+ "device": "cuda:0",
20
+ "layer": 4,
21
+ "lm_name": "EleutherAI/pythia-70m-deduped",
22
+ "submodule_name": "resid_post_layer_4",
23
+ "random_mask_bit_ratio": 0.0,
24
+ "random_mask_bit_ratio_force_topk": false,
25
+ "random_mask_bit_original_scale": 1.0,
26
+ "random_mask_bit_pos": "input",
27
+ "random_mask_bit_reconstruction_target": "original",
28
+ "nonlinear_sae_loss_scale": 0.0,
29
+ "nonlinear_sae_input": "original",
30
+ "nonlinear_sae_target": "residual",
31
+ "nonlinear_use_encoder_mlp": true,
32
+ "nonlinear_use_decoder_mlp": false,
33
+ "nonlinear_topk_k": -1,
34
+ "nonlinear_block_gradients": false,
35
+ "meta_sae_loss_scale": 0.0
36
+ },
37
+ "buffer": {
38
+ "d_submodule": 512,
39
+ "io": "out",
40
+ "n_ctxs": 2048,
41
+ "ctx_len": 128,
42
+ "refresh_batch_size": 24,
43
+ "out_batch_size": 2048,
44
+ "device": "cuda:0"
45
+ }
46
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/self_feature_space_diversity_eval_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "intrinsic_dim_0.8": 1145,
3
+ "intrinsic_dim_0.9": 1493,
4
+ "intrinsic_dim_0.95": 1714,
5
+ "effective_rank": 1391.723876953125,
6
+ "feature_sparsity": 0.984375,
7
+ "mean_correlation": 0.009598391130566597,
8
+ "max_correlation": 1.0000014305114746,
9
+ "correlation_std": 0.03441373631358147,
10
+ "decoder_coactive_mean_sim": 0.013000169768929482,
11
+ "decoder_coactive_max_sim": 0.5189113616943359,
12
+ "decoder_coactive_std_sim": 0.04145849123597145,
13
+ "encoder_coactive_mean_sim": 0.013387808576226234,
14
+ "encoder_coactive_max_sim": 0.46838805079460144,
15
+ "encoder_coactive_std_sim": 0.03152437135577202,
16
+ "decoder_per_sample_mean_sim": 0.013000166974961758,
17
+ "decoder_per_sample_max_sim": 0.29648900032043457,
18
+ "encoder_per_sample_mean_sim": 0.013387808576226234,
19
+ "encoder_per_sample_max_sim": 0.1819629967212677,
20
+ "encoder_mean_correlation": 0.0035003535449504852,
21
+ "encoder_max_correlation": 1.000000238418579,
22
+ "encoder_correlation_std": 0.0581950768828392,
23
+ "decoder_mean_correlation": 0.004728738218545914,
24
+ "decoder_max_correlation": 1.0000011920928955,
25
+ "decoder_correlation_std": 0.05676833167672157
26
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_1/standard_eval_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "l2_loss": 5.493244290351868,
3
+ "l1_loss": 43.91451568603516,
4
+ "l0": 32.0,
5
+ "frac_variance_explained": 0.8692285768687725,
6
+ "cossim": 0.9287988729774952,
7
+ "l2_ratio": 0.9284325882792472,
8
+ "relative_reconstruction_bias": 1.0001945979893208,
9
+ "loss_original": 5.4671875,
10
+ "loss_reconstructed": 6.02265625,
11
+ "loss_zero": 10.740625,
12
+ "frac_recovered": 0.892724609375,
13
+ "frac_alive": 1.0
14
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/seed-2_trainer_2",
4
+ "trainer_class": "TopKTrainer",
5
+ "dict_class": "AutoEncoderTopK",
6
+ "lr": 0.0003,
7
+ "steps": 14648,
8
+ "auxk_alpha": 0.03125,
9
+ "diversity_scale": 0.0,
10
+ "diversity_type": null,
11
+ "warmup_steps": 1000,
12
+ "decay_start": 1200,
13
+ "threshold_beta": 0.999,
14
+ "threshold_start_step": 1000,
15
+ "seed": 2,
16
+ "activation_dim": 512,
17
+ "dict_size": 2048,
18
+ "k": 32,
19
+ "device": "cuda:0",
20
+ "layer": 4,
21
+ "lm_name": "EleutherAI/pythia-70m-deduped",
22
+ "submodule_name": "resid_post_layer_4",
23
+ "random_mask_bit_ratio": 0.0,
24
+ "random_mask_bit_ratio_force_topk": false,
25
+ "random_mask_bit_original_scale": 1.0,
26
+ "random_mask_bit_pos": "input",
27
+ "random_mask_bit_reconstruction_target": "original",
28
+ "nonlinear_sae_loss_scale": 0.0,
29
+ "nonlinear_sae_input": "original",
30
+ "nonlinear_sae_target": "residual",
31
+ "nonlinear_use_encoder_mlp": true,
32
+ "nonlinear_use_decoder_mlp": false,
33
+ "nonlinear_topk_k": -1,
34
+ "nonlinear_block_gradients": false,
35
+ "meta_sae_loss_scale": 0.0
36
+ },
37
+ "buffer": {
38
+ "d_submodule": 512,
39
+ "io": "out",
40
+ "n_ctxs": 2048,
41
+ "ctx_len": 128,
42
+ "refresh_batch_size": 24,
43
+ "out_batch_size": 2048,
44
+ "device": "cuda:0"
45
+ }
46
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/self_feature_space_diversity_eval_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "intrinsic_dim_0.8": 1147,
3
+ "intrinsic_dim_0.9": 1497,
4
+ "intrinsic_dim_0.95": 1720,
5
+ "effective_rank": 1386.2728271484375,
6
+ "feature_sparsity": 0.984375,
7
+ "mean_correlation": 0.009658349677920341,
8
+ "max_correlation": 1.0000011920928955,
9
+ "correlation_std": 0.034718144685029984,
10
+ "decoder_coactive_mean_sim": 0.013472042046487331,
11
+ "decoder_coactive_max_sim": 0.6248090863227844,
12
+ "decoder_coactive_std_sim": 0.042402926832437515,
13
+ "encoder_coactive_mean_sim": 0.013234490528702736,
14
+ "encoder_coactive_max_sim": 0.48452436923980713,
15
+ "encoder_coactive_std_sim": 0.03160259500145912,
16
+ "decoder_per_sample_mean_sim": 0.013472042977809906,
17
+ "decoder_per_sample_max_sim": 0.295926034450531,
18
+ "encoder_per_sample_mean_sim": 0.013234490528702736,
19
+ "encoder_per_sample_max_sim": 0.1905398666858673,
20
+ "encoder_mean_correlation": 0.003325967350974679,
21
+ "encoder_max_correlation": 1.0000003576278687,
22
+ "encoder_correlation_std": 0.057232871651649475,
23
+ "decoder_mean_correlation": 0.004680304788053036,
24
+ "decoder_max_correlation": 1.0000011920928955,
25
+ "decoder_correlation_std": 0.057123977690935135
26
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_2/standard_eval_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "l2_loss": 5.518608373403549,
3
+ "l1_loss": 43.638191032409665,
4
+ "l0": 32.0,
5
+ "frac_variance_explained": 0.8637508787214756,
6
+ "cossim": 0.9281533844769001,
7
+ "l2_ratio": 0.9278098031878471,
8
+ "relative_reconstruction_bias": 1.0003917694091797,
9
+ "loss_original": 5.4671875,
10
+ "loss_reconstructed": 6.020703125,
11
+ "loss_zero": 10.740625,
12
+ "frac_recovered": 0.892822265625,
13
+ "frac_alive": 1.0
14
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/seed-3_trainer_3",
4
+ "trainer_class": "TopKTrainer",
5
+ "dict_class": "AutoEncoderTopK",
6
+ "lr": 0.0003,
7
+ "steps": 14648,
8
+ "auxk_alpha": 0.03125,
9
+ "diversity_scale": 0.0,
10
+ "diversity_type": null,
11
+ "warmup_steps": 1000,
12
+ "decay_start": 1200,
13
+ "threshold_beta": 0.999,
14
+ "threshold_start_step": 1000,
15
+ "seed": 3,
16
+ "activation_dim": 512,
17
+ "dict_size": 2048,
18
+ "k": 32,
19
+ "device": "cuda:0",
20
+ "layer": 4,
21
+ "lm_name": "EleutherAI/pythia-70m-deduped",
22
+ "submodule_name": "resid_post_layer_4",
23
+ "random_mask_bit_ratio": 0.0,
24
+ "random_mask_bit_ratio_force_topk": false,
25
+ "random_mask_bit_original_scale": 1.0,
26
+ "random_mask_bit_pos": "input",
27
+ "random_mask_bit_reconstruction_target": "original",
28
+ "nonlinear_sae_loss_scale": 0.0,
29
+ "nonlinear_sae_input": "original",
30
+ "nonlinear_sae_target": "residual",
31
+ "nonlinear_use_encoder_mlp": true,
32
+ "nonlinear_use_decoder_mlp": false,
33
+ "nonlinear_topk_k": -1,
34
+ "nonlinear_block_gradients": false,
35
+ "meta_sae_loss_scale": 0.0
36
+ },
37
+ "buffer": {
38
+ "d_submodule": 512,
39
+ "io": "out",
40
+ "n_ctxs": 2048,
41
+ "ctx_len": 128,
42
+ "refresh_batch_size": 24,
43
+ "out_batch_size": 2048,
44
+ "device": "cuda:0"
45
+ }
46
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/self_feature_space_diversity_eval_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "intrinsic_dim_0.8": 1146,
3
+ "intrinsic_dim_0.9": 1496,
4
+ "intrinsic_dim_0.95": 1719,
5
+ "effective_rank": 1397.719970703125,
6
+ "feature_sparsity": 0.984375,
7
+ "mean_correlation": 0.009594940580427647,
8
+ "max_correlation": 1.0000015497207642,
9
+ "correlation_std": 0.034256912767887115,
10
+ "decoder_coactive_mean_sim": 0.013142098672688007,
11
+ "decoder_coactive_max_sim": 0.4960811734199524,
12
+ "decoder_coactive_std_sim": 0.0417519137263298,
13
+ "encoder_coactive_mean_sim": 0.013556867837905884,
14
+ "encoder_coactive_max_sim": 0.4828951954841614,
15
+ "encoder_coactive_std_sim": 0.0318557471036911,
16
+ "decoder_per_sample_mean_sim": 0.013142098672688007,
17
+ "decoder_per_sample_max_sim": 0.3047160804271698,
18
+ "encoder_per_sample_mean_sim": 0.013556867837905884,
19
+ "encoder_per_sample_max_sim": 0.1878674179315567,
20
+ "encoder_mean_correlation": 0.003432024270296097,
21
+ "encoder_max_correlation": 1.000000238418579,
22
+ "encoder_correlation_std": 0.058941394090652466,
23
+ "decoder_mean_correlation": 0.004725632257759571,
24
+ "decoder_max_correlation": 1.0000011920928955,
25
+ "decoder_correlation_std": 0.05638653412461281
26
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_3/standard_eval_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "l2_loss": 5.497262263298035,
3
+ "l1_loss": 43.95940580368042,
4
+ "l0": 32.0,
5
+ "frac_variance_explained": 0.873967283219099,
6
+ "cossim": 0.9286889567971229,
7
+ "l2_ratio": 0.9278851471841335,
8
+ "relative_reconstruction_bias": 0.99990846067667,
9
+ "loss_original": 5.4671875,
10
+ "loss_reconstructed": 6.01796875,
11
+ "loss_zero": 10.740625,
12
+ "frac_recovered": 0.893505859375,
13
+ "frac_alive": 1.0
14
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/seed-4_trainer_4",
4
+ "trainer_class": "TopKTrainer",
5
+ "dict_class": "AutoEncoderTopK",
6
+ "lr": 0.0003,
7
+ "steps": 14648,
8
+ "auxk_alpha": 0.03125,
9
+ "diversity_scale": 0.0,
10
+ "diversity_type": null,
11
+ "warmup_steps": 1000,
12
+ "decay_start": 1200,
13
+ "threshold_beta": 0.999,
14
+ "threshold_start_step": 1000,
15
+ "seed": 4,
16
+ "activation_dim": 512,
17
+ "dict_size": 2048,
18
+ "k": 32,
19
+ "device": "cuda:0",
20
+ "layer": 4,
21
+ "lm_name": "EleutherAI/pythia-70m-deduped",
22
+ "submodule_name": "resid_post_layer_4",
23
+ "random_mask_bit_ratio": 0.0,
24
+ "random_mask_bit_ratio_force_topk": false,
25
+ "random_mask_bit_original_scale": 1.0,
26
+ "random_mask_bit_pos": "input",
27
+ "random_mask_bit_reconstruction_target": "original",
28
+ "nonlinear_sae_loss_scale": 0.0,
29
+ "nonlinear_sae_input": "original",
30
+ "nonlinear_sae_target": "residual",
31
+ "nonlinear_use_encoder_mlp": true,
32
+ "nonlinear_use_decoder_mlp": false,
33
+ "nonlinear_topk_k": -1,
34
+ "nonlinear_block_gradients": false,
35
+ "meta_sae_loss_scale": 0.0
36
+ },
37
+ "buffer": {
38
+ "d_submodule": 512,
39
+ "io": "out",
40
+ "n_ctxs": 2048,
41
+ "ctx_len": 128,
42
+ "refresh_batch_size": 24,
43
+ "out_batch_size": 2048,
44
+ "device": "cuda:0"
45
+ }
46
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/self_feature_space_diversity_eval_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "intrinsic_dim_0.8": 1143,
3
+ "intrinsic_dim_0.9": 1494,
4
+ "intrinsic_dim_0.95": 1717,
5
+ "effective_rank": 1369.8779296875,
6
+ "feature_sparsity": 0.984375,
7
+ "mean_correlation": 0.009484217502176762,
8
+ "max_correlation": 1.0000014305114746,
9
+ "correlation_std": 0.0345107764005661,
10
+ "decoder_coactive_mean_sim": 0.012512738816440105,
11
+ "decoder_coactive_max_sim": 0.6438488960266113,
12
+ "decoder_coactive_std_sim": 0.04079408198595047,
13
+ "encoder_coactive_mean_sim": 0.012626885436475277,
14
+ "encoder_coactive_max_sim": 0.5188671946525574,
15
+ "encoder_coactive_std_sim": 0.031891077756881714,
16
+ "decoder_per_sample_mean_sim": 0.01251273788511753,
17
+ "decoder_per_sample_max_sim": 0.2933551073074341,
18
+ "encoder_per_sample_mean_sim": 0.012626885436475277,
19
+ "encoder_per_sample_max_sim": 0.19583337008953094,
20
+ "encoder_mean_correlation": 0.003148031421005726,
21
+ "encoder_max_correlation": 1.0000003576278687,
22
+ "encoder_correlation_std": 0.05692300200462341,
23
+ "decoder_mean_correlation": 0.004536854103207588,
24
+ "decoder_max_correlation": 1.0000011920928955,
25
+ "decoder_correlation_std": 0.05627986416220665
26
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-2048/trainer_4/standard_eval_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "l2_loss": 5.485749912261963,
3
+ "l1_loss": 44.28157253265381,
4
+ "l0": 32.0,
5
+ "frac_variance_explained": 0.867275919765234,
6
+ "cossim": 0.9295805610716343,
7
+ "l2_ratio": 0.9290113553404808,
8
+ "relative_reconstruction_bias": 1.0001047268509864,
9
+ "loss_original": 5.4671875,
10
+ "loss_reconstructed": 6.025390625,
11
+ "loss_zero": 10.740625,
12
+ "frac_recovered": 0.8919921875,
13
+ "frac_alive": 1.0
14
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_0/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/seed-0_trainer_0",
4
+ "trainer_class": "TopKTrainer",
5
+ "dict_class": "AutoEncoderTopK",
6
+ "lr": 0.0003,
7
+ "steps": 14648,
8
+ "auxk_alpha": 0.03125,
9
+ "diversity_scale": 0.0,
10
+ "diversity_type": null,
11
+ "warmup_steps": 1000,
12
+ "decay_start": 1200,
13
+ "threshold_beta": 0.999,
14
+ "threshold_start_step": 1000,
15
+ "seed": 0,
16
+ "activation_dim": 512,
17
+ "dict_size": 32768,
18
+ "k": 32,
19
+ "device": "cuda:0",
20
+ "layer": 4,
21
+ "lm_name": "EleutherAI/pythia-70m-deduped",
22
+ "submodule_name": "resid_post_layer_4",
23
+ "random_mask_bit_ratio": 0.0
24
+ },
25
+ "buffer": {
26
+ "d_submodule": 512,
27
+ "io": "out",
28
+ "n_ctxs": 2048,
29
+ "ctx_len": 128,
30
+ "refresh_batch_size": 24,
31
+ "out_batch_size": 2048,
32
+ "device": "cuda:0"
33
+ }
34
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_1/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/seed-1_trainer_1",
4
+ "trainer_class": "TopKTrainer",
5
+ "dict_class": "AutoEncoderTopK",
6
+ "lr": 0.0003,
7
+ "steps": 14648,
8
+ "auxk_alpha": 0.03125,
9
+ "diversity_scale": 0.0,
10
+ "diversity_type": null,
11
+ "warmup_steps": 1000,
12
+ "decay_start": 1200,
13
+ "threshold_beta": 0.999,
14
+ "threshold_start_step": 1000,
15
+ "seed": 1,
16
+ "activation_dim": 512,
17
+ "dict_size": 32768,
18
+ "k": 32,
19
+ "device": "cuda:0",
20
+ "layer": 4,
21
+ "lm_name": "EleutherAI/pythia-70m-deduped",
22
+ "submodule_name": "resid_post_layer_4",
23
+ "random_mask_bit_ratio": 0.0
24
+ },
25
+ "buffer": {
26
+ "d_submodule": 512,
27
+ "io": "out",
28
+ "n_ctxs": 2048,
29
+ "ctx_len": 128,
30
+ "refresh_batch_size": 24,
31
+ "out_batch_size": 2048,
32
+ "device": "cuda:0"
33
+ }
34
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_2/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/seed-2_trainer_2",
4
+ "trainer_class": "TopKTrainer",
5
+ "dict_class": "AutoEncoderTopK",
6
+ "lr": 0.0003,
7
+ "steps": 14648,
8
+ "auxk_alpha": 0.03125,
9
+ "diversity_scale": 0.0,
10
+ "diversity_type": null,
11
+ "warmup_steps": 1000,
12
+ "decay_start": 1200,
13
+ "threshold_beta": 0.999,
14
+ "threshold_start_step": 1000,
15
+ "seed": 2,
16
+ "activation_dim": 512,
17
+ "dict_size": 32768,
18
+ "k": 32,
19
+ "device": "cuda:0",
20
+ "layer": 4,
21
+ "lm_name": "EleutherAI/pythia-70m-deduped",
22
+ "submodule_name": "resid_post_layer_4",
23
+ "random_mask_bit_ratio": 0.0
24
+ },
25
+ "buffer": {
26
+ "d_submodule": 512,
27
+ "io": "out",
28
+ "n_ctxs": 2048,
29
+ "ctx_len": 128,
30
+ "refresh_batch_size": 24,
31
+ "out_batch_size": 2048,
32
+ "device": "cuda:0"
33
+ }
34
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_3/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/seed-3_trainer_3",
4
+ "trainer_class": "TopKTrainer",
5
+ "dict_class": "AutoEncoderTopK",
6
+ "lr": 0.0003,
7
+ "steps": 14648,
8
+ "auxk_alpha": 0.03125,
9
+ "diversity_scale": 0.0,
10
+ "diversity_type": null,
11
+ "warmup_steps": 1000,
12
+ "decay_start": 1200,
13
+ "threshold_beta": 0.999,
14
+ "threshold_start_step": 1000,
15
+ "seed": 3,
16
+ "activation_dim": 512,
17
+ "dict_size": 32768,
18
+ "k": 32,
19
+ "device": "cuda:0",
20
+ "layer": 4,
21
+ "lm_name": "EleutherAI/pythia-70m-deduped",
22
+ "submodule_name": "resid_post_layer_4",
23
+ "random_mask_bit_ratio": 0.0
24
+ },
25
+ "buffer": {
26
+ "d_submodule": 512,
27
+ "io": "out",
28
+ "n_ctxs": 2048,
29
+ "ctx_len": 128,
30
+ "refresh_batch_size": 24,
31
+ "out_batch_size": 2048,
32
+ "device": "cuda:0"
33
+ }
34
+ }
TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/trainer_4/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "wandb_name": "TopKTrainer/EleutherAI__pythia-70m-deduped/resid_post_layer_4/TopK-32/dict_size-32768/seed-4_trainer_4",
4
+ "trainer_class": "TopKTrainer",
5
+ "dict_class": "AutoEncoderTopK",
6
+ "lr": 0.0003,
7
+ "steps": 14648,
8
+ "auxk_alpha": 0.03125,
9
+ "diversity_scale": 0.0,
10
+ "diversity_type": null,
11
+ "warmup_steps": 1000,
12
+ "decay_start": 1200,
13
+ "threshold_beta": 0.999,
14
+ "threshold_start_step": 1000,
15
+ "seed": 4,
16
+ "activation_dim": 512,
17
+ "dict_size": 32768,
18
+ "k": 32,
19
+ "device": "cuda:0",
20
+ "layer": 4,
21
+ "lm_name": "EleutherAI/pythia-70m-deduped",
22
+ "submodule_name": "resid_post_layer_4",
23
+ "random_mask_bit_ratio": 0.0
24
+ },
25
+ "buffer": {
26
+ "d_submodule": 512,
27
+ "io": "out",
28
+ "n_ctxs": 2048,
29
+ "ctx_len": 128,
30
+ "refresh_batch_size": 24,
31
+ "out_batch_size": 2048,
32
+ "device": "cuda:0"
33
+ }
34
+ }