adamkarvonen commited on
Commit
925a41a
·
verified ·
1 Parent(s): 6e06106

Upload folder using huggingface_hub

Browse files
Files changed (16) hide show
  1. ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_0/ae.pt +3 -0
  2. ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_0/eval_results.json +1 -0
  3. ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_1/ae.pt +3 -0
  4. ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_1/eval_results.json +1 -0
  5. ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_2/ae.pt +3 -0
  6. ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_2/eval_results.json +1 -0
  7. ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_3/ae.pt +3 -0
  8. ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_3/eval_results.json +1 -0
  9. ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_0/eval_results.json +1 -1
  10. ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_1/eval_results.json +1 -1
  11. ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_2/eval_results.json +1 -1
  12. ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_3/eval_results.json +1 -1
  13. ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_0/eval_results.json +1 -1
  14. ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_1/eval_results.json +1 -1
  15. ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_2/eval_results.json +1 -1
  16. ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_3/eval_results.json +1 -1
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5a94a2d6dec84165d57928239e40b9cbdf2a91ebeb7c7de60f54e76b29db328
3
+ size 536954902
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 4.41109375, "l1_loss": 41.82125, "l0": 79.8651611328125, "frac_variance_explained": 0.898984375, "cossim": 0.8560546875, "l2_ratio": 0.8562890625, "relative_reconstruction_bias": 1.00046875, "loss_original": 2.13728515625, "loss_reconstructed": 2.3126171875, "loss_zero": 11.8125, "frac_recovered": 0.981953125, "frac_alive": 0.99542236328125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e44176411331836232fcd0181acc760d8d1afc36ef77697db7557c35beff34f9
3
+ size 536954902
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 4.0209375, "l1_loss": 62.89, "l0": 161.96763671875, "frac_variance_explained": 0.91451171875, "cossim": 0.88205078125, "l2_ratio": 0.88236328125, "relative_reconstruction_bias": 1.0012890625, "loss_original": 2.13728515625, "loss_reconstructed": 2.24765625, "loss_zero": 11.8125, "frac_recovered": 0.9882421875, "frac_alive": 0.99993896484375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1c01e561595654381b63ce648f563880ba1c48c8f10f70cb238a1c38acd1f75
3
+ size 2147764246
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 4.121875, "l1_loss": 40.935, "l0": 80.9107861328125, "frac_variance_explained": 0.909609375, "cossim": 0.875703125, "l2_ratio": 0.873671875, "relative_reconstruction_bias": 1.00015625, "loss_original": 2.13728515625, "loss_reconstructed": 2.2828125, "loss_zero": 11.8125, "frac_recovered": 0.98490234375, "frac_alive": 0.841888427734375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96cde24541510b6c5d2543a5627e07f7bc3bdec9c0a330d4a62dd610b2bfa18f
3
+ size 2147764246
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_3/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.695703125, "l1_loss": 57.2225, "l0": 160.3888671875, "frac_variance_explained": 0.9287890625, "cossim": 0.90158203125, "l2_ratio": 0.89947265625, "relative_reconstruction_bias": 0.9999609375, "loss_original": 2.13728515625, "loss_reconstructed": 2.223046875, "loss_zero": 11.8125, "frac_recovered": 0.99078125, "frac_alive": 0.8576812744140625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_0/eval_results.json CHANGED
@@ -1 +1 @@
1
- {"l2_loss": 64.5, "l1_loss": 158.305, "l0": 3.51408203125, "frac_variance_explained": 0.3057421875, "cossim": 0.1778662109375, "l2_ratio": 3.720390625, "relative_reconstruction_bias": 12.22859375, "loss_original": 2.13728515625, "loss_reconstructed": 10.9125, "loss_zero": 11.8125, "frac_recovered": 0.09535858154296875, "frac_alive": 0.01300048828125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
 
1
+ {"l2_loss": 64.5, "l1_loss": 158.48, "l0": 3.5237890625, "frac_variance_explained": 0.3183203125, "cossim": 0.1781396484375, "l2_ratio": 3.717890625, "relative_reconstruction_bias": 11.90765625, "loss_original": 2.13728515625, "loss_reconstructed": 10.9125, "loss_zero": 11.8125, "frac_recovered": 0.09535858154296875, "frac_alive": 0.0123291015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_1/eval_results.json CHANGED
@@ -1 +1 @@
1
- {"l2_loss": 56.02375, "l1_loss": 130.32, "l0": 6.826220703125, "frac_variance_explained": 0.33234375, "cossim": 0.1442919921875, "l2_ratio": 3.186640625, "relative_reconstruction_bias": 11.3984375, "loss_original": 2.13728515625, "loss_reconstructed": 10.41625, "loss_zero": 11.8125, "frac_recovered": 0.1462725830078125, "frac_alive": 0.04119873046875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
 
1
+ {"l2_loss": 56.015, "l1_loss": 130.39, "l0": 6.8289013671875, "frac_variance_explained": 0.3350390625, "cossim": 0.1444677734375, "l2_ratio": 3.185, "relative_reconstruction_bias": 11.29890625, "loss_original": 2.13728515625, "loss_reconstructed": 10.41625, "loss_zero": 11.8125, "frac_recovered": 0.1462725830078125, "frac_alive": 0.04193115234375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_2/eval_results.json CHANGED
@@ -1 +1 @@
1
- {"l2_loss": 76.5375, "l1_loss": 495.95, "l0": 32.3491943359375, "frac_variance_explained": 0.27076171875, "cossim": 0.14060546875, "l2_ratio": 4.40671875, "relative_reconstruction_bias": 17.7328125, "loss_original": 2.13728515625, "loss_reconstructed": 11.72625, "loss_zero": 11.8125, "frac_recovered": 0.0098712158203125, "frac_alive": 0.00164794921875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
 
1
+ {"l2_loss": 76.535, "l1_loss": 496.14, "l0": 32.3553515625, "frac_variance_explained": 0.286796875, "cossim": 0.1408740234375, "l2_ratio": 4.40625, "relative_reconstruction_bias": 17.151875, "loss_original": 2.13728515625, "loss_reconstructed": 11.72625, "loss_zero": 11.8125, "frac_recovered": 0.0098712158203125, "frac_alive": 0.0015411376953125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_3/eval_results.json CHANGED
@@ -1 +1 @@
1
- {"l2_loss": 64.48, "l1_loss": 201.635, "l0": 8.8580419921875, "frac_variance_explained": 0.32404296875, "cossim": 0.1606640625, "l2_ratio": 3.694765625, "relative_reconstruction_bias": 13.12578125, "loss_original": 2.13728515625, "loss_reconstructed": 10.1690625, "loss_zero": 11.8125, "frac_recovered": 0.17206039428710937, "frac_alive": 0.0052032470703125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
 
1
+ {"l2_loss": 64.4725, "l1_loss": 201.79, "l0": 8.8722314453125, "frac_variance_explained": 0.32947265625, "cossim": 0.16078125, "l2_ratio": 3.696640625, "relative_reconstruction_bias": 13.00953125, "loss_original": 2.13728515625, "loss_reconstructed": 10.1690625, "loss_zero": 11.8125, "frac_recovered": 0.17206039428710937, "frac_alive": 0.005279541015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_0/eval_results.json CHANGED
@@ -1 +1 @@
1
- {"l2_loss": 2.59359375, "l1_loss": 25.0725, "l0": 83.3653076171875, "frac_variance_explained": 0.94828125, "cossim": 0.82373046875, "l2_ratio": 0.8217578125, "relative_reconstruction_bias": 1.00052734375, "loss_original": 2.13728515625, "loss_reconstructed": 2.25451171875, "loss_zero": 11.8125, "frac_recovered": 0.987578125, "frac_alive": 0.9998779296875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
 
1
+ {"l2_loss": 2.593515625, "l1_loss": 24.898125, "l0": 83.339609375, "frac_variance_explained": 0.9429296875, "cossim": 0.82373046875, "l2_ratio": 0.82177734375, "relative_reconstruction_bias": 1.0005078125, "loss_original": 2.13728515625, "loss_reconstructed": 2.25451171875, "loss_zero": 11.8125, "frac_recovered": 0.987578125, "frac_alive": 0.99969482421875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_1/eval_results.json CHANGED
@@ -1 +1 @@
1
- {"l2_loss": 2.366015625, "l1_loss": 34.444375, "l0": 169.13447265625, "frac_variance_explained": 0.951484375, "cossim": 0.8555859375, "l2_ratio": 0.85482421875, "relative_reconstruction_bias": 1.00044921875, "loss_original": 2.13728515625, "loss_reconstructed": 2.20716796875, "loss_zero": 11.8125, "frac_recovered": 0.99220703125, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
 
1
+ {"l2_loss": 2.365703125, "l1_loss": 34.48625, "l0": 169.05658203125, "frac_variance_explained": 0.9555078125, "cossim": 0.85583984375, "l2_ratio": 0.8549609375, "relative_reconstruction_bias": 1.0008203125, "loss_original": 2.13728515625, "loss_reconstructed": 2.20716796875, "loss_zero": 11.8125, "frac_recovered": 0.99220703125, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_2/eval_results.json CHANGED
@@ -1 +1 @@
1
- {"l2_loss": 2.44046875, "l1_loss": 23.774375, "l0": 83.2398828125, "frac_variance_explained": 0.95125, "cossim": 0.845078125, "l2_ratio": 0.84109375, "relative_reconstruction_bias": 0.9971875, "loss_original": 2.13728515625, "loss_reconstructed": 2.23462890625, "loss_zero": 11.8125, "frac_recovered": 0.9896875, "frac_alive": 0.9406280517578125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
 
1
+ {"l2_loss": 2.440546875, "l1_loss": 23.644375, "l0": 83.163466796875, "frac_variance_explained": 0.94892578125, "cossim": 0.84505859375, "l2_ratio": 0.8411328125, "relative_reconstruction_bias": 0.9966796875, "loss_original": 2.13728515625, "loss_reconstructed": 2.23462890625, "loss_zero": 11.8125, "frac_recovered": 0.9896875, "frac_alive": 0.940155029296875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_3/eval_results.json CHANGED
@@ -1 +1 @@
1
- {"l2_loss": 2.2103125, "l1_loss": 31.881875, "l0": 167.9657568359375, "frac_variance_explained": 0.957421875, "cossim": 0.87513671875, "l2_ratio": 0.87203125, "relative_reconstruction_bias": 0.9966796875, "loss_original": 2.13728515625, "loss_reconstructed": 2.19716796875, "loss_zero": 11.8125, "frac_recovered": 0.99306640625, "frac_alive": 0.9580078125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
 
1
+ {"l2_loss": 2.210234375, "l1_loss": 31.965625, "l0": 168.0044189453125, "frac_variance_explained": 0.958984375, "cossim": 0.875078125, "l2_ratio": 0.87197265625, "relative_reconstruction_bias": 0.99671875, "loss_original": 2.13728515625, "loss_reconstructed": 2.19716796875, "loss_zero": 11.8125, "frac_recovered": 0.99306640625, "frac_alive": 0.957611083984375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}