adamkarvonen commited on Mar 31, 2025

Commit

925a41a

verified ·

1 Parent(s): 6e06106

Upload folder using huggingface_hub

Browse files

Files changed (16) hide show

._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_0/ae.pt +3 -0
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_0/eval_results.json +1 -0
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_1/ae.pt +3 -0
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_1/eval_results.json +1 -0
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_2/ae.pt +3 -0
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_2/eval_results.json +1 -0
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_3/ae.pt +3 -0
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_3/eval_results.json +1 -0
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_0/eval_results.json +1 -1
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_1/eval_results.json +1 -1
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_2/eval_results.json +1 -1
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_3/eval_results.json +1 -1
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_0/eval_results.json +1 -1
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_1/eval_results.json +1 -1
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_2/eval_results.json +1 -1
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_3/eval_results.json +1 -1

._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_0/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5a94a2d6dec84165d57928239e40b9cbdf2a91ebeb7c7de60f54e76b29db328
+size 536954902

._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_0/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 4.41109375, "l1_loss": 41.82125, "l0": 79.8651611328125, "frac_variance_explained": 0.898984375, "cossim": 0.8560546875, "l2_ratio": 0.8562890625, "relative_reconstruction_bias": 1.00046875, "loss_original": 2.13728515625, "loss_reconstructed": 2.3126171875, "loss_zero": 11.8125, "frac_recovered": 0.981953125, "frac_alive": 0.99542236328125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_1/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e44176411331836232fcd0181acc760d8d1afc36ef77697db7557c35beff34f9
+size 536954902

._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_1/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 4.0209375, "l1_loss": 62.89, "l0": 161.96763671875, "frac_variance_explained": 0.91451171875, "cossim": 0.88205078125, "l2_ratio": 0.88236328125, "relative_reconstruction_bias": 1.0012890625, "loss_original": 2.13728515625, "loss_reconstructed": 2.24765625, "loss_zero": 11.8125, "frac_recovered": 0.9882421875, "frac_alive": 0.99993896484375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_2/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1c01e561595654381b63ce648f563880ba1c48c8f10f70cb238a1c38acd1f75
+size 2147764246

._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_2/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 4.121875, "l1_loss": 40.935, "l0": 80.9107861328125, "frac_variance_explained": 0.909609375, "cossim": 0.875703125, "l2_ratio": 0.873671875, "relative_reconstruction_bias": 1.00015625, "loss_original": 2.13728515625, "loss_reconstructed": 2.2828125, "loss_zero": 11.8125, "frac_recovered": 0.98490234375, "frac_alive": 0.841888427734375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_3/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:96cde24541510b6c5d2543a5627e07f7bc3bdec9c0a330d4a62dd610b2bfa18f
+size 2147764246

._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_3/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 3.695703125, "l1_loss": 57.2225, "l0": 160.3888671875, "frac_variance_explained": 0.9287890625, "cossim": 0.90158203125, "l2_ratio": 0.89947265625, "relative_reconstruction_bias": 0.9999609375, "loss_original": 2.13728515625, "loss_reconstructed": 2.223046875, "loss_zero": 11.8125, "frac_recovered": 0.99078125, "frac_alive": 0.8576812744140625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_0/eval_results.json CHANGED Viewed

@@ -1 +1 @@

- {"l2_loss": 64.5, "l1_loss": 158.~~305~~, "l0": 3.~~51408203125~~, "frac_variance_explained": 0.~~3057421875~~, "cossim": 0.~~1778662109375~~, "l2_ratio": 3.~~720390625~~, "relative_reconstruction_bias": 12.~~22859375~~, "loss_original": 2.13728515625, "loss_reconstructed": 10.9125, "loss_zero": 11.8125, "frac_recovered": 0.09535858154296875, "frac_alive": 0.~~01300048828125~~, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

+ {"l2_loss": 64.5, "l1_loss": 158.48, "l0": 3.5237890625, "frac_variance_explained": 0.3183203125, "cossim": 0.1781396484375, "l2_ratio": 3.717890625, "relative_reconstruction_bias": 11.90765625, "loss_original": 2.13728515625, "loss_reconstructed": 10.9125, "loss_zero": 11.8125, "frac_recovered": 0.09535858154296875, "frac_alive": 0.0123291015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_1/eval_results.json CHANGED Viewed

@@ -1 +1 @@

- {"l2_loss": 56.~~02375~~, "l1_loss": 130.32, "l0": 6.~~826220703125~~, "frac_variance_explained": 0.~~33234375~~, "cossim": 0.~~1442919921875~~, "l2_ratio": 3.~~186640625~~, "relative_reconstruction_bias": 11.~~3984375~~, "loss_original": 2.13728515625, "loss_reconstructed": 10.41625, "loss_zero": 11.8125, "frac_recovered": 0.1462725830078125, "frac_alive": 0.~~04119873046875~~, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

+ {"l2_loss": 56.015, "l1_loss": 130.39, "l0": 6.8289013671875, "frac_variance_explained": 0.3350390625, "cossim": 0.1444677734375, "l2_ratio": 3.185, "relative_reconstruction_bias": 11.29890625, "loss_original": 2.13728515625, "loss_reconstructed": 10.41625, "loss_zero": 11.8125, "frac_recovered": 0.1462725830078125, "frac_alive": 0.04193115234375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_2/eval_results.json CHANGED Viewed

@@ -1 +1 @@

- {"l2_loss": 76.~~5375~~, "l1_loss": ~~495~~.95, "l0": 32.~~3491943359375~~, "frac_variance_explained": 0.~~27076171875~~, "cossim": 0.~~14060546875~~, "l2_ratio": 4.~~40671875~~, "relative_reconstruction_bias": 17.~~7328125~~, "loss_original": 2.13728515625, "loss_reconstructed": 11.72625, "loss_zero": 11.8125, "frac_recovered": 0.0098712158203125, "frac_alive": 0.~~00164794921875~~, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

+ {"l2_loss": 76.535, "l1_loss": 496.14, "l0": 32.3553515625, "frac_variance_explained": 0.286796875, "cossim": 0.1408740234375, "l2_ratio": 4.40625, "relative_reconstruction_bias": 17.151875, "loss_original": 2.13728515625, "loss_reconstructed": 11.72625, "loss_zero": 11.8125, "frac_recovered": 0.0098712158203125, "frac_alive": 0.0015411376953125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_3/eval_results.json CHANGED Viewed

@@ -1 +1 @@

- {"l2_loss": 64.48, "l1_loss": 201.~~635~~, "l0": 8.~~8580419921875~~, "frac_variance_explained": 0.~~32404296875~~, "cossim": 0.~~1606640625~~, "l2_ratio": 3.~~694765625~~, "relative_reconstruction_bias": 13.~~12578125~~, "loss_original": 2.13728515625, "loss_reconstructed": 10.1690625, "loss_zero": 11.8125, "frac_recovered": 0.17206039428710937, "frac_alive": 0.~~0052032470703125~~, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

+ {"l2_loss": 64.4725, "l1_loss": 201.79, "l0": 8.8722314453125, "frac_variance_explained": 0.32947265625, "cossim": 0.16078125, "l2_ratio": 3.696640625, "relative_reconstruction_bias": 13.00953125, "loss_original": 2.13728515625, "loss_reconstructed": 10.1690625, "loss_zero": 11.8125, "frac_recovered": 0.17206039428710937, "frac_alive": 0.005279541015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_0/eval_results.json CHANGED Viewed

@@ -1 +1 @@

- {"l2_loss": 2.~~59359375~~, "l1_loss": 25.~~0725~~, "l0": 83.~~3653076171875~~, "frac_variance_explained": 0.~~94828125~~, "cossim": 0.82373046875, "l2_ratio": 0.~~8217578125~~, "relative_reconstruction_bias": 1.~~00052734375~~, "loss_original": 2.13728515625, "loss_reconstructed": 2.25451171875, "loss_zero": 11.8125, "frac_recovered": 0.987578125, "frac_alive": 0.~~9998779296875~~, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

+ {"l2_loss": 2.593515625, "l1_loss": 24.898125, "l0": 83.339609375, "frac_variance_explained": 0.9429296875, "cossim": 0.82373046875, "l2_ratio": 0.82177734375, "relative_reconstruction_bias": 1.0005078125, "loss_original": 2.13728515625, "loss_reconstructed": 2.25451171875, "loss_zero": 11.8125, "frac_recovered": 0.987578125, "frac_alive": 0.99969482421875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_1/eval_results.json CHANGED Viewed

@@ -1 +1 @@

- {"l2_loss": 2.~~366015625~~, "l1_loss": 34.~~444375~~, "l0": 169.~~13447265625~~, "frac_variance_explained": 0.~~951484375~~, "cossim": 0.~~8555859375~~, "l2_ratio": 0.~~85482421875~~, "relative_reconstruction_bias": 1.~~00044921875~~, "loss_original": 2.13728515625, "loss_reconstructed": 2.20716796875, "loss_zero": 11.8125, "frac_recovered": 0.99220703125, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

+ {"l2_loss": 2.365703125, "l1_loss": 34.48625, "l0": 169.05658203125, "frac_variance_explained": 0.9555078125, "cossim": 0.85583984375, "l2_ratio": 0.8549609375, "relative_reconstruction_bias": 1.0008203125, "loss_original": 2.13728515625, "loss_reconstructed": 2.20716796875, "loss_zero": 11.8125, "frac_recovered": 0.99220703125, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_2/eval_results.json CHANGED Viewed

@@ -1 +1 @@

- {"l2_loss": 2.~~44046875~~, "l1_loss": 23.~~774375~~, "l0": 83.~~2398828125~~, "frac_variance_explained": 0.~~95125~~, "cossim": 0.~~845078125~~, "l2_ratio": 0.~~84109375~~, "relative_reconstruction_bias": 0.~~9971875~~, "loss_original": 2.13728515625, "loss_reconstructed": 2.23462890625, "loss_zero": 11.8125, "frac_recovered": 0.9896875, "frac_alive": 0.~~9406280517578125~~, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

+ {"l2_loss": 2.440546875, "l1_loss": 23.644375, "l0": 83.163466796875, "frac_variance_explained": 0.94892578125, "cossim": 0.84505859375, "l2_ratio": 0.8411328125, "relative_reconstruction_bias": 0.9966796875, "loss_original": 2.13728515625, "loss_reconstructed": 2.23462890625, "loss_zero": 11.8125, "frac_recovered": 0.9896875, "frac_alive": 0.940155029296875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_3/eval_results.json CHANGED Viewed

@@ -1 +1 @@

- {"l2_loss": 2.~~2103125~~, "l1_loss": 31.~~881875~~, "l0": ~~167~~.~~9657568359375~~, "frac_variance_explained": 0.~~957421875~~, "cossim": 0.~~87513671875~~, "l2_ratio": 0.~~87203125~~, "relative_reconstruction_bias": 0.~~9966796875~~, "loss_original": 2.13728515625, "loss_reconstructed": 2.19716796875, "loss_zero": 11.8125, "frac_recovered": 0.99306640625, "frac_alive": 0.~~9580078125~~, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

+ {"l2_loss": 2.210234375, "l1_loss": 31.965625, "l0": 168.0044189453125, "frac_variance_explained": 0.958984375, "cossim": 0.875078125, "l2_ratio": 0.87197265625, "relative_reconstruction_bias": 0.99671875, "loss_original": 2.13728515625, "loss_reconstructed": 2.19716796875, "loss_zero": 11.8125, "frac_recovered": 0.99306640625, "frac_alive": 0.957611083984375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}