luciaquirke commited on
Commit
eecd4b5
·
verified ·
1 Parent(s): 06d02ca

Upload depth-scaled transformer probes

Browse files
layer_12.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f3f5efe333ba714404f30034cbedc8f8560e55a63ff19568bdab6a949f533f6
3
+ size 6137752
layer_12_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "layer_idx": 12,
3
+ "model_dim": 4096,
4
+ "probe_dim": 64,
5
+ "num_layers": 20,
6
+ "num_heads": 4,
7
+ "ffn_multiplier": 4,
8
+ "dropout": 0.0,
9
+ "train_mse": 2.4374186965942384,
10
+ "eval_mse": 2.0876001670920714
11
+ }
layer_16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d63f9a3185a7d08d3f8adaaec5d57c269131507484916e5576adf6f0a1dc21e2
3
+ size 5333000
layer_16_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "layer_idx": 16,
3
+ "model_dim": 4096,
4
+ "probe_dim": 64,
5
+ "num_layers": 16,
6
+ "num_heads": 4,
7
+ "ffn_multiplier": 4,
8
+ "dropout": 0.0,
9
+ "train_mse": 3.0125887276649475,
10
+ "eval_mse": 3.479990794368405
11
+ }
layer_20.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b43049f2dc20f16b83b44c664b4edc59177dbb4a89cf3d6756cb3f6267832a16
3
+ size 4528256
layer_20_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "layer_idx": 20,
3
+ "model_dim": 4096,
4
+ "probe_dim": 64,
5
+ "num_layers": 12,
6
+ "num_heads": 4,
7
+ "ffn_multiplier": 4,
8
+ "dropout": 0.0,
9
+ "train_mse": 3.406930178833008,
10
+ "eval_mse": 3.4354089314601683
11
+ }
layer_24.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cfe4724fc2c87c898a756d65a9fd02f343edb00899f6a666a69f9d15ad3a047
3
+ size 3723528
layer_24_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "layer_idx": 24,
3
+ "model_dim": 4096,
4
+ "probe_dim": 64,
5
+ "num_layers": 8,
6
+ "num_heads": 4,
7
+ "ffn_multiplier": 4,
8
+ "dropout": 0.0,
9
+ "train_mse": 4.520931562042236,
10
+ "eval_mse": 4.617096263317011
11
+ }
layer_28.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62ab317791dd40865432f4453906b9ee3b1f1897b0a1c6ff32c1f5d6dbb283be
3
+ size 2918832
layer_28_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "layer_idx": 28,
3
+ "model_dim": 4096,
4
+ "probe_dim": 64,
5
+ "num_layers": 4,
6
+ "num_heads": 4,
7
+ "ffn_multiplier": 4,
8
+ "dropout": 0.0,
9
+ "train_mse": 8.369592458343506,
10
+ "eval_mse": 8.405994867279654
11
+ }
layer_32.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa8735474f1673449fbac2a61b4dcf8f8b328eca6cd302f50b533369cec60de1
3
+ size 2315304
layer_32_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "layer_idx": 32,
3
+ "model_dim": 4096,
4
+ "probe_dim": 64,
5
+ "num_layers": 1,
6
+ "num_heads": 4,
7
+ "ffn_multiplier": 4,
8
+ "dropout": 0.0,
9
+ "train_mse": 0.45685000762939454,
10
+ "eval_mse": 0.4507866852244911
11
+ }
layer_8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1fb5d91b8ae2473a9aa374f65336c92aaebfc8a752ecb9b56b6dcf8044c18ad
3
+ size 6942496
layer_8_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "layer_idx": 8,
3
+ "model_dim": 4096,
4
+ "probe_dim": 64,
5
+ "num_layers": 24,
6
+ "num_heads": 4,
7
+ "ffn_multiplier": 4,
8
+ "dropout": 0.0,
9
+ "train_mse": 0.9838169354915619,
10
+ "eval_mse": 0.7481966165184717
11
+ }