Upload depth-scaled transformer probes
Browse files- layer_12.safetensors +3 -0
- layer_12_config.json +11 -0
- layer_16.safetensors +3 -0
- layer_16_config.json +11 -0
- layer_20.safetensors +3 -0
- layer_20_config.json +11 -0
- layer_24.safetensors +3 -0
- layer_24_config.json +11 -0
- layer_28.safetensors +3 -0
- layer_28_config.json +11 -0
- layer_32.safetensors +3 -0
- layer_32_config.json +11 -0
- layer_8.safetensors +3 -0
- layer_8_config.json +11 -0
layer_12.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f3f5efe333ba714404f30034cbedc8f8560e55a63ff19568bdab6a949f533f6
|
| 3 |
+
size 6137752
|
layer_12_config.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"layer_idx": 12,
|
| 3 |
+
"model_dim": 4096,
|
| 4 |
+
"probe_dim": 64,
|
| 5 |
+
"num_layers": 20,
|
| 6 |
+
"num_heads": 4,
|
| 7 |
+
"ffn_multiplier": 4,
|
| 8 |
+
"dropout": 0.0,
|
| 9 |
+
"train_mse": 2.4374186965942384,
|
| 10 |
+
"eval_mse": 2.0876001670920714
|
| 11 |
+
}
|
layer_16.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d63f9a3185a7d08d3f8adaaec5d57c269131507484916e5576adf6f0a1dc21e2
|
| 3 |
+
size 5333000
|
layer_16_config.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"layer_idx": 16,
|
| 3 |
+
"model_dim": 4096,
|
| 4 |
+
"probe_dim": 64,
|
| 5 |
+
"num_layers": 16,
|
| 6 |
+
"num_heads": 4,
|
| 7 |
+
"ffn_multiplier": 4,
|
| 8 |
+
"dropout": 0.0,
|
| 9 |
+
"train_mse": 3.0125887276649475,
|
| 10 |
+
"eval_mse": 3.479990794368405
|
| 11 |
+
}
|
layer_20.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b43049f2dc20f16b83b44c664b4edc59177dbb4a89cf3d6756cb3f6267832a16
|
| 3 |
+
size 4528256
|
layer_20_config.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"layer_idx": 20,
|
| 3 |
+
"model_dim": 4096,
|
| 4 |
+
"probe_dim": 64,
|
| 5 |
+
"num_layers": 12,
|
| 6 |
+
"num_heads": 4,
|
| 7 |
+
"ffn_multiplier": 4,
|
| 8 |
+
"dropout": 0.0,
|
| 9 |
+
"train_mse": 3.406930178833008,
|
| 10 |
+
"eval_mse": 3.4354089314601683
|
| 11 |
+
}
|
layer_24.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3cfe4724fc2c87c898a756d65a9fd02f343edb00899f6a666a69f9d15ad3a047
|
| 3 |
+
size 3723528
|
layer_24_config.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"layer_idx": 24,
|
| 3 |
+
"model_dim": 4096,
|
| 4 |
+
"probe_dim": 64,
|
| 5 |
+
"num_layers": 8,
|
| 6 |
+
"num_heads": 4,
|
| 7 |
+
"ffn_multiplier": 4,
|
| 8 |
+
"dropout": 0.0,
|
| 9 |
+
"train_mse": 4.520931562042236,
|
| 10 |
+
"eval_mse": 4.617096263317011
|
| 11 |
+
}
|
layer_28.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62ab317791dd40865432f4453906b9ee3b1f1897b0a1c6ff32c1f5d6dbb283be
|
| 3 |
+
size 2918832
|
layer_28_config.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"layer_idx": 28,
|
| 3 |
+
"model_dim": 4096,
|
| 4 |
+
"probe_dim": 64,
|
| 5 |
+
"num_layers": 4,
|
| 6 |
+
"num_heads": 4,
|
| 7 |
+
"ffn_multiplier": 4,
|
| 8 |
+
"dropout": 0.0,
|
| 9 |
+
"train_mse": 8.369592458343506,
|
| 10 |
+
"eval_mse": 8.405994867279654
|
| 11 |
+
}
|
layer_32.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa8735474f1673449fbac2a61b4dcf8f8b328eca6cd302f50b533369cec60de1
|
| 3 |
+
size 2315304
|
layer_32_config.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"layer_idx": 32,
|
| 3 |
+
"model_dim": 4096,
|
| 4 |
+
"probe_dim": 64,
|
| 5 |
+
"num_layers": 1,
|
| 6 |
+
"num_heads": 4,
|
| 7 |
+
"ffn_multiplier": 4,
|
| 8 |
+
"dropout": 0.0,
|
| 9 |
+
"train_mse": 0.45685000762939454,
|
| 10 |
+
"eval_mse": 0.4507866852244911
|
| 11 |
+
}
|
layer_8.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1fb5d91b8ae2473a9aa374f65336c92aaebfc8a752ecb9b56b6dcf8044c18ad
|
| 3 |
+
size 6942496
|
layer_8_config.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"layer_idx": 8,
|
| 3 |
+
"model_dim": 4096,
|
| 4 |
+
"probe_dim": 64,
|
| 5 |
+
"num_layers": 24,
|
| 6 |
+
"num_heads": 4,
|
| 7 |
+
"ffn_multiplier": 4,
|
| 8 |
+
"dropout": 0.0,
|
| 9 |
+
"train_mse": 0.9838169354915619,
|
| 10 |
+
"eval_mse": 0.7481966165184717
|
| 11 |
+
}
|