Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- hierarchy_mistral_7b_instruct_v0.2/layer_10.json +17 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_10.pt +3 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_11.json +17 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_11.pt +3 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_12.json +17 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_12.pt +3 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_13.json +17 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_13.pt +3 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_14.json +17 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_14.pt +3 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_15.json +17 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_15.pt +3 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_16.json +17 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_16.pt +3 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_17.json +17 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_17.pt +3 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_18.json +17 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_18.pt +3 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_19.json +17 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_19.pt +3 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_20.json +17 -0
- hierarchy_mistral_7b_instruct_v0.2/layer_20.pt +3 -0
- hierarchy_mistral_7b_instruct_v0.2/metadata.json +89 -0
- hierarchy_qwen3_8b/layer_12.json +17 -0
- hierarchy_qwen3_8b/layer_12.pt +3 -0
- hierarchy_qwen3_8b/layer_13.json +17 -0
- hierarchy_qwen3_8b/layer_13.pt +3 -0
- hierarchy_qwen3_8b/layer_14.json +17 -0
- hierarchy_qwen3_8b/layer_14.pt +3 -0
- hierarchy_qwen3_8b/layer_15.json +17 -0
- hierarchy_qwen3_8b/layer_15.pt +3 -0
- hierarchy_qwen3_8b/layer_16.json +17 -0
- hierarchy_qwen3_8b/layer_16.pt +3 -0
- hierarchy_qwen3_8b/layer_17.json +17 -0
- hierarchy_qwen3_8b/layer_17.pt +3 -0
- hierarchy_qwen3_8b/layer_18.json +17 -0
- hierarchy_qwen3_8b/layer_18.pt +3 -0
- hierarchy_qwen3_8b/layer_19.json +17 -0
- hierarchy_qwen3_8b/layer_19.pt +3 -0
- hierarchy_qwen3_8b/layer_20.json +17 -0
- hierarchy_qwen3_8b/layer_20.pt +3 -0
- hierarchy_qwen3_8b/layer_21.json +17 -0
- hierarchy_qwen3_8b/layer_21.pt +3 -0
- hierarchy_qwen3_8b/layer_22.json +17 -0
- hierarchy_qwen3_8b/layer_22.pt +3 -0
- hierarchy_qwen3_8b/layer_23.json +17 -0
- hierarchy_qwen3_8b/layer_23.pt +3 -0
- hierarchy_qwen3_8b/metadata.json +96 -0
- refusal_gemma_2_9b_it/layer_14.json +17 -0
- refusal_gemma_2_9b_it/layer_14.pt +3 -0
hierarchy_mistral_7b_instruct_v0.2/layer_10.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 10,
|
| 4 |
+
"model_name": "mistralai/Mistral-7B-Instruct-v0.2",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 2.21875,
|
| 10 |
+
"neg_mean_norm": 1.71875,
|
| 11 |
+
"vector_norm": 1.171875,
|
| 12 |
+
"created_at": "2025-12-18T07:56:46.687323"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_mistral_7b_instruct_v0.2/layer_10.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1006519eb5b358f74bf30a99c8c627f724ffce4df650344d5598b584f58f97a2
|
| 3 |
+
size 9776
|
hierarchy_mistral_7b_instruct_v0.2/layer_11.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 11,
|
| 4 |
+
"model_name": "mistralai/Mistral-7B-Instruct-v0.2",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 2.515625,
|
| 10 |
+
"neg_mean_norm": 1.9453125,
|
| 11 |
+
"vector_norm": 1.375,
|
| 12 |
+
"created_at": "2025-12-18T07:56:59.172719"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_mistral_7b_instruct_v0.2/layer_11.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ed8a8c8648eb0b563ee51a3c12fa9c0621e04a5971f25d0a59fd748efda92ea
|
| 3 |
+
size 9776
|
hierarchy_mistral_7b_instruct_v0.2/layer_12.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 12,
|
| 4 |
+
"model_name": "mistralai/Mistral-7B-Instruct-v0.2",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 2.796875,
|
| 10 |
+
"neg_mean_norm": 2.15625,
|
| 11 |
+
"vector_norm": 1.59375,
|
| 12 |
+
"created_at": "2025-12-18T07:57:11.731605"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_mistral_7b_instruct_v0.2/layer_12.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:250f67ef20cea1c35b0bff4fc053af114810620e981c6e58f4ae8ebf7d9ce7dd
|
| 3 |
+
size 9776
|
hierarchy_mistral_7b_instruct_v0.2/layer_13.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 13,
|
| 4 |
+
"model_name": "mistralai/Mistral-7B-Instruct-v0.2",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 3.15625,
|
| 10 |
+
"neg_mean_norm": 2.515625,
|
| 11 |
+
"vector_norm": 1.875,
|
| 12 |
+
"created_at": "2025-12-18T07:57:24.201189"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_mistral_7b_instruct_v0.2/layer_13.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f2c18c7d76db7d6ad80f5b17f562066b71d51e26634501d67c4f585a7d2e1e6
|
| 3 |
+
size 9776
|
hierarchy_mistral_7b_instruct_v0.2/layer_14.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 14,
|
| 4 |
+
"model_name": "mistralai/Mistral-7B-Instruct-v0.2",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 3.46875,
|
| 10 |
+
"neg_mean_norm": 2.75,
|
| 11 |
+
"vector_norm": 2.171875,
|
| 12 |
+
"created_at": "2025-12-18T07:57:36.698532"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_mistral_7b_instruct_v0.2/layer_14.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b1d114124e531ea8bca68532d225a6e4f39cdfe0a634aa83fac6b9f170d9a3a
|
| 3 |
+
size 9776
|
hierarchy_mistral_7b_instruct_v0.2/layer_15.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 15,
|
| 4 |
+
"model_name": "mistralai/Mistral-7B-Instruct-v0.2",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 4.0,
|
| 10 |
+
"neg_mean_norm": 3.203125,
|
| 11 |
+
"vector_norm": 2.515625,
|
| 12 |
+
"created_at": "2025-12-18T07:57:49.241757"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_mistral_7b_instruct_v0.2/layer_15.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c525b49a11073bf3cc35e581882eb059cee3eb90337ab98ec402fbd255b1e6e0
|
| 3 |
+
size 9776
|
hierarchy_mistral_7b_instruct_v0.2/layer_16.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 16,
|
| 4 |
+
"model_name": "mistralai/Mistral-7B-Instruct-v0.2",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 4.5625,
|
| 10 |
+
"neg_mean_norm": 3.8125,
|
| 11 |
+
"vector_norm": 2.796875,
|
| 12 |
+
"created_at": "2025-12-18T07:58:01.868522"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_mistral_7b_instruct_v0.2/layer_16.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e44de7b7d62c10a13d54eaca978cbabf5a0e5b45b88b0341b9a76b9e0c4caa0e
|
| 3 |
+
size 9776
|
hierarchy_mistral_7b_instruct_v0.2/layer_17.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 17,
|
| 4 |
+
"model_name": "mistralai/Mistral-7B-Instruct-v0.2",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 4.96875,
|
| 10 |
+
"neg_mean_norm": 4.21875,
|
| 11 |
+
"vector_norm": 3.25,
|
| 12 |
+
"created_at": "2025-12-18T07:58:14.433875"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_mistral_7b_instruct_v0.2/layer_17.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02714212448839254a5283e4fab3815a3421f790ca793b6c6622b5301a78b604
|
| 3 |
+
size 9776
|
hierarchy_mistral_7b_instruct_v0.2/layer_18.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 18,
|
| 4 |
+
"model_name": "mistralai/Mistral-7B-Instruct-v0.2",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 5.75,
|
| 10 |
+
"neg_mean_norm": 5.03125,
|
| 11 |
+
"vector_norm": 3.671875,
|
| 12 |
+
"created_at": "2025-12-18T07:58:26.994268"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_mistral_7b_instruct_v0.2/layer_18.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2716a6dc3e5be95d59e5f6d3087ef86b4ea6f89432312b3297e2f1202135bd09
|
| 3 |
+
size 9776
|
hierarchy_mistral_7b_instruct_v0.2/layer_19.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 19,
|
| 4 |
+
"model_name": "mistralai/Mistral-7B-Instruct-v0.2",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 6.375,
|
| 10 |
+
"neg_mean_norm": 5.65625,
|
| 11 |
+
"vector_norm": 4.03125,
|
| 12 |
+
"created_at": "2025-12-18T07:58:39.596522"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_mistral_7b_instruct_v0.2/layer_19.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a2891da9226cf5e7848205990cb14a1dbb93a154f0f72bc6a5ad9b136377c03
|
| 3 |
+
size 9776
|
hierarchy_mistral_7b_instruct_v0.2/layer_20.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 20,
|
| 4 |
+
"model_name": "mistralai/Mistral-7B-Instruct-v0.2",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 7.09375,
|
| 10 |
+
"neg_mean_norm": 6.375,
|
| 11 |
+
"vector_norm": 4.40625,
|
| 12 |
+
"created_at": "2025-12-18T07:58:52.211449"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_mistral_7b_instruct_v0.2/layer_20.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f21a277acc94df368c916e79ba65afdf98c26aa4f8e4e249bcf6985bbf701243
|
| 3 |
+
size 9776
|
hierarchy_mistral_7b_instruct_v0.2/metadata.json
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "hierarchy",
|
| 3 |
+
"model_name": "mistralai/Mistral-7B-Instruct-v0.2",
|
| 4 |
+
"layers": [
|
| 5 |
+
10,
|
| 6 |
+
11,
|
| 7 |
+
12,
|
| 8 |
+
13,
|
| 9 |
+
14,
|
| 10 |
+
15,
|
| 11 |
+
16,
|
| 12 |
+
17,
|
| 13 |
+
18,
|
| 14 |
+
19,
|
| 15 |
+
20
|
| 16 |
+
],
|
| 17 |
+
"best_layer": 15,
|
| 18 |
+
"best_improvement": 0.25,
|
| 19 |
+
"num_pairs": 26,
|
| 20 |
+
"target_direction": "increase",
|
| 21 |
+
"layer_results": {
|
| 22 |
+
"10": {
|
| 23 |
+
"0.0": 0.5,
|
| 24 |
+
"0.5": 0.75,
|
| 25 |
+
"1.0": 0.5,
|
| 26 |
+
"1.5": 0.5
|
| 27 |
+
},
|
| 28 |
+
"11": {
|
| 29 |
+
"0.0": 0.5,
|
| 30 |
+
"0.5": 0.5,
|
| 31 |
+
"1.0": 0.5,
|
| 32 |
+
"1.5": 0.5
|
| 33 |
+
},
|
| 34 |
+
"12": {
|
| 35 |
+
"0.0": 0.5,
|
| 36 |
+
"0.5": 0.75,
|
| 37 |
+
"1.0": 0.25,
|
| 38 |
+
"1.5": 0.5
|
| 39 |
+
},
|
| 40 |
+
"13": {
|
| 41 |
+
"0.0": 0.5,
|
| 42 |
+
"0.5": 0.5,
|
| 43 |
+
"1.0": 0.25,
|
| 44 |
+
"1.5": 0.5
|
| 45 |
+
},
|
| 46 |
+
"14": {
|
| 47 |
+
"0.0": 0.5,
|
| 48 |
+
"0.5": 0.5,
|
| 49 |
+
"1.0": 0.5,
|
| 50 |
+
"1.5": 0.25
|
| 51 |
+
},
|
| 52 |
+
"15": {
|
| 53 |
+
"0.0": 0.5,
|
| 54 |
+
"0.5": 0.25,
|
| 55 |
+
"1.0": 0.75,
|
| 56 |
+
"1.5": 0.5
|
| 57 |
+
},
|
| 58 |
+
"16": {
|
| 59 |
+
"0.0": 0.5,
|
| 60 |
+
"0.5": 0.5,
|
| 61 |
+
"1.0": 0.5,
|
| 62 |
+
"1.5": 0.25
|
| 63 |
+
},
|
| 64 |
+
"17": {
|
| 65 |
+
"0.0": 0.5,
|
| 66 |
+
"0.5": 0.5,
|
| 67 |
+
"1.0": 0.75,
|
| 68 |
+
"1.5": 0.0
|
| 69 |
+
},
|
| 70 |
+
"18": {
|
| 71 |
+
"0.0": 0.5,
|
| 72 |
+
"0.5": 0.75,
|
| 73 |
+
"1.0": 0.75,
|
| 74 |
+
"1.5": 0.5
|
| 75 |
+
},
|
| 76 |
+
"19": {
|
| 77 |
+
"0.0": 0.5,
|
| 78 |
+
"0.5": 0.5,
|
| 79 |
+
"1.0": 0.75,
|
| 80 |
+
"1.5": 0.75
|
| 81 |
+
},
|
| 82 |
+
"20": {
|
| 83 |
+
"0.0": 0.5,
|
| 84 |
+
"0.5": 0.75,
|
| 85 |
+
"1.0": 0.75,
|
| 86 |
+
"1.5": 0.25
|
| 87 |
+
}
|
| 88 |
+
}
|
| 89 |
+
}
|
hierarchy_qwen3_8b/layer_12.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 12,
|
| 4 |
+
"model_name": "Qwen/Qwen3-8B",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 57.5,
|
| 10 |
+
"neg_mean_norm": 53.0,
|
| 11 |
+
"vector_norm": 16.5,
|
| 12 |
+
"created_at": "2025-12-18T22:23:00.548285"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_qwen3_8b/layer_12.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce51034b11b188669eec43728629bb4910b7a172f4c1096bda675c8f4e8f29c1
|
| 3 |
+
size 9776
|
hierarchy_qwen3_8b/layer_13.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 13,
|
| 4 |
+
"model_name": "Qwen/Qwen3-8B",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 57.75,
|
| 10 |
+
"neg_mean_norm": 52.5,
|
| 11 |
+
"vector_norm": 17.375,
|
| 12 |
+
"created_at": "2025-12-18T22:23:13.018805"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_qwen3_8b/layer_13.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6e78111385cbc647ba93a926857ec0728defacc450004089399612b4040e6ee
|
| 3 |
+
size 9776
|
hierarchy_qwen3_8b/layer_14.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 14,
|
| 4 |
+
"model_name": "Qwen/Qwen3-8B",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 64.0,
|
| 10 |
+
"neg_mean_norm": 59.5,
|
| 11 |
+
"vector_norm": 18.25,
|
| 12 |
+
"created_at": "2025-12-18T22:23:25.489920"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_qwen3_8b/layer_14.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77e0c227409a6cf799777c3c062be3f8de3bb502486fd4d9a8ac8ad0787b7f06
|
| 3 |
+
size 9776
|
hierarchy_qwen3_8b/layer_15.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 15,
|
| 4 |
+
"model_name": "Qwen/Qwen3-8B",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 64.5,
|
| 10 |
+
"neg_mean_norm": 59.25,
|
| 11 |
+
"vector_norm": 20.125,
|
| 12 |
+
"created_at": "2025-12-18T22:23:37.918916"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_qwen3_8b/layer_15.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c52325a7cb27e8b27546652ff6ec96617898b552f3bdc3f35e2588277bac3e22
|
| 3 |
+
size 9776
|
hierarchy_qwen3_8b/layer_16.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 16,
|
| 4 |
+
"model_name": "Qwen/Qwen3-8B",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 70.5,
|
| 10 |
+
"neg_mean_norm": 66.0,
|
| 11 |
+
"vector_norm": 22.75,
|
| 12 |
+
"created_at": "2025-12-18T22:23:50.342735"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_qwen3_8b/layer_16.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f63575f4b03adaca359a235936cad5def59645b5c633eee43a605a3fdc3bb7b4
|
| 3 |
+
size 9776
|
hierarchy_qwen3_8b/layer_17.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 17,
|
| 4 |
+
"model_name": "Qwen/Qwen3-8B",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 79.0,
|
| 10 |
+
"neg_mean_norm": 72.5,
|
| 11 |
+
"vector_norm": 24.875,
|
| 12 |
+
"created_at": "2025-12-18T22:24:02.740801"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_qwen3_8b/layer_17.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c430e797d25d7da3ae75e29dfb69c9f1086d4b598e1b3c2731fb5b3deb8c0af
|
| 3 |
+
size 9776
|
hierarchy_qwen3_8b/layer_18.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 18,
|
| 4 |
+
"model_name": "Qwen/Qwen3-8B",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 82.0,
|
| 10 |
+
"neg_mean_norm": 76.0,
|
| 11 |
+
"vector_norm": 28.5,
|
| 12 |
+
"created_at": "2025-12-18T22:24:15.178867"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_qwen3_8b/layer_18.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:413c094daf3f75329afca04e1ef3669d5eabaae2cb96f67ba3d95d8c2c771058
|
| 3 |
+
size 9776
|
hierarchy_qwen3_8b/layer_19.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 19,
|
| 4 |
+
"model_name": "Qwen/Qwen3-8B",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 95.5,
|
| 10 |
+
"neg_mean_norm": 88.5,
|
| 11 |
+
"vector_norm": 35.5,
|
| 12 |
+
"created_at": "2025-12-18T22:24:27.615196"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_qwen3_8b/layer_19.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:935c01ff2ecd1b7ca76b7216201d9d0aca1ec92d335655547226132418baa919
|
| 3 |
+
size 9776
|
hierarchy_qwen3_8b/layer_20.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 20,
|
| 4 |
+
"model_name": "Qwen/Qwen3-8B",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 102.0,
|
| 10 |
+
"neg_mean_norm": 97.5,
|
| 11 |
+
"vector_norm": 38.0,
|
| 12 |
+
"created_at": "2025-12-18T22:24:40.132334"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_qwen3_8b/layer_20.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b33e968ee60893fceb743e97b43585079759340346a0446e6d4b98f28b84efb7
|
| 3 |
+
size 9776
|
hierarchy_qwen3_8b/layer_21.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 21,
|
| 4 |
+
"model_name": "Qwen/Qwen3-8B",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 106.5,
|
| 10 |
+
"neg_mean_norm": 103.0,
|
| 11 |
+
"vector_norm": 41.75,
|
| 12 |
+
"created_at": "2025-12-18T22:24:52.616517"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_qwen3_8b/layer_21.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:355eb6c12243de94f3e020b5d7bf8c512b93f5fef321c4f3af2695c68b5133a5
|
| 3 |
+
size 9776
|
hierarchy_qwen3_8b/layer_22.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 22,
|
| 4 |
+
"model_name": "Qwen/Qwen3-8B",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 131.0,
|
| 10 |
+
"neg_mean_norm": 127.0,
|
| 11 |
+
"vector_norm": 50.5,
|
| 12 |
+
"created_at": "2025-12-18T22:25:05.048245"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_qwen3_8b/layer_22.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:580cf4c17cb42b36970276ea01be1d4fe720bf4342e179c5eff2c4d31a09497c
|
| 3 |
+
size 9776
|
hierarchy_qwen3_8b/layer_23.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "instruction_hierarchy",
|
| 3 |
+
"layer_index": 23,
|
| 4 |
+
"model_name": "Qwen/Qwen3-8B",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 26,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 159.0,
|
| 10 |
+
"neg_mean_norm": 156.0,
|
| 11 |
+
"vector_norm": 62.25,
|
| 12 |
+
"created_at": "2025-12-18T22:25:17.482916"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
4096
|
| 16 |
+
]
|
| 17 |
+
}
|
hierarchy_qwen3_8b/layer_23.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4cf2b7d9c494448f6350f792048fcd8cce2e810b9e24c4bde228970c18bf3d3c
|
| 3 |
+
size 9776
|
hierarchy_qwen3_8b/metadata.json
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "hierarchy",
|
| 3 |
+
"model_name": "Qwen/Qwen3-8B",
|
| 4 |
+
"layers": [
|
| 5 |
+
12,
|
| 6 |
+
13,
|
| 7 |
+
14,
|
| 8 |
+
15,
|
| 9 |
+
16,
|
| 10 |
+
17,
|
| 11 |
+
18,
|
| 12 |
+
19,
|
| 13 |
+
20,
|
| 14 |
+
21,
|
| 15 |
+
22,
|
| 16 |
+
23
|
| 17 |
+
],
|
| 18 |
+
"best_layer": 14,
|
| 19 |
+
"best_improvement": 0.25,
|
| 20 |
+
"num_pairs": 26,
|
| 21 |
+
"target_direction": "increase",
|
| 22 |
+
"layer_results": {
|
| 23 |
+
"12": {
|
| 24 |
+
"0.0": 0.0,
|
| 25 |
+
"0.5": 0.25,
|
| 26 |
+
"1.0": 0.0,
|
| 27 |
+
"1.5": 0.25
|
| 28 |
+
},
|
| 29 |
+
"13": {
|
| 30 |
+
"0.0": 0.0,
|
| 31 |
+
"0.5": 0.25,
|
| 32 |
+
"1.0": 0.0,
|
| 33 |
+
"1.5": 0.0
|
| 34 |
+
},
|
| 35 |
+
"14": {
|
| 36 |
+
"0.0": 0.0,
|
| 37 |
+
"0.5": 0.25,
|
| 38 |
+
"1.0": 0.25,
|
| 39 |
+
"1.5": 0.0
|
| 40 |
+
},
|
| 41 |
+
"15": {
|
| 42 |
+
"0.0": 0.0,
|
| 43 |
+
"0.5": 0.25,
|
| 44 |
+
"1.0": 0.25,
|
| 45 |
+
"1.5": 0.5
|
| 46 |
+
},
|
| 47 |
+
"16": {
|
| 48 |
+
"0.0": 0.0,
|
| 49 |
+
"0.5": 0.0,
|
| 50 |
+
"1.0": 0.0,
|
| 51 |
+
"1.5": 0.0
|
| 52 |
+
},
|
| 53 |
+
"17": {
|
| 54 |
+
"0.0": 0.0,
|
| 55 |
+
"0.5": 0.25,
|
| 56 |
+
"1.0": 0.25,
|
| 57 |
+
"1.5": 0.0
|
| 58 |
+
},
|
| 59 |
+
"18": {
|
| 60 |
+
"0.0": 0.0,
|
| 61 |
+
"0.5": 0.25,
|
| 62 |
+
"1.0": 0.0,
|
| 63 |
+
"1.5": 0.25
|
| 64 |
+
},
|
| 65 |
+
"19": {
|
| 66 |
+
"0.0": 0.0,
|
| 67 |
+
"0.5": 0.0,
|
| 68 |
+
"1.0": 0.0,
|
| 69 |
+
"1.5": 0.0
|
| 70 |
+
},
|
| 71 |
+
"20": {
|
| 72 |
+
"0.0": 0.0,
|
| 73 |
+
"0.5": 0.25,
|
| 74 |
+
"1.0": 0.25,
|
| 75 |
+
"1.5": 0.5
|
| 76 |
+
},
|
| 77 |
+
"21": {
|
| 78 |
+
"0.0": 0.0,
|
| 79 |
+
"0.5": 0.25,
|
| 80 |
+
"1.0": 0.0,
|
| 81 |
+
"1.5": 0.0
|
| 82 |
+
},
|
| 83 |
+
"22": {
|
| 84 |
+
"0.0": 0.0,
|
| 85 |
+
"0.5": 0.0,
|
| 86 |
+
"1.0": 0.25,
|
| 87 |
+
"1.5": 0.5
|
| 88 |
+
},
|
| 89 |
+
"23": {
|
| 90 |
+
"0.0": 0.0,
|
| 91 |
+
"0.5": 0.0,
|
| 92 |
+
"1.0": 0.25,
|
| 93 |
+
"1.5": 0.25
|
| 94 |
+
}
|
| 95 |
+
}
|
| 96 |
+
}
|
refusal_gemma_2_9b_it/layer_14.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"behavior": "refusal",
|
| 3 |
+
"layer_index": 14,
|
| 4 |
+
"model_name": "google/gemma-2-9b-it",
|
| 5 |
+
"extraction_method": "caa",
|
| 6 |
+
"metadata": {
|
| 7 |
+
"num_pairs": 50,
|
| 8 |
+
"token_position": "last",
|
| 9 |
+
"pos_mean_norm": 161.0,
|
| 10 |
+
"neg_mean_norm": 147.625,
|
| 11 |
+
"vector_norm": 115.375,
|
| 12 |
+
"created_at": "2025-12-18T00:57:55.073309"
|
| 13 |
+
},
|
| 14 |
+
"vector_shape": [
|
| 15 |
+
3584
|
| 16 |
+
]
|
| 17 |
+
}
|
refusal_gemma_2_9b_it/layer_14.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd28df76df8f95e46b419d51abe7b2372be1b1c8676a9224d5bfeeae42f4ee67
|
| 3 |
+
size 8752
|