File size: 3,734 Bytes
bd2d239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
{
  "top_heads": [
    {
      "rank": 1,
      "layer": 19,
      "head": 5,
      "aie": -0.024941083043813705,
      "abs_aie": 0.024941083043813705
    },
    {
      "rank": 2,
      "layer": 19,
      "head": 3,
      "aie": -0.02465316466987133,
      "abs_aie": 0.02465316466987133
    },
    {
      "rank": 3,
      "layer": 23,
      "head": 1,
      "aie": 0.020055249333381653,
      "abs_aie": 0.020055249333381653
    },
    {
      "rank": 4,
      "layer": 17,
      "head": 7,
      "aie": -0.013633356429636478,
      "abs_aie": 0.013633356429636478
    },
    {
      "rank": 5,
      "layer": 25,
      "head": 4,
      "aie": -0.012786902487277985,
      "abs_aie": 0.012786902487277985
    },
    {
      "rank": 6,
      "layer": 20,
      "head": 0,
      "aie": -0.01222932618111372,
      "abs_aie": 0.01222932618111372
    },
    {
      "rank": 7,
      "layer": 24,
      "head": 8,
      "aie": -0.011511072516441345,
      "abs_aie": 0.011511072516441345
    },
    {
      "rank": 8,
      "layer": 19,
      "head": 6,
      "aie": -0.010977868922054768,
      "abs_aie": 0.010977868922054768
    },
    {
      "rank": 9,
      "layer": 23,
      "head": 0,
      "aie": -0.010334699414670467,
      "abs_aie": 0.010334699414670467
    },
    {
      "rank": 10,
      "layer": 15,
      "head": 7,
      "aie": -0.010305065661668777,
      "abs_aie": 0.010305065661668777
    }
  ],
  "function_vector_path": "/root/SafeGenAI/work/cache/fv/part3_function_vector.pt",
  "default_layer": 9,
  "steering_layers": [
    9
  ],
  "heatmap_path": "/root/SafeGenAI/work/plots/part3_aie_heatmap.png",
  "top_tokens": [
    {
      "token_id": 358,
      "token": " I",
      "probability": 0.7904141545295715
    },
    {
      "token_id": 35946,
      "token": "我",
      "probability": 0.028790833428502083
    },
    {
      "token_id": 40,
      "token": "I",
      "probability": 0.016404522582888603
    },
    {
      "token_id": 10168,
      "token": "“I",
      "probability": 0.00994984619319439
    },
    {
      "token_id": 64395,
      "token": " }},\n",
      "probability": 0.005669251084327698
    },
    {
      "token_id": 28492,
      "token": " winds",
      "probability": 0.003896415466442704
    },
    {
      "token_id": 34957,
      "token": " lenses",
      "probability": 0.003230242058634758
    },
    {
      "token_id": 5318,
      "token": "_i",
      "probability": 0.003034531371667981
    },
    {
      "token_id": 66769,
      "token": "dataTable",
      "probability": 0.003034531371667981
    },
    {
      "token_id": 79592,
      "token": "ASA",
      "probability": 0.0028506785165518522
    },
    {
      "token_id": 7959,
      "token": "_I",
      "probability": 0.0026779647450894117
    },
    {
      "token_id": 9956,
      "token": " wind",
      "probability": 0.0026779647450894117
    },
    {
      "token_id": 111734,
      "token": "中国队",
      "probability": 0.002363295527175069
    },
    {
      "token_id": 125153,
      "token": "мы",
      "probability": 0.002363295527175069
    },
    {
      "token_id": 36828,
      "token": "},{\"",
      "probability": 0.0020856009796261787
    },
    {
      "token_id": 48700,
      "token": " Credits",
      "probability": 0.0020856009796261787
    },
    {
      "token_id": 82979,
      "token": "},\r\n\r\n",
      "probability": 0.0018405363662168384
    },
    {
      "token_id": 112898,
      "token": "我当时",
      "probability": 0.0017290239920839667
    },
    {
      "token_id": 25230,
      "token": " Wolf",
      "probability": 0.0017290239920839667
    },
    {
      "token_id": 7044,
      "token": "\"I",
      "probability": 0.0017290239920839667
    }
  ]
}