Spaces:
Runtime error
Runtime error
frankaging
commited on
Commit
·
e3ab52c
1
Parent(s):
98bf5cc
o1 impl
Browse files
app.py
CHANGED
|
@@ -25,19 +25,15 @@ def load_jsonl(jsonl_path):
|
|
| 25 |
return jsonl_data
|
| 26 |
|
| 27 |
class Steer(pv.SourcelessIntervention):
|
|
|
|
| 28 |
def __init__(self, **kwargs):
|
| 29 |
super().__init__(**kwargs, keep_last_dim=True)
|
| 30 |
-
self.proj = torch.nn.Linear(
|
| 31 |
-
|
| 32 |
def forward(self, base, source=None, subspaces=None):
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
idx = sp["idx"]
|
| 37 |
-
mag = sp["internal_mag"] # scaled by 50
|
| 38 |
-
steering_vec = mag * self.proj.weight[idx].unsqueeze(dim=0)
|
| 39 |
-
steer_vec = steer_vec + steering_vec
|
| 40 |
-
return steer_vec
|
| 41 |
|
| 42 |
# Check GPU
|
| 43 |
if not torch.cuda.is_available():
|
|
@@ -67,13 +63,9 @@ if torch.cuda.is_available():
|
|
| 67 |
steer = Steer(embed_dim=params.shape[0], latent_dim=params.shape[1])
|
| 68 |
steer.proj.weight.data = params.float()
|
| 69 |
|
| 70 |
-
pv_model = pv.IntervenableModel(
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
"intervention": steer,
|
| 74 |
-
},
|
| 75 |
-
model=model,
|
| 76 |
-
)
|
| 77 |
|
| 78 |
terminators = [tokenizer.eos_token_id] if tokenizer else []
|
| 79 |
|
|
@@ -171,7 +163,7 @@ with gr.Blocks(css="style.css") as demo:
|
|
| 171 |
# Pre-populate with a random concept if available
|
| 172 |
default_subspaces = []
|
| 173 |
if pv_model and concept_list:
|
| 174 |
-
default_concept =
|
| 175 |
default_subspaces = [{
|
| 176 |
"text": default_concept,
|
| 177 |
"idx": concept_id_map[default_concept],
|
|
@@ -191,7 +183,7 @@ with gr.Blocks(css="style.css") as demo:
|
|
| 191 |
)
|
| 192 |
# Right side: concept management
|
| 193 |
with gr.Column(scale=3):
|
| 194 |
-
gr.Markdown("
|
| 195 |
search_box = gr.Textbox(
|
| 196 |
label="Search concepts",
|
| 197 |
placeholder="e.g. 'time travel'"
|
|
|
|
| 25 |
return jsonl_data
|
| 26 |
|
| 27 |
class Steer(pv.SourcelessIntervention):
|
| 28 |
+
"""Steer model via activation addition"""
|
| 29 |
def __init__(self, **kwargs):
|
| 30 |
super().__init__(**kwargs, keep_last_dim=True)
|
| 31 |
+
self.proj = torch.nn.Linear(
|
| 32 |
+
self.embed_dim, kwargs["latent_dim"], bias=False)
|
| 33 |
def forward(self, base, source=None, subspaces=None):
|
| 34 |
+
steering_vec = torch.tensor(subspaces["mag"]) * \
|
| 35 |
+
self.proj.weight[subspaces["idx"]].unsqueeze(dim=0)
|
| 36 |
+
return base + steering_vec
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
# Check GPU
|
| 39 |
if not torch.cuda.is_available():
|
|
|
|
| 63 |
steer = Steer(embed_dim=params.shape[0], latent_dim=params.shape[1])
|
| 64 |
steer.proj.weight.data = params.float()
|
| 65 |
|
| 66 |
+
pv_model = pv.IntervenableModel({
|
| 67 |
+
"component": f"model.layers[20].output",
|
| 68 |
+
"intervention": steer}, model=model)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
terminators = [tokenizer.eos_token_id] if tokenizer else []
|
| 71 |
|
|
|
|
| 163 |
# Pre-populate with a random concept if available
|
| 164 |
default_subspaces = []
|
| 165 |
if pv_model and concept_list:
|
| 166 |
+
default_concept = "words related to time travel and its consequences"
|
| 167 |
default_subspaces = [{
|
| 168 |
"text": default_concept,
|
| 169 |
"idx": concept_id_map[default_concept],
|
|
|
|
| 183 |
)
|
| 184 |
# Right side: concept management
|
| 185 |
with gr.Column(scale=3):
|
| 186 |
+
gr.Markdown("# Steering Concepts")
|
| 187 |
search_box = gr.Textbox(
|
| 188 |
label="Search concepts",
|
| 189 |
placeholder="e.g. 'time travel'"
|