Spaces:
Running
Running
File size: 1,425 Bytes
e6f24ae | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | """M5 — LinearProbe: Logistic regression weight vector as steering direction."""
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from src.methods.base import SteeringMethod
class LinearProbe(SteeringMethod):
"""LinearProbe — Logistic regression weight vector."""
@property
def name(self) -> str:
return "LinearProbe"
@property
def method_id(self) -> str:
return "M5"
def extract_vector(
self,
h_pos: np.ndarray,
h_neg: np.ndarray,
**kwargs,
) -> np.ndarray:
"""Compute logistic regression weight vector.
Args:
h_pos: (N_pos, d) positive activations
h_neg: (N_neg, d) negative activations
Returns:
(d,) weight vector direction
"""
X = np.concatenate([h_pos, h_neg], axis=0)
y = np.concatenate([
np.ones(len(h_pos)),
np.zeros(len(h_neg)),
])
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
C = kwargs.get("C", 1.0)
max_iter = kwargs.get("max_iter", 5000)
lr = LogisticRegression(C=C, max_iter=max_iter, solver="lbfgs")
lr.fit(X_scaled, y)
# Get weight vector in original space
w = lr.coef_[0] / scaler.scale_
w = w / (np.linalg.norm(w) + 1e-8)
return w
|