stylsteer-vlm / src /methods /linear_probe.py
abka03's picture
Deploy StyleSteer-VLM demo
e6f24ae verified
"""M5 — LinearProbe: Logistic regression weight vector as steering direction."""
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from src.methods.base import SteeringMethod
class LinearProbe(SteeringMethod):
"""LinearProbe — Logistic regression weight vector."""
@property
def name(self) -> str:
return "LinearProbe"
@property
def method_id(self) -> str:
return "M5"
def extract_vector(
self,
h_pos: np.ndarray,
h_neg: np.ndarray,
**kwargs,
) -> np.ndarray:
"""Compute logistic regression weight vector.
Args:
h_pos: (N_pos, d) positive activations
h_neg: (N_neg, d) negative activations
Returns:
(d,) weight vector direction
"""
X = np.concatenate([h_pos, h_neg], axis=0)
y = np.concatenate([
np.ones(len(h_pos)),
np.zeros(len(h_neg)),
])
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
C = kwargs.get("C", 1.0)
max_iter = kwargs.get("max_iter", 5000)
lr = LogisticRegression(C=C, max_iter=max_iter, solver="lbfgs")
lr.fit(X_scaled, y)
# Get weight vector in original space
w = lr.coef_[0] / scaler.scale_
w = w / (np.linalg.norm(w) + 1e-8)
return w