Spaces:

abka03
/

stylsteer-vlm

Running

Deploy StyleSteer-VLM demo

e6f24ae verified 4 days ago

1.07 kB

	"""M2 — DiffMean: Contrastive Activation Addition (CAA).

	Steering vector = mean(H_pos) - mean(H_neg)
	The simplest and most widely-used training-free steering method.
	"""

	from typing import Optional

	import numpy as np

	from src.methods.base import SteeringMethod


	class DiffMean(SteeringMethod):
	"""DiffMean — Contrastive Activation Addition."""

	@property
	def name(self) -> str:
	return "DiffMean"

	@property
	def method_id(self) -> str:
	return "M2"

	def extract_vector(
	self,
	h_pos: np.ndarray,
	h_neg: np.ndarray,
	**kwargs,
	) -> np.ndarray:
	"""Compute steering vector as mean(H_pos) - mean(H_neg).

	Args:
	h_pos: (N, d) positive activations
	h_neg: (N, d) negative activations

	Returns:
	(d,) steering direction
	"""
	assert h_pos.shape[1] == h_neg.shape[1], (
	f"Dimension mismatch: h_pos={h_pos.shape}, h_neg={h_neg.shape}"
	)
	v = h_pos.mean(axis=0) - h_neg.mean(axis=0)
	return v