Spaces:

abka03
/

stylsteer-vlm

Running

Deploy StyleSteer-VLM demo

e6f24ae verified 4 days ago

1.38 kB

	"""M3 — PCADir: PC1 of (H_pos - H_neg) activation matrix."""

	import numpy as np
	from sklearn.decomposition import PCA

	from src.methods.base import SteeringMethod


	class PCADir(SteeringMethod):
	"""PCADir — First principal component of contrastive activations."""

	@property
	def name(self) -> str:
	return "PCADir"

	@property
	def method_id(self) -> str:
	return "M3"

	def extract_vector(
	self,
	h_pos: np.ndarray,
	h_neg: np.ndarray,
	**kwargs,
	) -> np.ndarray:
	"""Compute PC1 of the concatenated [H_pos; H_neg] activation matrix.

	This captures the direction of maximum variance, which corresponds to
	the axis separating the positive and negative distributions.

	Args:
	h_pos: (N, d) positive activations
	h_neg: (N, d) negative activations

	Returns:
	(d,) first principal component direction (unit norm)
	"""
	# Concatenate and find PC1 of the combined activations
	H = np.concatenate([h_pos, h_neg], axis=0) # (2N, d)
	pca = PCA(n_components=1)
	pca.fit(H)
	v = pca.components_[0] # (d,)

	# Orient: ensure positive dot product with mean diff
	mean_diff = h_pos.mean(axis=0) - h_neg.mean(axis=0)
	if np.dot(v, mean_diff) < 0:
	v = -v

	return v