Spaces:
Sleeping
Sleeping
| """M9 — RePS: Representation-based Prompt Steering. | |
| Lightly-trained method that learns steering through representation optimisation. | |
| """ | |
| import numpy as np | |
| from src.methods.base import SteeringMethod | |
| class RePS(SteeringMethod): | |
| """RePS — Representation-based Prompt Steering.""" | |
| def __init__(self, **kwargs): | |
| self._trained = False | |
| self._direction: np.ndarray = None | |
| def name(self) -> str: | |
| return "RePS" | |
| def method_id(self) -> str: | |
| return "M9" | |
| def is_training_free(self) -> bool: | |
| return False | |
| def extract_vector( | |
| self, | |
| h_pos: np.ndarray, | |
| h_neg: np.ndarray, | |
| **kwargs, | |
| ) -> np.ndarray: | |
| if self._direction is not None: | |
| return self._direction | |
| # Fallback | |
| from src.methods.diffmean import DiffMean | |
| return DiffMean().extract_vector(h_pos, h_neg) | |
| def train(self, train_data: dict) -> None: | |
| """Train RePS steering. | |
| TODO: Implement representation-based prompt steering following | |
| AxBench catalogue. | |
| """ | |
| raise NotImplementedError("RePS training not yet implemented.") | |