Spaces:
Running
Running
| from typing import Optional | |
| from graphgen.bases import BaseEvaluator, QAPair | |
| class RewardEvaluator(BaseEvaluator): | |
| """ | |
| Reward Model Evaluator for single QAPair evaluation. | |
| """ | |
| def __init__( | |
| self, | |
| reward_name: str = "OpenAssistant/reward-model-deberta-v3-large-v2", | |
| max_length: int = 2560, | |
| device: Optional[str] = None, | |
| ): | |
| """ | |
| Initialize the reward evaluator. | |
| Args: | |
| reward_name: Model name or path on HuggingFace Hub | |
| max_length: Maximum token length for the model | |
| device: Device to run the model on. If None, auto-detect CUDA/CPU. | |
| """ | |
| self.reward_name = reward_name | |
| self.max_length = max_length | |
| import torch | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
| self.torch = torch | |
| # Set device (auto-detect if not specified) | |
| self.device = device or ("cuda" if torch.cuda.is_available() else "cpu") | |
| try: | |
| self.tokenizer = AutoTokenizer.from_pretrained(reward_name) | |
| self.model = AutoModelForSequenceClassification.from_pretrained(reward_name) | |
| self.model.to(self.device) | |
| self.model.eval() | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to load reward model '{reward_name}': {e}") from e | |
| def evaluate(self, pair: QAPair) -> float: | |
| """ | |
| Evaluate a single question-answer pair using the reward model. | |
| Args: | |
| pair: QAPair containing question and answer strings | |
| Returns: | |
| Score as a float | |
| """ | |
| # Tokenize | |
| inputs = self.tokenizer( | |
| pair.question, | |
| pair.answer, | |
| return_tensors="pt", | |
| max_length=self.max_length, | |
| truncation=True, | |
| ) | |
| inputs = {k: v.to(self.device) for k, v in inputs.items()} | |
| # Get score | |
| with self.torch.no_grad(): | |
| score = self.model(**inputs).logits[0].item() | |
| return score | |