GraphGen / graphgen /models /evaluator /qa /reward_evaluator.py
github-actions[bot]
Auto-sync from demo at Fri Dec 26 08:29:01 UTC 2025
7566ac3
from typing import Optional
from graphgen.bases import BaseEvaluator, QAPair
class RewardEvaluator(BaseEvaluator):
"""
Reward Model Evaluator for single QAPair evaluation.
"""
def __init__(
self,
reward_name: str = "OpenAssistant/reward-model-deberta-v3-large-v2",
max_length: int = 2560,
device: Optional[str] = None,
):
"""
Initialize the reward evaluator.
Args:
reward_name: Model name or path on HuggingFace Hub
max_length: Maximum token length for the model
device: Device to run the model on. If None, auto-detect CUDA/CPU.
"""
self.reward_name = reward_name
self.max_length = max_length
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
self.torch = torch
# Set device (auto-detect if not specified)
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
try:
self.tokenizer = AutoTokenizer.from_pretrained(reward_name)
self.model = AutoModelForSequenceClassification.from_pretrained(reward_name)
self.model.to(self.device)
self.model.eval()
except Exception as e:
raise RuntimeError(f"Failed to load reward model '{reward_name}': {e}") from e
def evaluate(self, pair: QAPair) -> float:
"""
Evaluate a single question-answer pair using the reward model.
Args:
pair: QAPair containing question and answer strings
Returns:
Score as a float
"""
# Tokenize
inputs = self.tokenizer(
pair.question,
pair.answer,
return_tensors="pt",
max_length=self.max_length,
truncation=True,
)
inputs = {k: v.to(self.device) for k, v in inputs.items()}
# Get score
with self.torch.no_grad():
score = self.model(**inputs).logits[0].item()
return score