Spaces:
Build error
Build error
File size: 2,125 Bytes
7566ac3 0bd1b0f 7566ac3 0bd1b0f 7566ac3 0bd1b0f 7566ac3 0bd1b0f 7566ac3 0bd1b0f 7566ac3 0bd1b0f 7566ac3 0bd1b0f 7566ac3 0bd1b0f 7566ac3 0bd1b0f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | from typing import Optional
from graphgen.bases import BaseQAEvaluator, QAPair
class RewardEvaluator(BaseQAEvaluator):
"""
Reward Model Evaluator for single QAPair evaluation.
"""
def __init__(
self,
reward_name: str = "OpenAssistant/reward-model-deberta-v3-large-v2",
max_length: int = 2560,
device: Optional[str] = None,
):
"""
Initialize the reward evaluator.
Args:
reward_name: Model name or path on HuggingFace Hub
max_length: Maximum token length for the model
device: Device to run the model on. If None, auto-detect CUDA/CPU.
"""
self.reward_name = reward_name
self.max_length = max_length
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
self.torch = torch
# Set device (auto-detect if not specified)
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
try:
self.tokenizer = AutoTokenizer.from_pretrained(reward_name)
self.model = AutoModelForSequenceClassification.from_pretrained(reward_name)
self.model.to(self.device)
self.model.eval()
except Exception as e:
raise RuntimeError(
f"Failed to load reward model '{reward_name}': {e}"
) from e
async def evaluate(self, pair: QAPair) -> dict[str, float]:
"""
Evaluate a single question-answer pair using the reward model.
Args:
pair: QAPair containing question and answer strings
Returns:
Score as a float
"""
# Tokenize
inputs = self.tokenizer(
pair.question,
pair.answer,
return_tensors="pt",
max_length=self.max_length,
truncation=True,
)
inputs = {k: v.to(self.device) for k, v in inputs.items()}
# Get score
with self.torch.no_grad():
score = self.model(**inputs).logits[0].item()
return {"reward_score": score}
|