File size: 2,125 Bytes
7566ac3
 
0bd1b0f
7566ac3
0bd1b0f
 
7566ac3
 
 
 
 
 
 
 
 
 
 
 
0bd1b0f
7566ac3
 
 
 
 
 
 
 
 
 
0bd1b0f
7566ac3
 
 
 
 
 
 
 
 
 
 
0bd1b0f
 
 
7566ac3
0bd1b0f
7566ac3
 
0bd1b0f
7566ac3
 
0bd1b0f
7566ac3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0bd1b0f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from typing import Optional

from graphgen.bases import BaseQAEvaluator, QAPair


class RewardEvaluator(BaseQAEvaluator):
    """
    Reward Model Evaluator for single QAPair evaluation.
    """

    def __init__(
        self,
        reward_name: str = "OpenAssistant/reward-model-deberta-v3-large-v2",
        max_length: int = 2560,
        device: Optional[str] = None,
    ):
        """
        Initialize the reward evaluator.

        Args:
            reward_name: Model name or path on HuggingFace Hub
            max_length: Maximum token length for the model
            device: Device to run the model on. If None, auto-detect CUDA/CPU.
        """
        self.reward_name = reward_name
        self.max_length = max_length

        import torch
        from transformers import AutoModelForSequenceClassification, AutoTokenizer

        self.torch = torch

        # Set device (auto-detect if not specified)
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")

        try:
            self.tokenizer = AutoTokenizer.from_pretrained(reward_name)
            self.model = AutoModelForSequenceClassification.from_pretrained(reward_name)
            self.model.to(self.device)
            self.model.eval()
        except Exception as e:
            raise RuntimeError(
                f"Failed to load reward model '{reward_name}': {e}"
            ) from e

    async def evaluate(self, pair: QAPair) -> dict[str, float]:
        """
        Evaluate a single question-answer pair using the reward model.

        Args:
            pair: QAPair containing question and answer strings

        Returns:
            Score as a float
        """
        # Tokenize
        inputs = self.tokenizer(
            pair.question,
            pair.answer,
            return_tensors="pt",
            max_length=self.max_length,
            truncation=True,
        )
        inputs = {k: v.to(self.device) for k, v in inputs.items()}

        # Get score
        with self.torch.no_grad():
            score = self.model(**inputs).logits[0].item()

        return {"reward_score": score}