File size: 1,457 Bytes
bff2f94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
"""Abstract base class for biological verifiers."""

from abc import ABC, abstractmethod
from typing import Dict, List
from dataclasses import dataclass, field


@dataclass
class VerifierResult:
    """Result from a single verifier."""
    score: float                    # 0.0 to 1.0
    verifier_name: str
    details: Dict = field(default_factory=dict)
    applicable: bool = True         # False if verifier doesn't apply


class BaseVerifier(ABC):
    """Abstract base class for biological verifiers.

    Each verifier scores a model completion against structured ground truth
    on a specific dimension (pathway direction, factual accuracy, etc.).
    """

    name: str = "base"

    @abstractmethod
    def score(
        self,
        prompt: str,
        completion: str,
        ground_truth: Dict,
        question_type: str,
    ) -> VerifierResult:
        """Score a single completion against ground truth.

        Args:
            prompt: The original question.
            completion: The model's generated response.
            ground_truth: Parsed ground truth dictionary.
            question_type: Type of question for routing logic.

        Returns:
            VerifierResult with score in [0, 1].
        """
        raise NotImplementedError

    def is_applicable(self, applicable_verifiers: List[str]) -> bool:
        """Check if this verifier should score this question."""
        return self.name in applicable_verifiers