File size: 3,681 Bytes
b6f9fa8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""
src/modules/base.py — Shared EvalResult dataclass.
Used as the standard output schema by all 4 evaluation modules.
Details shape per module is fully specified here (SRS Section 5).
"""
from __future__ import annotations

import logging
from dataclasses import dataclass, field
from typing import Any, Optional

logger = logging.getLogger(__name__)


@dataclass
class EvalResult:
    """
    Shared output schema for all evaluation modules.

    Attributes:
        module_name : Identifier string, e.g. "faithfulness"
        score       : Module score in [0.0, 1.0] — clipped automatically
        details     : Module-specific dict (see DETAILS SHAPES below)
        error       : None if successful; error message string if module failed
        latency_ms  : Wall-clock milliseconds for this module's execution
    """

    module_name: str
    score: float
    details: dict[str, Any] = field(default_factory=dict)
    error: Optional[str] = None
    latency_ms: int = 0

    def __post_init__(self) -> None:
        """Clip score to [0.0, 1.0] as required by SRS 4.2."""
        if not (0.0 <= self.score <= 1.0):
            logger.warning(
                "%s: score %.4f out of [0,1], clipping.",
                self.module_name,
                self.score,
            )
            self.score = max(0.0, min(1.0, self.score))

    # -------------------------------------------------------------------------
    # DETAILS SHAPE REFERENCE (SRS Section 5)
    # -------------------------------------------------------------------------
    #
    # faithfulness.details:
    # {
    #   "total_claims": int,
    #   "entailed_count": int,
    #   "neutral_count": int,
    #   "contradicted_count": int,
    #   "claims": [
    #     {
    #       "claim": str,
    #       "status": "ENTAILED" | "NEUTRAL" | "CONTRADICTED",
    #       "best_chunk_id": str,      # chunk with highest NLI score
    #       "nli_score": float
    #     }
    #   ]
    # }
    #
    # entity_verifier.details:
    # {
    #   "total_entities": int,
    #   "verified_count": int,
    #   "flagged_count": int,
    #   "entities": [
    #     {
    #       "entity": str,
    #       "type": "DRUG" | "DOSAGE" | "CONDITION" | "PROCEDURE",
    #       "status": "VERIFIED" | "FLAGGED" | "NOT_FOUND",
    #       "severity": "CRITICAL" | "MODERATE" | "MINOR" | null,
    #       "answer_value": str,
    #       "context_value": str | null,
    #       "rxcui": str | null
    #     }
    #   ]
    # }
    #
    # source_credibility.details:
    # {
    #   "method_used": "keyword" | "metadata",
    #   "chunks": [
    #     {
    #       "chunk_id": str,
    #       "tier": int,             # 1–5
    #       "tier_weight": float,
    #       "pub_type": str,
    #       "title": str,
    #       "matched_keyword": str | null
    #     }
    #   ]
    # }
    #
    # contradiction.details:
    # {
    #   "total_sentences": int,
    #   "checked_pairs": int,
    #   "contradicted_pairs": int,
    #   "pairs": [
    #     {
    #       "sentence_a": str,
    #       "sentence_b": str,
    #       "contradiction_score": float,
    #       "flagged": bool
    #     }
    #   ]
    # }
    #
    # aggregator.details:
    # {
    #   "weights_used": {
    #     "faithfulness": float,
    #     "entity_accuracy": float,
    #     "source_credibility": float,
    #     "contradiction_risk": float
    #   },
    #   "weighted_composite": float,
    #   "component_contributions": {
    #     "faithfulness_contribution": float,
    #     "entity_contribution": float,
    #     "source_contribution": float,
    #     "contradiction_contribution": float
    #   }
    # }