Spaces:

joytheslothh
/

MediRAG-API

Running

App Files Files Community

MediRAG-API / src /modules /__init__.py

joytheslothh

deploy: clean build

b6f9fa8 1 day ago

raw

history blame contribute delete

3.68 kB

	"""
	src/modules/base.py — Shared EvalResult dataclass.
	Used as the standard output schema by all 4 evaluation modules.
	Details shape per module is fully specified here (SRS Section 5).
	"""
	from __future__ import annotations

	import logging
	from dataclasses import dataclass, field
	from typing import Any, Optional

	logger = logging.getLogger(__name__)


	@dataclass
	class EvalResult:
	"""
	Shared output schema for all evaluation modules.

	Attributes:
	module_name : Identifier string, e.g. "faithfulness"
	score : Module score in [0.0, 1.0] — clipped automatically
	details : Module-specific dict (see DETAILS SHAPES below)
	error : None if successful; error message string if module failed
	latency_ms : Wall-clock milliseconds for this module's execution
	"""

	module_name: str
	score: float
	details: dict[str, Any] = field(default_factory=dict)
	error: Optional[str] = None
	latency_ms: int = 0

	def __post_init__(self) -> None:
	"""Clip score to [0.0, 1.0] as required by SRS 4.2."""
	if not (0.0 <= self.score <= 1.0):
	logger.warning(
	"%s: score %.4f out of [0,1], clipping.",
	self.module_name,
	self.score,
	)
	self.score = max(0.0, min(1.0, self.score))

	# -------------------------------------------------------------------------
	# DETAILS SHAPE REFERENCE (SRS Section 5)
	# -------------------------------------------------------------------------
	#
	# faithfulness.details:
	# {
	# "total_claims": int,
	# "entailed_count": int,
	# "neutral_count": int,
	# "contradicted_count": int,
	# "claims": [
	# {
	# "claim": str,
	# "status": "ENTAILED" \| "NEUTRAL" \| "CONTRADICTED",
	# "best_chunk_id": str, # chunk with highest NLI score
	# "nli_score": float
	# }
	# ]
	# }
	#
	# entity_verifier.details:
	# {
	# "total_entities": int,
	# "verified_count": int,
	# "flagged_count": int,
	# "entities": [
	# {
	# "entity": str,
	# "type": "DRUG" \| "DOSAGE" \| "CONDITION" \| "PROCEDURE",
	# "status": "VERIFIED" \| "FLAGGED" \| "NOT_FOUND",
	# "severity": "CRITICAL" \| "MODERATE" \| "MINOR" \| null,
	# "answer_value": str,
	# "context_value": str \| null,
	# "rxcui": str \| null
	# }
	# ]
	# }
	#
	# source_credibility.details:
	# {
	# "method_used": "keyword" \| "metadata",
	# "chunks": [
	# {
	# "chunk_id": str,
	# "tier": int, # 1–5
	# "tier_weight": float,
	# "pub_type": str,
	# "title": str,
	# "matched_keyword": str \| null
	# }
	# ]
	# }
	#
	# contradiction.details:
	# {
	# "total_sentences": int,
	# "checked_pairs": int,
	# "contradicted_pairs": int,
	# "pairs": [
	# {
	# "sentence_a": str,
	# "sentence_b": str,
	# "contradiction_score": float,
	# "flagged": bool
	# }
	# ]
	# }
	#
	# aggregator.details:
	# {
	# "weights_used": {
	# "faithfulness": float,
	# "entity_accuracy": float,
	# "source_credibility": float,
	# "contradiction_risk": float
	# },
	# "weighted_composite": float,
	# "component_contributions": {
	# "faithfulness_contribution": float,
	# "entity_contribution": float,
	# "source_contribution": float,
	# "contradiction_contribution": float
	# }
	# }