Spaces:

joytheslothh
/

MediRAG-API

Running

App Files Files Community

MediRAG-API / src /modules /source_credibility.py

joytheslothh

deploy: clean build

b6f9fa8 2 days ago

raw

history blame contribute delete

7.8 kB

	"""
	FR-14: src/modules/source_credibility.py — Module 3: Source Credibility Scoring
	=================================================================================
	Scores the credibility of retrieved source documents based on their publication
	type / evidence tier.

	Tier weights (SRS Section 6.3):
	clinical_guideline → 1.00 (Tier 1 — highest authority)
	systematic_review → 0.85 (Tier 2)
	research_abstract → 0.70 (Tier 3 — PubMedQA default)
	review_article → 0.60 (Tier 4)
	clinical_case → 0.50 (Tier 5)
	unknown / other → 0.30 (fallback)

	Detection:
	1. Use 'tier_type' metadata field if present (set by embedder.py)
	2. Fall back to keyword matching in pub_type / title text

	Score = weighted mean of tier weights across all retrieved chunks.

	Each chunk must be a dict with at minimum:
	{"text": str, "metadata": {"tier_type": str, "pub_type": str, "title": str}}
	or the simpler form accepted by the retriever:
	{"text": str, "source": str, "tier_type": str, "title": str}
	"""
	from __future__ import annotations

	import logging
	import re
	import time

	from src.modules.base import EvalResult

	logger = logging.getLogger(__name__)

	# ---------------------------------------------------------------------------
	# Evidence tier weights
	# ---------------------------------------------------------------------------

	TIER_WEIGHTS: dict[str, float] = {
	"clinical_guideline": 1.00,
	"systematic_review": 0.85,
	"drug_label": 0.90, # FDA-approved drug labels — authoritative regulatory source
	"research_abstract": 0.70,
	"review_article": 0.60,
	"clinical_case": 0.50,
	"unknown": 0.30,
	}

	# Keyword → tier_type mapping for fallback text matching
	_KEYWORD_MAP: list[tuple[re.Pattern, str]] = [
	(re.compile(r"\b(guideline\|clinical practice\|recommendation\|consensus)\b", re.I), "clinical_guideline"),
	(re.compile(r"\b(systematic review\|meta.?analysis)\b", re.I), "systematic_review"),
	# RCT / controlled trial → highest single-study evidence tier
	(re.compile(r"\b(randomized\|randomised\|controlled trial\|rct\|clinical trial)\b", re.I), "clinical_guideline"),
	# FDA drug labels
	(re.compile(r"\b(fda\|drug label\|prescribing information\|package insert\|dailymed)\b", re.I), "drug_label"),
	(re.compile(r"\b(review\|overview)\b", re.I), "review_article"),
	(re.compile(r"\b(case report\|case study\|clinical case)\b", re.I), "clinical_case"),
	(re.compile(r"\b(abstract\|research article\|original article\|journal)\b", re.I), "research_abstract"),
	]


	def _classify_tier(chunk: dict) -> tuple[str, str \| None]:
	"""
	Return (tier_type, matched_keyword) for a single retrieved chunk dict.

	Priority 1: explicit tier_type field (set by embedder.py)
	Priority 2: pub_type field directly maps to a known tier name
	Priority 3: keyword regex on pub_type + title text
	"""
	# Priority 1: explicit tier_type already set (e.g., by embedder.py)
	tier = (
	chunk.get("tier_type")
	or chunk.get("metadata", {}).get("tier_type")
	)
	if tier and tier in TIER_WEIGHTS:
	return tier, None

	# Priority 2: direct pub_type value lookup
	# Handles underscore-separated values like "research_abstract" which
	# won't match word-boundary regex patterns
	pub_type_raw = str(
	chunk.get("pub_type") or chunk.get("metadata", {}).get("pub_type") or ""
	).strip().lower()

	_PUB_TYPE_DIRECT: dict[str, str] = {
	"research_abstract": "research_abstract",
	"abstract": "research_abstract",
	"systematic_review": "systematic_review",
	"systematic review": "systematic_review",
	"meta_analysis": "systematic_review",
	"meta-analysis": "systematic_review",
	"drug_label": "drug_label",
	"drug label": "drug_label",
	"clinical_guideline": "clinical_guideline",
	"clinical guideline": "clinical_guideline",
	"guideline": "clinical_guideline",
	"review_article": "review_article",
	"review article": "review_article",
	"review": "review_article",
	"clinical_case": "clinical_case",
	"case_report": "clinical_case",
	"case report": "clinical_case",
	}
	if pub_type_raw in _PUB_TYPE_DIRECT:
	return _PUB_TYPE_DIRECT[pub_type_raw], None

	# Priority 3: keyword regex on pub_type + title text
	title = str(chunk.get("title") or chunk.get("metadata", {}).get("title") or "")
	text_to_search = f"{pub_type_raw} {title}"

	for pattern, matched_tier in _KEYWORD_MAP:
	m = pattern.search(text_to_search)
	if m:
	return matched_tier, m.group(0)


	return "unknown", None


	# ---------------------------------------------------------------------------
	# Public API
	# ---------------------------------------------------------------------------

	def score_source_credibility(
	retrieved_chunks: list[dict],
	) -> EvalResult:
	"""
	Score the credibility of a set of retrieved source documents.

	Args:
	retrieved_chunks : List of chunk dicts as returned by retriever.retrieve().
	Each must contain at minimum 'text' and ideally
	'tier_type', 'pub_type', 'title', 'chunk_id' fields.

	Returns:
	EvalResult with module_name="source_credibility", score in [0,1], and
	details matching the shape from src/modules/__init__.py.
	"""
	t0 = time.perf_counter()

	if not retrieved_chunks:
	return EvalResult(
	module_name="source_credibility",
	score=0.0,
	details={"chunks": [], "method_used": "none"},
	error="No chunks provided",
	latency_ms=0,
	)

	chunk_details: list[dict] = []
	weights: list[float] = []
	method_used = "metadata" # assume metadata-first; may switch to keyword

	for i, chunk in enumerate(retrieved_chunks):
	tier_type, matched_kw = _classify_tier(chunk)
	weight = TIER_WEIGHTS.get(tier_type, TIER_WEIGHTS["unknown"])
	weights.append(weight)

	if matched_kw:
	method_used = "keyword"

	# Compute tier number (1-5) for display
	tier_num = {
	"clinical_guideline": 1,
	"systematic_review": 2,
	"research_abstract": 3,
	"review_article": 4,
	"clinical_case": 5,
	}.get(tier_type, 6) # 6 = unknown/unclassified

	chunk_details.append(
	{
	"chunk_id": chunk.get("chunk_id") or chunk.get("metadata", {}).get("chunk_id") or f"chunk_{i}",
	"tier": tier_num,
	"tier_type": tier_type,
	"tier_weight": round(weight, 2),
	"pub_type": chunk.get("pub_type") or chunk.get("metadata", {}).get("pub_type") or "",
	"title": (chunk.get("title") or chunk.get("metadata", {}).get("title") or "")[:80],
	"matched_keyword": matched_kw,
	}
	)

	score = sum(weights) / len(weights) if weights else 0.0

	details = {
	"method_used": method_used,
	"chunk_count": len(retrieved_chunks),
	"avg_tier_weight": round(score, 4),
	"chunks": chunk_details,
	}

	latency_ms = int((time.perf_counter() - t0) * 1000)
	logger.info(
	"Source credibility: %.3f (avg tier weight over %d chunks) in %d ms",
	score, len(retrieved_chunks), latency_ms,
	)
	return EvalResult(
	module_name="source_credibility",
	score=score,
	details=details,
	latency_ms=latency_ms,
	)