Spaces:

T0X1N
/

Agentic-RagBot

Running

File size: 7,807 Bytes

"""
MediGuard AI RAG-Helper
Biomarker analysis and validation utilities
"""

import json
from pathlib import Path

from src.state import BiomarkerFlag, SafetyAlert


class BiomarkerValidator:
    """Validates biomarker values against reference ranges"""

    def __init__(self, reference_file: str = "config/biomarker_references.json"):
        """Load biomarker reference ranges from JSON file"""
        ref_path = Path(__file__).parent.parent / reference_file
        with open(ref_path, encoding="utf-8") as f:
            self.references = json.load(f)["biomarkers"]

    def validate_biomarker(
        self, name: str, value: float, gender: str | None = None, threshold_pct: float = 0.0
    ) -> BiomarkerFlag:
        """
        Validate a single biomarker value against reference ranges.

        Args:
            name: Biomarker name
            value: Measured value
            gender: "male" or "female" (for gender-specific ranges)
            threshold_pct: Only flag LOW/HIGH if deviation from boundary exceeds this fraction (e.g. 0.15 = 15%)

        Returns:
            BiomarkerFlag object with status and warnings
        """
        if name not in self.references:
            return BiomarkerFlag(
                name=name,
                value=value,
                unit="unknown",
                status="UNKNOWN",
                reference_range="No reference data available",
                warning=f"No reference range found for {name}",
            )

        ref = self.references[name]
        unit = ref["unit"]

        # Handle gender-specific ranges
        if ref.get("gender_specific", False) and gender:
            if gender.lower() in ["male", "m"]:
                normal = ref["normal_range"]["male"]
            elif gender.lower() in ["female", "f"]:
                normal = ref["normal_range"]["female"]
            else:
                normal = ref["normal_range"]
        else:
            normal = ref["normal_range"]

        min_val = normal.get("min", 0)
        max_val = normal.get("max", float("inf"))
        critical_low = ref.get("critical_low")
        critical_high = ref.get("critical_high")

        # Determine status
        status = "NORMAL"
        warning = None

        # Check critical values first (threshold_pct does not suppress critical alerts)
        if critical_low and value < critical_low:
            status = "CRITICAL_LOW"
            warning = f"CRITICAL: {name} is {value} {unit}, below critical threshold of {critical_low} {unit}. {ref['clinical_significance'].get('low', 'Seek immediate medical attention.')}"
        elif critical_high and value > critical_high:
            status = "CRITICAL_HIGH"
            warning = f"CRITICAL: {name} is {value} {unit}, above critical threshold of {critical_high} {unit}. {ref['clinical_significance'].get('high', 'Seek immediate medical attention.')}"
        elif value < min_val:
            # Only flag if deviation exceeds threshold_pct fraction of the boundary
            deviation = (min_val - value) / min_val if min_val != 0 else 1.0
            if deviation > threshold_pct:
                status = "LOW"
                warning = f"{name} is {value} {unit}, below normal range ({min_val}-{max_val} {unit}). {ref['clinical_significance'].get('low', '')}"
        elif value > max_val:
            deviation = (value - max_val) / max_val if max_val != 0 else 1.0
            if deviation > threshold_pct:
                status = "HIGH"
                warning = f"{name} is {value} {unit}, above normal range ({min_val}-{max_val} {unit}). {ref['clinical_significance'].get('high', '')}"

        reference_range = f"{min_val}-{max_val} {unit}"

        return BiomarkerFlag(
            name=name, value=value, unit=unit, status=status, reference_range=reference_range, warning=warning
        )

    def validate_all(
        self, biomarkers: dict[str, float], gender: str | None = None, threshold_pct: float = 0.0
    ) -> tuple[list[BiomarkerFlag], list[SafetyAlert]]:
        """
        Validate all biomarker values.

        Args:
            biomarkers: Dict of biomarker name -> value
            gender: "male" or "female" (for gender-specific ranges)
            threshold_pct: Only flag LOW/HIGH if deviation exceeds this fraction (e.g. 0.15 = 15%)

        Returns:
            Tuple of (biomarker_flags, safety_alerts)
        """
        flags = []
        alerts = []

        for name, value in biomarkers.items():
            flag = self.validate_biomarker(name, value, gender, threshold_pct)
            flags.append(flag)

            # Generate safety alerts for critical values
            if flag.status in ["CRITICAL_LOW", "CRITICAL_HIGH"]:
                alerts.append(
                    SafetyAlert(
                        severity="CRITICAL",
                        biomarker=name,
                        message=flag.warning or f"{name} at critical level",
                        action="SEEK IMMEDIATE MEDICAL ATTENTION",
                    )
                )
            elif flag.status in ["LOW", "HIGH"]:
                severity = "HIGH" if "severe" in (flag.warning or "").lower() else "MEDIUM"
                alerts.append(
                    SafetyAlert(
                        severity=severity,
                        biomarker=name,
                        message=flag.warning or f"{name} out of normal range",
                        action="Consult with healthcare provider",
                    )
                )

        return flags, alerts

    def get_biomarker_info(self, name: str) -> dict | None:
        """Get reference information for a biomarker"""
        return self.references.get(name)

    def expected_biomarker_count(self) -> int:
        """Return expected number of biomarkers from reference ranges."""
        return len(self.references)

    def get_disease_relevant_biomarkers(self, disease: str) -> list[str]:
        """
        Get list of biomarkers most relevant to a specific disease.

        This is a simplified mapping - in production, this would be more sophisticated.
        """
        disease_map = {
            "Diabetes": ["Glucose", "HbA1c", "Insulin", "BMI", "Triglycerides", "HDL Cholesterol", "LDL Cholesterol"],
            "Type 2 Diabetes": [
                "Glucose",
                "HbA1c",
                "Insulin",
                "BMI",
                "Triglycerides",
                "HDL Cholesterol",
                "LDL Cholesterol",
            ],
            "Type 1 Diabetes": [
                "Glucose",
                "HbA1c",
                "Insulin",
                "BMI",
                "Triglycerides",
                "HDL Cholesterol",
                "LDL Cholesterol",
            ],
            "Anemia": [
                "Hemoglobin",
                "Red Blood Cells",
                "Hematocrit",
                "Mean Corpuscular Volume",
                "Mean Corpuscular Hemoglobin",
                "Mean Corpuscular Hemoglobin Concentration",
            ],
            "Thrombocytopenia": ["Platelets", "White Blood Cells", "Hemoglobin"],
            "Thalassemia": [
                "Hemoglobin",
                "Red Blood Cells",
                "Mean Corpuscular Volume",
                "Mean Corpuscular Hemoglobin",
                "Hematocrit",
            ],
            "Heart Disease": [
                "Cholesterol",
                "LDL Cholesterol",
                "HDL Cholesterol",
                "Triglycerides",
                "Troponin",
                "C-reactive Protein",
                "Systolic Blood Pressure",
                "Diastolic Blood Pressure",
                "Heart Rate",
                "BMI",
            ],
        }

        return disease_map.get(disease, [])