Spaces:

kshamaasuresh
/

CognitivePulse

Running

File size: 9,002 Bytes

14a5ab4

"""
intervention_engine.py — CognitivePulse

Given a patient's SHAP-based risk contribution profile, ranks their modifiable
risk factors by combined impact and practical actionability, and maps each to
the relevant literature domain for downstream RAG retrieval.

The core logic:
  priority_score = |SHAP contribution| × actionability_weight

where actionability_weight reflects both medical tractability (e.g. hypertension
is very treatable) and evidence quality for brain-health outcomes.
"""

from __future__ import annotations

from data_loader import FEATURE_META

# Maps each modifiable feature to: (literature_domain, actionability_weight, norm direction)
# norm_direction: "lower_better" or "higher_better" — used to determine if a value
# is adverse vs protective relative to population norms.
MODIFIABLE_FEATURE_MAP = {
    "BMI":                   ("diet_exercise",        0.8, "lower_better"),
    "Smoking":               ("smoking_cessation",    1.0, "lower_better"),
    "AlcoholConsumption":    ("alcohol_moderation",   0.7, "lower_better"),
    "PhysicalActivity":      ("exercise",             1.0, "higher_better"),
    "DietQuality":           ("nutrition",            0.9, "higher_better"),
    "SleepQuality":          ("sleep",                0.9, "higher_better"),
    "CardiovascularDisease": ("cardiovascular",       0.8, "lower_better"),
    "Diabetes":              ("metabolic_health",     0.8, "lower_better"),
    "Depression":            ("mental_health",        0.9, "lower_better"),
    "Hypertension":          ("cardiovascular",       1.0, "lower_better"),
    "SystolicBP":            ("cardiovascular",       1.0, "lower_better"),
    "DiastolicBP":           ("cardiovascular",       0.9, "lower_better"),
    "CholesterolTotal":      ("cardiovascular",       0.9, "lower_better"),
    "CholesterolLDL":        ("cardiovascular",       1.0, "lower_better"),
    "CholesterolHDL":        ("cardiovascular",       0.8, "higher_better"),
    "CholesterolTriglycerides": ("cardiovascular",    0.8, "lower_better"),
}

# Domain → literature tags (must match domains used in rag_engine.py corpus)
DOMAIN_TO_LITERATURE = {
    "exercise":           ["exercise_cognitive_reserve"],
    "nutrition":          ["diet_nutrition"],
    "sleep":              ["sleep_glymphatic"],
    "cardiovascular":     ["cardiovascular_risk"],
    "metabolic_health":   ["metabolic_health"],
    "mental_health":      ["mental_health_social"],
    "diet_exercise":      ["diet_nutrition", "exercise_cognitive_reserve"],
    "smoking_cessation":  ["cardiovascular_risk"],
    "alcohol_moderation": ["lifestyle_factors"],
}

# Human-readable intervention summaries (shown before RAG coaching text)
INTERVENTION_SUMMARY = {
    "exercise":           "Increasing structured physical activity",
    "nutrition":          "Improving diet quality (Mediterranean / MIND dietary patterns)",
    "sleep":              "Improving sleep quality and duration",
    "cardiovascular":     "Managing cardiovascular risk factors (BP / cholesterol)",
    "metabolic_health":   "Managing metabolic health (blood glucose / insulin resistance)",
    "mental_health":      "Addressing depression and social engagement",
    "diet_exercise":      "Combined diet and exercise program",
    "smoking_cessation":  "Smoking cessation",
    "alcohol_moderation": "Moderating alcohol consumption",
}


def _is_adverse(feature: str, value, norm_direction: str) -> bool:
    """
    Returns True if the feature value represents an adverse (risk-elevating) level
    relative to the norm direction. Used to filter out features that are already
    at protective levels.
    """
    from data_loader import REFERENCE_RANGES
    if feature not in REFERENCE_RANGES:
        # Binary features: adverse if positive and lower_better, or zero and higher_better
        if norm_direction == "lower_better":
            return float(value) > 0.5
        else:
            return float(value) < 0.5
    ranges = REFERENCE_RANGES[feature]
    v = float(value)
    if norm_direction == "lower_better":
        return v > ranges["optimal"][1]
    else:
        return v < ranges["optimal"][0]


def rank_interventions(shap_contributions: dict, patient: dict, n: int = 4) -> list:
    """
    Returns the top n prioritized, modifiable interventions for a patient.

    Each entry contains:
      - feature: raw feature name
      - label: human-readable label
      - domain: literature domain for RAG retrieval
      - literature_tags: list of corpus tags
      - intervention_summary: one-line description
      - priority_score: combined impact × actionability
      - shap_value: raw SHAP contribution
      - patient_value: the patient's actual value for context
    """
    candidates = []
    seen_domains = set()

    for feature, (domain, actionability, norm_dir) in MODIFIABLE_FEATURE_MAP.items():
        if feature not in shap_contributions:
            continue

        shap_val = shap_contributions[feature]
        patient_val = patient.get(feature, None)

        # Only flag features that are both risk-elevating (positive SHAP) AND
        # at an adverse level — no point flagging e.g. "eat better" when diet is
        # already excellent.
        if shap_val <= 0:
            continue
        if patient_val is not None and not _is_adverse(feature, patient_val, norm_dir):
            continue

        priority = abs(shap_val) * actionability

        # De-duplicate domains (no benefit listing SystolicBP + DiastolicBP separately)
        domain_key = domain
        if domain_key in seen_domains:
            # Keep whichever has higher priority score
            existing = next((c for c in candidates if c["domain"] == domain_key), None)
            if existing and priority > existing["priority_score"]:
                candidates.remove(existing)
                seen_domains.discard(domain_key)
            else:
                continue

        candidates.append({
            "feature": feature,
            "label": FEATURE_META.get(feature, {}).get("label", feature),
            "domain": domain_key,
            "literature_tags": DOMAIN_TO_LITERATURE.get(domain_key, [domain_key]),
            "intervention_summary": INTERVENTION_SUMMARY.get(domain_key, domain_key),
            "priority_score": round(priority, 4),
            "shap_value": round(shap_val, 4),
            "patient_value": patient_val,
        })
        seen_domains.add(domain_key)

    # Sort by priority descending
    candidates.sort(key=lambda x: x["priority_score"], reverse=True)
    return candidates[:n]


def build_coach_brief(patient: dict, risk_result: dict, interventions: list) -> str:
    """
    Builds a structured pre-session brief for a BetterBrain-style health coach,
    summarising the patient's risk profile and the top intervention priorities.
    This is passed as context to the RAG coaching generation step.
    """
    lines = [
        f"PATIENT RISK SCORE: {risk_result['risk_score']}/100 ({risk_result['risk_band'].upper()} risk band)",
        f"Risk probability: {risk_result['risk_probability']:.1%}",
        "",
        "TOP RISK DRIVERS (SHAP-identified):",
    ]
    for d in risk_result.get("top_drivers", [])[:5]:
        mod = "modifiable" if d["modifiable"] else "non-modifiable"
        lines.append(f"  • {d['label']}: SHAP={d['shap_value']:+.3f} — {d['direction']} ({mod})")

    lines += ["", "PRIORITIZED INTERVENTION AREAS:"]
    for i, iv in enumerate(interventions, 1):
        lines.append(f"  {i}. {iv['intervention_summary']} (priority score: {iv['priority_score']:.3f})")
        if iv["patient_value"] is not None:
            lines.append(f"     Patient value: {iv['patient_value']} | Feature: {iv['label']}")

    lines += [
        "",
        "COACHING SESSION FOCUS: Ground recommendations in the intervention areas above.",
        "All claims must cite retrieved research evidence. Do not make unsupported assertions.",
    ]
    return "\n".join(lines)


if __name__ == "__main__":
    # Smoke test
    sample_shap = {
        "SystolicBP": 0.845, "DietQuality": 0.626, "SleepQuality": 0.446,
        "CholesterolLDL": 0.460, "PhysicalActivity": -0.279, "MMSE": -0.940,
        "FamilyHistoryAlzheimers": 0.313, "Forgetfulness": 0.555,
        "Depression": 0.0, "Smoking": -0.025,
    }
    sample_patient = {
        "SystolicBP": 148, "DietQuality": 5.0, "SleepQuality": 6.0,
        "CholesterolLDL": 158, "PhysicalActivity": 2.5, "Depression": 0,
        "Smoking": 0,
    }
    sample_risk = {"risk_score": 85.1, "risk_band": "high", "risk_probability": 0.851,
                   "top_drivers": [{"label": "MMSE Score", "shap_value": -0.94,
                                     "direction": "decreases risk", "modifiable": False}]}
    ivs = rank_interventions(sample_shap, sample_patient)
    import json
    print(json.dumps(ivs, indent=2))
    print("\n--- COACH BRIEF ---")
    print(build_coach_brief(sample_patient, sample_risk, ivs))