""" intervention_engine.py — CognitivePulse Given a patient's SHAP-based risk contribution profile, ranks their modifiable risk factors by combined impact and practical actionability, and maps each to the relevant literature domain for downstream RAG retrieval. The core logic: priority_score = |SHAP contribution| × actionability_weight where actionability_weight reflects both medical tractability (e.g. hypertension is very treatable) and evidence quality for brain-health outcomes. """ from __future__ import annotations from data_loader import FEATURE_META # Maps each modifiable feature to: (literature_domain, actionability_weight, norm direction) # norm_direction: "lower_better" or "higher_better" — used to determine if a value # is adverse vs protective relative to population norms. MODIFIABLE_FEATURE_MAP = { "BMI": ("diet_exercise", 0.8, "lower_better"), "Smoking": ("smoking_cessation", 1.0, "lower_better"), "AlcoholConsumption": ("alcohol_moderation", 0.7, "lower_better"), "PhysicalActivity": ("exercise", 1.0, "higher_better"), "DietQuality": ("nutrition", 0.9, "higher_better"), "SleepQuality": ("sleep", 0.9, "higher_better"), "CardiovascularDisease": ("cardiovascular", 0.8, "lower_better"), "Diabetes": ("metabolic_health", 0.8, "lower_better"), "Depression": ("mental_health", 0.9, "lower_better"), "Hypertension": ("cardiovascular", 1.0, "lower_better"), "SystolicBP": ("cardiovascular", 1.0, "lower_better"), "DiastolicBP": ("cardiovascular", 0.9, "lower_better"), "CholesterolTotal": ("cardiovascular", 0.9, "lower_better"), "CholesterolLDL": ("cardiovascular", 1.0, "lower_better"), "CholesterolHDL": ("cardiovascular", 0.8, "higher_better"), "CholesterolTriglycerides": ("cardiovascular", 0.8, "lower_better"), } # Domain → literature tags (must match domains used in rag_engine.py corpus) DOMAIN_TO_LITERATURE = { "exercise": ["exercise_cognitive_reserve"], "nutrition": ["diet_nutrition"], "sleep": ["sleep_glymphatic"], "cardiovascular": ["cardiovascular_risk"], "metabolic_health": ["metabolic_health"], "mental_health": ["mental_health_social"], "diet_exercise": ["diet_nutrition", "exercise_cognitive_reserve"], "smoking_cessation": ["cardiovascular_risk"], "alcohol_moderation": ["lifestyle_factors"], } # Human-readable intervention summaries (shown before RAG coaching text) INTERVENTION_SUMMARY = { "exercise": "Increasing structured physical activity", "nutrition": "Improving diet quality (Mediterranean / MIND dietary patterns)", "sleep": "Improving sleep quality and duration", "cardiovascular": "Managing cardiovascular risk factors (BP / cholesterol)", "metabolic_health": "Managing metabolic health (blood glucose / insulin resistance)", "mental_health": "Addressing depression and social engagement", "diet_exercise": "Combined diet and exercise program", "smoking_cessation": "Smoking cessation", "alcohol_moderation": "Moderating alcohol consumption", } def _is_adverse(feature: str, value, norm_direction: str) -> bool: """ Returns True if the feature value represents an adverse (risk-elevating) level relative to the norm direction. Used to filter out features that are already at protective levels. """ from data_loader import REFERENCE_RANGES if feature not in REFERENCE_RANGES: # Binary features: adverse if positive and lower_better, or zero and higher_better if norm_direction == "lower_better": return float(value) > 0.5 else: return float(value) < 0.5 ranges = REFERENCE_RANGES[feature] v = float(value) if norm_direction == "lower_better": return v > ranges["optimal"][1] else: return v < ranges["optimal"][0] def rank_interventions(shap_contributions: dict, patient: dict, n: int = 4) -> list: """ Returns the top n prioritized, modifiable interventions for a patient. Each entry contains: - feature: raw feature name - label: human-readable label - domain: literature domain for RAG retrieval - literature_tags: list of corpus tags - intervention_summary: one-line description - priority_score: combined impact × actionability - shap_value: raw SHAP contribution - patient_value: the patient's actual value for context """ candidates = [] seen_domains = set() for feature, (domain, actionability, norm_dir) in MODIFIABLE_FEATURE_MAP.items(): if feature not in shap_contributions: continue shap_val = shap_contributions[feature] patient_val = patient.get(feature, None) # Only flag features that are both risk-elevating (positive SHAP) AND # at an adverse level — no point flagging e.g. "eat better" when diet is # already excellent. if shap_val <= 0: continue if patient_val is not None and not _is_adverse(feature, patient_val, norm_dir): continue priority = abs(shap_val) * actionability # De-duplicate domains (no benefit listing SystolicBP + DiastolicBP separately) domain_key = domain if domain_key in seen_domains: # Keep whichever has higher priority score existing = next((c for c in candidates if c["domain"] == domain_key), None) if existing and priority > existing["priority_score"]: candidates.remove(existing) seen_domains.discard(domain_key) else: continue candidates.append({ "feature": feature, "label": FEATURE_META.get(feature, {}).get("label", feature), "domain": domain_key, "literature_tags": DOMAIN_TO_LITERATURE.get(domain_key, [domain_key]), "intervention_summary": INTERVENTION_SUMMARY.get(domain_key, domain_key), "priority_score": round(priority, 4), "shap_value": round(shap_val, 4), "patient_value": patient_val, }) seen_domains.add(domain_key) # Sort by priority descending candidates.sort(key=lambda x: x["priority_score"], reverse=True) return candidates[:n] def build_coach_brief(patient: dict, risk_result: dict, interventions: list) -> str: """ Builds a structured pre-session brief for a BetterBrain-style health coach, summarising the patient's risk profile and the top intervention priorities. This is passed as context to the RAG coaching generation step. """ lines = [ f"PATIENT RISK SCORE: {risk_result['risk_score']}/100 ({risk_result['risk_band'].upper()} risk band)", f"Risk probability: {risk_result['risk_probability']:.1%}", "", "TOP RISK DRIVERS (SHAP-identified):", ] for d in risk_result.get("top_drivers", [])[:5]: mod = "modifiable" if d["modifiable"] else "non-modifiable" lines.append(f" • {d['label']}: SHAP={d['shap_value']:+.3f} — {d['direction']} ({mod})") lines += ["", "PRIORITIZED INTERVENTION AREAS:"] for i, iv in enumerate(interventions, 1): lines.append(f" {i}. {iv['intervention_summary']} (priority score: {iv['priority_score']:.3f})") if iv["patient_value"] is not None: lines.append(f" Patient value: {iv['patient_value']} | Feature: {iv['label']}") lines += [ "", "COACHING SESSION FOCUS: Ground recommendations in the intervention areas above.", "All claims must cite retrieved research evidence. Do not make unsupported assertions.", ] return "\n".join(lines) if __name__ == "__main__": # Smoke test sample_shap = { "SystolicBP": 0.845, "DietQuality": 0.626, "SleepQuality": 0.446, "CholesterolLDL": 0.460, "PhysicalActivity": -0.279, "MMSE": -0.940, "FamilyHistoryAlzheimers": 0.313, "Forgetfulness": 0.555, "Depression": 0.0, "Smoking": -0.025, } sample_patient = { "SystolicBP": 148, "DietQuality": 5.0, "SleepQuality": 6.0, "CholesterolLDL": 158, "PhysicalActivity": 2.5, "Depression": 0, "Smoking": 0, } sample_risk = {"risk_score": 85.1, "risk_band": "high", "risk_probability": 0.851, "top_drivers": [{"label": "MMSE Score", "shap_value": -0.94, "direction": "decreases risk", "modifiable": False}]} ivs = rank_interventions(sample_shap, sample_patient) import json print(json.dumps(ivs, indent=2)) print("\n--- COACH BRIEF ---") print(build_coach_brief(sample_patient, sample_risk, ivs))