| """ |
| LangGraph Node implementations for OncoAgent. |
| |
| This module retains the data ingestion node (PHI cleaning + entity extraction) |
| and re-exports all other nodes from their dedicated modules for backward |
| compatibility. |
| |
| Module organisation (SOTA redesign): |
| - agents/router.py → Router Node (complexity classification) |
| - agents/corrective_rag.py → Corrective RAG Node (graded retrieval) |
| - agents/specialist.py → Specialist Node (tier-adaptive reasoning) |
| - agents/critic.py → Critic Node (reflexion validation) |
| - agents/formatter.py → Formatter + Fallback Nodes |
| - agents/tools.py → Shared vLLM client + tier calling |
| - agents/memory.py → Per-patient session memory |
| """ |
|
|
| from typing import Dict, Any |
|
|
| import re |
| import logging |
|
|
| from .state import AgentState |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| |
| |
| |
|
|
| _PHI_PATTERNS = [ |
| re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), |
| re.compile(r"\b\d{2}/\d{2}/\d{4}\b"), |
| re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}"), |
| ] |
|
|
|
|
| |
| |
| |
|
|
| def data_ingestion_node(state: AgentState) -> Dict[str, Any]: |
| """Clean the input clinical text (Zero-PHI policy) and extract |
| key medical entities via rule-based heuristics. |
| |
| Enhanced extraction includes: |
| - Cancer type identification (20+ types) |
| - TNM staging parsing |
| - Biomarker/mutation detection (15+ markers) |
| - Performance status detection (ECOG) |
| - Urgency signals |
| |
| Args: |
| state: Current LangGraph state with ``clinical_text``. |
| |
| Returns: |
| State update with ``extracted_entities`` and ``phi_detected``. |
| """ |
| text: str = state.get("clinical_text", "") |
|
|
| |
| phi_found = False |
| cleaned_text = text |
| for pattern in _PHI_PATTERNS: |
| if pattern.search(text): |
| phi_found = True |
| |
| cleaned_text = pattern.sub("[REDACTED]", cleaned_text) |
| |
| if phi_found: |
| logger.warning("PHI detected and redacted from clinical input.") |
|
|
| |
| text = cleaned_text |
|
|
| |
| extracted: Dict[str, Any] = { |
| "cancer_type": "Unknown", |
| "stage": "Unknown", |
| "mutations": [], |
| "ecog_status": "Unknown", |
| "urgency": "routine", |
| } |
|
|
| text_lower = text.lower() |
|
|
| |
| cancer_keywords = { |
| "breast": "Breast Cancer", |
| "lung": "Lung Cancer", |
| "non-small cell": "Non-Small Cell Lung Cancer", |
| "small cell lung": "Small Cell Lung Cancer", |
| "colon": "Colon Cancer", |
| "colorectal": "Colorectal Cancer", |
| "prostate": "Prostate Cancer", |
| "pancreatic": "Pancreatic Cancer", |
| "hepatocellular": "Hepatocellular Carcinoma", |
| "hcc": "Hepatocellular Carcinoma", |
| "melanoma": "Melanoma", |
| "renal": "Renal Cell Carcinoma", |
| "bladder": "Bladder Cancer", |
| "ovarian": "Ovarian Cancer", |
| "cervical": "Cervical Cancer", |
| "thyroid": "Thyroid Cancer", |
| "leukemia": "Leukemia", |
| "lymphoma": "Lymphoma", |
| "myeloma": "Multiple Myeloma", |
| "sarcoma": "Sarcoma", |
| "glioma": "Glioma", |
| "glioblastoma": "Glioblastoma", |
| "esophageal": "Esophageal Cancer", |
| "gastric": "Gastric Cancer", |
| "cholangiocarcinoma": "Cholangiocarcinoma", |
| "mesothelioma": "Mesothelioma", |
| "uterine": "Uterine Cancer", |
| "endometrial": "Uterine Cancer", |
| |
| "menstru": "Uterine Cancer", |
| "vaginal": "Uterine Cancer", |
| "bleeding": "Uterine Cancer", |
| "sangrado": "Uterine Cancer", |
| "periods": "Uterine Cancer", |
| "periodo": "Uterine Cancer", |
| "postmenopausal": "Uterine Cancer", |
| "postmenopau": "Uterine Cancer", |
| "hemorragia": "Uterine Cancer", |
| } |
| for keyword, label in cancer_keywords.items(): |
| if keyword in text_lower: |
| extracted["cancer_type"] = label |
| break |
|
|
| |
| stage_match = re.search( |
| r"stage\s+(I{1,3}V?|[1-4]|iv|iii|ii|i)\b", |
| text, |
| re.IGNORECASE, |
| ) |
| if stage_match: |
| extracted["stage"] = f"Stage {stage_match.group(1).upper()}" |
|
|
| |
| tnm_match = re.search( |
| r"\b(T[0-4x]N[0-3x]M[01x])\b", |
| text, |
| re.IGNORECASE, |
| ) |
| if tnm_match: |
| extracted["tnm"] = tnm_match.group(1).upper() |
|
|
| |
| mutations_found = re.findall( |
| r"\b(EGFR|ALK|KRAS|BRAF|HER2|TP53|BRCA[12]|PD-?L1|ROS1|MET|RET|" |
| r"NTRK|PIK3CA|MSI-?H|dMMR|FGFR[1-4]?|IDH[12]?|ERBB2|CDK[46]|" |
| r"PTEN|APC|VEGF|mTOR)\b", |
| text, |
| re.IGNORECASE, |
| ) |
| if mutations_found: |
| extracted["mutations"] = list(set(m.upper() for m in mutations_found)) |
|
|
| |
| ecog_match = re.search( |
| r"(?:ECOG|performance\s+status)\s*(?:of\s*)?(\d)", |
| text, |
| re.IGNORECASE, |
| ) |
| if ecog_match: |
| extracted["ecog_status"] = f"ECOG {ecog_match.group(1)}" |
|
|
| |
| urgency_keywords = [ |
| "urgent", "emergency", "critical", "immediate", |
| "rapidly progressing", "acute", "life-threatening", |
| ] |
| for kw in urgency_keywords: |
| if kw in text_lower: |
| extracted["urgency"] = "urgent" |
| break |
|
|
| return { |
| "clinical_text": cleaned_text, |
| "extracted_entities": extracted, |
| "phi_detected": phi_found, |
| } |
|
|
|
|
| |
| |
| |
|
|
| from .corrective_rag import corrective_rag_node as rag_retrieval_node |
| from .specialist import specialist_node as clinical_specialist_node |
| from .critic import critic_node as safety_validator_node |
|
|