Spaces:

ArchCoder
/

medintake-ai

Sleeping

App Files Files Community

priyansh-saxena1 commited on Apr 25

Commit

284dfa9

1 Parent(s): 808ef75

feat : add dual agent architecture

Browse files

Files changed (5) hide show

app/graph.py +176 -374
app/llm.py +122 -17
app/main.py +2 -2
app/schemas.py +16 -11
tests/test_e2e.py +54 -165

app/graph.py CHANGED Viewed

@@ -1,403 +1,200 @@
 from typing import Optional, TypedDict, Annotated
 from langgraph.graph import StateGraph, START, END
 from langgraph.checkpoint.memory import MemorySaver
-import os
-import re
-_MOCK = lambda: os.environ.get("MOCK_LLM", "true").lower() == "true"
-SYSTEM_PROMPT = """
-You are a clinical intake assistant.
-Rules:
-- Ask exactly ONE question at a time
-- Keep responses under 20 words
-- Be clear and direct
-- No explanations unless asked
-"""
-def _ask(prompt: str) -> str:
-    from app.llm import get_llm
-    llm = get_llm()
-    try:
-        return llm.ask(prompt, system=SYSTEM_PROMPT)
-    except TypeError:
-        return llm.ask(prompt)
 def add_messages(left: list[dict], right: list[dict]) -> list[dict]:
     return left + right
 class IntakeState(TypedDict):
     messages: Annotated[list[dict], add_messages]
-    chief_complaint: str
-    hpi: dict
-    ros: dict[str, list[str]]
     current_node: str
     clinical_brief: Optional[dict]
-    ros_systems: list[str]
-    ros_current_index: int
-    ros_pending_system: Optional[str]
-    last_processed_message_index: int
-    vague_retry_field: Optional[str]
-HPI_FIELDS = ["onset", "location", "duration", "character", "severity", "aggravating", "relieving"]
-# Questions are templated — {cc} will be replaced with chief complaint
-HPI_QUESTIONS = {
-    "onset": "When did {cc} start?",
-    "location": "Where exactly do you feel {cc}?",
-    "duration": "Is {cc} constant or does it come and go? How long does each episode last?",
-    "character": "How would you describe {cc} — sharp, dull, pressure, burning?",
-    "severity": "On a 1–10 scale, how severe is your {cc} right now?",
-    "aggravating": "Does anything make {cc} worse, like activity or certain foods?",
-    "relieving": "What helps relieve your {cc}?"
-}
-HPI_FIELD_CONTEXT = {
-    "onset": "when your symptoms first started",
-    "location": "where exactly you feel it",
-    "duration": "how long each episode lasts",
-    "character": "what the pain feels like",
-    "severity": "pain severity (1-10)",
-    "aggravating": "what makes symptoms worse",
-    "relieving": "what relieves symptoms",
-}
-CC_KEYWORDS_TO_ROS = {
-    "chest": ["cardiac", "respiratory", "gi"],
-    "pain": ["cardiac", "respiratory", "gi"],
-    "headache": ["neuro", "ent", "vision"],
-    "head": ["neuro", "ent", "vision"],
-    "breath": ["respiratory", "cardiac"],
-    "shortness": ["respiratory", "cardiac"],
-    "cough": ["respiratory", "ent"],
-    "dizzy": ["neuro", "cardiac"],
-    "nausea": ["gi", "constitutional"],
-    "vomiting": ["gi", "constitutional"],
-}
-DEFAULT_ROS = ["constitutional", "cardiac", "respiratory"]
-ROS_SYSTEM_QUESTIONS = {
-    "cardiac": "Any palpitations, fluttering, or swelling in your legs or ankles?",
-    "respiratory": "Any shortness of breath, wheezing, or cough?",
-    "gi": "Any nausea, vomiting, heartburn, or abdominal pain?",
-    "neuro": "Any headaches, dizziness, numbness, or vision changes?",
-    "ent": "Any ear pain, sore throat, or sinus pressure?",
-    "vision": "Any blurry vision, double vision, or eye pain?",
-    "constitutional": "Any fever, chills, unexplained weight loss, or fatigue?",
-}
-def get_relevant_ros_systems(cc: str) -> list[str]:
-    cc_lower = cc.lower()
-    seen = []
-    for keyword, systems in CC_KEYWORDS_TO_ROS.items():
-        if keyword in cc_lower:
-            for s in systems:
-                if s not in seen:
-                    seen.append(s)
-    return seen if seen else DEFAULT_ROS
-def _fmt_question(field: str, cc: str) -> str:
-    """Format an HPI question, injecting the chief complaint naturally."""
-    q = HPI_QUESTIONS[field]
-    cc_short = cc.split()[0:4]  # first few words of complaint
-    cc_str = " ".join(cc_short).lower() if cc_short else "this"
-    return q.format(cc=cc_str)
-def extract_hpi_value(answer: str, field: str) -> str:
-    answer = answer.strip()
-    if field == "severity":
-        match = re.search(r'(\d{1,2})\s*(?:out of|/|over)?\s*10', answer, re.IGNORECASE)
-        if match:
-            return f"{match.group(1)}/10"
-        # also handle bare numbers 1-10
-        match2 = re.search(r'\b([1-9]|10)\b', answer)
-        if match2:
-            return f"{match2.group(1)}/10"
-    return answer
-def _is_vague_answer(answer: str) -> bool:
-    vague_phrases = ["i don't know", "not sure", "dont know", "idk", "maybe", "i guess", "not really", "not sure"]
-    return any(phrase in answer.lower() for phrase in vague_phrases)
-def _parse_ros_answer(answer: str) -> list[str]:
     """
-    Parse a free-text ROS answer into a list of individual findings.
-    Handles comma-separated, 'and'-joined, and 'no X' style negative findings.
     """
-    # Split on commas, semicolons, and 'and'
-    parts = re.split(r'[,;]|\band\b', answer, flags=re.IGNORECASE)
-    findings = []
-    for part in parts:
-        part = part.strip()
-        if part:
-            findings.append(part)
-    return findings if findings else [answer.strip()]
 # -------------------- NODES --------------------
-GREETINGS = {"hello", "hi", "hey", "start", "begin", "ok", "okay", "yes", "sure"}
-def intake_node(state: IntakeState) -> dict:
-    messages = state.get("messages", [])
-    last_idx = state.get("last_processed_message_index", 0)
-    cc = state.get("chief_complaint", "")
-    if cc:
-        return {"current_node": "hpi"}
-    has_new_user_msg = len(messages) > last_idx
-    greeting_reply = "Hello, I'm conducting your pre-visit clinical intake. What brings you in today?"
-    if has_new_user_msg:
-        user_msg = next((m for m in messages[last_idx:] if m["role"] == "user"), None)
-        if user_msg:
-            content = user_msg["content"].strip()
-            if content.lower() in GREETINGS or len(content) <= 4:
-                return {
-                    "messages": [{"role": "assistant", "content": greeting_reply}],
-                    "chief_complaint": "",
-                    "current_node": "intake",
-                    "last_processed_message_index": len(messages),
-                    "vague_retry_field": None,
-                }
-            cc = content
-            if _MOCK():
-                reply = f"Got it — {cc}. I'll ask a few quick questions to document your visit."
-            else:
-                reply = _ask(
-                    f"Patient's chief complaint is: '{cc}'. "
-                    "Acknowledge it in one sentence and say you'll ask a few questions."
-                )
             return {
-                "messages": [{"role": "assistant", "content": reply}],
-                "chief_complaint": cc,
-                "current_node": "hpi",
-                "last_processed_message_index": len(messages),
-                "vague_retry_field": None,
             }
-    return {
-        "messages": [{"role": "assistant", "content": greeting_reply}],
-        "chief_complaint": "",
-        "current_node": "intake",
-        "last_processed_message_index": last_idx,
-        "vague_retry_field": None,
-    }
-def hpi_node(state: IntakeState) -> dict:
-    messages = state.get("messages", [])
-    last_idx = state.get("last_processed_message_index", 0)
-    hpi = dict(state.get("hpi", {}))
-    vague_retry_field = state.get("vague_retry_field")
-    cc = state.get("chief_complaint", "")
-    next_field = vague_retry_field
-    if not next_field:
-        for field in HPI_FIELDS:
-            if field not in hpi or not hpi.get(field):
-                next_field = field
-                break
-    if next_field is None:
         return {
-            "messages": [{"role": "assistant", "content": "Thank you. Now I'll ask about a few other symptoms."}],
-            "current_node": "ros",
-            "last_processed_message_index": len(messages),
-            "vague_retry_field": None,
         }
-    has_new_user_msg = len(messages) > last_idx
-    if has_new_user_msg:
-        user_msg = next((m for m in messages[last_idx:] if m["role"] == "user"), None)
-        if user_msg:
-            answer = user_msg["content"]
-            if _is_vague_answer(answer):
-                field_context = HPI_FIELD_CONTEXT[next_field]
-                if _MOCK():
-                    reply = f"Could you be more specific? I need to know {field_context}."
-                else:
-                    reply = _ask(
-                        f"Patient response about {field_context} was vague. "
-                        "Ask for clarification in one short sentence."
-                    )
-                return {
-                    "messages": [{"role": "assistant", "content": reply}],
-                    "current_node": "hpi",
-                    "last_processed_message_index": last_idx,
-                    "vague_retry_field": next_field,
-                }
-            hpi[next_field] = extract_hpi_value(answer, next_field)
-            next_idx = HPI_FIELDS.index(next_field)
-            if next_idx < len(HPI_FIELDS) - 1:
-                next_field = HPI_FIELDS[next_idx + 1]
-                if _MOCK():
-                    reply = _fmt_question(next_field, cc)
-                else:
-                    reply = _ask(
-                        f"Complaint: {cc}. Known info: {hpi}. "
-                        f"Ask ONE question about {HPI_FIELD_CONTEXT[next_field]}."
-                    )
-                return {
-                    "messages": [{"role": "assistant", "content": reply}],
-                    "hpi": hpi,
-                    "current_node": "hpi",
-                    "last_processed_message_index": len(messages),
-                    "vague_retry_field": None,
-                }
-            return {
-                "messages": [{"role": "assistant", "content": "Thank you. Now I'll ask about a few other symptoms."}],
-                "hpi": hpi,
-                "current_node": "ros",
-                "last_processed_message_index": len(messages),
-                "vague_retry_field": None,
-            }
-    if _MOCK():
-        reply = _fmt_question(next_field, cc)
     else:
-        reply = _ask(
-            f"Complaint: {cc}. Known info: {hpi}. "
-            f"Ask ONE question about {HPI_FIELD_CONTEXT[next_field]}."
-        )
     return {
-        "messages": [{"role": "assistant", "content": reply}],
-        "current_node": "hpi",
-        "last_processed_message_index": last_idx,
-        "vague_retry_field": None,
     }
-def ros_node(state: IntakeState) -> dict:
-    messages = state.get("messages", [])
-    last_idx = state.get("last_processed_message_index", 0)
-    ros = dict(state.get("ros", {}))
-    cc = state.get("chief_complaint", "")
-    ros_systems = state.get("ros_systems") or get_relevant_ros_systems(cc)
-    current_idx = state.get("ros_current_index", 0)
-    pending = state.get("ros_pending_system")
-    if current_idx >= len(ros_systems):
         return {
-            "messages": [{"role": "assistant", "content": "Thank you — I have everything I need."}],
-            "current_node": "brief_generator",
-            "last_processed_message_index": len(messages),
         }
-    has_new_user_msg = len(messages) > last_idx
-    if has_new_user_msg and pending:
-        answer = messages[-1]["content"]
-        ros[pending] = _parse_ros_answer(answer)
-    next_system = ros_systems[current_idx]
-    if _MOCK():
-        reply = ROS_SYSTEM_QUESTIONS.get(next_system, f"Any {next_system} symptoms? Mention present and absent.")
-    else:
-        reply = _ask(
-            f"Ask about {next_system} symptoms. One short question. "
-            "Ask for both present and absent symptoms."
-        )
     return {
         "messages": [{"role": "assistant", "content": reply}],
-        "ros": ros,
-        "current_node": "ros",
-        "ros_systems": ros_systems,
-        "ros_current_index": current_idx + 1,
-        "ros_pending_system": next_system,
-        "last_processed_message_index": len(messages),
     }
-# -------------------- FINAL --------------------
-from datetime import datetime, timezone
-from app.schemas import HPI as HPIModel, ClinicalBrief as ClinicalBriefModel
-def _clean_hpi_value(field: str, raw: str) -> str:
-    """
-    Convert a raw patient answer into a clean clinical phrase.
-    Removes filler words and informal language.
-    """
-    raw = raw.strip()
-    # Remove filler starters
-    fillers = [
-        r'^(yeah|yes|no|well|so|like|um|uh|i mean|i guess),?\s*',
-        r'^(it\'?s?\s+)',
-        r'^(the\s+)',
-    ]
-    for pattern in fillers:
-        raw = re.sub(pattern, '', raw, flags=re.IGNORECASE).strip()
-    if not raw:
-        return "not specified"
-    # Capitalize first letter
-    return raw[0].upper() + raw[1:]
-def brief_generator_node(state: IntakeState) -> dict:
-    raw_hpi = state.get("hpi", {})
-    # Clean each HPI field
-    cleaned_hpi = {f: _clean_hpi_value(f, raw_hpi.get(f) or "not specified") for f in HPI_FIELDS}
-    hpi_obj = HPIModel(**cleaned_hpi)
-    # Clean ROS — ensure each system has a proper list of findings
-    raw_ros = state.get("ros", {})
-    cleaned_ros: dict[str, list[str]] = {}
-    for system, findings in raw_ros.items():
-        clean_findings = []
-        for f in findings:
-            f = f.strip()
-            if f:
-                # Capitalize
-                f = f[0].upper() + f[1:]
-                clean_findings.append(f)
-        if clean_findings:
-            cleaned_ros[system] = clean_findings
-    brief = ClinicalBriefModel(
-        chief_complaint=state.get("chief_complaint", ""),
-        hpi=hpi_obj,
-        ros=cleaned_ros,
         generated_at=datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
     )
     return {
-        "messages": [{"role": "assistant", "content": "Intake complete. Your clinical summary is ready."}],
         "current_node": "done",
         "clinical_brief": brief.model_dump(),
     }
@@ -406,31 +203,36 @@ def brief_generator_node(state: IntakeState) -> dict:
 def build_graph():
     workflow = StateGraph(IntakeState)
-    workflow.add_node("intake", intake_node)
-    workflow.add_node("hpi", hpi_node)
-    workflow.add_node("ros", ros_node)
-    workflow.add_node("brief_generator", brief_generator_node)
-    def route(state: IntakeState) -> str:
-        return state.get("current_node", "intake")
-    workflow.add_edge(START, "intake")
-    workflow.add_conditional_edges(
-        "intake", route, {"intake": "intake", "hpi": "hpi"}
-    )
-    workflow.add_conditional_edges(
-        "hpi", route, {"hpi": "hpi", "ros": "ros"}
-    )
-    workflow.add_conditional_edges(
-        "ros", route, {"ros": "ros", "brief_generator": "brief_generator"}
-    )
-    workflow.add_edge("brief_generator", END)
     checkpointer = MemorySaver()
     graph = workflow.compile(
         checkpointer=checkpointer,
-        interrupt_after=["intake", "hpi", "ros"]
     )
     return graph, checkpointer

+import os
+import json
 from typing import Optional, TypedDict, Annotated
 from langgraph.graph import StateGraph, START, END
 from langgraph.checkpoint.memory import MemorySaver
+from app.llm import get_llm
+from app.schemas import ClinicalStateExtraction, ClinicalBrief, HPI
+_MOCK = lambda: os.environ.get("MOCK_LLM", "true").lower() == "true"
 def add_messages(left: list[dict], right: list[dict]) -> list[dict]:
     return left + right
 class IntakeState(TypedDict):
     messages: Annotated[list[dict], add_messages]
+    clinical_state: str  # JSON representation of ClinicalStateExtraction
+    missing_fields: list[str]
     current_node: str
     clinical_brief: Optional[dict]
+    frontend_stage: str # 'intake', 'hpi', 'ros', or 'done'
+# -------------------- HELPER FUNCTIONS --------------------
+HPI_REQUIRED = ["onset", "location", "duration", "character", "severity", "aggravating", "relieving"]
+ROS_REQUIRED_COUNT = 3
+def format_transcript(messages: list[dict]) -> str:
+    out = []
+    # Only send the last couple of turns to not overwhelm if it's long, but ideally all
+    for m in messages:
+        role = "AI" if m["role"] == "assistant" else "Patient"
+        out.append(f"{role}: {m['content']}")
+    return "\n".join(out)
+def evaluate_missing(state: ClinicalStateExtraction) -> (list[str], str):
     """
+    Returns list of missing fields and the 'frontend_stage' mapped mapping.
     """
+    missing = []
+    stage = "intake"
+    if not state.chief_complaint:
+        missing.append("chief complaint (reason for visit)")
+        return missing, stage
+    stage = "hpi"
+    for field in HPI_REQUIRED:
+        val = getattr(state.hpi, field)
+        if not val or val.lower() == "not specified":
+            missing.append(f"HPI: {field}")
+    if missing:
+        return missing, stage
+    stage = "ros"
+    # Need at least a few systems covered if possible
+    if len(state.ros.keys()) < ROS_REQUIRED_COUNT:
+        missing.append(f"Review of Systems (ask about {ROS_REQUIRED_COUNT - len(state.ros.keys())} more bodily systems)")
+        return missing, stage
+    return [], "done"
 # -------------------- NODES --------------------
+def triage_node(state: IntakeState) -> dict:
+    msgs = state.get("messages", [])
+    if not msgs:
+        return {"current_node": "triage"}
+    last_msg = msgs[-1]
+    if last_msg["role"] == "user":
+        content = last_msg["content"].lower()
+        emergencies = ["suicide", "kill myself", "crushing chest pain", "can't breathe", "heart attack"]
+        if any(e in content for e in emergencies):
             return {
+                "messages": [{"role": "assistant", "content": "🚨 EMERGENCY OVERRIDE: Your symptoms sound like a medical emergency. Please call 911 or visit the nearest emergency room immediately."}],
+                "current_node": "done",
+                "frontend_stage": "done"
             }
+    return {"current_node": "extractor"}
+def extractor_node(state: IntakeState) -> dict:
+    msgs = state.get("messages", [])
+    if not msgs:
+        # Initial state setup
         return {
+            "clinical_state": ClinicalStateExtraction().model_dump_json(),
+            "current_node": "evaluator"
         }
+    # Only run extractor if the last message was from the user
+    if msgs[-1]["role"] != "user":
+        return {"current_node": "evaluator"}
+    llm = get_llm()
+    transcript = format_transcript(msgs)
+    current_state_json = state.get("clinical_state")
+    if not current_state_json:
+        current_state_json = ClinicalStateExtraction().model_dump_json()
+    # Extractor Agent updates the state passively
+    new_state = llm.ask_json(transcript, current_state_json, ClinicalStateExtraction)
+    # Check if the extractor detected a latent emergency
+    if new_state.emergency_detected:
+         return {
+            "messages": [{"role": "assistant", "content": "🚨 EMERGENCY OVERRIDE: Based on your details, you require immediate medical attention. Call 911."}],
+            "current_node": "done",
+            "frontend_stage": "done",
+            "clinical_state": new_state.model_dump_json()
+        }
+    return {
+        "clinical_state": new_state.model_dump_json(),
+        "current_node": "evaluator"
+    }
+def evaluator_node(state: IntakeState) -> dict:
+    state_json = state.get("clinical_state")
+    if not state_json:
+        clinical_state = ClinicalStateExtraction()
     else:
+        clinical_state = ClinicalStateExtraction.model_validate_json(state_json)
+    missing, stage = evaluate_missing(clinical_state)
+    if not missing:
+        return {
+            "missing_fields": missing,
+            "frontend_stage": "done",
+            "current_node": "scribe"
+        }
     return {
+        "missing_fields": missing,
+        "frontend_stage": stage,
+        "current_node": "conversationalist"
     }
+def conversationalist_node(state: IntakeState) -> dict:
+    msgs = state.get("messages", [])
+    clinical_json = state.get("clinical_state", "{}")
+    missing = state.get("missing_fields", [])
+    if not msgs:
         return {
+            "messages": [{"role": "assistant", "content": "Hello, I'm conducting your pre-visit clinical intake. What brings you in today?"}],
+            "current_node": "conversationalist"
         }
+    # Check if the agent just spoke (prevent double-speaking if no user input)
+    if msgs[-1]["role"] == "assistant":
+        return {"current_node": "conversationalist"}
+    # Dynamic target targeting the top missing field
+    target = missing[0] if missing else "general details"
+    system_prompt = (
+        "You are an empathetic clinical intake assistant. "
+        "Your sole job is to ask the next logical medical question in a conversational way. "
+        f"We currently know this info about the patient:\n{clinical_json}\n\n"
+        f"YOUR GOAL: You MUST naturally uncover the following missing information: {target}. "
+        "Keep your response to exactly ONE question. Be concise and friendly."
+    )
+    transcript = format_transcript(msgs[-6:]) # Context window
+    llm = get_llm()
+    reply = llm.ask(f"Transcript:\n{transcript}\n\nAsk the next question about: {target}.", system=system_prompt)
     return {
         "messages": [{"role": "assistant", "content": reply}],
+        "current_node": "conversationalist"
     }
+def scribe_node(state: IntakeState) -> dict:
+    state_json = state.get("clinical_state")
+    data = ClinicalStateExtraction.model_validate_json(state_json)
+    from datetime import datetime, timezone
+    brief = ClinicalBrief(
+        chief_complaint=data.chief_complaint or "Not specified",
+        hpi=data.hpi,
+        ros=data.ros,
         generated_at=datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
     )
     return {
+        "messages": [{"role": "assistant", "content": "Thank you — I have everything I need. Your clinical summary is ready."}],
         "current_node": "done",
         "clinical_brief": brief.model_dump(),
     }
 def build_graph():
     workflow = StateGraph(IntakeState)
+    workflow.add_node("triage", triage_node)
+    workflow.add_node("extractor", extractor_node)
+    workflow.add_node("evaluator", evaluator_node)
+    workflow.add_node("conversationalist", conversationalist_node)
+    workflow.add_node("scribe", scribe_node)
+    def route_triage(state: IntakeState) -> str:
+        # If triage marked it 'done' (emergency), skip everything
+        return state.get("current_node", "extractor")
+    def route_extractor(state: IntakeState) -> str:
+        # Extractor marks it 'done' if latent emergency, else 'evaluator'
+        return state.get("current_node", "evaluator")
+    def route_evaluator(state: IntakeState) -> str:
+        return state.get("current_node", "conversationalist")
+    workflow.add_edge(START, "triage")
+    workflow.add_conditional_edges("triage", route_triage, {"done": END, "extractor": "extractor"})
+    workflow.add_conditional_edges("extractor", route_extractor, {"done": END, "evaluator": "evaluator"})
+    workflow.add_conditional_edges("evaluator", route_evaluator, {"conversationalist": "conversationalist", "scribe": "scribe"})
+    workflow.add_edge("conversationalist", END)
+    workflow.add_edge("scribe", END)
     checkpointer = MemorySaver()
+    # Interrupt after conversationalist so it waits for user input
     graph = workflow.compile(
         checkpointer=checkpointer,
+        interrupt_after=["conversationalist"]
     )
     return graph, checkpointer

app/llm.py CHANGED Viewed

@@ -1,26 +1,79 @@
 import os
 CLINICAL_SYSTEM_PROMPT = (
     "You are a clinical intake assistant conducting a pre-visit patient interview. "
-    "Ask one clear, concise, professional medical question at a time. "
     "Do not diagnose or give medical advice. Keep responses under 2 sentences. "
-    "Be empathetic but professional."
 )
 class MockLLM:
     def __init__(self):
-        self.hpi_fields = ["onset", "location", "duration", "character", "severity", "aggravating", "relieving"]
-        self.current_hpi_index = 0
-        self.ros_systems_done = False
-    def ask(self, instruction: str) -> str:
-        return ""  # unused in mock mode — graph uses hardcoded questions
-    def reset(self):
-        self.current_hpi_index = 0
-        self.ros_systems_done = False
 class TransformersLLM:
     def __init__(self):
@@ -39,11 +92,11 @@ class TransformersLLM:
                 device_map="cpu",
             )
-    def ask(self, instruction: str) -> str:
         self._load()
         import torch
         messages = [
-            {"role": "system", "content": CLINICAL_SYSTEM_PROMPT},
             {"role": "user", "content": instruction},
         ]
         text = self.tokenizer.apply_chat_template(
@@ -53,8 +106,8 @@ class TransformersLLM:
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
-                max_new_tokens=80,
-                temperature=0.3,
                 do_sample=True,
                 pad_token_id=self.tokenizer.eos_token_id,
             )
@@ -64,9 +117,61 @@ class TransformersLLM:
         )
         return response.strip()
-_llm_instance = None
 def get_llm():
     global _llm_instance

 import os
+import json
+from pydantic import BaseModel
 CLINICAL_SYSTEM_PROMPT = (
     "You are a clinical intake assistant conducting a pre-visit patient interview. "
+    "Be empathetic, warm, and highly professional. "
     "Do not diagnose or give medical advice. Keep responses under 2 sentences. "
 )
 class MockLLM:
     def __init__(self):
+        pass
+    def ask(self, instruction: str, system: str = CLINICAL_SYSTEM_PROMPT) -> str:
+        # We will heavily mock the responses in graph.py for tests
+        if "empathetic reply" in instruction.lower():
+            if "chest" in instruction.lower():
+                return "I'm sorry to hear about your chest pain. When did it start?"
+            return "I understand. Can you tell me more?"
+        # General fallback that allows tests to check for context
+        if "onset" in instruction.lower():
+            return "When did this start?"
+        elif "severity" in instruction.lower() or "scale" in instruction.lower():
+            return "On a scale of 1 to 10, how severe is this?"
+        elif "location" in instruction.lower():
+            return "Where exactly do you feel this?"
+        return "Can you elaborate on that?"
+    def ask_json(self, transcript: str, current_state: str, schema_cls: type[BaseModel]) -> BaseModel:
+        # Mocking extraction logic for deterministic testing
+        t_low = transcript.lower()
+        state_dict = json.loads(current_state)
+        # very basic test logic
+        if "chest pain" in t_low:
+            state_dict["chief_complaint"] = "chest pain"
+        if "yesterday" in t_low or "morning" in t_low:
+            if not state_dict.get("hpi"): state_dict["hpi"] = {}
+            state_dict["hpi"]["onset"] = "this morning" if "morning" in t_low else "yesterday"
+        if "center" in t_low:
+            if not state_dict.get("hpi"): state_dict["hpi"] = {}
+            state_dict["hpi"]["location"] = "center of chest"
+        if "constant" in t_low:
+            if not state_dict.get("hpi"): state_dict["hpi"] = {}
+            state_dict["hpi"]["duration"] = "constant"
+        if "pressure" in t_low or "tight" in t_low:
+            if not state_dict.get("hpi"): state_dict["hpi"] = {}
+            state_dict["hpi"]["character"] = "tight pressure"
+        if "7" in t_low or "seven" in t_low:
+            if not state_dict.get("hpi"): state_dict["hpi"] = {}
+            state_dict["hpi"]["severity"] = "7/10"
+        if "walk" in t_low or "running" in t_low:
+            if not state_dict.get("hpi"): state_dict["hpi"] = {}
+            state_dict["hpi"]["aggravating"] = "walking"
+        if "rest" in t_low:
+            if not state_dict.get("hpi"): state_dict["hpi"] = {}
+            state_dict["hpi"]["relieving"] = "resting"
+        if "palpitations" in t_low:
+            if not state_dict.get("ros"): state_dict["ros"] = {}
+            state_dict["ros"]["cardiac"] = ["palpitations", "no syncope"]
+        if "breath" in t_low:
+            if not state_dict.get("ros"): state_dict["ros"] = {}
+            state_dict["ros"]["respiratory"] = ["shortness of breath", "no cough"]
+        if "nausea" in t_low:
+            if not state_dict.get("ros"): state_dict["ros"] = {}
+            state_dict["ros"]["gi"] = ["no nausea"]
+        if "crushing chest pain" in t_low or "heart attack" in t_low or "emergency" in t_low:
+            state_dict["emergency_detected"] = True
+        # Guarantee schema matches via Pydantic model_validate
+        return schema_cls.model_validate(state_dict)
 class TransformersLLM:
     def __init__(self):
                 device_map="cpu",
             )
+    def ask(self, instruction: str, system: str = CLINICAL_SYSTEM_PROMPT) -> str:
         self._load()
         import torch
         messages = [
+            {"role": "system", "content": system},
             {"role": "user", "content": instruction},
         ]
         text = self.tokenizer.apply_chat_template(
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
+                max_new_tokens=100,
+                temperature=0.4,
                 do_sample=True,
                 pad_token_id=self.tokenizer.eos_token_id,
             )
         )
         return response.strip()
+    def ask_json(self, transcript: str, current_state: str, schema_cls: type[BaseModel]) -> BaseModel:
+        self._load()
+        import torch
+        system = (
+            "You are a clinical data extraction engine. "
+            "Your objective is to read the patient transcript and output exactly a valid JSON document "
+            "that matches the requested schema. Extract all relevant medical facts you can find. "
+            "Merge new facts into the existing state."
+        )
+        instruction = (
+            f"CURRENT STATE JSON (Update this based on the transcript):\n{current_state}\n\n"
+            f"TRANSCRIPT:\n{transcript}\n\n"
+            f"Output ONLY valid JSON matching this schema structure:\n"
+            f"{schema_cls.model_json_schema()}"
+        )
+        messages = [
+            {"role": "system", "content": system},
+            {"role": "user", "content": instruction},
+        ]
+        text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        inputs = self.tokenizer(text, return_tensors="pt")
+        with torch.no_grad():
+            outputs = self.model.generate(
+                **inputs,
+                max_new_tokens=400,
+                temperature=0.1, # Keep low for JSON determinism
+                do_sample=False,
+                pad_token_id=self.tokenizer.eos_token_id,
+            )
+        response = self.tokenizer.decode(
+            outputs[0][inputs.input_ids.shape[1]:],
+            skip_special_tokens=True,
+        )
+        # Attempt to parse json from output
+        json_str = response.strip()
+        if "```json" in json_str:
+            json_str = json_str.split("```json")[-1].split("```")[0]
+        elif "```" in json_str:
+            json_str = json_str.split("```")[-1].split("```")[0]
+        try:
+            parsed = json.loads(json_str)
+            return schema_cls.model_validate(parsed)
+        except Exception:
+            # Fallback to current state if extraction fails (avoids crashing)
+            try:
+                return schema_cls.model_validate_json(current_state)
+            except Exception:
+                return schema_cls()
+_llm_instance = None
 def get_llm():
     global _llm_instance

app/main.py CHANGED Viewed

@@ -36,12 +36,12 @@ graph, checkpointer = build_graph()
 def get_current_node(session_id: str) -> str:
-    """Get current node from checkpoint."""
     config = {"configurable": {"thread_id": session_id}}
     try:
         snapshot = graph.get_state(config)
         if snapshot and snapshot.values:
-            return snapshot.values.get("current_node", "intake")
     except Exception:
         pass
     return "intake"

 def get_current_node(session_id: str) -> str:
+    """Get frontend stage from checkpoint."""
     config = {"configurable": {"thread_id": session_id}}
     try:
         snapshot = graph.get_state(config)
         if snapshot and snapshot.values:
+            return snapshot.values.get("frontend_stage", "intake")
     except Exception:
         pass
     return "intake"

app/schemas.py CHANGED Viewed

@@ -1,18 +1,23 @@
-from pydantic import BaseModel
 class HPI(BaseModel):
-    onset: str
-    location: str
-    duration: str
-    character: str
-    severity: str
-    aggravating: str
-    relieving: str
 class ClinicalBrief(BaseModel):
     chief_complaint: str
     hpi: HPI
-    ros: dict[str, list[str]]
     generated_at: str

+from typing import Optional, Dict, List
+from pydantic import BaseModel, Field
 class HPI(BaseModel):
+    onset: Optional[str] = Field(None, description="When the symptom started")
+    location: Optional[str] = Field(None, description="Where exactly the symptom is located")
+    duration: Optional[str] = Field(None, description="How long episodes last or if it is constant")
+    character: Optional[str] = Field(None, description="What the pain feels like (sharp, dull, pressure, etc.)")
+    severity: Optional[str] = Field(None, description="Pain scale severity (e.g., 7/10 or 'severe')")
+    aggravating: Optional[str] = Field(None, description="What makes the symptoms worse")
+    relieving: Optional[str] = Field(None, description="What helps relieve the symptoms")
 class ClinicalBrief(BaseModel):
     chief_complaint: str
     hpi: HPI
+    ros: Dict[str, List[str]]
     generated_at: str
+class ClinicalStateExtraction(BaseModel):
+    chief_complaint: Optional[str] = Field(None, description="The main reason for the visit")
+    hpi: HPI = Field(default_factory=HPI)
+    ros: Dict[str, List[str]] = Field(default_factory=dict, description="Review of systems, keys are system names, values are list of findings (positive or negative)")
+    emergency_detected: bool = Field(False, description="True ONLY if the patient mentions life-threatening symptoms requiring immediate 911/ER like severe crushing chest pain radiating to jaw, active severe bleeding, or suicidal ideation")

tests/test_e2e.py CHANGED Viewed

@@ -1,12 +1,11 @@
 import os
 os.environ["MOCK_LLM"] = "true"
 import pytest
 from httpx import AsyncClient, ASGITransport
 from app.main import app
 @pytest.fixture
 async def client():
@@ -14,7 +13,6 @@ async def client():
     async with AsyncClient(transport=transport, base_url="http://test") as c:
         yield c
 @pytest.mark.asyncio(loop_scope="function")
 async def test_health_endpoint(client):
     response = await client.get("/health")
@@ -23,171 +21,62 @@ async def test_health_endpoint(client):
     assert data["status"] == "ok"
     assert data["mock_mode"] is True
 @pytest.mark.asyncio(loop_scope="function")
-async def test_full_intake_flow(client):
-    session_id = "test1"
-    response = await client.post("/chat", json={"session_id": session_id, "message": "hello"})
     assert response.status_code == 200
     data = response.json()
-    assert data["reply"]
-    assert data["state"] in ["intake", "hpi"]
-    responses = [
-        "I have chest pain since this morning",   # CC (intake)
-        "It started about 3 hours ago",            # onset
-        "In the center of my chest",               # location
-        "It has been constant for an hour",        # duration
-        "It feels like pressure",                  # character
-        "About a 7 out of 10",                     # severity
-        "It gets worse when I walk",               # aggravating
-        "Resting helps a little",                  # relieving
-        "palpitations present, no syncope",        # cardiac ROS
-        "mild shortness of breath, no cough",      # respiratory ROS
-        "no nausea or vomiting",                   # gi ROS
-    ]
-    final_data = None
-    for resp_text in responses:
-        response = await client.post("/chat", json={"session_id": session_id, "message": resp_text})
-        assert response.status_code == 200
-        final_data = response.json()
-    assert final_data is not None
-    assert final_data["state"] == "done"
-    assert "brief" in final_data
-    assert final_data["brief"] is not None
-    brief = final_data["brief"]
-    assert "chief_complaint" in brief
-    assert "hpi" in brief
-    assert "ros" in brief
 @pytest.mark.asyncio(loop_scope="function")
-async def test_hpi_reprompt(client):
-    """Vague answers (I don't know) should trigger a re-prompt."""
-    session_id = "test_vague"
-    await client.post("/chat", json={"session_id": session_id, "message": "hello"})
-    await client.post("/chat", json={"session_id": session_id, "message": "I have chest pain"})
-    # First HPI question is about onset
-    vague_response = await client.post("/chat", json={"session_id": session_id, "message": "I don't know"})
-    assert vague_response.status_code == 200
-    data = vague_response.json()
-    reply_lower = data["reply"].lower()
-    # Should ask again — should mention specificity or the field context
-    assert "specific" in reply_lower or "when" in reply_lower or "start" in reply_lower
-@pytest.mark.asyncio(loop_scope="function")
-async def test_ros_scoping(client):
-    """For chest pain, ROS should include cardiac and respiratory systems."""
-    session_id = "test_chest_pain"
     await client.post("/chat", json={"session_id": session_id, "message": "hello"})
-    await client.post("/chat", json={"session_id": session_id, "message": "I have chest pain"})
-    hpi_responses = [
-        "It started 3 hours ago",
-        "In the center of my chest",
-        "It has been constant",
-        "It feels like pressure",
-        "7 out of 10",
-        "Walking makes it worse",
-        "Resting helps",
-    ]
-    for resp in hpi_responses:
-        await client.post("/chat", json={"session_id": session_id, "message": resp})
-    # Now in ROS — answer cardiac system
-    await client.post("/chat", json={"session_id": session_id, "message": "palpitations, no syncope"})
-    # respiratory
-    await client.post("/chat", json={"session_id": session_id, "message": "mild shortness of breath, no cough"})
-    # gi
-    final_response = await client.post("/chat", json={"session_id": session_id, "message": "no nausea"})
-    final_data = final_response.json()
-    if final_data.get("brief"):
-        ros_keys = list(final_data["brief"]["ros"].keys())
-        assert "cardiac" in ros_keys or "respiratory" in ros_keys
-@pytest.mark.asyncio(loop_scope="function")
-async def test_brief_structure(client):
-    """Brief should have all 7 HPI fields, chief_complaint, ros, and generated_at."""
-    session_id = "test_brief"
-    messages = [
-        "hello",
-        "I have chest pain",
-        "It started 3 hours ago",
-        "In the center of my chest",
-        "Constant",
-        "Pressure-like",
-        "7 out of 10",
-        "Walking worsens it",
-        "Resting helps",
-        "palpitations present, no syncope",
-        "shortness of breath, no cough",
-        "no nausea or vomiting",
-    ]
-    response = None
-    for msg in messages:
-        response = await client.post("/chat", json={"session_id": session_id, "message": msg})
-        assert response.status_code == 200
-    final_data = response.json()
-    if final_data.get("brief"):
-        brief = final_data["brief"]
-        from app.schemas import ClinicalBrief
-        validated = ClinicalBrief.model_validate(brief)
-        assert validated.chief_complaint
-        assert validated.hpi.onset
-        assert validated.hpi.location
-        assert validated.hpi.duration
-        assert validated.hpi.character
-        assert validated.hpi.severity
-        assert validated.hpi.aggravating
-        assert validated.hpi.relieving
-        assert validated.generated_at
-@pytest.mark.asyncio(loop_scope="function")
-async def test_brief_cleaning(client):
-    """Brief generator should strip informal filler words from patient answers."""
-    session_id = "test_cleaning"
-    messages = [
-        "hello",
-        "I have chest pain",
-        "yeah like since yesterday evening",   # filler "yeah like"
-        "like in my chest area",               # filler "like"
-        "Constant",
-        "um tight and squeezing",              # filler "um"
-        "7 out of 10",
-        "yeah walking makes it worse",         # filler "yeah"
-        "Resting helps",
-        "palpitations, no syncope",
-        "mild shortness of breath",
-        "no nausea",
-    ]
-    response = None
-    for msg in messages:
-        response = await client.post("/chat", json={"session_id": session_id, "message": msg})
-        assert response.status_code == 200
-    final_data = response.json()
-    if final_data.get("brief"):
-        hpi = final_data["brief"]["hpi"]
-        # "yeah like since yesterday evening" → should not start with "yeah"
-        if hpi.get("onset"):
-            assert not hpi["onset"].lower().startswith("yeah"), \
-                f"Filler not cleaned from onset: {hpi['onset']}"

 import os
 os.environ["MOCK_LLM"] = "true"
 import pytest
 from httpx import AsyncClient, ASGITransport
 from app.main import app
+from app.schemas import ClinicalBrief
 @pytest.fixture
 async def client():
     async with AsyncClient(transport=transport, base_url="http://test") as c:
         yield c
 @pytest.mark.asyncio(loop_scope="function")
 async def test_health_endpoint(client):
     response = await client.get("/health")
     assert data["status"] == "ok"
     assert data["mock_mode"] is True
 @pytest.mark.asyncio(loop_scope="function")
+async def test_emergency_triage_guardrail(client):
+    """If user types 'crushing chest pain', the triage node should immediately abort to 'done'."""
+    session_id = "test_emergency"
+    await client.post("/chat", json={"session_id": session_id, "message": "hello"})
+    response = await client.post("/chat", json={"session_id": session_id, "message": "I am having crushing chest pain"})
     assert response.status_code == 200
     data = response.json()
+    assert data["state"] == "done"
+    assert "911" in data["reply"] or "emergency" in data["reply"].lower()
 @pytest.mark.asyncio(loop_scope="function")
+async def test_shadow_extractor_logic(client):
+    """
+    Test that the shadow extractor gracefully fills in missing information behind the scenes,
+    transitioning the frontend stage from hpi to ros and finally done.
+    """
+    session_id = "test_extraction"
     await client.post("/chat", json={"session_id": session_id, "message": "hello"})
+    # 1. Chief Complaint & some HPI
+    # The mock LLM maps "chest pain" -> CC, "yesterday" -> onset
+    res = await client.post("/chat", json={"session_id": session_id, "message": "I have chest pain since yesterday"})
+    assert res.status_code == 200
+    data = res.json()
+    assert data["state"] == "hpi" # Needs more HPI info
+    # 2. More HPI info
+    res = await client.post("/chat", json={"session_id": session_id, "message": "It is constant pressure in the center. Severity is 7. Walking makes it worse, rest helps."})
+    assert res.status_code == 200
+    data = res.json()
+    assert data["state"] == "ros" # Completes HPI, moves to ROS
+    # 3. ROS info
+    res = await client.post("/chat", json={"session_id": session_id, "message": "I have palpitations and shortness of breath. No nausea."})
+    assert res.status_code == 200
+    data = res.json()
+    # Should be done
+    assert data["state"] == "done"
+    assert data["brief"] is not None
+    brief = ClinicalBrief.model_validate(data["brief"])
+    assert brief.chief_complaint == "chest pain"
+    assert brief.hpi.onset == "yesterday"
+    assert brief.hpi.location == "center of chest"
+    assert brief.hpi.duration == "constant"
+    assert brief.hpi.character == "tight pressure"
+    assert brief.hpi.severity == "7/10"
+    assert brief.hpi.aggravating == "walking"
+    assert brief.hpi.relieving == "resting"
+    assert "cardiac" in brief.ros
+    assert "respiratory" in brief.ros
+    assert "gi" in brief.ros