Spaces:

ArchCoder
/

medintake-ai

Sleeping

App Files Files Community

priyansh-saxena1 commited on 23 days ago

Commit

8d6f802

1 Parent(s): 44d41e8

feat: unified prompt with state visibility

Browse files

Files changed (2) hide show

app/graph.py +77 -63
app/llm.py +89 -40

app/graph.py CHANGED Viewed

@@ -5,7 +5,7 @@ from langgraph.graph import StateGraph, START, END
 from langgraph.checkpoint.memory import MemorySaver
 from app.llm import get_llm, CombinedOutput, HPI_FIELDS, ROS_REQUIRED
-from app.schemas import ClinicalBrief, HPI, ClinicalStateExtraction
 _MOCK = lambda: os.environ.get("MOCK_LLM", "true").lower() == "true"
@@ -21,10 +21,9 @@ class IntakeState(TypedDict):
     current_node: str
     clinical_brief: Optional[dict]
     frontend_stage: str          # 'intake', 'hpi', 'ros', 'done'
-# HPI_FIELDS and ROS_REQUIRED imported from app.llm
 EMERGENCY_PHRASES = [
     "crushing chest pain", "can't breathe", "cannot breathe",
     "heart attack", "suicide", "kill myself", "can't move", "dying"
@@ -65,11 +64,19 @@ def missing_from(state: CombinedOutput) -> list[str]:
     return missing
-def _detect_repeat(state) -> bool:
-    """Return True if the last two assistant replies are identical."""
-    msgs = state.get("messages", [])
     assistant_replies = [m.get("content", "") for m in msgs if m.get("role") == "assistant"]
-    return len(assistant_replies) >= 2 and assistant_replies[-1] == assistant_replies[-2]
 # ------------------------------------------------------------------- nodes ---
@@ -96,20 +103,19 @@ def triage_node(state: IntakeState) -> dict:
 def agent_node(state: IntakeState) -> dict:
     """
-    Core agent node — ONE combined LLM call per turn:
-    1. Extracts any new clinical data from the transcript.
-    2. Generates the next conversational question.
-    3. If all data is collected, builds the ClinicalBrief inline (no separate scribe node).
     """
     msgs = state.get("messages", [])
-    # On first call with no messages, return opening greeting
     if not msgs or (len(msgs) == 1 and msgs[0]["role"] == "assistant"):
         return {
             "messages": [{"role": "assistant", "content": "Hello, I'm conducting your pre-visit clinical intake. What brings you in today?"}],
             "clinical_state": CombinedOutput().model_dump_json(),
             "frontend_stage": "intake",
             "current_node": "agent",
         }
     if msgs[-1]["role"] == "assistant":
@@ -117,8 +123,8 @@ def agent_node(state: IntakeState) -> dict:
     current_json = state.get("clinical_state") or CombinedOutput().model_dump_json()
     transcript = format_transcript(msgs)
-    # Compute the current stage BEFORE the LLM call so we can pick the right prompt
     try:
         pre_state = CombinedOutput.model_validate_json(current_json)
         current_stage = compute_stage(pre_state)
@@ -126,62 +132,66 @@ def agent_node(state: IntakeState) -> dict:
         current_stage = "intake"
     import time
-    t_agent = time.time()
     print(f"[{time.time():.3f}] [Graph Node] Requesting LLM inference (stage={current_stage})...")
     llm = get_llm()
     result: CombinedOutput = llm.combined_call(transcript, current_json, stage=current_stage)
-    # ── Loop Guard: if LLM returned same reply as last turn, force-fill stuck field ──
-    if _detect_repeat({"messages": msgs + [{"role": "assistant", "content": result.reply}]}):
-        # Check if the LLM made progress extracting data despite repeating the reply
-        try:
-            prev_state = CombinedOutput.model_validate_json(current_json)
-            prev_filled = sum(1 for f in HPI_FIELDS if getattr(prev_state, f, None)) + len(prev_state.ros)
-            new_filled = sum(1 for f in HPI_FIELDS if getattr(result, f, None)) + len(result.ros)
-            made_progress = new_filled > prev_filled
-        except Exception:
-            made_progress = False
-        hpi_filled = all(getattr(result, f, None) for f in HPI_FIELDS)
-        if not hpi_filled:
-            if not made_progress:
-                # Still in HPI and stuck — force-fill the first empty HPI field
-                for stuck_field in HPI_FIELDS:
-                    if getattr(result, stuck_field, None) is None:
-                        object.__setattr__(result, stuck_field, "not specified")
-                        print(f"[LoopGuard] Force-filled HPI '{stuck_field}' = 'not specified' to break repeat loop")
-                        break
-            # Ensure we ask a new question to break the loop
-            new_missing = missing_from(result)
-            if new_missing:
-                object.__setattr__(result, "reply", f"Thank you. Now, could you tell me about {new_missing[0].replace('HPI:', '')}?")
-            else:
-                object.__setattr__(result, "reply", "Thank you — I have everything I need.")
-        else:
-            # In ROS stage
-            if not made_progress:
-                print("[LoopGuard] LLM stuck in ROS without extracting data. Skipping system.")
-            if len(result.ros) < ROS_REQUIRED:
-                object.__setattr__(result, "reply", f"Thank you. Are there any other symptoms you've been experiencing?")
-            else:
-                object.__setattr__(result, "reply", "Thank you — I have everything I need.")
-    # ── ROS Hallucination Guard: LLM can only ADD one new ROS system per turn ──
     try:
-        prev_state = json.loads(current_json)
-        prev_ros = prev_state.get("ros") or {}
     except Exception:
         prev_ros = {}
     new_ros_keys = [k for k in result.ros if k not in prev_ros]
     if len(new_ros_keys) > 1:
-        print(f"[ROSGuard] LLM added {len(new_ros_keys)} new ROS systems in one turn: {new_ros_keys}. Keeping only first.")
         allowed_ros = dict(prev_ros)
         allowed_ros[new_ros_keys[0]] = result.ros[new_ros_keys[0]]
-        object.__setattr__(result, "ros", allowed_ros)
     print(f"[{time.time():.3f}] [Graph Node] LLM returned. Preparing node dictionaries...")
@@ -189,7 +199,11 @@ def agent_node(state: IntakeState) -> dict:
     missing = missing_from(result)
     reply = result.reply or "Could you tell me more?"
-    # All fields complete — build the brief inline so it's available this turn
     if stage == "done":
         from datetime import datetime, timezone
         brief = ClinicalBrief(
@@ -213,6 +227,7 @@ def agent_node(state: IntakeState) -> dict:
             "frontend_stage": "done",
             "current_node": "done",
             "clinical_brief": brief.model_dump(),
         }
     return {
@@ -221,6 +236,7 @@ def agent_node(state: IntakeState) -> dict:
         "missing_fields": missing,
         "frontend_stage": stage,
         "current_node": "agent",
     }
@@ -240,10 +256,8 @@ def build_graph():
     workflow.add_edge("agent", END)
     checkpointer = MemorySaver()
-    # Interrupt after agent so it pauses for user input each turn
-    graph = workflow.compile(
-        checkpointer=checkpointer,
-        interrupt_after=["agent"]
-    )
     return graph, checkpointer

 from langgraph.checkpoint.memory import MemorySaver
 from app.llm import get_llm, CombinedOutput, HPI_FIELDS, ROS_REQUIRED
+from app.schemas import ClinicalBrief, HPI
 _MOCK = lambda: os.environ.get("MOCK_LLM", "true").lower() == "true"
     current_node: str
     clinical_brief: Optional[dict]
     frontend_stage: str          # 'intake', 'hpi', 'ros', 'done'
+    ros_stuck_count: int         # consecutive turns stuck in ROS with no progress
 EMERGENCY_PHRASES = [
     "crushing chest pain", "can't breathe", "cannot breathe",
     "heart attack", "suicide", "kill myself", "can't move", "dying"
     return missing
+def _get_last_user_message(msgs: list[dict]) -> str:
+    for m in reversed(msgs):
+        if m.get("role") == "user":
+            return m.get("content", "")
+    return ""
+def _detect_repeat(msgs: list[dict], new_reply: str) -> bool:
+    """Return True if new_reply is identical to the last two stored assistant replies."""
     assistant_replies = [m.get("content", "") for m in msgs if m.get("role") == "assistant"]
+    if len(assistant_replies) >= 2:
+        return new_reply == assistant_replies[-1] == assistant_replies[-2]
+    return False
 # ------------------------------------------------------------------- nodes ---
 def agent_node(state: IntakeState) -> dict:
     """
+    Core agent — one LLM call per turn.
+    Extracts clinical data, generates next question, builds brief when complete.
     """
     msgs = state.get("messages", [])
+    # First call: no messages yet → return opening greeting
     if not msgs or (len(msgs) == 1 and msgs[0]["role"] == "assistant"):
         return {
             "messages": [{"role": "assistant", "content": "Hello, I'm conducting your pre-visit clinical intake. What brings you in today?"}],
             "clinical_state": CombinedOutput().model_dump_json(),
             "frontend_stage": "intake",
             "current_node": "agent",
+            "ros_stuck_count": 0,
         }
     if msgs[-1]["role"] == "assistant":
     current_json = state.get("clinical_state") or CombinedOutput().model_dump_json()
     transcript = format_transcript(msgs)
+    ros_stuck_count = state.get("ros_stuck_count", 0)
     try:
         pre_state = CombinedOutput.model_validate_json(current_json)
         current_stage = compute_stage(pre_state)
         current_stage = "intake"
     import time
     print(f"[{time.time():.3f}] [Graph Node] Requesting LLM inference (stage={current_stage})...")
     llm = get_llm()
     result: CombinedOutput = llm.combined_call(transcript, current_json, stage=current_stage)
+    # ── ROS Hallucination Guard: max 1 new ROS system per turn ──────────
     try:
+        prev_ros = json.loads(current_json).get("ros") or {}
     except Exception:
         prev_ros = {}
     new_ros_keys = [k for k in result.ros if k not in prev_ros]
     if len(new_ros_keys) > 1:
+        print(f"[ROSGuard] LLM added {len(new_ros_keys)} systems in one turn: {new_ros_keys}. Keeping first only.")
         allowed_ros = dict(prev_ros)
         allowed_ros[new_ros_keys[0]] = result.ros[new_ros_keys[0]]
+        result = result.model_copy(update={"ros": allowed_ros})
+    # ── Loop Guard ───────────────────────────────────────────────────────
+    try:
+        prev_state_obj = CombinedOutput.model_validate_json(current_json)
+        prev_filled = sum(1 for f in HPI_FIELDS if getattr(prev_state_obj, f, None)) + len(prev_state_obj.ros)
+        new_filled = sum(1 for f in HPI_FIELDS if getattr(result, f, None)) + len(result.ros)
+        made_progress = new_filled > prev_filled
+    except Exception:
+        made_progress = True  # assume progress on parse error
+    hpi_complete = all(getattr(result, f, None) for f in HPI_FIELDS)
+    if not made_progress:
+        last_user_msg = _get_last_user_message(msgs)
+        if not hpi_complete:
+            # HPI stuck — force-fill the first empty field
+            for stuck_field in HPI_FIELDS:
+                if not getattr(result, stuck_field, None):
+                    result = result.model_copy(update={stuck_field: last_user_msg or "not specified"})
+                    print(f"[LoopGuard] Force-filled HPI '{stuck_field}' = '{last_user_msg or 'not specified'}'")
+                    break
+        else:
+            # ROS stuck — force-store the user's answer into a pending ros_asked system
+            ros_stuck_count += 1
+            pending = [s for s in result.ros_asked if s not in result.ros]
+            if pending:
+                # Store whatever the user just said as the finding for this system
+                new_ros = dict(result.ros)
+                new_ros[pending[0]] = [last_user_msg] if last_user_msg else ["no symptoms reported"]
+                result = result.model_copy(update={"ros": new_ros})
+                print(f"[LoopGuard] Force-stored ROS['{pending[0]}'] = [{last_user_msg[:40]}]")
+            elif ros_stuck_count >= 2:
+                # LLM isn't even updating ros_asked — force a dummy system to unblock
+                stub_key = f"general_{len(result.ros)}"
+                new_ros = dict(result.ros)
+                new_ros[stub_key] = [last_user_msg] if last_user_msg else ["no additional symptoms"]
+                result = result.model_copy(update={"ros": new_ros})
+                print(f"[LoopGuard] Force-added stub ROS['{stub_key}'] after {ros_stuck_count} stuck turns.")
+                ros_stuck_count = 0
+    else:
+        ros_stuck_count = 0  # reset counter when progress is made
     print(f"[{time.time():.3f}] [Graph Node] LLM returned. Preparing node dictionaries...")
     missing = missing_from(result)
     reply = result.reply or "Could you tell me more?"
+    # Sanitize reply — avoid storing empty or whitespace-only replies
+    if not reply.strip():
+        reply = "Could you tell me more?"
+    # All fields complete — build the brief inline
     if stage == "done":
         from datetime import datetime, timezone
         brief = ClinicalBrief(
             "frontend_stage": "done",
             "current_node": "done",
             "clinical_brief": brief.model_dump(),
+            "ros_stuck_count": 0,
         }
     return {
         "missing_fields": missing,
         "frontend_stage": stage,
         "current_node": "agent",
+        "ros_stuck_count": ros_stuck_count,
     }
     workflow.add_edge("agent", END)
     checkpointer = MemorySaver()
+    graph = workflow.compile(checkpointer=checkpointer)
+    # NOTE: interrupt_after removed — state accumulates via MemorySaver reducer
+    # on every fresh invoke, which is correct behavior (has_next is always False)
     return graph, checkpointer

app/llm.py CHANGED Viewed

@@ -2,7 +2,6 @@ import os
 import json
 from pydantic import BaseModel
-# ── Single unified system prompt — LLM sees the full workflow ──
 SYSTEM_PROMPT = """You are a clinical intake assistant conducting a pre-visit patient interview.
 YOUR WORKFLOW (follow this order):
@@ -27,9 +26,14 @@ YOUR WORKFLOW (follow this order):
 CRITICAL RULES:
 - NEVER re-ask a field that is already filled (marked ✅ in the status).
 - Ask exactly ONE question per turn about the FIRST missing item.
-- If a patient says "none"/"zero"/"no"/"denied", store that exact answer — do NOT leave it null.
-- For ROS: store findings as a list, e.g. "musculoskeletal": ["joint stiffness", "no swelling"].
 - Do NOT ask emotional/psychological questions — stick to physical symptoms.
 - Output ONLY valid JSON, no extra text.
 OUTPUT FORMAT:
@@ -43,7 +47,7 @@ OUTPUT FORMAT:
   "aggravating": "..." or null,
   "relieving": "..." or null,
   "ros": {"system_name": ["finding1", "finding2"], ...},
-  "ros_asked": ["system_name1"] (append any new system you ask about here to prevent repeating),
   "emergency": false,
   "reply": "Your single question"
 }"""
@@ -53,7 +57,6 @@ ROS_REQUIRED = 3
 def build_state_context(current_json: str) -> str:
-    """Build a human-readable status summary so the LLM knows exactly what's filled and missing."""
     try:
         state = json.loads(current_json)
     except Exception:
@@ -61,14 +64,12 @@ def build_state_context(current_json: str) -> str:
     lines = ["FIELD STATUS:"]
-    # Chief complaint
     cc = state.get("chief_complaint")
     if cc:
         lines.append(f'  ✅ chief_complaint: "{cc}"')
     else:
         lines.append("  ❌ chief_complaint: MISSING — ask what brings them in")
-    # HPI fields
     for field in HPI_FIELDS:
         val = state.get(field)
         if val:
@@ -76,7 +77,6 @@ def build_state_context(current_json: str) -> str:
         else:
             lines.append(f"  ❌ {field}: MISSING")
-    # ROS
     ros = state.get("ros", {})
     ros_asked = state.get("ros_asked", [])
     if ros:
@@ -90,9 +90,9 @@ def build_state_context(current_json: str) -> str:
     else:
         lines.append(f"  ✅ ros: all {ROS_REQUIRED} systems collected")
-    # Determine current phase
     if not cc:
         phase = "INTAKE"
     elif any(not state.get(f) for f in HPI_FIELDS):
         phase = "HPI"
         first_missing = next(f for f in HPI_FIELDS if not state.get(f))
@@ -100,12 +100,11 @@ def build_state_context(current_json: str) -> str:
     elif ros_remaining > 0:
         phase = "ROS"
         lines.append(f"\nCURRENT PHASE: {phase} — ask about the next body system relevant to '{cc}'")
     else:
         phase = "DONE"
-        lines.append(f"\nCURRENT PHASE: {phase} — all data collected, set reply to completion message")
-    if not cc:
-        lines.append(f"\nCURRENT PHASE: {phase}")
     return "\n".join(lines)
@@ -126,7 +125,8 @@ class CombinedOutput(BaseModel):
 class MockLLM:
-    """Minimal mock for testing — no regex, no extraction logic. Just walks through fields."""
     def combined_call(self, transcript: str, current_json: str, stage: str = "intake") -> CombinedOutput:
         try:
             state = json.loads(current_json)
@@ -140,39 +140,62 @@ class MockLLM:
                 last_patient_msg = line.replace("Patient:", "").strip()
                 break
-        hpi_fields = ["chief_complaint", "onset", "location", "duration", "character", "severity", "aggravating", "relieving"]
         ros_systems = ["cardiac", "respiratory", "gi"]
         if stage == "intake":
             if last_patient_msg and not state.get("chief_complaint"):
-                state["chief_complaint"] = last_patient_msg
-            state["reply"] = "What brings you in today?" if not state.get("chief_complaint") else f"When did the {state['chief_complaint']} start?"
         elif stage == "hpi":
-            for field in hpi_fields[1:]:
                 if not state.get(field):
                     if last_patient_msg:
                         state[field] = last_patient_msg
                     break
-            for field in hpi_fields[1:]:
                 if not state.get(field):
-                    labels = {"onset": "when it started", "location": "where you feel it",
-                              "duration": "how long it's lasted", "character": "what it feels like",
-                              "severity": "how severe it is (1-10)", "aggravating": "what makes it worse",
-                              "relieving": "what makes it better"}
                     state["reply"] = f"Can you tell me {labels.get(field, field)}?"
                     break
             else:
-                state["reply"] = "Thank you, moving on to review of systems."
         elif stage == "ros":
             ros = state.get("ros", {})
             for sys_name in ros_systems:
                 if sys_name not in ros:
                     if last_patient_msg:
                         ros[sys_name] = [last_patient_msg]
                         state["ros"] = ros
                     break
             for sys_name in ros_systems:
                 if sys_name not in ros:
                     state["reply"] = f"Any {sys_name} symptoms?"
@@ -189,10 +212,6 @@ class OllamaLLM:
         self.api_url = "http://localhost:11434/api/chat"
     def combined_call(self, transcript: str, current_json: str, stage: str = "intake") -> CombinedOutput:
-        """
-        Single LLM call: extracts clinical data + generates next question.
-        The unified prompt + state context gives the LLM full visibility.
-        """
         state_context = build_state_context(current_json)
         prompt = (
@@ -201,6 +220,7 @@ class OllamaLLM:
             f"CONVERSATION TRANSCRIPT:\n{transcript}\n\n"
             "TASK: Read the patient's latest message. Extract any new clinical facts into the JSON. "
             "Then ask exactly ONE question about the FIRST missing item shown above. "
             "Return ONLY the updated JSON object."
         )
@@ -224,25 +244,24 @@ class OllamaLLM:
                 "num_predict": 400
             }
         }
         try:
             response = requests.post(self.api_url, json=payload, timeout=60)
             response.raise_for_status()
             data = response.json()
             raw = data.get("message", {}).get("content", "").strip()
         except Exception as e:
-            print(f"[Ollama] ERROR calling local Ollama API: {e}")
-            print("[Ollama] Make sure Ollama is installed and running, and the model is downloaded!")
             return CombinedOutput.model_validate_json(current_json)
         print(f"[Ollama] Inference completed in {time.time() - t_start:.2f}s total.")
-        # Parse JSON robustly
         json_str = raw
         if "```json" in json_str:
-            json_str = json_str.split("```json", 1)[1].split("```")[0]
         elif "```" in json_str:
-            json_str = json_str.split("```", 1)[1].split("```")[0]
         start = json_str.find("{")
         end = json_str.rfind("}") + 1
@@ -251,25 +270,55 @@ class OllamaLLM:
         try:
             parsed = json.loads(json_str)
-            # Coerce empty strings and literal "null" back to None
             for field in ["chief_complaint", "onset", "location", "duration",
                           "character", "severity", "aggravating", "relieving"]:
                 v = parsed.get(field)
-                if v is not None and str(v).strip() in ("", "null"):
                     parsed[field] = None
-            return CombinedOutput.model_validate(parsed)
         except Exception as e:
             print(f"[Ollama] JSON parse error: {e}\nRaw output: {raw[:300]}")
             try:
-                base = CombinedOutput.model_validate_json(current_json)
-                base.reply = "Could you please repeat that? I want to make sure I understood correctly."
-                return base
             except Exception:
                 return CombinedOutput(reply="Could you please repeat that?")
 _llm_instance = None
 def get_llm():
     global _llm_instance
     if _llm_instance is None:

 import json
 from pydantic import BaseModel
 SYSTEM_PROMPT = """You are a clinical intake assistant conducting a pre-visit patient interview.
 YOUR WORKFLOW (follow this order):
 CRITICAL RULES:
 - NEVER re-ask a field that is already filled (marked ✅ in the status).
 - Ask exactly ONE question per turn about the FIRST missing item.
+- For HPI: accept any answer the patient gives, even vague ones like "moderate" or "not sure".
+- For ROS: ALWAYS add the system to BOTH "ros" and "ros_asked" — even for negative answers.
+  - Positive finding: "cardiac": ["palpitations present"]
+  - Negative finding: "respiratory": ["no shortness of breath"]
+  - Denied:          "gi": ["denied nausea and vomiting"]
+  A "no" is still a valid clinical finding. Never leave a ros system in ros_asked but absent from ros.
 - Do NOT ask emotional/psychological questions — stick to physical symptoms.
+- All string fields must be strings, not arrays.
 - Output ONLY valid JSON, no extra text.
 OUTPUT FORMAT:
   "aggravating": "..." or null,
   "relieving": "..." or null,
   "ros": {"system_name": ["finding1", "finding2"], ...},
+  "ros_asked": ["system_name1", "system_name2"],
   "emergency": false,
   "reply": "Your single question"
 }"""
 def build_state_context(current_json: str) -> str:
     try:
         state = json.loads(current_json)
     except Exception:
     lines = ["FIELD STATUS:"]
     cc = state.get("chief_complaint")
     if cc:
         lines.append(f'  ✅ chief_complaint: "{cc}"')
     else:
         lines.append("  ❌ chief_complaint: MISSING — ask what brings them in")
     for field in HPI_FIELDS:
         val = state.get(field)
         if val:
         else:
             lines.append(f"  ❌ {field}: MISSING")
     ros = state.get("ros", {})
     ros_asked = state.get("ros_asked", [])
     if ros:
     else:
         lines.append(f"  ✅ ros: all {ROS_REQUIRED} systems collected")
     if not cc:
         phase = "INTAKE"
+        lines.append(f"\nCURRENT PHASE: {phase}")
     elif any(not state.get(f) for f in HPI_FIELDS):
         phase = "HPI"
         first_missing = next(f for f in HPI_FIELDS if not state.get(f))
     elif ros_remaining > 0:
         phase = "ROS"
         lines.append(f"\nCURRENT PHASE: {phase} — ask about the next body system relevant to '{cc}'")
+        lines.append(f"  ⚠️  IMPORTANT: Store BOTH positive AND negative ROS findings in 'ros' dict.")
+        lines.append(f"  ⚠️  A patient saying 'no' means: ros[\"system\"] = [\"no [symptom]\"]")
     else:
         phase = "DONE"
+        lines.append(f"\nCURRENT PHASE: {phase} — all data collected")
     return "\n".join(lines)
 class MockLLM:
+    """Minimal mock for testing — deterministic field walker."""
     def combined_call(self, transcript: str, current_json: str, stage: str = "intake") -> CombinedOutput:
         try:
             state = json.loads(current_json)
                 last_patient_msg = line.replace("Patient:", "").strip()
                 break
         ros_systems = ["cardiac", "respiratory", "gi"]
         if stage == "intake":
             if last_patient_msg and not state.get("chief_complaint"):
+                # Strip greeting words
+                greetings = {"hello", "hi", "hey", "ok", "okay", "start", "yes", "sure"}
+                if last_patient_msg.lower() not in greetings and len(last_patient_msg) > 4:
+                    state["chief_complaint"] = last_patient_msg
+            state["reply"] = (
+                "What brings you in today?"
+                if not state.get("chief_complaint")
+                else f"When did the {state['chief_complaint']} start?"
+            )
         elif stage == "hpi":
+            for field in HPI_FIELDS:
                 if not state.get(field):
                     if last_patient_msg:
                         state[field] = last_patient_msg
                     break
+            for field in HPI_FIELDS:
                 if not state.get(field):
+                    labels = {
+                        "onset": "when it started",
+                        "location": "where you feel it",
+                        "duration": "how long it's lasted",
+                        "character": "what it feels like",
+                        "severity": "how severe it is (1-10)",
+                        "aggravating": "what makes it worse",
+                        "relieving": "what makes it better",
+                    }
                     state["reply"] = f"Can you tell me {labels.get(field, field)}?"
                     break
             else:
+                state["reply"] = "Thank you, let me ask about other symptoms."
         elif stage == "ros":
             ros = state.get("ros", {})
+            ros_asked = state.get("ros_asked", [])
+            # Detect emergency keywords
+            if any(k in last_patient_msg.lower() for k in ["crushing", "can't breathe", "dying"]):
+                state["emergency"] = True
+            # Store last patient message into the first un-asked system
             for sys_name in ros_systems:
                 if sys_name not in ros:
                     if last_patient_msg:
                         ros[sys_name] = [last_patient_msg]
                         state["ros"] = ros
+                        if sys_name not in ros_asked:
+                            ros_asked.append(sys_name)
+                            state["ros_asked"] = ros_asked
                     break
+            # Ask about the next un-asked system
             for sys_name in ros_systems:
                 if sys_name not in ros:
                     state["reply"] = f"Any {sys_name} symptoms?"
         self.api_url = "http://localhost:11434/api/chat"
     def combined_call(self, transcript: str, current_json: str, stage: str = "intake") -> CombinedOutput:
         state_context = build_state_context(current_json)
         prompt = (
             f"CONVERSATION TRANSCRIPT:\n{transcript}\n\n"
             "TASK: Read the patient's latest message. Extract any new clinical facts into the JSON. "
             "Then ask exactly ONE question about the FIRST missing item shown above. "
+            "For ROS: if the patient answers about a system (even 'no'), add it to BOTH ros AND ros_asked. "
             "Return ONLY the updated JSON object."
         )
                 "num_predict": 400
             }
         }
         try:
             response = requests.post(self.api_url, json=payload, timeout=60)
             response.raise_for_status()
             data = response.json()
             raw = data.get("message", {}).get("content", "").strip()
         except Exception as e:
+            print(f"[Ollama] ERROR calling Ollama API: {e}")
             return CombinedOutput.model_validate_json(current_json)
         print(f"[Ollama] Inference completed in {time.time() - t_start:.2f}s total.")
+        # Strip markdown fences
         json_str = raw
         if "```json" in json_str:
+            json_str = json_str.split("```json", 1).split("```")[1]
         elif "```" in json_str:
+            json_str = json_str.split("```", 1)[3].split("```")[0]
         start = json_str.find("{")
         end = json_str.rfind("}") + 1
         try:
             parsed = json.loads(json_str)
+            # ── Coerce all HPI string fields: list→str, empty/null→None ──
             for field in ["chief_complaint", "onset", "location", "duration",
                           "character", "severity", "aggravating", "relieving"]:
                 v = parsed.get(field)
+                if isinstance(v, list):
+                    # e.g. ["Walking"] → "Walking"
+                    parsed[field] = " ".join(str(x) for x in v) if v else None
+                elif v is not None and str(v).strip() in ("", "null"):
                     parsed[field] = None
+            result = CombinedOutput.model_validate(parsed)
         except Exception as e:
             print(f"[Ollama] JSON parse error: {e}\nRaw output: {raw[:300]}")
             try:
+                result = CombinedOutput.model_validate_json(current_json)
+                result = result.model_copy(update={"reply": "Could you please repeat that? I want to make sure I understood correctly."})
+                return result
             except Exception:
                 return CombinedOutput(reply="Could you please repeat that?")
+        # ── Post-process: normalize ros_asked → ros ──────────────────────
+        # If LLM added a system to ros_asked but not ros (e.g. for "no" answers),
+        # capture the last patient message as the finding for that system.
+        if result.ros_asked:
+            last_user = ""
+            for line in reversed(transcript.strip().split("\n")):
+                if line.startswith("Patient:"):
+                    last_user = line.replace("Patient:", "").strip()
+                    break
+            updated_ros = dict(result.ros)
+            changed = False
+            for asked_sys in result.ros_asked:
+                if asked_sys not in updated_ros:
+                    updated_ros[asked_sys] = [last_user] if last_user else ["no symptoms reported"]
+                    print(f"[ROSNorm] Filled ros['{asked_sys}'] from patient message: '{last_user[:40]}'")
+                    changed = True
+            if changed:
+                result = result.model_copy(update={"ros": updated_ros})
+        print(f"[Ollama] Parsed result — stage will be recomputed in graph.")
+        return result
 _llm_instance = None
 def get_llm():
     global _llm_instance
     if _llm_instance is None: