Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

Rajan Sharma commited on Sep 13

Commit

b391d29

verified ·

1 Parent(s): 0cffbe0

Update app.py

Browse files

Files changed (1) hide show

app.py +140 -131

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# app.py
 import os, re, json, traceback, pathlib
 from functools import lru_cache
@@ -11,7 +10,7 @@ from audit_log import log_event, hash_summary
 from privacy import redact_text
 # ---------- Environment / cache (Spaces-safe, writable) ----------
-HOME = pathlib.Path.home()                                   # e.g., /home/user
 HF_HOME = str(HOME / ".cache" / "huggingface")
 HF_HUB_CACHE = str(HOME / ".cache" / "huggingface" / "hub")
 HF_TRANSFORMERS = str(HOME / ".cache" / "huggingface" / "transformers")
@@ -21,7 +20,7 @@ GRADIO_CACHE = GRADIO_TMP
 os.environ.setdefault("HF_HOME", HF_HOME)
 os.environ.setdefault("HF_HUB_CACHE", HF_HUB_CACHE)
-os.environ.setdefault("TRANSFORMERS_CACHE", HF_TRANSFORMERS)   # deprecation warning is harmless
 os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", ST_HOME)
 os.environ.setdefault("GRADIO_TEMP_DIR", GRADIO_TMP)
 os.environ.setdefault("GRADIO_CACHE_DIR", GRADIO_CACHE)
@@ -34,7 +33,7 @@ for p in [HF_HOME, HF_HUB_CACHE, HF_TRANSFORMERS, ST_HOME, GRADIO_TMP, GRADIO_CA
     except Exception:
         pass
-# Optional Cohere (preferred path)
 try:
     import cohere
     _HAS_COHERE = True
@@ -53,36 +52,50 @@ from session_rag import SessionRAG
 from mdsi_analysis import capacity_projection, cost_estimate, outcomes_summary
 # ---------- Config ----------
-MODEL_ID = os.getenv("MODEL_ID", "microsoft/Phi-3-mini-4k-instruct")  # HF fallback
 HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
 COHERE_API_KEY = os.getenv("COHERE_API_KEY")
 USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
-# Generous output (Cohere + HF)
 MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "2048"))
-# ---------- System Master (baseline guardrails; scenario-specific rules are injected only when needed) ----------
 SYSTEM_MASTER = """
-SYSTEM ROLE
 You are ClarityOps, a medical analytics system that interacts only via this chat.
-Core guardrails (always active):
 - Use ONLY information provided in this conversation (scenario text + uploaded files).
 - Never invent data. If something required is missing after clarifications, output the literal token: INSUFFICIENT_DATA.
-- Use correct medical units and plausible ranges.
-- Avoid PHI; aggregate only; apply small-cell suppression when cohort < 10.
-Scenario mode (when a scenario is detected):
-- Run in TWO PHASES:
   Phase 1: Ask up to 5 concise clarification questions, grouped by category (Prioritization, Capacity, Cost, Clinical, Recommendations). Then STOP and WAIT.
-  Phase 2: After answers are provided, produce the final structured analysis in this exact order:
-    1. Prioritization
-    2. Capacity
-    3. Cost
-    4. Clinical Benefits
-    5. ClarityOps Top 3 Recommendations
-  Include a short “Provenance” mapping each key output to scenario text, files, and/or clarified answers.
 """.strip()
 # ---------- Helpers ----------
@@ -100,16 +113,51 @@ def is_identity_query(message, history):
         r"\bdescribe\s+yourself\b", r"\band\s+you\s*\?\b", r"\byour\s+name\b",
         r"\bwho\s+am\s+i\s+chatting\s+with\b",
     ]
     def match(t):
         return any(re.search(p, (t or "").strip().lower()) for p in patterns)
-    if match(message):
-        return True
     if history:
         last_user = history[-1][0] if isinstance(history[-1], (list, tuple)) else None
-        if match(last_user):
-            return True
     return False
 def _iter_user_assistant(history):
@@ -120,8 +168,7 @@ def _iter_user_assistant(history):
             yield u, a
 def _sanitize_text(s: str) -> str:
-    if not isinstance(s, str):
-        return s
     return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
 def _history_to_prompt(message, history):
@@ -133,46 +180,6 @@ def _history_to_prompt(message, history):
     parts.append("Assistant:")
     return "\n".join(parts)
-def _summarize_artifacts(arts):
-    """Turn parsed artifacts (CSV, etc.) into a compact, model-friendly text block."""
-    if not arts:
-        return ""
-    blocks = []
-    for a in arts:
-        kind = a.get("kind")
-        name = a.get("name") or a.get("path") or "<file>"
-        if kind == "csv":
-            cols = ", ".join(map(str, a.get("columns", [])[:40])) or "<no columns>"
-            n_rows = a.get("n_rows_sampled", 0)
-            sample_rows = a.get("preview_rows") or []
-            first = sample_rows[0] if sample_rows else {}
-            first_str = ", ".join(f"{k}={str(v)[:60]}" for k, v in first.items()) if first else "<no sample>"
-            blocks.append(
-                f"FILE: {name}\nTYPE: CSV\nCOLUMNS: {cols}\nSAMPLED_ROWS: {n_rows}\nFIRST_ROW: {first_str}"
-            )
-        else:
-            text = a.get("text", "")
-            if text:
-                blocks.append(f"FILE: {name}\nTYPE: {kind}\nEXTRACT (truncated): {text[:800]}")
-    return "## Data File Summaries\n" + "\n\n".join(blocks)
-def _user_requested_files(text: str) -> bool:
-    low = (text or "").lower()
-    return "use the data files" in low or "use files" in low or "use uploaded" in low
-def _looks_like_scenario(text: str) -> bool:
-    """Heuristics to detect scenario/case-study inputs and avoid triggering on greetings."""
-    low = (text or "").lower().strip()
-    if not low:
-        return False
-    if len(low) > 600:
-        return True
-    hit_words = sum(w in low for w in [
-        "case study", "scenario", "background", "objective", "evaluation questions",
-        "expected output", "structured analysis", "prioritization", "capacity", "clinical benefits"
-    ])
-    return hit_words >= 2
 # ---------- Cohere first ----------
 def cohere_chat(message, history):
     if not USE_HOSTED_COHERE:
@@ -275,12 +282,12 @@ def _mdsi_block():
         "outcomes_summary": outcomes
     }, indent=2)
-# ---------- Core chat logic (auto scenario detection + two-phase when applicable) ----------
 def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answers=False):
     """
     awaiting_answers:
-      - False: normal chat or Phase 1 (if scenario)
-      - True:  Phase 2 (if scenario)
     """
     try:
         log_event("user_message", None, {"sizes": {"chars": len(user_msg or "")}})
@@ -296,7 +303,8 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
             ans = "I am ClarityOps, your strategic decision making AI partner."
             return history + [(user_msg, ans)], awaiting_answers
-        # Ingest uploads
         if uploaded_files_paths:
             ing = extract_text_from_files(uploaded_files_paths)
             chunks = ing.get("chunks", []) if isinstance(ing, dict) else (ing or [])
@@ -307,65 +315,51 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
                 _session_rag.register_artifacts(artifacts)
             log_event("uploads_added", None, {"chunks": len(chunks), "artifacts": len(artifacts)})
-        # Prepare artifact summary for prompt use
-        data_summary_text = _summarize_artifacts(_session_rag.artifacts)
-        # If user asks to "use files" but none parsed, ask for them explicitly
-        if _user_requested_files(safe_in) and not _session_rag.artifacts:
-            msg = ("I don’t see any parsed data files yet. Please attach your CSV/XLSX/PDF first, "
-                   "then resend your scenario. I’ll auto-summarize the files and use them in the analysis.")
-            return history + [(user_msg, msg)], False
-        # Quick helper: show latest CSV columns if asked
         if re.search(r"\b(columns?|headers?)\b", (safe_in or "").lower()):
             cols = _session_rag.get_latest_csv_columns()
             if cols:
                 return history + [(user_msg, "Here are the column names from your most recent CSV upload:\n\n- " + "\n- ".join(cols))], awaiting_answers
-        # Retrieval snippets
-        session_snips = "\n---\n".join(_session_rag.retrieve(
-            "diabetes screening Indigenous Métis mobile program cost throughput outcomes logistics",
-            k=6
-        ))
-        # Background computation/policy context
-        snapshot = _load_snapshot()
-        policy_context = retrieve_context(
-            "mobile diabetes screening Indigenous community outreach cultural safety data governance outcomes"
-        )
-        computed = compute_operational_numbers(snapshot)
-        # MDSi extras if relevant words present
-        user_lower = (safe_in or "").lower()
-        mdsi_extra = _mdsi_block() if ("diabetes" in user_lower or "mdsi" in user_lower or "mobile screening" in user_lower) else ""
-        # Build scenario-text (always include file summaries if present)
-        scenario_block = safe_in if len((safe_in or "")) > 0 else ""
-        scenario_text_full = (
-            scenario_block
-            + (f"\n\nExecutive Pre-Computed Blocks:\n{mdsi_extra}" if mdsi_extra else "")
-            + ("\n\n" + data_summary_text if data_summary_text else "")
-        )
-        # Build the common system preamble
-        system_preamble = build_system_preamble(
-            snapshot=snapshot,
-            policy_context=policy_context,
-            computed_numbers=computed,
-            scenario_text=scenario_text_full,
-            session_snips=session_snips
-        )
-        # Decide if this turn should be in scenario mode
-        scenario_mode = awaiting_answers or _looks_like_scenario(safe_in)
-        # Phase directive (only if scenario mode)
         if scenario_mode:
             if not awaiting_answers:
                 phase_directive = (
                     "\n\n[INSTRUCTION TO MODEL]\n"
-                    "Produce **Phase 1** only: First show a brief 'Data Snapshot' summarizing any parsed files (max 6 bullets), "
-                    "then output a header 'Clarification Questions' and ask up to 5 concise, grouped questions "
                     "(Prioritization, Capacity, Cost, Clinical, Recommendations). Then STOP and WAIT.\n"
                 )
             else:
@@ -375,16 +369,33 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
                     "(Prioritization, Capacity, Cost, Clinical Benefits, ClarityOps Top 3 Recommendations). "
                     "Use uploaded files + the user's latest answers as authoritative. Show calculations, units, and a brief Provenance.\n"
                 )
-        else:
-            # Normal chat: no phase instruction
-            phase_directive = ""
-        augmented_user = SYSTEM_MASTER + "\n\n" + system_preamble + "\n\nUser message:\n" + safe_in + phase_directive
-        # Call LLM (Cohere preferred, HF fallback)
         out = cohere_chat(augmented_user, history)
         if not out:
             model, tokenizer = load_local_model()
             inputs = build_inputs(tokenizer, augmented_user, history)
             out = local_generate(model, tokenizer, inputs, max_new_tokens=MAX_NEW_TOKENS)
@@ -400,7 +411,7 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
         if blocked_out:
             safe_out = refusal_reply(reason_out)
-        # Flip phase state only if we were in scenario mode
         new_awaiting = awaiting_answers
         if scenario_mode:
             low = safe_out.lower()
@@ -408,10 +419,7 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
                 new_awaiting = True
             elif awaiting_answers and "structured analysis" in low:
                 new_awaiting = False
-        else:
-            new_awaiting = False  # normal chat never toggles scenario phase
-        # Audit (content-free fingerprints)
         log_event("assistant_reply", None, {
             **hash_summary("prompt", augmented_user if not PERSIST_CONTENT else ""),
             **hash_summary("reply", safe_out if not PERSIST_CONTENT else ""),
@@ -466,7 +474,7 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
                     elem_classes="hero-box"
                 )
                 hero_send = gr.Button("➤", scale=0)
-            gr.Markdown('<div class="hint">ClarityOps will first ask up to 5 clarifications (only if it detects a scenario), then produce a structured analysis.</div>')
     # --- MAIN APP (hidden until first message) ---
     with gr.Column(elem_id="chat-container", visible=False) as app_wrap:
@@ -489,7 +497,7 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
     # ---- State
     state_history = gr.State(value=[])
     state_uploaded = gr.State(value=[])
-    state_awaiting = gr.State(value=False)  # False -> no pending Phase-2; True -> expecting Phase-2 answers
     # ---- Uploads
     def _store_uploads(files, current):
@@ -504,6 +512,7 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
     def _on_send(user_msg, history, up_paths, awaiting):
         try:
             if not user_msg or not user_msg.strip():
                 return history, "", history, awaiting
             new_history, new_awaiting = clarityops_reply(
                 user_msg.strip(), history or [], None, up_paths or [], awaiting_answers=awaiting

 import os, re, json, traceback, pathlib
 from functools import lru_cache
 from privacy import redact_text
 # ---------- Environment / cache (Spaces-safe, writable) ----------
+HOME = pathlib.Path.home()
 HF_HOME = str(HOME / ".cache" / "huggingface")
 HF_HUB_CACHE = str(HOME / ".cache" / "huggingface" / "hub")
 HF_TRANSFORMERS = str(HOME / ".cache" / "huggingface" / "transformers")
 os.environ.setdefault("HF_HOME", HF_HOME)
 os.environ.setdefault("HF_HUB_CACHE", HF_HUB_CACHE)
+os.environ.setdefault("TRANSFORMERS_CACHE", HF_TRANSFORMERS)
 os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", ST_HOME)
 os.environ.setdefault("GRADIO_TEMP_DIR", GRADIO_TMP)
 os.environ.setdefault("GRADIO_CACHE_DIR", GRADIO_CACHE)
     except Exception:
         pass
+# Optional Cohere
 try:
     import cohere
     _HAS_COHERE = True
 from mdsi_analysis import capacity_projection, cost_estimate, outcomes_summary
 # ---------- Config ----------
+MODEL_ID = os.getenv("MODEL_ID", "microsoft/Phi-3-mini-4k-instruct")  # fallback
 HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
 COHERE_API_KEY = os.getenv("COHERE_API_KEY")
 USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
+# Larger output (Cohere + HF fallback)
 MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "2048"))
+# ---------- System Master (two-phase, LLM-only behavior) ----------
 SYSTEM_MASTER = """
+SYSTEM ROLE (fixed, always on)
 You are ClarityOps, a medical analytics system that interacts only via this chat.
+Absolute rules:
 - Use ONLY information provided in this conversation (scenario text + uploaded files).
 - Never invent data. If something required is missing after clarifications, output the literal token: INSUFFICIENT_DATA.
+- Always run in TWO PHASES when the user provides a medical scenario (case study / program design / evaluation):
   Phase 1: Ask up to 5 concise clarification questions, grouped by category (Prioritization, Capacity, Cost, Clinical, Recommendations). Then STOP and WAIT.
+  Phase 2: After answers are provided, produce the final structured analysis exactly in the required format.
+Core behavior:
+- Read and synthesize any user-uploaded files (e.g., CSV/XLSX/PDF) relevant to the scenario.
+- Prefer analytics/longitudinal recommendations (risk targeting, follow-up, clustering) over generic ops advice.
+- Show all calculations explicitly for capacity and costs (e.g., “6 teams × 8 clients/day × 60 days = 2,880”).
+- Use correct clinical units and plausible ranges.
+- Include a brief “Provenance” section mapping each key output to scenario text, files, and/or clarified answers.
+Medical guardrails (always apply):
+- Units: BP in mmHg, A1c in %, BMI in kg/m², Total Cholesterol in mmol/L (or as provided), Percentages in %.
+- Plausible ranges: A1c 3–20 %, SBP 60–250 mmHg, DBP 30–150 mmHg, BMI 10–70 kg/m², Total Chol 2–12 mmol/L.
+- Privacy: avoid PHI; aggregate only; apply small-cell suppression where cohort < 10 (describe at a higher level).
+- When data includes mixed or ambiguous indicators, ask to confirm preferred indicators (e.g., obesity/metabolic syndrome vs self-reported diabetes).
+Formatting hard rules (only for scenarios):
+- Phase 1 output MUST include the header line: “Clarification Questions”
+- Phase 2 output MUST include the header line: “Structured Analysis”
+- Phase 2 MUST follow this exact section order:
+  1. Prioritization
+  2. Capacity
+  3. Cost
+  4. Clinical Benefits
+  5. ClarityOps Top 3 Recommendations
+  (Include a short Provenance block at the end.)
 """.strip()
 # ---------- Helpers ----------
         r"\bdescribe\s+yourself\b", r"\band\s+you\s*\?\b", r"\byour\s+name\b",
         r"\bwho\s+am\s+i\s+chatting\s+with\b",
     ]
     def match(t):
         return any(re.search(p, (t or "").strip().lower()) for p in patterns)
+    if match(message): return True
     if history:
         last_user = history[-1][0] if isinstance(history[-1], (list, tuple)) else None
+        if match(last_user): return True
+    return False
+GREETING_RE = re.compile(
+    r'^\s*(hi|hello|hey|yo|good\s*(morning|afternoon|evening)|howdy|sup)[\s!.\)]*$', re.I
+)
+def is_smalltalk(msg: str) -> bool:
+    if not msg: return True
+    if len(msg.strip()) < 6: return True
+    if GREETING_RE.match(msg.strip()): return True
+    # single short sentence, no punctuation complexity, no digits
+    if len(msg.split()) < 10 and not re.search(r'[\d,:;]|(case|scenario|study|objective|dataset|csv|program)', msg, re.I):
+        return True
+    return False
+SCENARIO_MARKERS = [
+    "background", "case study", "objective", "objectives", "available data", "data inputs",
+    "evaluation questions", "expected output", "structured analysis", "methods", "assumptions"
+]
+MEDICAL_TERMS = [
+    "diabetes", "a1c", "metabolic syndrome", "obesity", "blood pressure", "cholesterol",
+    "screening", "clinic", "patients", "prevalence", "capacity", "cost per client",
+    "program cost", "longitudinal", "outcomes", "cohort", "settlements", "indigenous", "métis"
+]
+def is_scenario_like(msg: str, artifacts, uploads_present: bool) -> bool:
+    if not msg: return False
+    low = msg.lower()
+    # length + markers
+    has_len = len(low) > 400 or len(low.split()) > 120
+    has_marker = any(m in low for m in SCENARIO_MARKERS)
+    med_hits = sum(1 for t in MEDICAL_TERMS if t in low)
+    has_medical = med_hits >= 2
+    csv_present = any((a.get("kind") == "csv") for a in (artifacts or []))
+    # Declare scenario if: (length & marker & medical) OR (uploads with csv and medical) OR explicit "scenario"/"case study"
+    explicit = ("scenario" in low) or ("case study" in low)
+    if explicit: return True
+    if (has_len and has_marker and has_medical): return True
+    if (uploads_present and csv_present and has_medical): return True
     return False
 def _iter_user_assistant(history):
             yield u, a
 def _sanitize_text(s: str) -> str:
+    if not isinstance(s, str): return s
     return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
 def _history_to_prompt(message, history):
     parts.append("Assistant:")
     return "\n".join(parts)
 # ---------- Cohere first ----------
 def cohere_chat(message, history):
     if not USE_HOSTED_COHERE:
         "outcomes_summary": outcomes
     }, indent=2)
+# ---------- Core chat logic (auto scenario detection) ----------
 def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answers=False):
     """
     awaiting_answers:
+      - False: If message looks like a medical scenario -> Phase 1; else general chat
+      - True:  We expect the user's answers to Phase 1 -> produce Phase 2
     """
     try:
         log_event("user_message", None, {"sizes": {"chars": len(user_msg or "")}})
             ans = "I am ClarityOps, your strategic decision making AI partner."
             return history + [(user_msg, ans)], awaiting_answers
+        # Ingest uploads first (so detector can use artifacts)
+        artifacts = []
         if uploaded_files_paths:
             ing = extract_text_from_files(uploaded_files_paths)
             chunks = ing.get("chunks", []) if isinstance(ing, dict) else (ing or [])
                 _session_rag.register_artifacts(artifacts)
             log_event("uploads_added", None, {"chunks": len(chunks), "artifacts": len(artifacts)})
+        # Column helper (explicit)
         if re.search(r"\b(columns?|headers?)\b", (safe_in or "").lower()):
             cols = _session_rag.get_latest_csv_columns()
             if cols:
                 return history + [(user_msg, "Here are the column names from your most recent CSV upload:\n\n- " + "\n- ".join(cols))], awaiting_answers
+        # Decide mode
+        uploads_present = bool(uploaded_files_paths)
+        scenario_mode = (not awaiting_answers) and is_scenario_like(safe_in or "", artifacts, uploads_present)
+        smalltalk = is_smalltalk(safe_in or "")
+        # Prepare retrieval/preamble only if needed
+        session_snips = ""
+        system_preamble = ""
+        phase_directive = ""
+        if awaiting_answers:
+            # We are in Phase 2 (user answered Phase 1); force scenario flow
+            scenario_mode = True
         if scenario_mode:
+            # Session retrieval to enrich the system preamble
+            session_snips = "\n---\n".join(_session_rag.retrieve(
+                "diabetes screening Indigenous Métis mobile program cost throughput outcomes logistics",
+                k=6
+            ))
+            snapshot = _load_snapshot()
+            policy_context = retrieve_context(
+                "mobile diabetes screening Indigenous community outreach cultural safety data governance outcomes"
+            )
+            computed = compute_operational_numbers(snapshot)
+            user_lower = (safe_in or "").lower()
+            mdsi_extra = _mdsi_block() if ("diabetes" in user_lower or "mdsi" in user_lower or "mobile screening" in user_lower) else ""
+            scenario_block = safe_in if len((safe_in or "")) > 0 else ""
+            system_preamble = build_system_preamble(
+                snapshot=snapshot,
+                policy_context=policy_context,
+                computed_numbers=computed,
+                scenario_text=scenario_block + (f"\n\nExecutive Pre-Computed Blocks:\n{mdsi_extra}" if mdsi_extra else ""),
+                session_snips=session_snips
+            )
             if not awaiting_answers:
                 phase_directive = (
                     "\n\n[INSTRUCTION TO MODEL]\n"
+                    "Produce **Phase 1** only: output a header 'Clarification Questions' and ask up to 5 concise, grouped questions "
                     "(Prioritization, Capacity, Cost, Clinical, Recommendations). Then STOP and WAIT.\n"
                 )
             else:
                     "(Prioritization, Capacity, Cost, Clinical Benefits, ClarityOps Top 3 Recommendations). "
                     "Use uploaded files + the user's latest answers as authoritative. Show calculations, units, and a brief Provenance.\n"
                 )
+        # Build final message to model
+        if scenario_mode:
+            augmented_user = SYSTEM_MASTER + "\n\n" + system_preamble + "\n\nUser message:\n" + (safe_in or "") + phase_directive
+        else:
+            # General chat path: NO phase directive, NO heavy preamble; still keep SYSTEM_MASTER safety/medical guardrails
+            augmented_user = (
+                "System: You are ClarityOps, a helpful medical & operations assistant. "
+                "Answer normally and concisely. If the user pastes a long, structured medical scenario, you will switch to the two-phase flow; "
+                "but this message does not qualify.\n\n"
+                f"User: {safe_in}\nAssistant:"
+            )
+        # Call LLM
         out = cohere_chat(augmented_user, history)
         if not out:
             model, tokenizer = load_local_model()
+            # For local fallback we still use chat template with SYSTEM_MASTER included
+            def build_inputs(tokenizer, message, history):
+                msgs = [{"role": "system", "content": SYSTEM_MASTER}]
+                for u, a in _iter_user_assistant(history):
+                    if u: msgs.append({"role": "user", "content": u})
+                    if a: msgs.append({"role": "assistant", "content": a})
+                msgs.append({"role": "user", "content": message})
+                return tokenizer.apply_chat_template(
+                    msgs, tokenize=True, add_generation_prompt=True, return_tensors="pt"
+                )
             inputs = build_inputs(tokenizer, augmented_user, history)
             out = local_generate(model, tokenizer, inputs, max_new_tokens=MAX_NEW_TOKENS)
         if blocked_out:
             safe_out = refusal_reply(reason_out)
+        # Flip phase state based on headers (only if we were in scenario mode)
         new_awaiting = awaiting_answers
         if scenario_mode:
             low = safe_out.lower()
                 new_awaiting = True
             elif awaiting_answers and "structured analysis" in low:
                 new_awaiting = False
         log_event("assistant_reply", None, {
             **hash_summary("prompt", augmented_user if not PERSIST_CONTENT else ""),
             **hash_summary("reply", safe_out if not PERSIST_CONTENT else ""),
                     elem_classes="hero-box"
                 )
                 hero_send = gr.Button("➤", scale=0)
+            gr.Markdown('<div class="hint">ClarityOps will first ask up to 5 clarifications for long medical scenarios, then produce a structured analysis.</div>')
     # --- MAIN APP (hidden until first message) ---
     with gr.Column(elem_id="chat-container", visible=False) as app_wrap:
     # ---- State
     state_history = gr.State(value=[])
     state_uploaded = gr.State(value=[])
+    state_awaiting = gr.State(value=False)  # False -> Phase 1 next if scenario; True -> awaiting answers for Phase 2
     # ---- Uploads
     def _store_uploads(files, current):
     def _on_send(user_msg, history, up_paths, awaiting):
         try:
             if not user_msg or not user_msg.strip():
+                # no toggle on empty
                 return history, "", history, awaiting
             new_history, new_awaiting = clarityops_reply(
                 user_msg.strip(), history or [], None, up_paths or [], awaiting_answers=awaiting