Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

Rajan Sharma commited on Sep 12

Commit

f3f85b6

verified ·

1 Parent(s): 909d570

Update app.py

Browse files

Files changed (1) hide show

app.py +156 -182

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ from audit_log import log_event, hash_summary
 from privacy import redact_text
 # ---------- Environment / cache (Spaces-safe, writable) ----------
-HOME = pathlib.Path.home()
 HF_HOME = str(HOME / ".cache" / "huggingface")
 HF_HUB_CACHE = str(HOME / ".cache" / "huggingface" / "hub")
 HF_TRANSFORMERS = str(HOME / ".cache" / "huggingface" / "transformers")
@@ -21,7 +21,7 @@ GRADIO_CACHE = GRADIO_TMP
 os.environ.setdefault("HF_HOME", HF_HOME)
 os.environ.setdefault("HF_HUB_CACHE", HF_HUB_CACHE)
-os.environ.setdefault("TRANSFORMERS_CACHE", HF_TRANSFORMERS)
 os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", ST_HOME)
 os.environ.setdefault("GRADIO_TEMP_DIR", GRADIO_TMP)
 os.environ.setdefault("GRADIO_CACHE_DIR", GRADIO_CACHE)
@@ -34,7 +34,7 @@ for p in [HF_HOME, HF_HUB_CACHE, HF_TRANSFORMERS, ST_HOME, GRADIO_TMP, GRADIO_CA
     except Exception:
         pass
-# Optional Cohere
 try:
     import cohere
     _HAS_COHERE = True
@@ -53,50 +53,36 @@ from session_rag import SessionRAG
 from mdsi_analysis import capacity_projection, cost_estimate, outcomes_summary
 # ---------- Config ----------
-MODEL_ID = os.getenv("MODEL_ID", "microsoft/Phi-3-mini-4k-instruct")  # fallback
 HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
 COHERE_API_KEY = os.getenv("COHERE_API_KEY")
 USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
-# Larger output (Cohere + HF fallback)
 MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "2048"))
-# ---------- System Master (two-phase, LLM-only behavior) ----------
 SYSTEM_MASTER = """
-SYSTEM ROLE (fixed, always on)
 You are ClarityOps, a medical analytics system that interacts only via this chat.
-Absolute rules:
 - Use ONLY information provided in this conversation (scenario text + uploaded files).
 - Never invent data. If something required is missing after clarifications, output the literal token: INSUFFICIENT_DATA.
-- When a SCENARIO is detected, always run in TWO PHASES:
   Phase 1: Ask up to 5 concise clarification questions, grouped by category (Prioritization, Capacity, Cost, Clinical, Recommendations). Then STOP and WAIT.
-  Phase 2: After answers are provided, produce the final structured analysis exactly in the required format.
-Core behavior:
-- Read and synthesize any user-uploaded files (e.g., CSV/XLSX/PDF) relevant to the scenario.
-- Prefer analytics/longitudinal recommendations (risk targeting, follow-up, clustering) over generic ops advice.
-- Show all calculations explicitly for capacity and costs (e.g., “6 teams × 8 clients/day × 60 days = 2,880”).
-- Use correct clinical units and plausible ranges.
-- Include a brief “Provenance” section mapping each key output to scenario text, files, and/or clarified answers.
-Medical guardrails (always apply):
-- Units: BP in mmHg, A1c in %, BMI in kg/m², Total Cholesterol in mmol/L (or as provided), Percentages in %.
-- Plausible ranges: A1c 3–20 %, SBP 60–250 mmHg, DBP 30–150 mmHg, BMI 10–70 kg/m², Total Chol 2–12 mmol/L.
-- Privacy: avoid PHI; aggregate only; apply small-cell suppression where cohort < 10 (describe at a higher level).
-- When data includes mixed or ambiguous indicators, ask to confirm preferred indicators (e.g., obesity/metabolic syndrome vs self-reported diabetes).
-Formatting hard rules (SCENARIO mode only):
-- Phase 1 output MUST include the header line: “Clarification Questions”
-- Phase 2 output MUST include the header line: “Structured Analysis”
-- Phase 2 MUST follow this exact section order:
-  1. Prioritization
-  2. Capacity
-  3. Cost
-  4. Clinical Benefits
-  5. ClarityOps Top 3 Recommendations
-  (Include a short Provenance block at the end.)
 """.strip()
 # ---------- Helpers ----------
@@ -114,7 +100,7 @@ def is_identity_query(message, history):
         r"\bdescribe\s+yourself\b", r"\band\s+you\s*\?\b", r"\byour\s+name\b",
         r"\bwho\s+am\s+i\s+chatting\s+with\b",
     ]
-    def match(t): return any(re.search(p, (t or "").strip().lower()) for p in patterns)
     if match(message): return True
     if history:
         last_user = history[-1][0] if isinstance(history[-1], (list, tuple)) else None
@@ -142,47 +128,45 @@ def _history_to_prompt(message, history):
     parts.append("Assistant:")
     return "\n".join(parts)
-# ---------- Scenario auto-detection (stricter) ----------
-_SCENARIO_HEADINGS = [
-    "context", "background", "scenario", "case study",
-    "data inputs", "inputs", "evaluation questions", "questions",
-    "recommendations", "deployment strategy", "next steps", "assumptions"
-]
-_SCENARIO_KEYWORDS = [
-    "diabetes", "screening", "metabolic", "prevalence", "settlements",
-    "capacity", "throughput", "cost", "startup", "ongoing",
-    "clinical", "a1c", "mmhg", "bmi", "cholesterol",
-    "mobile", "program", "mdsi", "ops"
-]
-def _looks_like_scenario(text: str, uploaded_paths) -> bool:
-    """
-    Conservative trigger: only enter scenario mode on clearly structured,
-    domain-heavy content or when substantial files are attached.
-    """
-    if not text:
         return False
-    t = text.strip()
-    low = t.lower()
-    n = len(t)
-    headings = sum(1 for h in _SCENARIO_HEADINGS if h in low)
-    kw_hits = sum(1 for k in _SCENARIO_KEYWORDS if k in low)
-    has_metrics = (
-        bool(re.search(r"\b\d{2,4}\b", low)) and
-        bool(re.search(r"%|\bmmhg\b|\bbmi\b|\ba1c\b", low))
-    )
-    # File trigger if some substance exists
-    if uploaded_paths and (n >= 200 or kw_hits >= 3):
         return True
-    if n >= 700 and headings >= 1:
-        return True
-    if n >= 450 and headings >= 2:
-        return True
-    if n >= 500 and kw_hits >= 6 and has_metrics:
-        return True
-    return False
 # ---------- Cohere first ----------
 def cohere_chat(message, history):
@@ -286,12 +270,12 @@ def _mdsi_block():
         "outcomes_summary": outcomes
     }, indent=2)
-# ---------- Core chat logic (auto scenario routing) ----------
 def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answers=False):
     """
-    awaiting_answers (bool state):
-      - False: Normal chat route OR Scenario Phase 1 (if detected)
-      - True : Scenario Phase 2 (consume answers -> produce Structured Analysis)
     """
     try:
         log_event("user_message", None, {"sizes": {"chars": len(user_msg or "")}})
@@ -300,14 +284,14 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
         safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
         if blocked_in:
             ans = refusal_reply(reason_in)
-            return history + [(user_msg, ans)], False  # never hold for next phase on refusal
         # Identity short-circuit
         if is_identity_query(safe_in, history):
             ans = "I am ClarityOps, your strategic decision making AI partner."
-            return history + [(user_msg, ans)], False
-        # Ingest uploads (text + artifacts like CSV headers/summary)
         if uploaded_files_paths:
             ing = extract_text_from_files(uploaded_files_paths)
             chunks = ing.get("chunks", []) if isinstance(ing, dict) else (ing or [])
@@ -318,128 +302,119 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
                 _session_rag.register_artifacts(artifacts)
             log_event("uploads_added", None, {"chunks": len(chunks), "artifacts": len(artifacts)})
-        # Columns helper (quick utility)
         if re.search(r"\b(columns?|headers?)\b", (safe_in or "").lower()):
             cols = _session_rag.get_latest_csv_columns()
             if cols:
                 return history + [(user_msg, "Here are the column names from your most recent CSV upload:\n\n- " + "\n- ".join(cols))], awaiting_answers
-        # -------- Decide routing early --------
-        is_scenario = awaiting_answers or _looks_like_scenario(safe_in or "", uploaded_files_paths)
-        # ===== NORMAL CHAT =====
-        if not is_scenario:
-            normal_user = SYSTEM_MASTER + "\n\nUser message:\n" + (safe_in or "")
-            out = cohere_chat(normal_user, history)
-            if not out:
-                model, tokenizer = load_local_model()
-                inputs = build_inputs(tokenizer, normal_user, history)
-                out = local_generate(model, tokenizer, inputs, max_new_tokens=MAX_NEW_TOKENS)
-            if isinstance(out, str):
-                for tag in ("Assistant:", "System:", "User:"):
-                    if out.startswith(tag):
-                        out = out[len(tag):].strip()
-            out = _sanitize_text(out)
-            safe_out, blocked_out, reason_out = safety_filter(out, mode="output")
-            if blocked_out:
-                safe_out = refusal_reply(reason_out)
-            log_event("assistant_reply", None, {
-                **hash_summary("prompt", normal_user if not PERSIST_CONTENT else ""),
-                **hash_summary("reply", safe_out if not PERSIST_CONTENT else ""),
-                "awaiting_next_phase": False
-            })
-            return history + [(user_msg, safe_out)], False
-        # ===== SCENARIO MODE (Phase 1 or Phase 2) =====
-        # Build context ONLY for scenario path
         session_snips = "\n---\n".join(_session_rag.retrieve(
             "diabetes screening Indigenous Métis mobile program cost throughput outcomes logistics",
             k=6
         ))
         snapshot = _load_snapshot()
         policy_context = retrieve_context(
             "mobile diabetes screening Indigenous community outreach cultural safety data governance outcomes"
         )
         computed = compute_operational_numbers(snapshot)
         user_lower = (safe_in or "").lower()
         mdsi_extra = _mdsi_block() if ("diabetes" in user_lower or "mdsi" in user_lower or "mobile screening" in user_lower) else ""
         scenario_block = safe_in if len((safe_in or "")) > 0 else ""
         system_preamble = build_system_preamble(
             snapshot=snapshot,
             policy_context=policy_context,
             computed_numbers=computed,
-            scenario_text=scenario_block + (f"\n\nExecutive Pre-Computed Blocks:\n{mdsi_extra}" if mdsi_extra else ""),
             session_snips=session_snips
         )
-        # Phase selection
-        if awaiting_answers:
-            # -------- Phase 2 --------
-            phase_directive = (
-                "\n\n[INSTRUCTION TO MODEL]\n"
-                "Produce **Phase 2** only: output a header 'Structured Analysis' and follow the exact section order "
-                "(Prioritization, Capacity, Cost, Clinical Benefits, ClarityOps Top 3 Recommendations). "
-                "Use uploaded files + the user's latest answers as authoritative. Show calculations, units, and a brief Provenance.\n"
-            )
-            augmented_user = SYSTEM_MASTER + "\n\n" + system_preamble + "\n\nClarification answers from user:\n" + (safe_in or "<none>") + phase_directive
-            out = cohere_chat(augmented_user, history)
-            if not out:
-                model, tokenizer = load_local_model()
-                inputs = build_inputs(tokenizer, augmented_user, history)
-                out = local_generate(model, tokenizer, inputs, max_new_tokens=MAX_NEW_TOKENS)
-            if isinstance(out, str):
-                for tag in ("Assistant:", "System:", "User:"):
-                    if out.startswith(tag):
-                        out = out[len(tag):].strip()
-            out = _sanitize_text(out)
-            safe_out, blocked_out, reason_out = safety_filter(out, mode="output")
-            if blocked_out:
-                safe_out = refusal_reply(reason_out)
-            log_event("assistant_reply", None, {
-                **hash_summary("prompt", augmented_user if not PERSIST_CONTENT else ""),
-                **hash_summary("reply", safe_out if not PERSIST_CONTENT else ""),
-                "awaiting_next_phase": False
-            })
-            return history + [(user_msg, safe_out)], False
         else:
-            # -------- Phase 1 --------
-            phase_directive = (
-                "\n\n[INSTRUCTION TO MODEL]\n"
-                "Produce **Phase 1** only: output a header 'Clarification Questions' and ask up to 5 concise, grouped questions "
-                "(Prioritization, Capacity, Cost, Clinical, Recommendations). Then STOP and WAIT.\n"
-            )
-            augmented_user = SYSTEM_MASTER + "\n\n" + system_preamble + "\n\nUser scenario:\n" + (safe_in or "") + phase_directive
-            out = cohere_chat(augmented_user, history)
-            if not out:
-                model, tokenizer = load_local_model()
-                inputs = build_inputs(tokenizer, augmented_user, history)
-                out = local_generate(model, tokenizer, inputs, max_new_tokens=MAX_NEW_TOKENS)
-            if isinstance(out, str):
-                for tag in ("Assistant:", "System:", "User:"):
-                    if out.startswith(tag):
-                        out = out[len(tag):].strip()
-            out = _sanitize_text(out)
-            safe_out, blocked_out, reason_out = safety_filter(out, mode="output")
-            if blocked_out:
-                safe_out = refusal_reply(reason_out)
-            log_event("assistant_reply", None, {
-                **hash_summary("prompt", augmented_user if not PERSIST_CONTENT else ""),
-                **hash_summary("reply", safe_out if not PERSIST_CONTENT else ""),
-                "awaiting_next_phase": True
-            })
-            return history + [(user_msg, safe_out)], True
     except Exception as e:
         err = f"Error: {e}"
@@ -447,7 +422,7 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
             traceback.print_exc()
         except Exception:
             pass
-        return history + [(user_msg, err)], False
 # ---------- Theme & CSS ----------
 theme = gr.themes.Soft(primary_hue="teal", neutral_hue="slate", radius_size=gr.themes.sizes.radius_lg)
@@ -486,7 +461,7 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
                     elem_classes="hero-box"
                 )
                 hero_send = gr.Button("➤", scale=0)
-            gr.Markdown('<div class="hint">ClarityOps will first ask up to 5 clarifications (only for scenarios), then produce a structured analysis.</div>')
     # --- MAIN APP (hidden until first message) ---
     with gr.Column(elem_id="chat-container", visible=False) as app_wrap:
@@ -509,7 +484,7 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
     # ---- State
     state_history = gr.State(value=[])
     state_uploaded = gr.State(value=[])
-    state_awaiting = gr.State(value=False)  # False -> normal or Phase 1 next; True -> expecting Phase 2 answers
     # ---- Uploads
     def _store_uploads(files, current):
@@ -524,7 +499,6 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
     def _on_send(user_msg, history, up_paths, awaiting):
         try:
             if not user_msg or not user_msg.strip():
-                # no toggle on empty
                 return history, "", history, awaiting
             new_history, new_awaiting = clarityops_reply(
                 user_msg.strip(), history or [], None, up_paths or [], awaiting_answers=awaiting

 from privacy import redact_text
 # ---------- Environment / cache (Spaces-safe, writable) ----------
+HOME = pathlib.Path.home()                                   # e.g., /home/user
 HF_HOME = str(HOME / ".cache" / "huggingface")
 HF_HUB_CACHE = str(HOME / ".cache" / "huggingface" / "hub")
 HF_TRANSFORMERS = str(HOME / ".cache" / "huggingface" / "transformers")
 os.environ.setdefault("HF_HOME", HF_HOME)
 os.environ.setdefault("HF_HUB_CACHE", HF_HUB_CACHE)
+os.environ.setdefault("TRANSFORMERS_CACHE", HF_TRANSFORMERS)   # deprecation warning is harmless
 os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", ST_HOME)
 os.environ.setdefault("GRADIO_TEMP_DIR", GRADIO_TMP)
 os.environ.setdefault("GRADIO_CACHE_DIR", GRADIO_CACHE)
     except Exception:
         pass
+# Optional Cohere (preferred path)
 try:
     import cohere
     _HAS_COHERE = True
 from mdsi_analysis import capacity_projection, cost_estimate, outcomes_summary
 # ---------- Config ----------
+MODEL_ID = os.getenv("MODEL_ID", "microsoft/Phi-3-mini-4k-instruct")  # HF fallback
 HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
 COHERE_API_KEY = os.getenv("COHERE_API_KEY")
 USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
+# Generous output (Cohere + HF)
 MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "2048"))
+# ---------- System Master (baseline guardrails; scenario-specific rules are injected only when needed) ----------
 SYSTEM_MASTER = """
+SYSTEM ROLE
 You are ClarityOps, a medical analytics system that interacts only via this chat.
+Core guardrails (always active):
 - Use ONLY information provided in this conversation (scenario text + uploaded files).
 - Never invent data. If something required is missing after clarifications, output the literal token: INSUFFICIENT_DATA.
+- Use correct medical units and plausible ranges.
+- Avoid PHI; aggregate only; apply small-cell suppression when cohort < 10.
+Scenario mode (when a scenario is detected):
+- Run in TWO PHASES:
   Phase 1: Ask up to 5 concise clarification questions, grouped by category (Prioritization, Capacity, Cost, Clinical, Recommendations). Then STOP and WAIT.
+  Phase 2: After answers are provided, produce the final structured analysis in this exact order:
+    1. Prioritization
+    2. Capacity
+    3. Cost
+    4. Clinical Benefits
+    5. ClarityOps Top 3 Recommendations
+  Include a short “Provenance” mapping each key output to scenario text, files, and/or clarified answers.
 """.strip()
 # ---------- Helpers ----------
         r"\bdescribe\s+yourself\b", r"\band\s+you\s*\?\b", r"\byour\s+name\b",
         r"\bwho\s+am\s+i\s+chatting\s+with\b",
     ]
+    def match(t): return any(re.search(p, (t or "").strip().lower()) for p in patterns))
     if match(message): return True
     if history:
         last_user = history[-1][0] if isinstance(history[-1], (list, tuple)) else None
     parts.append("Assistant:")
     return "\n".join(parts)
+def _summarize_artifacts(arts):
+    """Turn parsed artifacts (CSV, etc.) into a compact, model-friendly text block."""
+    if not arts:
+        return ""
+    blocks = []
+    for a in arts:
+        kind = a.get("kind")
+        name = a.get("name") or a.get("path") or "<file>"
+        if kind == "csv":
+            cols = ", ".join(map(str, a.get("columns", [])[:40])) or "<no columns>"
+            n_rows = a.get("n_rows_sampled", 0)
+            sample_rows = a.get("preview_rows") or []
+            first = sample_rows[0] if sample_rows else {}
+            first_str = ", ".join(f"{k}={str(v)[:60]}" for k, v in first.items()) if first else "<no sample>"
+            blocks.append(
+                f"FILE: {name}\nTYPE: CSV\nCOLUMNS: {cols}\nSAMPLED_ROWS: {n_rows}\nFIRST_ROW: {first_str}"
+            )
+        else:
+            text = a.get("text", "")
+            if text:
+                blocks.append(f"FILE: {name}\nTYPE: {kind}\nEXTRACT (truncated): {text[:800]}")
+    return "## Data File Summaries\n" + "\n\n".join(blocks)
+def _user_requested_files(text: str) -> bool:
+    low = (text or "").lower()
+    return "use the data files" in low or "use files" in low or "use uploaded" in low
+def _looks_like_scenario(text: str) -> bool:
+    """Heuristics to detect scenario/case-study inputs and avoid triggering on greetings."""
+    low = (text or "").lower().strip()
+    if not low:
         return False
+    if len(low) > 600:
         return True
+    hit_words = sum(w in low for w in [
+        "case study", "scenario", "background", "objective", "evaluation questions",
+        "expected output", "structured analysis", "prioritization", "capacity", "clinical benefits"
+    ])
+    return hit_words >= 2
 # ---------- Cohere first ----------
 def cohere_chat(message, history):
         "outcomes_summary": outcomes
     }, indent=2)
+# ---------- Core chat logic (auto scenario detection + two-phase when applicable) ----------
 def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answers=False):
     """
+    awaiting_answers:
+      - False: normal chat or Phase 1 (if scenario)
+      - True:  Phase 2 (if scenario)
     """
     try:
         log_event("user_message", None, {"sizes": {"chars": len(user_msg or "")}})
         safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
         if blocked_in:
             ans = refusal_reply(reason_in)
+            return history + [(user_msg, ans)], awaiting_answers
         # Identity short-circuit
         if is_identity_query(safe_in, history):
             ans = "I am ClarityOps, your strategic decision making AI partner."
+            return history + [(user_msg, ans)], awaiting_answers
+        # Ingest uploads
         if uploaded_files_paths:
             ing = extract_text_from_files(uploaded_files_paths)
             chunks = ing.get("chunks", []) if isinstance(ing, dict) else (ing or [])
                 _session_rag.register_artifacts(artifacts)
             log_event("uploads_added", None, {"chunks": len(chunks), "artifacts": len(artifacts)})
+        # Prepare artifact summary for prompt use
+        data_summary_text = _summarize_artifacts(_session_rag.artifacts)
+        # If user asks to "use files" but none parsed, ask for them explicitly
+        if _user_requested_files(safe_in) and not _session_rag.artifacts:
+            msg = ("I don’t see any parsed data files yet. Please attach your CSV/XLSX/PDF first, "
+                   "then resend your scenario. I’ll auto-summarize the files and use them in the analysis.")
+            return history + [(user_msg, msg)], False
+        # Quick helper: show latest CSV columns if asked
         if re.search(r"\b(columns?|headers?)\b", (safe_in or "").lower()):
             cols = _session_rag.get_latest_csv_columns()
             if cols:
                 return history + [(user_msg, "Here are the column names from your most recent CSV upload:\n\n- " + "\n- ".join(cols))], awaiting_answers
+        # Retrieval snippets
         session_snips = "\n---\n".join(_session_rag.retrieve(
             "diabetes screening Indigenous Métis mobile program cost throughput outcomes logistics",
             k=6
         ))
+        # Background computation/policy context
         snapshot = _load_snapshot()
         policy_context = retrieve_context(
             "mobile diabetes screening Indigenous community outreach cultural safety data governance outcomes"
         )
         computed = compute_operational_numbers(snapshot)
+        # MDSi extras if relevant words present
         user_lower = (safe_in or "").lower()
         mdsi_extra = _mdsi_block() if ("diabetes" in user_lower or "mdsi" in user_lower or "mobile screening" in user_lower) else ""
+        # Build scenario-text (always include file summaries if present)
         scenario_block = safe_in if len((safe_in or "")) > 0 else ""
+        scenario_text_full = (
+            scenario_block
+            + (f"\n\nExecutive Pre-Computed Blocks:\n{mdsi_extra}" if mdsi_extra else "")
+            + ("\n\n" + data_summary_text if data_summary_text else "")
+        )
+        # Build the common system preamble
         system_preamble = build_system_preamble(
             snapshot=snapshot,
             policy_context=policy_context,
             computed_numbers=computed,
+            scenario_text=scenario_text_full,
             session_snips=session_snips
         )
+        # Decide if this turn should be in scenario mode
+        scenario_mode = awaiting_answers or _looks_like_scenario(safe_in)
+        # Phase directive (only if scenario mode)
+        if scenario_mode:
+            if not awaiting_answers:
+                phase_directive = (
+                    "\n\n[INSTRUCTION TO MODEL]\n"
+                    "Produce **Phase 1** only: First show a brief 'Data Snapshot' summarizing any parsed files (max 6 bullets), "
+                    "then output a header 'Clarification Questions' and ask up to 5 concise, grouped questions "
+                    "(Prioritization, Capacity, Cost, Clinical, Recommendations). Then STOP and WAIT.\n"
+                )
+            else:
+                phase_directive = (
+                    "\n\n[INSTRUCTION TO MODEL]\n"
+                    "Produce **Phase 2** only: output a header 'Structured Analysis' and follow the exact section order "
+                    "(Prioritization, Capacity, Cost, Clinical Benefits, ClarityOps Top 3 Recommendations). "
+                    "Use uploaded files + the user's latest answers as authoritative. Show calculations, units, and a brief Provenance.\n"
+                )
         else:
+            # Normal chat: no phase instruction
+            phase_directive = ""
+        augmented_user = SYSTEM_MASTER + "\n\n" + system_preamble + "\n\nUser message:\n" + safe_in + phase_directive
+        # Call LLM (Cohere preferred, HF fallback)
+        out = cohere_chat(augmented_user, history)
+        if not out:
+            model, tokenizer = load_local_model()
+            inputs = build_inputs(tokenizer, augmented_user, history)
+            out = local_generate(model, tokenizer, inputs, max_new_tokens=MAX_NEW_TOKENS)
+        # Clean + sanitize
+        if isinstance(out, str):
+            for tag in ("Assistant:", "System:", "User:"):
+                if out.startswith(tag):
+                    out = out[len(tag):].strip()
+        out = _sanitize_text(out)
+        # Safety (output)
+        safe_out, blocked_out, reason_out = safety_filter(out, mode="output")
+        if blocked_out:
+            safe_out = refusal_reply(reason_out)
+        # Flip phase state only if we were in scenario mode
+        new_awaiting = awaiting_answers
+        if scenario_mode:
+            low = safe_out.lower()
+            if not awaiting_answers and "clarification questions" in low:
+                new_awaiting = True
+            elif awaiting_answers and "structured analysis" in low:
+                new_awaiting = False
+        else:
+            new_awaiting = False  # normal chat never toggles scenario phase
+        # Audit (content-free fingerprints)
+        log_event("assistant_reply", None, {
+            **hash_summary("prompt", augmented_user if not PERSIST_CONTENT else ""),
+            **hash_summary("reply", safe_out if not PERSIST_CONTENT else ""),
+            "awaiting_next_phase": new_awaiting,
+            "scenario_mode": scenario_mode
+        })
+        return history + [(user_msg, safe_out)], new_awaiting
     except Exception as e:
         err = f"Error: {e}"
             traceback.print_exc()
         except Exception:
             pass
+        return history + [(user_msg, err)], awaiting_answers
 # ---------- Theme & CSS ----------
 theme = gr.themes.Soft(primary_hue="teal", neutral_hue="slate", radius_size=gr.themes.sizes.radius_lg)
                     elem_classes="hero-box"
                 )
                 hero_send = gr.Button("➤", scale=0)
+            gr.Markdown('<div class="hint">ClarityOps will first ask up to 5 clarifications (only if it detects a scenario), then produce a structured analysis.</div>')
     # --- MAIN APP (hidden until first message) ---
     with gr.Column(elem_id="chat-container", visible=False) as app_wrap:
     # ---- State
     state_history = gr.State(value=[])
     state_uploaded = gr.State(value=[])
+    state_awaiting = gr.State(value=False)  # False -> no pending Phase-2; True -> expecting Phase-2 answers
     # ---- Uploads
     def _store_uploads(files, current):
     def _on_send(user_msg, history, up_paths, awaiting):
         try:
             if not user_msg or not user_msg.strip():
                 return history, "", history, awaiting
             new_history, new_awaiting = clarityops_reply(
                 user_msg.strip(), history or [], None, up_paths or [], awaiting_answers=awaiting