Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

Rajan Sharma commited on Sep 12

Commit

909d570

verified ·

1 Parent(s): eb04439

Update app.py

Browse files

Files changed (1) hide show

app.py +157 -134

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os, re, json, traceback, pathlib
 from functools import lru_cache
@@ -61,23 +62,41 @@ USE_HOSTED_COHERE = bool(COHERE_API_KEY and _HAS_COHERE)
 # Larger output (Cohere + HF fallback)
 MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "2048"))
-# ---------- Fixed System Preamble for Medical Guardrails ----------
 SYSTEM_MASTER = """
 SYSTEM ROLE (fixed, always on)
-You are ClarityOps, a medical analytics assistant.
 Absolute rules:
-- Use ONLY information provided in this conversation (user messages + uploaded files).
 - Never invent data. If something required is missing after clarifications, output the literal token: INSUFFICIENT_DATA.
 - Prefer analytics/longitudinal recommendations (risk targeting, follow-up, clustering) over generic ops advice.
-- Show all calculations explicitly when computing capacity and cost.
 - Use correct clinical units and plausible ranges.
 Medical guardrails (always apply):
 - Units: BP in mmHg, A1c in %, BMI in kg/m², Total Cholesterol in mmol/L (or as provided), Percentages in %.
 - Plausible ranges: A1c 3–20 %, SBP 60–250 mmHg, DBP 30–150 mmHg, BMI 10–70 kg/m², Total Chol 2–12 mmol/L.
 - Privacy: avoid PHI; aggregate only; apply small-cell suppression where cohort < 10 (describe at a higher level).
 - When data includes mixed or ambiguous indicators, ask to confirm preferred indicators (e.g., obesity/metabolic syndrome vs self-reported diabetes).
 """.strip()
 # ---------- Helpers ----------
@@ -114,8 +133,8 @@ def _sanitize_text(s: str) -> str:
         return s
     return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
-def _history_to_prompt(message, history, system_text):
-    parts = [f"System: {system_text}"]
     for u, a in _iter_user_assistant(history):
         if u: parts.append(f"User: {u}")
         if a: parts.append(f"Assistant: {a}")
@@ -123,13 +142,55 @@ def _history_to_prompt(message, history, system_text):
     parts.append("Assistant:")
     return "\n".join(parts)
 # ---------- Cohere first ----------
-def cohere_chat(message, history, system_text=SYSTEM_MASTER):
     if not USE_HOSTED_COHERE:
         return None
     try:
         client = cohere.Client(api_key=COHERE_API_KEY)
-        prompt = _history_to_prompt(message, history, system_text)
         resp = client.chat(
             model="command-r7b-12-2024",
             message=prompt,
@@ -172,8 +233,8 @@ def load_local_model():
         mdl.config.eos_token_id = tok.eos_token_id
     return mdl, tok
-def build_inputs(tokenizer, message, history, system_text=SYSTEM_MASTER):
-    msgs = [{"role": "system", "content": system_text}]
     for u, a in _iter_user_assistant(history):
         if u: msgs.append({"role": "user", "content": u})
         if a: msgs.append({"role": "assistant", "content": a})
@@ -225,65 +286,28 @@ def _mdsi_block():
         "outcomes_summary": outcomes
     }, indent=2)
-# ---------- Scenario auto-detection ----------
-_SCENARIO_HEADINGS = [
-    "context", "background", "scenario", "case study",
-    "data inputs", "inputs", "evaluation questions", "questions",
-    "recommendations", "deployment strategy", "next steps", "assumptions"
-]
-_SCENARIO_KEYWORDS = [
-    "diabetes", "screening", "metabolic", "prevalence", "capacity",
-    "cost", "startup", "ongoing", "clinical", "a1c", "mmhg", "bmi",
-    "cholesterol", "settlements", "program", "mobile", "ops", "throughput"
-]
-def _looks_like_scenario(text: str, uploaded_paths) -> bool:
-    if not text:
-        return False
-    t = text.strip()
-    low = t.lower()
-    # 1) Length + structure signals
-    if len(t) >= 450 and any(h in low for h in _SCENARIO_HEADINGS):
-        return True
-    # 2) Strong clinical/ops vocabulary density
-    kw_hits = sum(1 for k in _SCENARIO_KEYWORDS if k in low)
-    if len(t) >= 350 and kw_hits >= 4:
-        return True
-    # 3) Table/percent/metrics hints
-    if re.search(r"\b\d{2,4}\b", low) and re.search(r"%|\bmmhg\b|\bbmi\b|\ba1c\b", low):
-        if len(t) >= 300:
-            return True
-    # 4) Files attached (CSV/PDF/DOCX) + domain keywords
-    if uploaded_paths and kw_hits >= 2:
-        return True
-    return False
-# ---------- Core chat logic (auto scenario) ----------
-def clarityops_reply(user_msg, history, tz, uploaded_files_paths, mode="chat"):
     """
-    mode: "chat" (default) or "awaiting_answers"
-    Returns: (updated_history, updated_mode)
     """
     try:
-        log_event("user_message", None, {"sizes": {"chars": len(user_msg or "")}, "mode": mode})
         # Safety (input)
         safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
         if blocked_in:
             ans = refusal_reply(reason_in)
-            return history + [(user_msg, ans)], mode
         # Identity short-circuit
         if is_identity_query(safe_in, history):
             ans = "I am ClarityOps, your strategic decision making AI partner."
-            return history + [(user_msg, ans)], mode
-        # Ingest uploads
         if uploaded_files_paths:
             ing = extract_text_from_files(uploaded_files_paths)
             chunks = ing.get("chunks", []) if isinstance(ing, dict) else (ing or [])
@@ -294,27 +318,54 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, mode="chat"):
                 _session_rag.register_artifacts(artifacts)
             log_event("uploads_added", None, {"chunks": len(chunks), "artifacts": len(artifacts)})
-        # Columns helper
         if re.search(r"\b(columns?|headers?)\b", (safe_in or "").lower()):
             cols = _session_rag.get_latest_csv_columns()
             if cols:
-                return history + [(user_msg, "Here are the column names from your most recent CSV upload:\n\n- " + "\n- ".join(cols))], mode
-        # Session retrieval & context
         session_snips = "\n---\n".join(_session_rag.retrieve(
             "diabetes screening Indigenous Métis mobile program cost throughput outcomes logistics",
             k=6
         ))
         snapshot = _load_snapshot()
         policy_context = retrieve_context(
             "mobile diabetes screening Indigenous community outreach cultural safety data governance outcomes"
         )
         computed = compute_operational_numbers(snapshot)
         user_lower = (safe_in or "").lower()
         mdsi_extra = _mdsi_block() if ("diabetes" in user_lower or "mdsi" in user_lower or "mobile screening" in user_lower) else ""
         scenario_block = safe_in if len((safe_in or "")) > 0 else ""
         system_preamble = build_system_preamble(
             snapshot=snapshot,
@@ -324,9 +375,9 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, mode="chat"):
             session_snips=session_snips
         )
-        # -------- Auto-routing --------
-        if mode == "awaiting_answers":
-            # Any reply now triggers Phase 2
             phase_directive = (
                 "\n\n[INSTRUCTION TO MODEL]\n"
                 "Produce **Phase 2** only: output a header 'Structured Analysis' and follow the exact section order "
@@ -356,15 +407,21 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, mode="chat"):
                 **hash_summary("reply", safe_out if not PERSIST_CONTENT else ""),
                 "awaiting_next_phase": False
             })
-            return history + [(user_msg, safe_out)], "chat"
-        # Normal chat — unless it looks like a scenario
-        if not _looks_like_scenario(safe_in, uploaded_files_paths):
-            normal_user = SYSTEM_MASTER + "\n\n" + system_preamble + "\n\nUser message:\n" + safe_in
-            out = cohere_chat(normal_user, history)
             if not out:
                 model, tokenizer = load_local_model()
-                inputs = build_inputs(tokenizer, normal_user, history)
                 out = local_generate(model, tokenizer, inputs, max_new_tokens=MAX_NEW_TOKENS)
             if isinstance(out, str):
@@ -378,42 +435,11 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, mode="chat"):
                 safe_out = refusal_reply(reason_out)
             log_event("assistant_reply", None, {
-                **hash_summary("prompt", normal_user if not PERSIST_CONTENT else ""),
                 **hash_summary("reply", safe_out if not PERSIST_CONTENT else ""),
-                "awaiting_next_phase": False
             })
-            return history + [(user_msg, safe_out)], "chat"
-        # Scenario detected -> Phase 1
-        phase_directive = (
-            "\n\n[INSTRUCTION TO MODEL]\n"
-            "Produce **Phase 1** only: output a header 'Clarification Questions' and ask up to 5 concise, grouped questions "
-            "(Prioritization, Capacity, Cost, Clinical, Recommendations). Then STOP and WAIT.\n"
-        )
-        augmented_user = SYSTEM_MASTER + "\n\n" + system_preamble + "\n\nUser scenario:\n" + safe_in + phase_directive
-        out = cohere_chat(augmented_user, history)
-        if not out:
-            model, tokenizer = load_local_model()
-            inputs = build_inputs(tokenizer, augmented_user, history)
-            out = local_generate(model, tokenizer, inputs, max_new_tokens=MAX_NEW_TOKENS)
-        if isinstance(out, str):
-            for tag in ("Assistant:", "System:", "User:"):
-                if out.startswith(tag):
-                    out = out[len(tag):].strip()
-        out = _sanitize_text(out)
-        safe_out, blocked_out, reason_out = safety_filter(out, mode="output")
-        if blocked_out:
-            safe_out = refusal_reply(reason_out)
-        log_event("assistant_reply", None, {
-            **hash_summary("prompt", augmented_user if not PERSIST_CONTENT else ""),
-            **hash_summary("reply", safe_out if not PERSIST_CONTENT else ""),
-            "awaiting_next_phase": True
-        })
-        return history + [(user_msg, safe_out)], "awaiting_answers"
     except Exception as e:
         err = f"Error: {e}"
@@ -421,7 +447,7 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, mode="chat"):
             traceback.print_exc()
         except Exception:
             pass
-        return history + [(user_msg, err)], mode
 # ---------- Theme & CSS ----------
 theme = gr.themes.Soft(primary_hue="teal", neutral_hue="slate", radius_size=gr.themes.sizes.radius_lg)
@@ -454,17 +480,13 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
             gr.HTML("<h2>What can I help with?</h2>")
             with gr.Row(elem_classes="search-row"):
                 hero_msg = gr.Textbox(
-                    placeholder="Ask anything — paste a scenario (and attach files) to trigger structured analysis.",
                     show_label=False,
                     lines=1,
                     elem_classes="hero-box"
                 )
                 hero_send = gr.Button("➤", scale=0)
-            gr.Markdown(
-                '<div class="hint">Tip: Pasting a structured medical scenario (with sections like '
-                '<i>Context, Data Inputs, Evaluation Questions</i>) will auto-trigger clarifications first, '
-                'then the final analysis. CSVs are auto-summarized.</div>'
-            )
     # --- MAIN APP (hidden until first message) ---
     with gr.Column(elem_id="chat-container", visible=False) as app_wrap:
@@ -478,7 +500,7 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
             msg = gr.Textbox(
                 label="",
                 show_label=False,
-                placeholder="Chat freely… Paste a scenario to auto-start clarifications.",
                 scale=10
             )
             send = gr.Button("Send", scale=1)
@@ -487,7 +509,7 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
     # ---- State
     state_history = gr.State(value=[])
     state_uploaded = gr.State(value=[])
-    state_mode = gr.State(value="chat")  # "chat" or "awaiting_answers"
     # ---- Uploads
     def _store_uploads(files, current):
@@ -499,26 +521,27 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
     uploads.change(fn=_store_uploads, inputs=[uploads, state_uploaded], outputs=state_uploaded)
     # ---- Core send (used by both hero input and chat input)
-    def _on_send(user_msg, history, up_paths, mode):
         try:
             if not user_msg or not user_msg.strip():
-                return history, "", history, mode
-            new_history, new_mode = clarityops_reply(
-                user_msg.strip(), history or [], None, up_paths or [], mode=mode
             )
-            return new_history, "", new_history, new_mode
         except Exception as e:
             err = f"Error: {e}"
             try: traceback.print_exc()
             except Exception: pass
             new_hist = (history or []) + [(user_msg or "", err)]
-            return new_hist, "", new_hist, mode
     # ---- Hero -> App transition + first send
-    def _hero_start(user_msg, history, up_paths, mode):
-        chat_o, msg_o, hist_o, mode_o = _on_send(user_msg, history, up_paths, mode)
         return (
-            chat_o, msg_o, hist_o, mode_o,
             gr.update(visible=False),   # hide hero
             gr.update(visible=True),    # show app
             ""                          # clear hero box
@@ -526,35 +549,35 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
     hero_send.click(
         _hero_start,
-        inputs=[hero_msg, state_history, state_uploaded, state_mode],
-        outputs=[chat, msg, state_history, state_mode, hero_wrap, app_wrap, hero_msg],
         concurrency_limit=2, queue=True
     )
     hero_msg.submit(
         _hero_start,
-        inputs=[hero_msg, state_history, state_uploaded, state_mode],
-        outputs=[chat, msg, state_history, state_mode, hero_wrap, app_wrap, hero_msg],
         concurrency_limit=2, queue=True
     )
     # ---- Normal chat interactions after hero is gone
-    send.click(_on_send, inputs=[msg, state_history, state_uploaded, state_mode],
-               outputs=[chat, msg, state_history, state_mode],
                concurrency_limit=2, queue=True)
-    msg.submit(_on_send, inputs=[msg, state_history, state_uploaded, state_mode],
-               outputs=[chat, msg, state_history, state_mode],
                concurrency_limit=2, queue=True)
     def _on_clear():
-        # reset to fresh hero screen and chat mode
         return (
-            [], "", [], "chat",
             gr.update(visible=True),   # show hero
             gr.update(visible=False),  # hide app
             ""                         # clear hero input
         )
-    clear.click(_on_clear, None, [chat, msg, state_history, state_mode, hero_wrap, app_wrap, hero_msg])
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", "7860"))

+# app.py
 import os, re, json, traceback, pathlib
 from functools import lru_cache
 # Larger output (Cohere + HF fallback)
 MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "2048"))
+# ---------- System Master (two-phase, LLM-only behavior) ----------
 SYSTEM_MASTER = """
 SYSTEM ROLE (fixed, always on)
+You are ClarityOps, a medical analytics system that interacts only via this chat.
 Absolute rules:
+- Use ONLY information provided in this conversation (scenario text + uploaded files).
 - Never invent data. If something required is missing after clarifications, output the literal token: INSUFFICIENT_DATA.
+- When a SCENARIO is detected, always run in TWO PHASES:
+  Phase 1: Ask up to 5 concise clarification questions, grouped by category (Prioritization, Capacity, Cost, Clinical, Recommendations). Then STOP and WAIT.
+  Phase 2: After answers are provided, produce the final structured analysis exactly in the required format.
+Core behavior:
+- Read and synthesize any user-uploaded files (e.g., CSV/XLSX/PDF) relevant to the scenario.
 - Prefer analytics/longitudinal recommendations (risk targeting, follow-up, clustering) over generic ops advice.
+- Show all calculations explicitly for capacity and costs (e.g., “6 teams × 8 clients/day × 60 days = 2,880”).
 - Use correct clinical units and plausible ranges.
+- Include a brief “Provenance” section mapping each key output to scenario text, files, and/or clarified answers.
 Medical guardrails (always apply):
 - Units: BP in mmHg, A1c in %, BMI in kg/m², Total Cholesterol in mmol/L (or as provided), Percentages in %.
 - Plausible ranges: A1c 3–20 %, SBP 60–250 mmHg, DBP 30–150 mmHg, BMI 10–70 kg/m², Total Chol 2–12 mmol/L.
 - Privacy: avoid PHI; aggregate only; apply small-cell suppression where cohort < 10 (describe at a higher level).
 - When data includes mixed or ambiguous indicators, ask to confirm preferred indicators (e.g., obesity/metabolic syndrome vs self-reported diabetes).
+Formatting hard rules (SCENARIO mode only):
+- Phase 1 output MUST include the header line: “Clarification Questions”
+- Phase 2 output MUST include the header line: “Structured Analysis”
+- Phase 2 MUST follow this exact section order:
+  1. Prioritization
+  2. Capacity
+  3. Cost
+  4. Clinical Benefits
+  5. ClarityOps Top 3 Recommendations
+  (Include a short Provenance block at the end.)
 """.strip()
 # ---------- Helpers ----------
         return s
     return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
+def _history_to_prompt(message, history):
+    parts = [f"System: {SYSTEM_MASTER}"]
     for u, a in _iter_user_assistant(history):
         if u: parts.append(f"User: {u}")
         if a: parts.append(f"Assistant: {a}")
     parts.append("Assistant:")
     return "\n".join(parts)
+# ---------- Scenario auto-detection (stricter) ----------
+_SCENARIO_HEADINGS = [
+    "context", "background", "scenario", "case study",
+    "data inputs", "inputs", "evaluation questions", "questions",
+    "recommendations", "deployment strategy", "next steps", "assumptions"
+]
+_SCENARIO_KEYWORDS = [
+    "diabetes", "screening", "metabolic", "prevalence", "settlements",
+    "capacity", "throughput", "cost", "startup", "ongoing",
+    "clinical", "a1c", "mmhg", "bmi", "cholesterol",
+    "mobile", "program", "mdsi", "ops"
+]
+def _looks_like_scenario(text: str, uploaded_paths) -> bool:
+    """
+    Conservative trigger: only enter scenario mode on clearly structured,
+    domain-heavy content or when substantial files are attached.
+    """
+    if not text:
+        return False
+    t = text.strip()
+    low = t.lower()
+    n = len(t)
+    headings = sum(1 for h in _SCENARIO_HEADINGS if h in low)
+    kw_hits = sum(1 for k in _SCENARIO_KEYWORDS if k in low)
+    has_metrics = (
+        bool(re.search(r"\b\d{2,4}\b", low)) and
+        bool(re.search(r"%|\bmmhg\b|\bbmi\b|\ba1c\b", low))
+    )
+    # File trigger if some substance exists
+    if uploaded_paths and (n >= 200 or kw_hits >= 3):
+        return True
+    if n >= 700 and headings >= 1:
+        return True
+    if n >= 450 and headings >= 2:
+        return True
+    if n >= 500 and kw_hits >= 6 and has_metrics:
+        return True
+    return False
 # ---------- Cohere first ----------
+def cohere_chat(message, history):
     if not USE_HOSTED_COHERE:
         return None
     try:
         client = cohere.Client(api_key=COHERE_API_KEY)
+        prompt = _history_to_prompt(message, history)
         resp = client.chat(
             model="command-r7b-12-2024",
             message=prompt,
         mdl.config.eos_token_id = tok.eos_token_id
     return mdl, tok
+def build_inputs(tokenizer, message, history):
+    msgs = [{"role": "system", "content": SYSTEM_MASTER}]
     for u, a in _iter_user_assistant(history):
         if u: msgs.append({"role": "user", "content": u})
         if a: msgs.append({"role": "assistant", "content": a})
         "outcomes_summary": outcomes
     }, indent=2)
+# ---------- Core chat logic (auto scenario routing) ----------
+def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answers=False):
     """
+    awaiting_answers (bool state):
+      - False: Normal chat route OR Scenario Phase 1 (if detected)
+      - True : Scenario Phase 2 (consume answers -> produce Structured Analysis)
     """
     try:
+        log_event("user_message", None, {"sizes": {"chars": len(user_msg or "")}})
         # Safety (input)
         safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
         if blocked_in:
             ans = refusal_reply(reason_in)
+            return history + [(user_msg, ans)], False  # never hold for next phase on refusal
         # Identity short-circuit
         if is_identity_query(safe_in, history):
             ans = "I am ClarityOps, your strategic decision making AI partner."
+            return history + [(user_msg, ans)], False
+        # Ingest uploads (text + artifacts like CSV headers/summary)
         if uploaded_files_paths:
             ing = extract_text_from_files(uploaded_files_paths)
             chunks = ing.get("chunks", []) if isinstance(ing, dict) else (ing or [])
                 _session_rag.register_artifacts(artifacts)
             log_event("uploads_added", None, {"chunks": len(chunks), "artifacts": len(artifacts)})
+        # Columns helper (quick utility)
         if re.search(r"\b(columns?|headers?)\b", (safe_in or "").lower()):
             cols = _session_rag.get_latest_csv_columns()
             if cols:
+                return history + [(user_msg, "Here are the column names from your most recent CSV upload:\n\n- " + "\n- ".join(cols))], awaiting_answers
+        # -------- Decide routing early --------
+        is_scenario = awaiting_answers or _looks_like_scenario(safe_in or "", uploaded_files_paths)
+        # ===== NORMAL CHAT =====
+        if not is_scenario:
+            normal_user = SYSTEM_MASTER + "\n\nUser message:\n" + (safe_in or "")
+            out = cohere_chat(normal_user, history)
+            if not out:
+                model, tokenizer = load_local_model()
+                inputs = build_inputs(tokenizer, normal_user, history)
+                out = local_generate(model, tokenizer, inputs, max_new_tokens=MAX_NEW_TOKENS)
+            if isinstance(out, str):
+                for tag in ("Assistant:", "System:", "User:"):
+                    if out.startswith(tag):
+                        out = out[len(tag):].strip()
+            out = _sanitize_text(out)
+            safe_out, blocked_out, reason_out = safety_filter(out, mode="output")
+            if blocked_out:
+                safe_out = refusal_reply(reason_out)
+            log_event("assistant_reply", None, {
+                **hash_summary("prompt", normal_user if not PERSIST_CONTENT else ""),
+                **hash_summary("reply", safe_out if not PERSIST_CONTENT else ""),
+                "awaiting_next_phase": False
+            })
+            return history + [(user_msg, safe_out)], False
+        # ===== SCENARIO MODE (Phase 1 or Phase 2) =====
+        # Build context ONLY for scenario path
         session_snips = "\n---\n".join(_session_rag.retrieve(
             "diabetes screening Indigenous Métis mobile program cost throughput outcomes logistics",
             k=6
         ))
         snapshot = _load_snapshot()
         policy_context = retrieve_context(
             "mobile diabetes screening Indigenous community outreach cultural safety data governance outcomes"
         )
         computed = compute_operational_numbers(snapshot)
         user_lower = (safe_in or "").lower()
         mdsi_extra = _mdsi_block() if ("diabetes" in user_lower or "mdsi" in user_lower or "mobile screening" in user_lower) else ""
         scenario_block = safe_in if len((safe_in or "")) > 0 else ""
         system_preamble = build_system_preamble(
             snapshot=snapshot,
             session_snips=session_snips
         )
+        # Phase selection
+        if awaiting_answers:
+            # -------- Phase 2 --------
             phase_directive = (
                 "\n\n[INSTRUCTION TO MODEL]\n"
                 "Produce **Phase 2** only: output a header 'Structured Analysis' and follow the exact section order "
                 **hash_summary("reply", safe_out if not PERSIST_CONTENT else ""),
                 "awaiting_next_phase": False
             })
+            return history + [(user_msg, safe_out)], False
+        else:
+            # -------- Phase 1 --------
+            phase_directive = (
+                "\n\n[INSTRUCTION TO MODEL]\n"
+                "Produce **Phase 1** only: output a header 'Clarification Questions' and ask up to 5 concise, grouped questions "
+                "(Prioritization, Capacity, Cost, Clinical, Recommendations). Then STOP and WAIT.\n"
+            )
+            augmented_user = SYSTEM_MASTER + "\n\n" + system_preamble + "\n\nUser scenario:\n" + (safe_in or "") + phase_directive
+            out = cohere_chat(augmented_user, history)
             if not out:
                 model, tokenizer = load_local_model()
+                inputs = build_inputs(tokenizer, augmented_user, history)
                 out = local_generate(model, tokenizer, inputs, max_new_tokens=MAX_NEW_TOKENS)
             if isinstance(out, str):
                 safe_out = refusal_reply(reason_out)
             log_event("assistant_reply", None, {
+                **hash_summary("prompt", augmented_user if not PERSIST_CONTENT else ""),
                 **hash_summary("reply", safe_out if not PERSIST_CONTENT else ""),
+                "awaiting_next_phase": True
             })
+            return history + [(user_msg, safe_out)], True
     except Exception as e:
         err = f"Error: {e}"
             traceback.print_exc()
         except Exception:
             pass
+        return history + [(user_msg, err)], False
 # ---------- Theme & CSS ----------
 theme = gr.themes.Soft(primary_hue="teal", neutral_hue="slate", radius_size=gr.themes.sizes.radius_lg)
             gr.HTML("<h2>What can I help with?</h2>")
             with gr.Row(elem_classes="search-row"):
                 hero_msg = gr.Textbox(
+                    placeholder="Ask anything (paste scenarios here; you can attach files after)...",
                     show_label=False,
                     lines=1,
                     elem_classes="hero-box"
                 )
                 hero_send = gr.Button("➤", scale=0)
+            gr.Markdown('<div class="hint">ClarityOps will first ask up to 5 clarifications (only for scenarios), then produce a structured analysis.</div>')
     # --- MAIN APP (hidden until first message) ---
     with gr.Column(elem_id="chat-container", visible=False) as app_wrap:
             msg = gr.Textbox(
                 label="",
                 show_label=False,
+                placeholder="Continue here. Paste scenario details, add files below.",
                 scale=10
             )
             send = gr.Button("Send", scale=1)
     # ---- State
     state_history = gr.State(value=[])
     state_uploaded = gr.State(value=[])
+    state_awaiting = gr.State(value=False)  # False -> normal or Phase 1 next; True -> expecting Phase 2 answers
     # ---- Uploads
     def _store_uploads(files, current):
     uploads.change(fn=_store_uploads, inputs=[uploads, state_uploaded], outputs=state_uploaded)
     # ---- Core send (used by both hero input and chat input)
+    def _on_send(user_msg, history, up_paths, awaiting):
         try:
             if not user_msg or not user_msg.strip():
+                # no toggle on empty
+                return history, "", history, awaiting
+            new_history, new_awaiting = clarityops_reply(
+                user_msg.strip(), history or [], None, up_paths or [], awaiting_answers=awaiting
             )
+            return new_history, "", new_history, new_awaiting
         except Exception as e:
             err = f"Error: {e}"
             try: traceback.print_exc()
             except Exception: pass
             new_hist = (history or []) + [(user_msg or "", err)]
+            return new_hist, "", new_hist, awaiting
     # ---- Hero -> App transition + first send
+    def _hero_start(user_msg, history, up_paths, awaiting):
+        chat_o, msg_o, hist_o, await_o = _on_send(user_msg, history, up_paths, awaiting)
         return (
+            chat_o, msg_o, hist_o, await_o,
             gr.update(visible=False),   # hide hero
             gr.update(visible=True),    # show app
             ""                          # clear hero box
     hero_send.click(
         _hero_start,
+        inputs=[hero_msg, state_history, state_uploaded, state_awaiting],
+        outputs=[chat, msg, state_history, state_awaiting, hero_wrap, app_wrap, hero_msg],
         concurrency_limit=2, queue=True
     )
     hero_msg.submit(
         _hero_start,
+        inputs=[hero_msg, state_history, state_uploaded, state_awaiting],
+        outputs=[chat, msg, state_history, state_awaiting, hero_wrap, app_wrap, hero_msg],
         concurrency_limit=2, queue=True
     )
     # ---- Normal chat interactions after hero is gone
+    send.click(_on_send, inputs=[msg, state_history, state_uploaded, state_awaiting],
+               outputs=[chat, msg, state_history, state_awaiting],
                concurrency_limit=2, queue=True)
+    msg.submit(_on_send, inputs=[msg, state_history, state_uploaded, state_awaiting],
+               outputs=[chat, msg, state_history, state_awaiting],
                concurrency_limit=2, queue=True)
     def _on_clear():
+        # reset to fresh hero screen
         return (
+            [], "", [], False,
             gr.update(visible=True),   # show hero
             gr.update(visible=False),  # hide app
             ""                         # clear hero input
         )
+    clear.click(_on_clear, None, [chat, msg, state_history, state_awaiting, hero_wrap, app_wrap, hero_msg])
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", "7860"))