Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

Rajan Sharma commited on Sep 19

Commit

1ca7039

verified ·

1 Parent(s): 4f1d205

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -125

app.py CHANGED Viewed

@@ -47,20 +47,17 @@ from huggingface_hub import login
 from safety import safety_filter, refusal_reply
 from retriever import init_retriever, retrieve_context
-# ---------- Snapshot & retrieval helpers import ----------
-# Use the real function if present; otherwise fall back to a harmless no-op.
-try:
-    from decision_math import compute_operational_numbers
-except Exception:
-    def compute_operational_numbers(snapshot: dict) -> dict:
-        return {}
 from prompt_templates import build_system_preamble
 from upload_ingest import extract_text_from_files
 from session_rag import SessionRAG
 from mdsi_analysis import capacity_projection, cost_estimate, outcomes_summary
 # ---------- Config ----------
 MODEL_ID = os.getenv("MODEL_ID", "microsoft/Phi-3-mini-4k-instruct")  # fallback
 HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
@@ -238,102 +235,8 @@ def _mdsi_block():
         "outcomes_summary": outcomes
     }, indent=2)
-# ---------- Dynamic Phase 1 question generator ----------
-def _extract_present_domains(artifacts: List[Dict[str, Any]]) -> Dict[str, bool]:
-    flags = dict(population=False, cost=False, clinical=False, capacity=False)
-    for a in artifacts or []:
-        name = (a.get("name") or "").lower()
-        cols = [c.lower() for c in (a.get("columns") or [])]
-        if any(k in name for k in ["population", "census", "membership"]) or any(
-            k in ",".join(cols) for k in ["population", "census", "residence", "settlement", "age"]
-        ):
-            flags["population"] = True
-        if any(k in name for k in ["cost", "finance", "budget"]) or any(
-            k in ",".join(cols) for k in ["cost", "startup", "ongoing", "per_client", "per-visit"]
-        ):
-            flags["cost"] = True
-        if any(k in name for k in ["a1c", "outcome", "bp", "chol"]) or any(
-            k in ",".join(cols) for k in ["a1c", "bmi", "bp", "chol", "outcome"]
-        ):
-            flags["clinical"] = True
-        if any(k in name for k in ["ops", "capacity", "throughput", "volume"]) or any(
-            k in ",".join(cols) for k in ["clients_per_day", "teams", "visits", "throughput"]
-        ):
-            flags["capacity"] = True
-    return flags
-def _domain_from_text(text: str) -> Dict[str, bool]:
-    t = (text or "").lower()
-    return {
-        "population": any(k in t for k in ["population", "census", "settlement", "membership"]),
-        "cost": any(k in t for k in ["cost", "budget", "startup", "per client", "per-client", "ongoing"]),
-        "clinical": any(k in t for k in ["a1c", "bmi", "blood pressure", "bp", "cholesterol", "outcome"]),
-        "capacity": any(k in t for k in ["capacity", "throughput", "clients per day", "teams", "screen", "volume"]),
-    }
-def _is_mdsi_diabetes(text: str) -> bool:
-    t = (text or "").lower()
-    return any(k in t for k in ["mdsi", "mobile diabetes", "diabetes", "metabolic", "a1c", "metis"])
-def build_dynamic_clarifications(scenario_text: str, artifacts: List[Dict[str, Any]]) -> str:
-    flags_from_files = _extract_present_domains(artifacts)
-    flags_from_text = _domain_from_text(scenario_text)
-    missing = {
-        k: not (flags_from_files.get(k) or flags_from_text.get(k))
-        for k in ["population", "capacity", "cost", "clinical"]
-    }
-    qs: List[Tuple[str, str]] = []
-    is_mdsi = _is_mdsi_diabetes(scenario_text)
-    if missing["population"]:
-        qs.append((
-            "Prioritization",
-            "Which population/risk indicators should drive prioritization (size, prevalence, access, equity factors)?"
-            if not is_mdsi else
-            "Confirm prioritization inputs: settlement membership living on-settlement (latest), obesity/metabolic syndrome prevalence, and any access-to-care constraints to weigh."
-        ))
-    if missing["capacity"]:
-        qs.append((
-            "Capacity",
-            "What per-team throughput and operating schedule should be used for capacity calculations?"
-            if not is_mdsi else
-            "What is the realistic per-team screening rate (clients/day) and operating schedule (days/week, weeks/3-month window)?"
-        ))
-    if missing["cost"]:
-        qs.append((
-            "Cost",
-            "Provide fixed setup costs and variable cost per client to model total program spend."
-            if not is_mdsi else
-            "Provide startup cost per client and ongoing cost per client/visit (or total program costs) to price scenarios like 1,200 screens."
-        ))
-    if missing["clinical"]:
-        qs.append((
-            "Clinical",
-            "Which clinical indicators and expected effect sizes should be tracked for outcomes?"
-            if not is_mdsi else
-            "What longitudinal deltas should we expect (e.g., ΔA1c, ΔBP, ΔBMI, lipids) from repeat screenings, and over what interval?"
-        ))
-    qs.append((
-        "Recommendations",
-        "Any operational constraints (scheduling, staffing, partnerships) we should incorporate into deployment modeling?"
-        if not is_mdsi else
-        "Are there community constraints (events/seasonality/cultural protocols) that should shape routing and visit cadence?"
-    ))
-    qs = qs[:5]
-    out = ["**Clarification Questions**"]
-    current_group = None
-    for grp, q in qs:
-        if grp != current_group:
-            out.append(f"\n**{grp}:**")
-            current_group = grp
-        out.append(f"- {q}")
-    return "\n".join(out)
 # ---------- Core chat logic (auto scenario, dynamic Phase 1) ----------
 def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answers=False):
@@ -349,6 +252,7 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
             ans = "I am ClarityOps, your strategic decision making AI partner."
             return history + [(user_msg, ans)], awaiting_answers
         artifacts = []
         if uploaded_files_paths:
             ing = extract_text_from_files(uploaded_files_paths)
@@ -358,16 +262,24 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
                 _session_rag.add_docs(chunks)
             if artifacts:
                 _session_rag.register_artifacts(artifacts)
-            log_event("uploads_added", None, {"chunks": len(chunks), "artifacts": len(artifacts)})
         if re.search(r"\b(columns?|headers?)\b", (safe_in or "").lower()):
             cols = _session_rag.get_latest_csv_columns()
             if cols:
                 return history + [(user_msg, "Here are the column names from your most recent CSV upload:\n\n- " + "\n- ".join(cols))], awaiting_answers
         scenario_mode = is_scenario_triggered(safe_in, uploaded_files_paths)
         if not scenario_mode:
             out = cohere_chat(safe_in, history) if USE_HOSTED_COHERE else None
             if not out:
                 model, tokenizer = load_local_model()
@@ -390,8 +302,13 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
             })
             return history + [(user_msg, safe_out)], awaiting_answers
         if not awaiting_answers:
-            phase1 = build_dynamic_clarifications(scenario_text=safe_in, artifacts=artifacts or _session_rag.artifacts)
             phase1 = _sanitize_text(phase1)
             log_event("assistant_reply", None, {
                 **hash_summary("prompt", safe_in if not PERSIST_CONTENT else ""),
@@ -401,11 +318,23 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
             })
             return history + [(user_msg, phase1)], True
         session_snips = "\n---\n".join(_session_rag.retrieve(
             "diabetes screening Indigenous Métis mobile program cost throughput outcomes logistics",
             k=6
         ))
         snapshot = _load_snapshot()
         policy_context = retrieve_context(
             "mobile diabetes screening Indigenous community outreach cultural safety data governance outcomes"
@@ -413,34 +342,29 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
         computed = compute_operational_numbers(snapshot)
         user_lower = (safe_in or "").lower()
-        mdsi_extra = _mdsi_block() if ("diabetes" in user_lower or "mdsi" in user_lower or "mobile screening" in user_lower) else ""
-        arts = _session_rag.artifacts or []
-        if arts:
-            arts_summ = []
-            for a in arts:
-                nm = a.get("name") or "<unnamed>"
-                cols = ", ".join(a.get("columns") or [])[:600]
-                rows = a.get("n_rows_sampled") or 0
-                arts_summ.append(f"- {nm}: columns[{cols}] sample_rows={rows}")
-            artifact_block = "Uploaded Data Files (summarized):\n" + "\n".join(arts_summ)
-        else:
-            artifact_block = "Uploaded Data Files (summarized):\n- <none>"
         scenario_block = safe_in if len((safe_in or "")) > 0 else ""
         system_preamble = build_system_preamble(
             snapshot=snapshot,
             policy_context=policy_context,
             computed_numbers=computed,
-            scenario_text=scenario_block + f"\n\n{artifact_block}" + (f"\n\nExecutive Pre-Computed Blocks:\n{mdsi_extra}" if mdsi_extra else ""),
             session_snips=session_snips
         )
         directive = (
             "\n\n[INSTRUCTION TO MODEL]\n"
-            "Produce **Phase 2** only now: start with 'Structured Analysis' and follow the exact section order "
             "(Prioritization, Capacity, Cost, Clinical Benefits, ClarityOps Top 3 Recommendations). "
-            "Use uploaded files + the user's latest answers as authoritative. Show calculations, units, and a brief Provenance.\n"
         )
         augmented_user = SYSTEM_MASTER + "\n\n" + system_preamble + "\n\nUser scenario & answers:\n" + safe_in + directive
@@ -481,7 +405,7 @@ def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answe
 # ---------- Theme & CSS ----------
 theme = gr.themes.Soft(primary_hue="teal", neutral_hue="slate", radius_size=gr.themes.sizes.radius_lg)
 custom_css = """
-:root { --brand-bg: #0f172a; --brand-accent: #0d9488; --brand-text: #0f172a; --brand-text-light: #ffffff; }  /* CHANGED bg only */
 html, body, .gradio-container { height: 100vh; }
 .gradio-container { background: var(--brand-bg); display: flex; flex-direction: column; }
@@ -605,6 +529,8 @@ with gr.Blocks(theme=theme, css=custom_css, analytics_enabled=False) as demo:
                concurrency_limit=2, queue=True)
     def _on_clear():
         return (
             [], "", [], False,
             gr.update(visible=True),

 from safety import safety_filter, refusal_reply
 from retriever import init_retriever, retrieve_context
+from decision_math import compute_operational_numbers   # fixed import name
 from prompt_templates import build_system_preamble
 from upload_ingest import extract_text_from_files
 from session_rag import SessionRAG
 from mdsi_analysis import capacity_projection, cost_estimate, outcomes_summary
+# NEW: dynamic data plumbing
+from data_registry import DataRegistry
+from schema_mapper import map_concepts, build_phase1_questions
+from auto_metrics import build_data_findings_markdown
 # ---------- Config ----------
 MODEL_ID = os.getenv("MODEL_ID", "microsoft/Phi-3-mini-4k-instruct")  # fallback
 HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HF_TOKEN")
         "outcomes_summary": outcomes
     }, indent=2)
+# NEW: session-scoped data registry
+_data_registry = DataRegistry()
 # ---------- Core chat logic (auto scenario, dynamic Phase 1) ----------
 def clarityops_reply(user_msg, history, tz, uploaded_files_paths, awaiting_answers=False):
             ans = "I am ClarityOps, your strategic decision making AI partner."
             return history + [(user_msg, ans)], awaiting_answers
+        # 1) Ingest uploads into RAG AND DataRegistry (files alone can trigger Scenario Mode)
         artifacts = []
         if uploaded_files_paths:
             ing = extract_text_from_files(uploaded_files_paths)
                 _session_rag.add_docs(chunks)
             if artifacts:
                 _session_rag.register_artifacts(artifacts)
+            # register parsable tables into DataRegistry
+            for p in uploaded_files_paths:
+                _data_registry.add_path(p)
+            log_event("uploads_added", None, {
+                "chunks": len(chunks), "artifacts": len(artifacts), "tables": len(_data_registry.names())
+            })
+        # quick helper
         if re.search(r"\b(columns?|headers?)\b", (safe_in or "").lower()):
             cols = _session_rag.get_latest_csv_columns()
             if cols:
                 return history + [(user_msg, "Here are the column names from your most recent CSV upload:\n\n- " + "\n- ".join(cols))], awaiting_answers
+        # 2) Decide mode
         scenario_mode = is_scenario_triggered(safe_in, uploaded_files_paths)
         if not scenario_mode:
+            # ---------- Normal conversational chat ----------
             out = cohere_chat(safe_in, history) if USE_HOSTED_COHERE else None
             if not out:
                 model, tokenizer = load_local_model()
             })
             return history + [(user_msg, safe_out)], awaiting_answers
+        # ---------- Scenario Mode ----------
+        # 3) Build dynamic concept mapping from scenario + data
+        mapping = map_concepts(safe_in, _data_registry)
         if not awaiting_answers:
+            # PHASE 1: ask only for missing/ambiguous
+            phase1 = build_phase1_questions(scenario_text=safe_in, registry=_data_registry, mapping=mapping)
             phase1 = _sanitize_text(phase1)
             log_event("assistant_reply", None, {
                 **hash_summary("prompt", safe_in if not PERSIST_CONTENT else ""),
             })
             return history + [(user_msg, phase1)], True
+        # PHASE 2: compute data findings in Python, then let LLM write the narrative
+        data_findings_md, missing_keys = build_data_findings_markdown(_data_registry, mapping)
+        # If critical missing items remain, surface INSUFFICIENT_DATA context to the model + ask for the rest
+        insuff_note = ""
+        if missing_keys:
+            insuff_note = (
+                "\n\nUncomputable (still missing columns/defs): "
+                + ", ".join(sorted(set(missing_keys)))
+                + ". If any of these are essential to the requested outputs, write INSUFFICIENT_DATA where appropriate."
+            )
+        # Preamble context (snapshot + policy)
         session_snips = "\n---\n".join(_session_rag.retrieve(
             "diabetes screening Indigenous Métis mobile program cost throughput outcomes logistics",
             k=6
         ))
         snapshot = _load_snapshot()
         policy_context = retrieve_context(
             "mobile diabetes screening Indigenous community outreach cultural safety data governance outcomes"
         computed = compute_operational_numbers(snapshot)
         user_lower = (safe_in or "").lower()
+        mdsi_extra = ""
+        if any(k in user_lower for k in ["diabetes", "mdsi", "mobile screening"]):
+            mdsi_extra = _mdsi_block()
+        # Build artifact + table summary for the prompt
+        registry_summary = _data_registry.summarize_for_prompt()
+        artifact_block = "Uploaded Data Files (tables):\n" + registry_summary
         scenario_block = safe_in if len((safe_in or "")) > 0 else ""
         system_preamble = build_system_preamble(
             snapshot=snapshot,
             policy_context=policy_context,
             computed_numbers=computed,
+            scenario_text=scenario_block + f"\n\n{artifact_block}\n\n{data_findings_md}" + (f"\n\nExecutive Pre-Computed Blocks:\n{mdsi_extra}" if mdsi_extra else "") + insuff_note,
             session_snips=session_snips
         )
         directive = (
             "\n\n[INSTRUCTION TO MODEL]\n"
+            "Produce **Phase 2** now: begin with 'Structured Analysis' and follow the exact section order "
             "(Prioritization, Capacity, Cost, Clinical Benefits, ClarityOps Top 3 Recommendations). "
+            "Use the **Python-computed tables** in the context as ground truth; when something is truly missing, write INSUFFICIENT_DATA. "
+            "Show calculations, units, and add a brief Provenance.\n"
         )
         augmented_user = SYSTEM_MASTER + "\n\n" + system_preamble + "\n\nUser scenario & answers:\n" + safe_in + directive
 # ---------- Theme & CSS ----------
 theme = gr.themes.Soft(primary_hue="teal", neutral_hue="slate", radius_size=gr.themes.sizes.radius_lg)
 custom_css = """
+:root { --brand-bg: #0f172a; --brand-accent: #0d9488; --brand-text: #0f172a; --brand-text-light: #ffffff; }  /* bg same as chat for integrated look */
 html, body, .gradio-container { height: 100vh; }
 .gradio-container { background: var(--brand-bg); display: flex; flex-direction: column; }
                concurrency_limit=2, queue=True)
     def _on_clear():
+        # Also clear the in-memory data registry for a fresh scenario
+        _data_registry.clear()
         return (
             [], "", [], False,
             gr.update(visible=True),