Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

VEDAGI1 commited on 20 days ago

Commit

da409af

verified ·

1 Parent(s): d900f6e

Update app.py

Browse files

Files changed (1) hide show

app.py +417 -172

app.py CHANGED Viewed

@@ -1,28 +1,47 @@
 # app.py
-# Universal AI Data Analyst – FINAL FIXED VERSION (Nov 2025)
 from __future__ import annotations
 import io
 import json
 import os
 import traceback
-import re
 from contextlib import redirect_stdout
 from datetime import datetime
 from typing import Any, Dict, List
 import gradio as gr
 import pandas as pd
 import regex as re2
 from langchain_cohere import ChatCohere  # noqa: F401
 from settings import (
     GENERAL_CONVERSATION_PROMPT,
     COHERE_MODEL_PRIMARY,
-    COHERE_TIMEOUT_S,  # noqa: F401
-    USE_OPEN_FALLBACKS  # noqa: F401
 )
-# Optional HIPAA settings with safe defaults
 try:
-    from settings import PHI_MODE, PERSIST_HISTORY, HISTORY_TTL_DAYS, REDACT_BEFORE_LLM, ALLOW_EXTERNAL_PHI
 except Exception:
     PHI_MODE = False
     PERSIST_HISTORY = True
@@ -30,21 +49,8 @@ except Exception:
     REDACT_BEFORE_LLM = False
     ALLOW_EXTERNAL_PHI = True
-from audit_log import log_event
-from privacy import safety_filter, refusal_reply
-from llm_router import cohere_chat, _co_client, cohere_embed
-# ———————— PERMANENT FIX: Safe .item() for floats & pandas scalars ————————
-def safe_item(x):
-    """Safely extract scalar from pandas/numpy objects OR plain Python types"""
-    try:
-        return x.item() if hasattr(x, "item") else x
-    except:
-        return x
-# —————————————————————————————————————————————————————————————————————
 def load_markdown_text(filepath: str) -> str:
     try:
         with open(filepath, "r", encoding="utf-8") as f:
@@ -52,11 +58,15 @@ def load_markdown_text(filepath: str) -> str:
     except FileNotFoundError:
         return f"**Error:** Document `{os.path.basename(filepath)}` not found."
 def _sanitize_text(s: str) -> str:
     if not isinstance(s, str):
         return s
     return re2.sub(r"[\p{C}--[\n\t]]+", "", s)
 PHI_PATTERNS = [
     (re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "[REDACTED_SSN]"),
     (re.compile(r"\b\d{9}\b"), "[REDACTED_MRN]"),
@@ -67,6 +77,7 @@ PHI_PATTERNS = [
     (re.compile(r"\b\d{5}(-\d{4})?\b"), "[REDACTED_ZIP]"),
 ]
 def redact_phi(text: str) -> str:
     if not isinstance(text, str):
         return text
@@ -75,101 +86,197 @@ def redact_phi(text: str) -> str:
         t = pat.sub(repl, t)
     return t
 def safe_log(event_name: str, meta: dict | None = None):
     try:
         meta = (meta or {}).copy()
         meta.pop("raw", None)
         log_event(event_name, None, meta)
     except Exception:
         pass
-# ———————— Rest of your unchanged logic (kept 100% identical) ————————
 def _create_python_script(user_scenario: str, schema_context: str) -> str:
-    EXPERT_ANALYTICAL_GUIDELINES = """
---- EXPERT ANALYTICAL GUIDELINES ---
-When writing your script, you MUST follow these expert business rules:
-1. **Linking Datasets Rule:** If you need to connect facilities to health zones when the 'zone' column is not in the facility list,
-    you must first identify the high-priority zone from the beds data, then find the major city (by facility count) in the facility list,
-    and *then* assess that city's capacity. Do not try to filter the facility list by a 'zone' column if it does not exist in the schema.
-2. **Prioritization Rule:** To prioritize locations, you MUST combine the most recent population data with specific high-risk health indicators
-    to create a multi-factor risk score.
-3. **Capacity Calculation Rule:** For capacity over a 3-month window, assume **60 working days**.
-4. **Cost Calculation Rule:** Sum 'Startup cost' and 'Ongoing cost' per person before multiplying.
-"""
     prompt_for_coder = f"""\
-You are an expert Python data scientist. Your job is to write a script to extract the data needed to answer the user's request.
-You have dataframes in a list `dfs`.
-{EXPERT_ANALYTICAL_GUIDELINES}
 --- DATA SCHEMA ---
 {schema_context}
 --- END DATA SCHEMA ---
-CRITICAL RULES:
-1. **DO NOT READ FILES:** You MUST NOT include `pd.read_csv`. The data is ALREADY loaded in the `dfs` variable. You MUST use this variable. Failure to do so will cause a fatal error.
-2. **JSON OUTPUT ONLY:** Your script's ONLY output must be a single JSON object printed to stdout containing the raw data findings.
-3. **BE PRECISE:** Use the exact, case-sensitive column names from the schema and robustly clean strings (`re.sub()`) before converting to numbers.
-4. **JSON SERIALIZATION:** Before adding data to your final dictionary for JSON conversion, you MUST convert any pandas-specific types (like `int64`) to standard Python types using `safe_item()` for single values or `.tolist()` for lists.
---- USER'S SCENARIO ---
-{user_scenario}
---- PYTHON SCRIPT ---
-Now, write the complete Python script that performs the analysis and prints a single, serializable JSON object.
-```python
 """
     generated_text = cohere_chat(prompt_for_coder)
-    match = re2.search(r"```python\n(.*?)```", generated_text, re2.DOTALL)
     if match:
         return match.group(1).strip()
-    return "print(json.dumps({'error': 'Failed to generate a valid Python script.'}))"
 def _generate_long_report(prompt: str) -> str:
     try:
         client = _co_client()
         if not client:
             return "Error: Cohere client not initialized."
-        response = client.chat(model=COHERE_MODEL_PRIMARY, message=prompt, max_tokens=4096)
         return response.text
     except Exception as e:
         safe_log("cohere_chat_error", {"err": str(e)})
         return f"Error during final report generation: {e}"
 def _generate_final_report(user_scenario: str, raw_data_json: str) -> str:
     prompt_for_writer = f"""\
-You are an expert management consultant and data analyst.
-A data science script has run to extract key findings. You have the user's original request and the raw JSON data.
-Your task is to synthesize these raw findings into a single, comprehensive, and professional report that directly answers all of the user's questions with detailed justifications.
---- USER'S ORIGINAL SCENARIO & DELIVERABLES ---
 {user_scenario}
 --- END SCENARIO ---
 --- RAW DATA FINDINGS (JSON) ---
 {raw_data_json}
 --- END RAW DATA ---
-Now, write the final, polished report. The report MUST:
-1. Follow the "Expected Output Format" requested by the user.
-2. Use tables, bullet points, and DETAILED narrative justifications for each recommendation.
-3. Synthesize the raw data into actionable insights. Do not just copy the raw numbers; interpret them.
-4. Ensure you fully address ALL evaluation questions, especially the final recommendations.
 """
     return _generate_long_report(prompt_for_writer)
 def _append_msg(h: List[Dict[str, str]], r: str, c: str) -> List[Dict[str, str]]:
     return (h or []) + [{"role": r, "content": c}]
 def ping_cohere() -> str:
     try:
         cli = _co_client()
         if not cli:
             return "Cohere client not initialized."
         vecs = cohere_embed(["hello", "world"])
-        return f"Cohere OK (model={COHERE_MODEL_PRIMARY})" if vecs else "Cohere reachable."
     except Exception as e:
         return f"Cohere ping failed: {e}"
 def handle(user_msg: str, files: list, yield_update) -> str:
     try:
         safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
         if blocked_in:
             return refusal_reply(reason_in)
         redacted_in = safe_in
         if PHI_MODE and REDACT_BEFORE_LLM:
             redacted_in = redact_phi(safe_in)
@@ -177,6 +284,7 @@ def handle(user_msg: str, files: list, yield_update) -> str:
         file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
         if file_paths:
             dataframes, schema_parts = [], []
             for i, p in enumerate(file_paths):
                 if p.endswith(".csv"):
@@ -185,93 +293,92 @@ def handle(user_msg: str, files: list, yield_update) -> str:
                     except UnicodeDecodeError:
                         df = pd.read_csv(p, encoding="latin1")
                     dataframes.append(df)
-                    schema_parts.append(f"DataFrame `dfs[{i}]` (`{os.path.basename(p)}`):\n{df.head().to_markdown()}\n")
             if not dataframes:
                 return "Please upload at least one CSV file."
             schema_context = "\n".join(schema_parts)
-            prompt_for_code = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
-            yield_update("```\nGenerating aligned analysis script...\n```")
             analysis_script = _create_python_script(prompt_for_code, schema_context)
-            yield_update("```\nExecuting script to extract raw data...\n```")
-            # ←←← INJECT safe_item INTO SCRIPT NAMESPACE ←←←
-            execution_namespace = {
-                "dfs": dataframes,
-                "pd": pd,
-                "re": re,
-                "json": json,
-                "safe_item": safe_item
-            }
             output_buffer = io.StringIO()
             try:
                 with redirect_stdout(output_buffer):
                     exec(analysis_script, execution_namespace)
                 raw_data_output = output_buffer.getvalue()
-                # Robust JSON extraction
-                try:
-                    raw_data = json.loads(raw_data_output)
-                except json.JSONDecodeError:
-                    json_match = re.search(r'\{.*\}', raw_data_output, re.DOTALL)
-                    raw_data = json.loads(json_match.group(0)) if json_match else {}
-                # Final safety net – convert any lingering pandas types
-                def convert(obj):
-                    return safe_item(obj) if not isinstance(obj, (dict, list)) else obj
-                def deep_convert(o):
-                    if isinstance(o, dict):
-                        return {k: deep_convert(v) for k, v in o.items()}
-                    elif isinstance(o, list):
-                        return [deep_convert(i) for i in o]
-                    else:
-                        return convert(o)
-                raw_data = deep_convert(raw_data)
-                raw_data_json = json.dumps(raw_data)
             except Exception as e:
-                error_detail = f"Script execution failed: {e}\n\nGenerated script:\n```python\n{analysis_script}\n```"
-                return error_detail if not PHI_MODE else "A critical error occurred."
-            yield_update("```\nSynthesizing final comprehensive report...\n```")
-            writer_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
-            final_report = _generate_final_report(writer_input, raw_data_json)
             return _sanitize_text(final_report)
         else:
-            # Pure chat mode
-            chat_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
             prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {chat_input}\nAssistant:"
             return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
     except Exception as e:
         tb = traceback.format_exc()
         safe_log("app_error", {"err": str(e)})
-        return "A critical error occurred. Please contact your administrator." if PHI_MODE else f"Error: {e}"
 PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
 TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
-# ———————— FINAL WORKING CSS (Nov 2025 – Gradio 4+) ————————
 SLEEK_CSS = """
-/* Full-bleed layout */
-:root, body, #root, .gradio-container { height: 100%; margin:0; padding:0; }
 .gradio-container { padding: 0 !important; }
 /* Header */
 .header {
   padding: 20px 28px;
   background: linear-gradient(135deg, #0e1726, #1d2a44 60%, #243a5e);
   color: #fff;
-  display: flex; align-items: center; justify-content: space-between; gap: 16px;
 }
-.header h1 { margin:0; font-size:22px; font-weight:600; letter-spacing:0.3px; }
-.header .badge { font-size:12px; background:#ffffff22; padding:6px 10px; border-radius:999px; }
-/* Main grid */
 .main {
   display: grid;
   grid-template-columns: 420px 1fr;
@@ -289,106 +396,244 @@ SLEEK_CSS = """
 .left { padding: 16px; display: flex; flex-direction: column; gap: 12px; }
 .right { padding: 0; display: flex; flex-direction: column; }
-/* Make chatbot fill entire right panel – WORKS IN 2025 */
-#chatbot_container {
-    flex: 1 !important;
-    min-height: 0;
-    display: flex !important;
-    flex-direction: column !important;
-}
-#chatbot_container .svelte-1cea1s5 {
-    flex: 1 !important;
-    min-height: 0 !important;
-    display: flex !important;
-    flex-direction: column !important;
-}
-#chatbot_container .messages {
-    flex: 1 !important;
-    overflow-y: auto !important;
-    overflow-x: hidden !important;
-    padding: 28px !important;
-    min-height: 0 !important;
-}
-#chatbot_container .gr-chatbot,
-#chatbot_container .svelte-1cea1s5,
-#chatbot_container .messages { max-height: none !important; }
-/* Scrollbars */
-#chatbot_container .messages::-webkit-scrollbar {
-    width: 8px;
-}
-#chatbot_container .messages::-webkit-scrollbar-track { background: transparent; }
-#chatbot_container .messages::-webkit-scrollbar-thumb {
-    background: rgba(100,120,160,0.4);
-    border-radius: 4px;
 }
-#chatbot_container .messages::-webkit-scrollbar-thumb:hover { background: rgba(100,120,160,0.7); }
-/* Code blocks */
-#chatbot_container pre {
-    background: #0f1629 !important;
-    border: 1px solid #2a3755 !important;
-    border-radius: 8px !important;
 }
 """
-VOICE_STT_HTML = """..."""  # (your existing voice script – unchanged)
 with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
     assessment_history = gr.State([])
     with gr.Row(elem_classes=["header"]):
         gr.Markdown("<h1>Clarity Ops Augmented Decision Support</h1>")
-        pill = "PHI Mode ON · history off" if (PHI_MODE and not PERSIST_HISTORY) else "PHI Mode ON" if PHI_MODE else "PHI Mode OFF"
         gr.Markdown(f"<span class='badge'>{pill}</span>")
     with gr.Row(elem_classes=["main"]):
         with gr.Column(elem_classes=["left"]):
             gr.Markdown("<div class='panel-title'>New Assessment</div>")
             gr.Markdown("<div class='helper'>Upload CSVs for analysis, or enter a prompt. Voice works in modern browsers.</div>")
-            files_input = gr.Files(label="Upload Data Files (.csv)", file_count="multiple", type="filepath", file_types=[".csv"])
-            prompt_input = gr.Textbox(label="Prompt", placeholder="Paste your scenario or question here...", lines=12, elem_id="prompt_box", autofocus=True)
             with gr.Row(elem_classes=["actions"]):
-                gr.Button("Run Analysis", variant="primary")
-                gr.Button("Clear")
-                gr.Button("Voice")
             gr.Markdown("<div class='voice-hint'>Click Voice to start/stop dictation into the prompt box.</div>")
-            gr.Button("Ping Cohere") .click(ping_cohere, outputs=gr.Markdown())
             gr.Markdown("<div class='hr'></div>")
             if PHI_MODE:
-                gr.Markdown("PHI Mode: History persistence is disabled by default. Avoid unnecessary identifiers.")
             with gr.Accordion("Privacy & Terms", open=False):
                 gr.Markdown(PRIVACY_POLICY_TEXT)
                 gr.Markdown("<div class='hr'></div>")
                 gr.Markdown(TERMS_OF_SERVICE_TEXT)
         with gr.Column(elem_classes=["right"]):
             with gr.Tabs(elem_classes=["tabs"]):
-                with gr.TabItem("Current Assessment", id=0):
                     with gr.Column(elem_id="chatbot_container"):
                         chat_history_output = gr.Chatbot(
-                            label="Analysis Output",
-                            type="messages",
-                            container=False,
-                            autoscroll=True,
-                            elem_id="chatbot_root",
-                            height=None  # Let CSS control height
                         )
-                with gr.TabItem("Assessment History", id=1):
                     gr.Markdown("### Review Past Assessments")
-                    history_dropdown = gr.Dropdown(label="Select an assessment", choices=[])
-                    history_display = gr.Markdown()
     gr.HTML(VOICE_STT_HTML)
-    # (Your event wiring stays exactly the same – unchanged)
-    # ... (rest of your code unchanged)
 if __name__ == "__main__":
     if not os.getenv("COHERE_API_KEY"):
-        print("COHERE_API_KEY not set")
     demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))

 # app.py
+#
+# Universal AI Data Analyst with:
+# - IMPROVED: "Plan-and-Execute" logic for high-accuracy analysis.
+# - IMPROVED: Professional, structured report generation.
+# - IMPROVED: Enriched schema context for the AI analyst.
+# - Unchanged UI, event wiring, and core infrastructure.
 from __future__ import annotations
 import io
 import json
 import os
 import traceback
 from contextlib import redirect_stdout
 from datetime import datetime
 from typing import Any, Dict, List
 import gradio as gr
 import pandas as pd
 import regex as re2
+import re
 from langchain_cohere import ChatCohere  # noqa: F401
 from settings import (
     GENERAL_CONVERSATION_PROMPT,
     COHERE_MODEL_PRIMARY,
+    COHERE_TIMEOUT_S,
+    USE_OPEN_FALLBACKS,
 )
+from audit_log import log_event
+from privacy import safety_filter, refusal_reply
+from llm_router import cohere_chat, _co_client, cohere_embed
+# Try to import optional HIPAA flags; fall back to safe defaults if not defined.
 try:
+    from settings import (
+        PHI_MODE,
+        PERSIST_HISTORY,
+        HISTORY_TTL_DAYS,
+        REDACT_BEFORE_LLM,
+        ALLOW_EXTERNAL_PHI,
+    )
 except Exception:
     PHI_MODE = False
     PERSIST_HISTORY = True
     REDACT_BEFORE_LLM = False
     ALLOW_EXTERNAL_PHI = True
+# ---------------------- Helpers (analysis logic selectively improved) ----------------------
 def load_markdown_text(filepath: str) -> str:
     try:
         with open(filepath, "r", encoding="utf-8") as f:
     except FileNotFoundError:
         return f"**Error:** Document `{os.path.basename(filepath)}` not found."
 def _sanitize_text(s: str) -> str:
     if not isinstance(s, str):
         return s
+    # Remove control characters (except newline and tab)
     return re2.sub(r"[\p{C}--[\n\t]]+", "", s)
+# Conservative PHI redaction patterns (only applied if PHI_MODE & REDACT_BEFORE_LLM are enabled)
 PHI_PATTERNS = [
     (re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "[REDACTED_SSN]"),
     (re.compile(r"\b\d{9}\b"), "[REDACTED_MRN]"),
     (re.compile(r"\b\d{5}(-\d{4})?\b"), "[REDACTED_ZIP]"),
 ]
 def redact_phi(text: str) -> str:
     if not isinstance(text, str):
         return text
         t = pat.sub(repl, t)
     return t
 def safe_log(event_name: str, meta: dict | None = None):
+    # Avoid logging raw PHI or payloads
     try:
         meta = (meta or {}).copy()
         meta.pop("raw", None)
         log_event(event_name, None, meta)
     except Exception:
+        # Never raise from logging
         pass
 def _create_python_script(user_scenario: str, schema_context: str) -> str:
+    """
+    IMPROVED: Generates a Python script using a universal "Map, Plan, Execute" approach.
+    The AI first maps user concepts to data columns, then plans and executes the analysis.
+    This ensures the logic is robust, dynamic, and not hardcoded to a specific dataset.
+    """
     prompt_for_coder = f"""\
+You are an expert-level, universal Python data scientist. Your task is to dynamically analyze any provided dataset(s) to answer a user's business request.
+--- USER'S SCENARIO ---
+{user_scenario}
+--- END SCENARIO ---
 --- DATA SCHEMA ---
 {schema_context}
 --- END DATA SCHEMA ---
+You must follow a rigorous three-step "Map, Plan, Execute" process:
+**Step 1: Map Concepts to Data.**
+First, analyze the user's scenario and the provided data schemas. Identify the key business concepts (e.g., "hospitals", "sales", "regions") and metrics (e.g., "wait times", "revenue", "population"). Then, create a logical mapping from these concepts to the actual column names in the provided DataFrames. State these mappings clearly. This is the most critical step to ensure your analysis is relevant.
+**Step 2: Create a Detailed Analysis Plan.**
+Based on your mapping, formulate a step-by-step plan. Describe the data cleaning, merging, grouping, and aggregation steps needed to answer the user's request using the columns you identified.
+**Step 3: Write the Python Script.**
+Based on your plan, write a complete Python script.
+CRITICAL SCRIPTING RULES:
+1.  **DYNAMIC DATAFRAME IDENTIFICATION:** Your script MUST identify the correct DataFrame by checking for the presence of the columns you mapped in Step 1. Do NOT use hardcoded indices like `dfs[0]`.
+2.  **ROBUST SUCCESS CHECK (MOST IMPORTANT TO PREVENT AMBIGUITY ERROR):** After attempting to find a DataFrame, you MUST check for success by comparing the result to `None`. Do NOT use `if not my_dataframe:` as this is ambiguous.
+    ```python
+    # Good, robust code
+    def find_df_by_cols(dfs, required_cols):
+        for df in dfs:
+            if all(col in df.columns for col in required_cols):
+                return df
+        return None
+    primary_df = find_df_by_cols(dfs, ['user_id', 'transaction_amount'])
+    # This is the correct way to check for failure
+    if primary_df is None:
+        raise ValueError("Could not find the primary dataframe based on its columns.")
+    ```
+3.  **VERIFY COLUMN EXISTENCE:** Only use columns that you have explicitly identified and mapped.
+4.  **NO FILE READING:** The data is already in the `dfs` list.
+5.  **STRICTLY JSON OUTPUT:** The script's ONLY output must be a single JSON object.
+6.  **ROBUST & GENERIC:** Write robust code that can handle potential missing data (`errors='coerce'`, checking for `None`).
+Now, provide your response in the following format:
+**ANALYSIS PLAN:**
+```text
+**1. Concept-to-Column Mapping:**
+- Concept: [e.g., 'Hospitals'] -> Mapped Column: [e.g., `Facility`]
+- Concept: [e.g., 'Surgical Wait Time'] -> Mapped Column: [e.g., `Surgery_Median`]
+**2. Step-by-Step Analysis:**
+1.  **Data Identification:** [e.g., "Define a helper function to find dataframes by checking for key columns..."]
+2.  **Data Cleaning:** [e.g., "Convert metric columns to numeric..."]
+3.  **Analysis Step A:** [e.g., "Group the primary dataframe by the 'Facility' column and calculate the mean of the 'Surgery_Median' column..."]
+4.  ...
+the final JSON object]
+# Your complete Python script starts here
+import pandas as pd
+import json
+import re
+# Main analysis logic...
+# ...
+# Final print statement
+print(json.dumps(final_data_structure, indent=4))```
 """
     generated_text = cohere_chat(prompt_for_coder)
+    # This regex is more robust for extracting the final code block
+    match = re2.search(r"PYTHON SCRIPT:\s*```python\n(.*?)```", generated_text, re2.DOTALL)
     if match:
         return match.group(1).strip()
+    # Fallback if the structured format fails
+    fallback_match = re2.search(r"```python\n(.*?)```", generated_text, re2.DOTALL)
+    if fallback_match:
+        return fallback_match.group(1).strip()
+    return "print(json.dumps({'error': 'Failed to generate a valid Python script from the plan.'}))"
 def _generate_long_report(prompt: str) -> str:
     try:
         client = _co_client()
         if not client:
             return "Error: Cohere client not initialized."
+        response = client.chat(
+            model=COHERE_MODEL_PRIMARY,
+            message=prompt,
+            max_tokens=4096,
+        )
         return response.text
     except Exception as e:
         safe_log("cohere_chat_error", {"err": str(e)})
         return f"Error during final report generation: {e}"
 def _generate_final_report(user_scenario: str, raw_data_json: str) -> str:
+    """
+    IMPROVED: Generates a professional, structured report from the JSON data.
+    The prompt guides the AI to synthesize insights in a standard consulting format,
+    ensuring a high level of detail and actionable recommendations.
+    """
     prompt_for_writer = f"""\
+You are an expert management consultant specializing in data-driven strategy. A Python script has been executed to extract key data points based on a user's request. Your task is to synthesize this raw data into a polished, comprehensive, and actionable report.
+--- USER'S ORIGINAL SCENARIO ---
 {user_scenario}
 --- END SCENARIO ---
 --- RAW DATA FINDINGS (JSON) ---
 {raw_data_json}
 --- END RAW DATA ---
+CRITICAL INSTRUCTIONS:
+You must write a final report that follows this exact structure:
+**### Executive Summary**
+- Start with a brief paragraph summarizing the core problem, key findings, and top recommendations. This should be a high-level overview for a leadership audience.
+**### 1. [First Key Finding, e.g., Hospitals with the Longest Wait Times]**
+- Present the relevant data in a Markdown table.
+- Write a short narrative interpreting the data. What does it mean? Are there any outliers? Why might these facilities have long waits (e.g., specialized care, rural location, capacity issues)?
+**### 2. [Second Key Finding, e.g., Specialties with the Longest Wait Times]**
+- Present the relevant data in a Markdown table.
+- Interpret the findings. Why are these specialties facing delays (e.g., specialist shortages, equipment needs)?
+**### 3. [Third Key Finding, e.g., Zone-Level Performance]**
+- Present the data in a table, including a comparison to a relevant average or baseline.
+- Analyze the geographic or systemic issues this data reveals.
+**### 4. [Fourth Key Finding, if applicable, e.g., Geographic Distribution]**
+- Synthesize location data with the wait-time findings.
+- Discuss the implications for patient equity, travel burdens, and access to care.
+**### 5. Recommendations for Resource Allocation**
+- Provide specific, actionable, and justified recommendations.
+- Structure them by category (e.g., by facility, by specialty, by zone).
+- For each recommendation, provide a clear rationale directly linked to the data findings above (e.g., "Allocate additional resources to Glace Bay Hospital because it is a rural facility in a high-wait zone, suggesting a capacity bottleneck.").
+**### Data Limitations**
+- Briefly mention any potential limitations of the analysis (e.g., missing data, use of proxies, case severity not included). This adds credibility to the report.
+Do not just repeat the JSON data. Your value is in interpreting the numbers, connecting the dots between different findings, and providing clear, data-backed strategic advice.
 """
     return _generate_long_report(prompt_for_writer)
 def _append_msg(h: List[Dict[str, str]], r: str, c: str) -> List[Dict[str, str]]:
     return (h or []) + [{"role": r, "content": c}]
 def ping_cohere() -> str:
     try:
         cli = _co_client()
         if not cli:
             return "Cohere client not initialized."
         vecs = cohere_embed(["hello", "world"])
+        return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY})" if vecs else "Cohere reachable."
     except Exception as e:
         return f"Cohere ping failed: {e}"
 def handle(user_msg: str, files: list, yield_update) -> str:
     try:
+        # Safety filter on incoming message
         safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
         if blocked_in:
             return refusal_reply(reason_in)
+        # Optional PHI redaction for prompts sent to an external LLM
         redacted_in = safe_in
         if PHI_MODE and REDACT_BEFORE_LLM:
             redacted_in = redact_phi(safe_in)
         file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
         if file_paths:
+            # CSV analysis path
             dataframes, schema_parts = [], []
             for i, p in enumerate(file_paths):
                 if p.endswith(".csv"):
                     except UnicodeDecodeError:
                         df = pd.read_csv(p, encoding="latin1")
                     dataframes.append(df)
+                    # --- IMPROVEMENT: ENRICHED SCHEMA CONTEXT ---
+                    schema_buffer = io.StringIO()
+                    df.info(buf=schema_buffer)
+                    schema_info = schema_buffer.getvalue()
+                    schema_parts.append(
+                        f"""DataFrame `dfs[{i}]` (`{os.path.basename(p)}`):
+### Head
+{df.head().to_markdown()}
+### Schema and Data Types
+{schema_info}
+### Summary Statistics
+{df.describe(include='all').to_markdown()}
+"""
+                    )
             if not dataframes:
                 return "Please upload at least one CSV file."
             schema_context = "\n".join(schema_parts)
+            # If external PHI is not allowed, use redacted prompt; otherwise use original
+            prompt_for_code = (
+                redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
+            )
+            yield_update("```\n🧠 Generating aligned analysis script...\n```")
             analysis_script = _create_python_script(prompt_for_code, schema_context)
+            yield_update("```\n⚙️ Executing script to extract raw data...\n```")
+            execution_namespace = {"dfs": dataframes, "pd": pd, "re": re, "json": json}
             output_buffer = io.StringIO()
             try:
                 with redirect_stdout(output_buffer):
                     exec(analysis_script, execution_namespace)
                 raw_data_output = output_buffer.getvalue()
             except Exception as e:
+                return (
+                    f"An error occurred executing the script: {e}\n\nGenerated Script:\n"
+                    f"```python\n{analysis_script}\n```"
+                )
+            yield_update("```\n✍️ Synthesizing final comprehensive report...\n```")
+            writer_input = (
+                redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
+            )
+            final_report = _generate_final_report(writer_input, raw_data_output)
             return _sanitize_text(final_report)
         else:
+            # Pure chat path
+            chat_input = (
+                redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
+            )
             prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {chat_input}\nAssistant:"
             return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
     except Exception as e:
         tb = traceback.format_exc()
         safe_log("app_error", {"err": str(e)})
+        return ("A critical error occurred. Please contact your administrator." if PHI_MODE else f"A critical error occurred: {e}")
 PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
 TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
+# ---------------------- Sleek UI assets (CSS/JS only) ----------------------
 SLEEK_CSS = """
+/* Full-bleed, modern look */
+:root, body, #root, .gradio-container { height: 100%; }
 .gradio-container { padding: 0 !important; }
+.block { padding: 0 !important; }
 /* Header */
 .header {
   padding: 20px 28px;
   background: linear-gradient(135deg, #0e1726, #1d2a44 60%, #243a5e);
   color: #fff;
+  display: flex; align-items: center; justify-content: space-between;
+  gap: 16px;
 }
+.header h1 { margin: 0; font-size: 22px; letter-spacing: 0.3px; font-weight: 600; }
+.header .badge { font-size: 12px; opacity: 0.9; background:#ffffff22; padding:6px 10px; border-radius: 999px; }
+/* Main layout */
 .main {
   display: grid;
   grid-template-columns: 420px 1fr;
 .left { padding: 16px; display: flex; flex-direction: column; gap: 12px; }
 .right { padding: 0; display: flex; flex-direction: column; }
+/* Panels */
+.panel-title { font-size: 14px; font-weight: 600; color: #aeb8cc; margin-bottom: 6px; }
+.helper { font-size: 12px; color: #97a3bb; margin-bottom: 8px; }
+/* Sticky actions */
+.actions {
+  display: flex; gap: 8px; align-items: center; justify-content: stretch;
 }
+.actions .gr-button { flex: 1; }
+/* Tabs full height */
+.right .tabs { height: 100%; display: flex; flex-direction: column; }
+.right .tabitem { flex: 1; display: flex; flex-direction: column; }
+#chatbot_container { flex: 1; }
+#chatbot_container .gr-chatbot { height: 100%; }
+/* Tiny separators */
+.hr { height: 1px; background: #16203b; margin: 10px 0; }
+/* Voice hint */
+.voice-hint { font-size: 12px; color:#9fb0cc; margin-top: 4px; }
+"""
+VOICE_STT_HTML = """
+<script>
+let __rs_rec = null;
+function rs_toggle_stt(elemId){
+  const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
+  if (!SpeechRecognition){
+    alert("This browser does not support Speech Recognition. Try Chrome or Edge.");
+    return;
+  }
+  if (__rs_rec){ __rs_rec.stop(); __rs_rec = null; return; }
+  __rs_rec = new SpeechRecognition();
+  __rs_rec.lang = "en-US";
+  __rs_rec.interimResults = true;
+  __rs_rec.continuous = true;
+  const box = document.querySelector(`#${elemId} textarea`);
+  if (!box){ alert("Prompt box not found."); return; }
+  let base = box.value || "";
+  __rs_rec.onresult = (ev) => {
+    let t = "";
+    for (let i = ev.resultIndex; i < ev.results.length; i++){
+      t += ev.results[i][0].transcript;
+    }
+    box.value = (base + " " + t).trim();
+    box.dispatchEvent(new Event("input", { bubbles: true }));
+  };
+  __rs_rec.onend = () => { __rs_rec = null; };
+  __rs_rec.start();
 }
+</script>
 """
+# ---------------------- Sleek UI (with fixed State wiring) ----------------------
 with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
+    # Persistent in-memory history component (fixes list/_id error)
     assessment_history = gr.State([])
+    # Header
     with gr.Row(elem_classes=["header"]):
         gr.Markdown("<h1>Clarity Ops Augmented Decision Support</h1>")
+        pill = ("PHI Mode ON · history off" if (PHI_MODE and not PERSIST_HISTORY) else "PHI Mode ON" if PHI_MODE else "PHI Mode OFF")
         gr.Markdown(f"<span class='badge'>{pill}</span>")
+    # Main layout
     with gr.Row(elem_classes=["main"]):
+        # Left panel
         with gr.Column(elem_classes=["left"]):
             gr.Markdown("<div class='panel-title'>New Assessment</div>")
             gr.Markdown("<div class='helper'>Upload CSVs for analysis, or enter a prompt. Voice works in modern browsers.</div>")
+            files_input = gr.Files(
+                label="Upload Data Files (.csv)",
+                file_count="multiple",
+                type="filepath",
+                file_types=[".csv"],
+            )
+            prompt_input = gr.Textbox(
+                label="Prompt",
+                placeholder="Paste your scenario or question here...",
+                lines=12,
+                elem_id="prompt_box",
+                autofocus=True,
+            )
             with gr.Row(elem_classes=["actions"]):
+                send_btn = gr.Button("▶️ Run Analysis", variant="primary")
+                clear_btn = gr.Button("🧹 Clear")
+                voice_btn = gr.Button("🎙️ Voice")
             gr.Markdown("<div class='voice-hint'>Click Voice to start/stop dictation into the prompt box.</div>")
+            ping_btn = gr.Button("🔌 Ping Cohere")
+            ping_out = gr.Markdown()
             gr.Markdown("<div class='hr'></div>")
             if PHI_MODE:
+                gr.Markdown(
+                    "⚠️ **PHI Mode:** History persistence is disabled by default. Avoid unnecessary identifiers."
+                )
             with gr.Accordion("Privacy & Terms", open=False):
                 gr.Markdown(PRIVACY_POLICY_TEXT)
                 gr.Markdown("<div class='hr'></div>")
                 gr.Markdown(TERMS_OF_SERVICE_TEXT)
+        # Right panel
         with gr.Column(elem_classes=["right"]):
             with gr.Tabs(elem_classes=["tabs"]):
+                with gr.TabItem("Current Assessment", id=0, elem_classes=["tabitem"]):
                     with gr.Column(elem_id="chatbot_container"):
                         chat_history_output = gr.Chatbot(
+                            label="Analysis Output", type="messages"
                         )
+                with gr.TabItem("Assessment History", id=1, elem_classes=["tabitem"]):
                     gr.Markdown("### Review Past Assessments")
+                    history_dropdown = gr.Dropdown(
+                        label="Select an assessment to review", choices=[]
+                    )
+                    history_display = gr.Markdown(label="Selected Assessment Details")
+    # Inject voice-to-text helper
     gr.HTML(VOICE_STT_HTML)
+    # --------- Event logic (unchanged analysis flow) ----------
+    def run_analysis_wrapper(
+        prompt, files, chat_history_list, history_state_list
+    ):
+        if not prompt:
+            gr.Warning("Please enter a prompt.")
+            yield chat_history_list, history_state_list, gr.update()
+            return
+        chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
+        # Optional progress callback (not streaming in this UI)
+        def dummy_update(message: str):
+            pass
+        thinking_message = _append_msg(
+            chat_with_user_msg,
+            "assistant",
+            "```\n🧠 Generating and executing analysis... Please wait.\n```",
+        )
+        yield thinking_message, history_state_list, gr.update()
+        ai_response_text = handle(prompt, files, dummy_update)
+        final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
+        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        file_names: List[str] = []
+        if files:
+            file_names = [
+                os.path.basename(f.name if hasattr(f, "name") else f) for f in files
+            ]
+        new_entry = {
+            "id": timestamp,
+            "prompt": prompt,
+            "files": file_names,
+            "response": ai_response_text,
+            "chat_history": final_chat,
+        }
+        if PERSIST_HISTORY and (not PHI_MODE or (PHI_MODE and HISTORY_TTL_DAYS > 0)):
+            updated_history: List[Dict[str, Any]] = (history_state_list or []) + [
+                new_entry
+            ]
+        else:
+            updated_history = history_state_list or []
+        history_labels = [
+            f"{item['id']} - {item['prompt'][:40]}..."
+            for item in updated_history
+        ]
+        yield final_chat, updated_history, gr.update(choices=history_labels)
+    def view_history(selection: str, history_state_list: List[Dict[str, Any]]) -> str:
+        if not selection or not history_state_list:
+            return ""
+        try:
+            selected_id = selection.split(" - ", 1)[0]
+        except Exception:
+            selected_id = selection
+        selected_assessment = next(
+            (item for item in history_state_list if item.get("id") == selected_id), None
+        )
+        if not selected_assessment:
+            return "Could not find the selected assessment."
+        file_list = selected_assessment.get("files", [])
+        file_list_md = "\n- ".join(file_list) if file_list else "*(no files uploaded)*"
+        chat_entries = selected_assessment.get("chat_history", [])
+        chat_md_lines = []
+        for msg in chat_entries:
+            role = msg.get("role", "").capitalize()
+            content = msg.get("content", "")
+            chat_md_lines.append(f"**{role}:** {content}")
+        chat_md = "\n\n".join(chat_md_lines)
+        return f"""### Assessment from: {selected_assessment['id']}
+**Files Used:**
+- {file_list_md}
+---
+**Original Prompt:**
+> {selected_assessment['prompt']}
+---
+**AI Generated Response:**
+{selected_assessment['response']}
+---
+**Chat Transcript:**
+{chat_md}
+"""
+    # Wire events (using proper gr.State component for history)
+    send_btn.click(
+        run_analysis_wrapper,
+        inputs=[prompt_input, files_input, chat_history_output, assessment_history],
+        outputs=[chat_history_output, assessment_history, history_dropdown],
+    )
+    history_dropdown.change(
+        view_history,
+        inputs=[history_dropdown, assessment_history],
+        outputs=[history_display],
+    )
+    clear_btn.click(
+        lambda: (None, None, []),
+        outputs=[prompt_input, files_input, chat_history_output],
+    )
+    ping_btn.click(ping_cohere, outputs=[ping_out])
+    voice_btn.click(None, [], [], js="rs_toggle_stt('prompt_box')")
 if __name__ == "__main__":
     if not os.getenv("COHERE_API_KEY"):
+        print(
+            "🔴 COHERE_API_KEY environment variable not set. Application may not function correctly."
+        )
     demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))