Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

VEDAGI1 commited on Nov 10

Commit

5dbf496

verified ·

1 Parent(s): dc12e99

Update app.py

Browse files

Files changed (1) hide show

app.py +436 -480

app.py CHANGED Viewed

@@ -1,13 +1,13 @@
-# app.py
-#
-# Universal AI Data Analyst with:
-# - IMPROVED: "Plan-and-Execute" logic for high-accuracy analysis.
-# - IMPROVED: Professional, structured report generation.
-# - IMPROVED: Enriched schema context for the AI analyst.
-# - Unchanged UI, event wiring, and core infrastructure.
-from __future__ import annotations
 import io
 import json
 import os
@@ -15,561 +15,517 @@ import traceback
 from contextlib import redirect_stdout
 from datetime import datetime
 from typing import Any, Dict, List
 import gradio as gr
 import pandas as pd
 import regex as re2
 import re
 from langchain_cohere import ChatCohere  # noqa: F401
 from settings import (
-    GENERAL_CONVERSATION_PROMPT,
-    COHERE_MODEL_PRIMARY,
-    COHERE_TIMEOUT_S,
-    USE_OPEN_FALLBACKS,
 )
 from audit_log import log_event
 from privacy import safety_filter, refusal_reply
 from llm_router import cohere_chat, _co_client, cohere_embed
-# Try to import optional HIPAA flags; fall back to safe defaults if not defined.
-try:
-    from settings import (
-        PHI_MODE,
-        PERSIST_HISTORY,
-        HISTORY_TTL_DAYS,
-        REDACT_BEFORE_LLM,
-        ALLOW_EXTERNAL_PHI,
-    )
-except Exception:
-    PHI_MODE = False
-    PERSIST_HISTORY = True
-    HISTORY_TTL_DAYS = 365
-    REDACT_BEFORE_LLM = False
-    ALLOW_EXTERNAL_PHI = True
-# ---------------------- Helpers (analysis logic selectively improved) ----------------------
 def load_markdown_text(filepath: str) -> str:
-    try:
-        with open(filepath, "r", encoding="utf-8") as f:
-            return f.read()
-    except FileNotFoundError:
-        return f"**Error:** Document `{os.path.basename(filepath)}` not found."
 def _sanitize_text(s: str) -> str:
-    if not isinstance(s, str):
-        return s
-    # Remove control characters (except newline and tab)
-    return re2.sub(r"[\p{C}--[\n\t]]+", "", s)
-# Conservative PHI redaction patterns (only applied if PHI_MODE & REDACT_BEFORE_LLM are enabled)
 PHI_PATTERNS = [
-    (re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "[REDACTED_SSN]"),
-    (re.compile(r"\b\d{9}\b"), "[REDACTED_MRN]"),
-    (re.compile(r"\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b"), "[REDACTED_PHONE]"),
-    (re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}"), "[REDACTED_EMAIL]"),
-    (re.compile(r"\b(19|20)\d{2}-\d{2}-\d{2}\b"), "[REDACTED_DOB]"),
-    (re.compile(r"\b\d{2}/\d{2}/(19|20)\d{2}\b"), "[REDACTED_DOB]"),
-    (re.compile(r"\b\d{5}(-\d{4})?\b"), "[REDACTED_ZIP]"),
 ]
 def redact_phi(text: str) -> str:
-    if not isinstance(text, str):
-        return text
-    t = text
-    for pat, repl in PHI_PATTERNS:
-        t = pat.sub(repl, t)
-    return t
 def safe_log(event_name: str, meta: dict | None = None):
-    # Avoid logging raw PHI or payloads
-    try:
-        meta = (meta or {}).copy()
-        meta.pop("raw", None)
-        log_event(event_name, None, meta)
-    except Exception:
-        # Never raise from logging
-        pass
 def _create_python_script(user_scenario: str, schema_context: str) -> str:
-    """
-    IMPROVED: Generates a Python script using a "Plan-and-Execute" approach.
-    The AI first creates a step-by-step plan, then writes code to execute it.
-    This ensures the analysis is logical, correctly aggregated, and aligned with the user's goal.
-    """
-    prompt_for_coder = f"""\
-You are an expert-level Python data scientist acting as a consultant. Your task is to analyze data to answer a user's business request.
---- USER'S SCENARIO ---
-{user_scenario}
---- END SCENARIO ---
 --- DATA SCHEMA ---
 {schema_context}
 --- END DATA SCHEMA ---
-You must follow a rigorous two-step process:
-**Step 1: Create a Detailed Analysis Plan.**
-First, think step-by-step. Deconstruct the user's request into a clear, logical plan.
-The plan must identify the key metrics, necessary data manipulations (cleaning, grouping, aggregation), and the final outputs required.
-- **CRITICAL for aggregation:** If the user asks for analysis by category (e.g., "specialty," "department"), you MUST identify the correct high-level categorical column for grouping. DO NOT aggregate by granular, free-text procedure descriptions unless explicitly asked. Your goal is to find meaningful, strategic trends.
-**Step 2: Write the Python Script.**
-Based on your plan, write a complete Python script.
-CRITICAL SCRIPTING RULES:
-1. **NO FILE READING:** The data is already loaded into a list of pandas DataFrames called `dfs`. You MUST use this variable. Do not include `pd.read_csv`.
-2. **STRICTLY JSON OUTPUT:** The script's ONLY output to stdout MUST be a single, well-structured JSON object containing all the raw data findings from your plan.
-3. **ROBUST DATA CLEANING:** Before performing calculations, clean data robustly. Convert numeric columns to numbers using `pd.to_numeric(..., errors='coerce')`. Handle missing values (`NaN`) appropriately (e.g., by excluding them from averages).
-4. **JSON SERIALIZATION:** Ensure all data in the final dictionary is JSON-serializable. Use `.item()` for single numpy values and `.tolist()` for arrays/series.
-Now, provide your response in the following format:
-**ANALYSIS PLAN:**
-Objective: [Briefly state the main goal]
-Data Cleaning: [Describe steps to clean and prepare the data]
-Analysis Step A: [e.g., "Calculate average wait times per hospital by grouping dfs[0] by 'Facility' and averaging 'Surgery_Median'."]
-Analysis Step B: [e.g., "Identify top 5 specialties by grouping dfs[0] by the 'Specialty' column and calculating the mean of 'Surgery_Median'."]
-Analysis Step C: [e.g., "Determine zone-level performance by grouping by 'Zone' and comparing to the overall provincial average."]
-JSON Output Structure: [Describe the keys and values of the final JSON object]
-text**PYTHON SCRIPT:**
-```python
-# Your complete Python script starts here
-import pandas as pd
-import json
-import re
-# Main analysis logic...
-# ...
-# Final print statement
-print(json.dumps(final_data_structure, indent=4))
-"""
-generated_text = cohere_chat(prompt_for_coder)
-This regex is more robust for extracting the final code block
-match = re2.search(r"PYTHON SCRIPT:\s*python\n(.*?)", generated_text, re2.DOTALL)
-if match:
-return match.group(1).strip()
-Fallback if the structured format fails
-fallback_match = re2.search(r"python\n(.*?)", generated_text, re2.DOTALL)
-if fallback_match:
-return fallback_match.group(1).strip()
-return "print(json.dumps({'error': 'Failed to generate a valid Python script from the plan.'}))"
-def _generate_long_report(prompt: str) -> str:
-try:
-client = _co_client()
-if not client:
-return "Error: Cohere client not initialized."
-response = client.chat(
-model=COHERE_MODEL_PRIMARY,
-message=prompt,
-max_tokens=4096,
-)
-return response.text
-except Exception as e:
-safe_log("cohere_chat_error", {"err": str(e)})
-return f"Error during final report generation: {e}"
-def _generate_final_report(user_scenario: str, raw_data_json: str) -> str:
-"""
-IMPROVED: Generates a professional, structured report from the JSON data.
-The prompt guides the AI to synthesize insights in a standard consulting format,
-ensuring a high level of detail and actionable recommendations.
-"""
-prompt_for_writer = f"""\
-You are an expert management consultant specializing in data-driven strategy. A Python script has been executed to extract key data points based on a user's request. Your task is to synthesize this raw data into a polished, comprehensive, and actionable report.
---- USER'S ORIGINAL SCENARIO ---
 {user_scenario}
 --- END SCENARIO ---
 --- RAW DATA FINDINGS (JSON) ---
 {raw_data_json}
 --- END RAW DATA ---
-CRITICAL INSTRUCTIONS:
-You must write a final report that follows this exact structure:
-Executive Summary
-Start with a brief paragraph summarizing the core problem, key findings, and top recommendations. This should be a high-level overview for a leadership audience.
-1. [First Key Finding, e.g., Hospitals with the Longest Wait Times]
-Present the relevant data in a Markdown table.
-Write a short narrative interpreting the data. What does it mean? Are there any outliers? Why might these facilities have long waits (e.g., specialized care, rural location, capacity issues)?
-2. [Second Key Finding, e.g., Specialties with the Longest Wait Times]
-Present the relevant data in a Markdown table.
-Interpret the findings. Why are these specialties facing delays (e.g., specialist shortages, equipment needs)?
-3. [Third Key Finding, e.g., Zone-Level Performance]
-Present the data in a table, including a comparison to a relevant average or baseline.
-Analyze the geographic or systemic issues this data reveals.
-4. [Fourth Key Finding, if applicable, e.g., Geographic Distribution]
-Synthesize location data with the wait-time findings.
-Discuss the implications for patient equity, travel burdens, and access to care.
-5. Recommendations for Resource Allocation
-Provide specific, actionable, and justified recommendations.
-Structure them by category (e.g., by facility, by specialty, by zone).
-For each recommendation, provide a clear rationale directly linked to the data findings above (e.g., "Allocate additional resources to Glace Bay Hospital because it is a rural facility in a high-wait zone, suggesting a capacity bottleneck.").
-Data Limitations
-Briefly mention any potential limitations of the analysis (e.g., missing data, use of proxies, case severity not included). This adds credibility to the report.
-Do not just repeat the JSON data. Your value is in interpreting the numbers, connecting the dots between different findings, and providing clear, data-backed strategic advice.
-"""
-return _generate_long_report(prompt_for_writer)
-def _append_msg(h: List[Dict[str, str]], r: str, c: str) -> List[Dict[str, str]]:
-return (h or []) + [{"role": r, "content": c}]
-def ping_cohere() -> str:
-try:
-cli = _co_client()
-if not cli:
-return "Cohere client not initialized."
-vecs = cohere_embed(["hello", "world"])
-return f"Cohere OK (model={COHERE_MODEL_PRIMARY})" if vecs else "Cohere reachable."
-except Exception as e:
-return f"Cohere ping failed: {e}"
-def handle(user_msg: str, files: list, yield_update) -> str:
-try:
-Safety filter on incoming message
-safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
-if blocked_in:
-return refusal_reply(reason_in)
-Optional PHI redaction for prompts sent to an external LLM
-redacted_in = safe_in
-if PHI_MODE and REDACT_BEFORE_LLM:
-redacted_in = redact_phi(safe_in)
-file_paths: List[str] = [
-getattr(f, "name", None) or f for f in (files or [])
-]
-if file_paths:
-CSV analysis path
-dataframes, schema_parts = [], []
-for i, p in enumerate(file_paths):
-if p.endswith(".csv"):
-try:
-df = pd.read_csv(p)
-except UnicodeDecodeError:
-df = pd.read_csv(p, encoding="latin1")
-dataframes.append(df)
---- IMPROVEMENT: ENRICHED SCHEMA CONTEXT ---
-schema_buffer = io.StringIO()
-df.info(buf=schema_buffer)
-schema_info = schema_buffer.getvalue()
-schema_parts.append(
-f"""DataFrame dfs[{i}] ({os.path.basename(p)}):\n\nHead\n{df.head().to_markdown()}\n\nSchema and Data Types\n\n{schema_info}\n\n\nSummary Statistics\n{df.describe(include='all').to_markdown()}\n"""
-)
-if not dataframes:
-return "Please upload at least one CSV file."
-schema_context = "\n".join(schema_parts)
-If external PHI is not allowed, use redacted prompt; otherwise use original
-prompt_for_code = (
-redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
-)
-yield_update("Generating aligned analysis script...")
-analysis_script = _create_python_script(prompt_for_code, schema_context)
-yield_update("Executing script to extract raw data...")
-execution_namespace = {"dfs": dataframes, "pd": pd, "re": re, "json": json}
-output_buffer = io.StringIO()
-try:
-with redirect_stdout(output_buffer):
-exec(analysis_script, execution_namespace)
-raw_data_output = output_buffer.getvalue()
-except Exception as e:
-return (
-f"An error occurred executing the script: {e}\n\nGenerated Script:\n"
-f"python\n{analysis_script}\n"
-)
-yield_update("Synthesizing final comprehensive report...")
-writer_input = (
-redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
-)
-final_report = _generate_final_report(writer_input, raw_data_output)
-return _sanitize_text(final_report)
-else:
-Pure chat path
-chat_input = (
-redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
-)
-prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {chat_input}\nAssistant:"
-return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
-except Exception as e:
-tb = traceback.format_exc()
-safe_log("app_error", {"err": str(e)})
-return "A critical error occurred. Please contact your administrator." if PHI_MODE else f"A critical error occurred: {e}"
-PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
-TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
----------------------- Sleek UI assets (CSS/JS only) ----------------------
 SLEEK_CSS = """
 /* Full-bleed, modern look */
 :root, body, #root, .gradio-container { height: 100%; }
 .gradio-container { padding: 0 !important; }
 .block { padding: 0 !important; }
 /* Header */
 .header {
-padding: 20px 28px;
-background: linear-gradient(135deg, #0e1726, #1d2a44 60%, #243a5e);
-color: #fff;
-display: flex; align-items: center; justify-content: space-between;
-gap: 16px;
 }
 .header h1 { margin: 0; font-size: 22px; letter-spacing: 0.3px; font-weight: 600; }
 .header .badge { font-size: 12px; opacity: 0.9; background:#ffffff22; padding:6px 10px; border-radius: 999px; }
 /* Main layout */
 .main {
-display: grid;
-grid-template-columns: 420px 1fr;
-gap: 16px;
-padding: 16px;
-height: calc(100vh - 72px);
-box-sizing: border-box;
 }
 .left, .right {
-background: #0b1020;
-color: #e9edf3;
-border-radius: 16px;
-border: 1px solid #1c2642;
 }
 .left { padding: 16px; display: flex; flex-direction: column; gap: 12px; }
 .right { padding: 0; display: flex; flex-direction: column; }
 /* Panels */
 .panel-title { font-size: 14px; font-weight: 600; color: #aeb8cc; margin-bottom: 6px; }
 .helper { font-size: 12px; color: #97a3bb; margin-bottom: 8px; }
 /* Sticky actions */
 .actions {
-display: flex; gap: 8px; align-items: center; justify-content: stretch;
 }
 .actions .gr-button { flex: 1; }
 /* Tabs full height */
 .right .tabs { height: 100%; display: flex; flex-direction: column; }
 .right .tabitem { flex: 1; display: flex; flex-direction: column; }
 #chatbot_container { flex: 1; }
 #chatbot_container .gr-chatbot { height: 100%; }
 /* Tiny separators */
 .hr { height: 1px; background: #16203b; margin: 10px 0; }
 /* Voice hint */
 .voice-hint { font-size: 12px; color:#9fb0cc; margin-top: 4px; }
 """
 VOICE_STT_HTML = """
 <script>
 let __rs_rec = null;
 function rs_toggle_stt(elemId){
-const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
-if (!SpeechRecognition){
-alert("This browser does not support Speech Recognition. Try Chrome or Edge.");
-return;
-}
-if (__rs_rec){ __rs_rec.stop(); __rs_rec = null; return; }
-__rs_rec = new SpeechRecognition();
-__rs_rec.lang = "en-US";
-__rs_rec.interimResults = true;
-__rs_rec.continuous = true;
-const box = document.querySelector(`#${elemId} textarea`);
-if (!box){ alert("Prompt box not found."); return; }
-let base = box.value || "";
-__rs_rec.onresult = (ev) => {
-let t = "";
-for (let i = ev.resultIndex; i < ev.results.length; i++){
-t += ev.results[i][0].transcript;
-}
-box.value = (base + " " + t).trim();
-box.dispatchEvent(new Event("input", { bubbles: true }));
-};
-__rs_rec.onend = () => { __rs_rec = null; };
-__rs_rec.start();
 }
 </script>
 """
----------------------- Sleek UI (with fixed State wiring) ----------------------
 with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
-Persistent in-memory history component (fixes list/_id error)
-assessment_history = gr.State([])
-Header
-with gr.Row(elem_classes=["header"]):
-gr.Markdown("Clarity Ops Augmented Decision Support")
-pill = (
-"PHI Mode ON · history off"
-if (PHI_MODE and not PERSIST_HISTORY)
-else "PHI Mode ON"
-if PHI_MODE
-else "PHI Mode OFF"
-)
-gr.Markdown(f"{pill}")
-Main layout
-with gr.Row(elem_classes=["main"]):
-Left panel
-with gr.Column(elem_classes=["left"]):
-gr.Markdown("New Assessment")
-gr.Markdown(
-"Upload CSVs for analysis, or enter a prompt. Voice works in modern browsers."
-)
-files_input = gr.Files(
-label="Upload Data Files (.csv)",
-file_count="multiple",
-type="filepath",
-file_types=[".csv"],
-)
-prompt_input = gr.Textbox(
-label="Prompt",
-placeholder="Paste your scenario or question here...",
-lines=12,
-elem_id="prompt_box",
-autofocus=True,
-)
-with gr.Row(elem_classes=["actions"]):
-send_btn = gr.Button("Run Analysis", variant="primary")
-clear_btn = gr.Button("Clear")
-voice_btn = gr.Button("Voice")
-gr.Markdown(
-"Click Voice to start/stop dictation into the prompt box."
-)
-ping_btn = gr.Button("Ping Cohere")
-ping_out = gr.Markdown()
-gr.Markdown("")
-if PHI_MODE:
-gr.Markdown(
-"Warning: PHI Mode: History persistence is disabled by default. Avoid unnecessary identifiers."
-)
-with gr.Accordion("Privacy & Terms", open=False):
-gr.Markdown(PRIVACY_POLICY_TEXT)
-gr.Markdown("")
-gr.Markdown(TERMS_OF_SERVICE_TEXT)
-Right panel
-with gr.Column(elem_classes=["right"]):
-with gr.Tabs(elem_classes=["tabs"]):
-with gr.TabItem("Current Assessment", id=0, elem_classes=["tabitem"]):
-with gr.Column(elem_id="chatbot_container"):
-chat_history_output = gr.Chatbot(
-label="Analysis Output", type="messages"
-)
-with gr.TabItem("Assessment History", id=1, elem_classes=["tabitem"]):
-gr.Markdown("### Review Past Assessments")
-history_dropdown = gr.Dropdown(
-label="Select an assessment to review", choices=[]
-)
-history_display = gr.Markdown(label="Selected Assessment Details")
-Inject voice-to-text helper
-gr.HTML(VOICE_STT_HTML)
---------- Event logic (unchanged analysis flow) ----------
-def run_analysis_wrapper(
-prompt, files, chat_history_list, history_state_list
-):
-if not prompt:
-gr.Warning("Please enter a prompt.")
-yield chat_history_list, history_state_list, gr.update()
-return
-Append user's message
-chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
-Thinking bubble
-thinking_message = _append_msg(
-chat_with_user_msg,
-"assistant",
-"Generating and executing analysis... Please wait.",
-)
-yield thinking_message, history_state_list, gr.update()
-Run analysis/chat
-def dummy_update(message: str):
-pass
-ai_response_text = handle(prompt, files, dummy_update)
-Append final assistant response
-final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
-timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-Capture filenames (if any)
-file_names: List[str] = []
-if files:
-file_names = [
-os.path.basename(f.name if hasattr(f, "name") else f) for f in files
-]
-Build history record
-new_entry = {
-"id": timestamp,
-"prompt": prompt,
-"files": file_names,
-"response": ai_response_text,
-"chat_history": final_chat,
-}
-Respect PHI/history flags
-if PERSIST_HISTORY and (not PHI_MODE or (PHI_MODE and HISTORY_TTL_DAYS > 0)):
-updated_history: List[Dict[str, Any]] = (history_state_list or []) + [
-new_entry
-]
-else:
-updated_history = history_state_list or []
-history_labels = [
-f"{item['id']} - {item['prompt'][:40]}..."
-for item in updated_history
-]
-yield final_chat, updated_history, gr.update(choices=history_labels)
-def view_history(selection: str, history_state_list: List[Dict[str, Any]]) -> str:
-if not selection or not history_state_list:
-return ""
-try:
-selected_id = selection.split(" - ", 1)[0]
-except Exception:
-selected_id = selection
-selected_assessment = next(
-(item for item in history_state_list if item.get("id") == selected_id), None
-)
-if not selected_assessment:
-return "Could not find the selected assessment."
-file_list = selected_assessment.get("files", [])
-file_list_md = "\n- ".join(file_list) if file_list else "(no files uploaded)"
-chat_entries = selected_assessment.get("chat_history", [])
-chat_md_lines = []
-for msg in chat_entries:
-role = msg.get("role", "").capitalize()
-content = msg.get("content", "")
-chat_md_lines.append(f"{role}: {content}")
-chat_md = "\n\n".join(chat_md_lines)
-return f"""### Assessment from: {selected_assessment['id']}
-Files Used:
-{file_list_md}
-Original Prompt:
-{selected_assessment['prompt']}
-AI Generated Response:
 {selected_assessment['response']}
-Chat Transcript:
 {chat_md}
 """
-Wire events (using proper gr.State component for history)
-send_btn.click(
-run_analysis_wrapper,
-inputs=[prompt_input, files_input, chat_history_output, assessment_history],
-outputs=[chat_history_output, assessment_history, history_dropdown],
-)
-history_dropdown.change(
-view_history,
-inputs=[history_dropdown, assessment_history],
-outputs=[history_display],
-)
-clear_btn.click(
-lambda: (None, None, []),
-outputs=[prompt_input, files_input, chat_history_output],
-)
-ping_btn.click(ping_cohere, outputs=[ping_out])
-voice_btn.click(None, [], [], js="rs_toggle_stt('prompt_box')")
-if name == "main":
-if not os.getenv("COHERE_API_KEY"):
-print(
-"COHERE_API_KEY environment variable not set. Application may not function correctly."
-)
-demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))

+this works nov 9th 2025
+app.py
+Universal AI Data Analyst with:
+- Unchanged analysis & assessment logic
+- Fixed Gradio event wiring (uses gr.State for history)
+- Triple-quoted progress strings (no unterminated literals)
+- Sleek full-width UI and Voice-to-Text (browser Web Speech API)
+- Optional HIPAA flags (fallback defaults if not present in settings.py)
+from future import annotations
 import io
 import json
 import os
 from contextlib import redirect_stdout
 from datetime import datetime
 from typing import Any, Dict, List
 import gradio as gr
 import pandas as pd
 import regex as re2
 import re
 from langchain_cohere import ChatCohere  # noqa: F401
 from settings import (
+GENERAL_CONVERSATION_PROMPT,
+COHERE_MODEL_PRIMARY,
+COHERE_TIMEOUT_S,   # noqa: F401
+USE_OPEN_FALLBACKS  # noqa: F401
 )
+Try to import optional HIPAA flags; fall back to safe defaults if not defined.
+try:
+from settings import PHI_MODE, PERSIST_HISTORY, HISTORY_TTL_DAYS, REDACT_BEFORE_LLM, ALLOW_EXTERNAL_PHI
+except Exception:
+PHI_MODE = False
+PERSIST_HISTORY = True
+HISTORY_TTL_DAYS = 365
+REDACT_BEFORE_LLM = False
+ALLOW_EXTERNAL_PHI = True
 from audit_log import log_event
 from privacy import safety_filter, refusal_reply
 from llm_router import cohere_chat, _co_client, cohere_embed
+---------------------- Helpers (analysis logic unchanged) ----------------------
 def load_markdown_text(filepath: str) -> str:
+try:
+with open(filepath, "r", encoding="utf-8") as f:
+return f.read()
+except FileNotFoundError:
+return f"Error: Document {os.path.basename(filepath)} not found."
 def _sanitize_text(s: str) -> str:
+if not isinstance(s, str):
+return s
+# Remove control characters (except newline and tab)
+return re2.sub(r"[\p{C}--[\n\t]]+", "", s)
+Conservative PHI redaction patterns (only applied if PHI_MODE & REDACT_BEFORE_LLM are enabled)
 PHI_PATTERNS = [
+(re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "[REDACTED_SSN]"),
+(re.compile(r"\b\d{9}\b"), "[REDACTED_MRN]"),
+(re.compile(r"\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b"), "[REDACTED_PHONE]"),
+(re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+.[A-Za-z]{2,}"), "[REDACTED_EMAIL]"),
+(re.compile(r"\b(19|20)\d{2}-\d{2}-\d{2}\b"), "[REDACTED_DOB]"),
+(re.compile(r"\b\d{2}/\d{2}/(19|20)\d{2}\b"), "[REDACTED_DOB]"),
+(re.compile(r"\b\d{5}(-\d{4})?\b"), "[REDACTED_ZIP]"),
 ]
 def redact_phi(text: str) -> str:
+if not isinstance(text, str):
+return text
+t = text
+for pat, repl in PHI_PATTERNS:
+t = pat.sub(repl, t)
+return t
 def safe_log(event_name: str, meta: dict | None = None):
+# Avoid logging raw PHI or payloads
+try:
+meta = (meta or {}).copy()
+meta.pop("raw", None)
+log_event(event_name, None, meta)
+except Exception:
+# Never raise from logging
+pass
 def _create_python_script(user_scenario: str, schema_context: str) -> str:
+EXPERT_ANALYTICAL_GUIDELINES = """
+--- EXPERT ANALYTICAL GUIDELINES ---
+When writing your script, you MUST follow these expert business rules:
+Linking Datasets Rule: If you need to connect facilities to health zones when the 'zone' column is not in the facility list,
+you must first identify the high-priority zone from the beds data, then find the major city (by facility count) in the facility list,
+and then assess that city's capacity. Do not try to filter the facility list by a 'zone' column if it does not exist in the schema.
+Prioritization Rule: To prioritize locations, you MUST combine the most recent population data with specific high-risk health indicators
+to create a multi-factor risk score.
+Capacity Calculation Rule: For capacity over a 3-month window, assume 60 working days.
+Cost Calculation Rule: Sum 'Startup cost' and 'Ongoing cost' per person before multiplying.
+"""
+prompt_for_coder = f"""
+You are an expert Python data scientist. Your job is to write a script to extract the data needed to answer the user's request.
+You have dataframes in a list dfs.
+{EXPERT_ANALYTICAL_GUIDELINES}
 --- DATA SCHEMA ---
 {schema_context}
 --- END DATA SCHEMA ---
+CRITICAL RULES:
+DO NOT READ FILES: You MUST NOT include pd.read_csv. The data is ALREADY loaded in the dfs variable. You MUST use this variable. Failure to do so will cause a fatal error.
+JSON OUTPUT ONLY: Your script's ONLY output must be a single JSON object printed to stdout containing the raw data findings.
+BE PRECISE: Use the exact, case-sensitive column names from the schema and robustly clean strings (re.sub()) before converting to numbers.
+JSON SERIALIZATION: Before adding data to your final dictionary for JSON conversion, you MUST convert any pandas-specific types (like int64) to standard Python types using .item() for single values or .tolist() for lists.
+--- USER'S SCENARIO ---
+{user_scenario}
+--- PYTHON SCRIPT ---
+Now, write the complete Python script that performs the analysis and prints a single, serializable JSON object.
+code
+Python
+"""
+    generated_text = cohere_chat(prompt_for_coder)
+    match = re2.search(r"```python\n(.*?)```", generated_text, re2.DOTALL)
+    if match:
+        return match.group(1).strip()
+    return "print(json.dumps({'error': 'Failed to generate a valid Python script.'}))"
+def _generate_long_report(prompt: str) -> str:
+    try:
+        client = _co_client()
+        if not client:
+            return "Error: Cohere client not initialized."
+        response = client.chat(
+            model=COHERE_MODEL_PRIMARY,
+            message=prompt,
+            max_tokens=4096,
+        )
+        return response.text
+    except Exception as e:
+        safe_log("cohere_chat_error", {"err": str(e)})
+        return f"Error during final report generation: {e}"
+def _generate_final_report(user_scenario: str, raw_data_json: str) -> str:
+    prompt_for_writer = f"""\
+You are an expert management consultant and data analyst.
+A data science script has run to extract key findings. You have the user's original request and the raw JSON data.
+Your task is to synthesize these raw findings into a single, comprehensive, and professional report that directly answers all of the user's questions with detailed justifications.
+--- USER'S ORIGINAL SCENARIO & DELIVERABLES ---
 {user_scenario}
 --- END SCENARIO ---
 --- RAW DATA FINDINGS (JSON) ---
 {raw_data_json}
 --- END RAW DATA ---
+Now, write the final, polished report. The report MUST:
+1.  Follow the "Expected Output Format" requested by the user.
+2.  Use tables, bullet points, and DETAILED narrative justifications for each recommendation.
+3.  Synthesize the raw data into actionable insights. Do not just copy the raw numbers; interpret them.
+4.  Ensure you fully address ALL evaluation questions, especially the final recommendations.
+"""
+    return _generate_long_report(prompt_for_writer)
+def _append_msg(h: List[Dict[str, str]], r: str, c: str) -> List[Dict[str, str]]:
+    return (h or []) + [{"role": r, "content": c}]
+def ping_cohere() -> str:
+    try:
+        cli = _co_client()
+        if not cli:
+            return "Cohere client not initialized."
+        vecs = cohere_embed(["hello", "world"])
+        return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY})" if vecs else "Cohere reachable."
+    except Exception as e:
+        return f"Cohere ping failed: {e}"
+def handle(user_msg: str, files: list, yield_update) -> str:
+    try:
+        # Safety filter on incoming message
+        safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
+        if blocked_in:
+            return refusal_reply(reason_in)
+        # Optional PHI redaction for prompts sent to an external LLM
+        redacted_in = safe_in
+        if PHI_MODE and REDACT_BEFORE_LLM:
+            redacted_in = redact_phi(safe_in)
+        file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
+        if file_paths:
+            # CSV analysis path (unchanged)
+            dataframes, schema_parts = [], []
+            for i, p in enumerate(file_paths):
+                if p.endswith(".csv"):
+                    try:
+                        df = pd.read_csv(p)
+                    except UnicodeDecodeError:
+                        df = pd.read_csv(p, encoding="latin1")
+                    dataframes.append(df)
+                    schema_parts.append(
+                        f"DataFrame `dfs[{i}]` (`{os.path.basename(p)}`):\n{df.head().to_markdown()}\n"
+                    )
+            if not dataframes:
+                return "Please upload at least one CSV file."
+            schema_context = "\n".join(schema_parts)
+            # If external PHI is not allowed, use redacted prompt; otherwise use original
+            prompt_for_code = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
+            yield_update("""```
+🧠 Generating aligned analysis script...
+```""")
+            analysis_script = _create_python_script(prompt_for_code, schema_context)
+            yield_update("""```
+⚙️ Executing script to extract raw data...
+```""")
+            execution_namespace = {"dfs": dataframes, "pd": pd, "re": re, "json": json}
+            output_buffer = io.StringIO()
+            try:
+                with redirect_stdout(output_buffer):
+                    exec(analysis_script, execution_namespace)
+                raw_data_output = output_buffer.getvalue()
+            except Exception as e:
+                return (
+                    f"An error occurred executing the script: {e}\n\nGenerated Script:\n"
+                    f"```python\n{analysis_script}\n```"
+                )
+            yield_update("""```
+✍️ Synthesizing final comprehensive report...
+```""")
+            writer_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
+            final_report = _generate_final_report(writer_input, raw_data_output)
+            return _sanitize_text(final_report)
+        else:
+            # Pure chat path
+            chat_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
+            prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {chat_input}\nAssistant:"
+            return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
+    except Exception as e:
+        tb = traceback.format_exc()
+        safe_log("app_error", {"err": str(e)})
+        return "A critical error occurred. Please contact your administrator." if PHI_MODE else f"A critical error occurred: {e}"
+PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
+TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
+# ---------------------- Sleek UI assets (CSS/JS only) ----------------------
 SLEEK_CSS = """
 /* Full-bleed, modern look */
 :root, body, #root, .gradio-container { height: 100%; }
 .gradio-container { padding: 0 !important; }
 .block { padding: 0 !important; }
 /* Header */
 .header {
+  padding: 20px 28px;
+  background: linear-gradient(135deg, #0e1726, #1d2a44 60%, #243a5e);
+  color: #fff;
+  display: flex; align-items: center; justify-content: space-between;
+  gap: 16px;
 }
 .header h1 { margin: 0; font-size: 22px; letter-spacing: 0.3px; font-weight: 600; }
 .header .badge { font-size: 12px; opacity: 0.9; background:#ffffff22; padding:6px 10px; border-radius: 999px; }
 /* Main layout */
 .main {
+  display: grid;
+  grid-template-columns: 420px 1fr;
+  gap: 16px;
+  padding: 16px;
+  height: calc(100vh - 72px);
+  box-sizing: border-box;
 }
 .left, .right {
+  background: #0b1020;
+  color: #e9edf3;
+  border-radius: 16px;
+  border: 1px solid #1c2642;
 }
 .left { padding: 16px; display: flex; flex-direction: column; gap: 12px; }
 .right { padding: 0; display: flex; flex-direction: column; }
 /* Panels */
 .panel-title { font-size: 14px; font-weight: 600; color: #aeb8cc; margin-bottom: 6px; }
 .helper { font-size: 12px; color: #97a3bb; margin-bottom: 8px; }
 /* Sticky actions */
 .actions {
+  display: flex; gap: 8px; align-items: center; justify-content: stretch;
 }
 .actions .gr-button { flex: 1; }
 /* Tabs full height */
 .right .tabs { height: 100%; display: flex; flex-direction: column; }
 .right .tabitem { flex: 1; display: flex; flex-direction: column; }
 #chatbot_container { flex: 1; }
 #chatbot_container .gr-chatbot { height: 100%; }
 /* Tiny separators */
 .hr { height: 1px; background: #16203b; margin: 10px 0; }
 /* Voice hint */
 .voice-hint { font-size: 12px; color:#9fb0cc; margin-top: 4px; }
 """
 VOICE_STT_HTML = """
 <script>
 let __rs_rec = null;
 function rs_toggle_stt(elemId){
+  const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
+  if (!SpeechRecognition){
+    alert("This browser does not support Speech Recognition. Try Chrome or Edge.");
+    return;
+  }
+  if (__rs_rec){ __rs_rec.stop(); __rs_rec = null; return; }
+  __rs_rec = new SpeechRecognition();
+  __rs_rec.lang = "en-US";
+  __rs_rec.interimResults = true;
+  __rs_rec.continuous = true;
+  const box = document.querySelector(`#${elemId} textarea`);
+  if (!box){ alert("Prompt box not found."); return; }
+  let base = box.value || "";
+  __rs_rec.onresult = (ev) => {
+    let t = "";
+    for (let i = ev.resultIndex; i < ev.results.length; i++){
+      t += ev.results[i][0].transcript;
+    }
+    box.value = (base + " " + t).trim();
+    box.dispatchEvent(new Event("input", { bubbles: true }));
+  };
+  __rs_rec.onend = () => { __rs_rec = null; };
+  __rs_rec.start();
 }
 </script>
 """
+# ---------------------- Sleek UI (with fixed State wiring) ----------------------
 with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
+    # Persistent in-memory history component (fixes list/_id error)
+    assessment_history = gr.State([])
+    # Header
+    with gr.Row(elem_classes=["header"]):
+        gr.Markdown("<h1>Clarity Ops Augemented Decision Support</h1>")
+        pill = "PHI Mode ON · history off" if (PHI_MODE and not PERSIST_HISTORY) else \
+               "PHI Mode ON" if PHI_MODE else "PHI Mode OFF"
+        gr.Markdown(f"<span class='badge'>{pill}</span>")
+    # Main layout
+    with gr.Row(elem_classes=["main"]):
+        # Left panel
+        with gr.Column(elem_classes=["left"]):
+            gr.Markdown("<div class='panel-title'>New Assessment</div>")
+            gr.Markdown("<div class='helper'>Upload CSVs for analysis, or enter a prompt. Voice works in modern browsers.</div>")
+            files_input = gr.Files(
+                label="Upload Data Files (.csv)",
+                file_count="multiple",
+                type="filepath",
+                file_types=[".csv"],
+            )
+            prompt_input = gr.Textbox(
+                label="Prompt",
+                placeholder="Paste your scenario or question here...",
+                lines=12,
+                elem_id="prompt_box",
+                autofocus=True,
+            )
+            with gr.Row(elem_classes=["actions"]):
+                send_btn = gr.Button("▶️ Run Analysis", variant="primary")
+                clear_btn = gr.Button("🧹 Clear")
+                voice_btn = gr.Button("🎙️ Voice")
+            gr.Markdown("<div class='voice-hint'>Click Voice to start/stop dictation into the prompt box.</div>")
+            ping_btn = gr.Button("🔌 Ping Cohere")
+            ping_out = gr.Markdown()
+            gr.Markdown("<div class='hr'></div>")
+            if PHI_MODE:
+                gr.Markdown(
+                    "⚠️ **PHI Mode:** History persistence is disabled by default. Avoid unnecessary identifiers."
+                )
+            with gr.Accordion("Privacy & Terms", open=False):
+                gr.Markdown(PRIVACY_POLICY_TEXT)
+                gr.Markdown("<div class='hr'></div>")
+                gr.Markdown(TERMS_OF_SERVICE_TEXT)
+        # Right panel
+        with gr.Column(elem_classes=["right"]):
+            with gr.Tabs(elem_classes=["tabs"]):
+                with gr.TabItem("Current Assessment", id=0, elem_classes=["tabitem"]):
+                    with gr.Column(elem_id="chatbot_container"):
+                        chat_history_output = gr.Chatbot(label="Analysis Output", type="messages")
+                with gr.TabItem("Assessment History", id=1, elem_classes=["tabitem"]):
+                    gr.Markdown("### Review Past Assessments")
+                    history_dropdown = gr.Dropdown(label="Select an assessment to review", choices=[])
+                    history_display = gr.Markdown(label="Selected Assessment Details")
+    # Inject voice-to-text helper
+    gr.HTML(VOICE_STT_HTML)
+    # --------- Event logic (unchanged analysis flow) ----------
+    def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
+        if not prompt:
+            gr.Warning("Please enter a prompt.")
+            yield chat_history_list, history_state_list, gr.update()
+            return
+        # Append user's message
+        chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
+        # Optional progress callback (not streaming in this UI)
+        def dummy_update(message: str):
+            pass
+        # Thinking bubble
+        thinking_message = _append_msg(
+            chat_with_user_msg,
+            "assistant",
+            """```
+🧠 Generating and executing analysis... Please wait.
+```""",
+        )
+        yield thinking_message, history_state_list, gr.update()
+        # Run analysis/chat
+        ai_response_text = handle(prompt, files, dummy_update)
+        # Append final assistant response
+        final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
+        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        # Capture filenames (if any)
+        file_names: List[str] = []
+        if files:
+            file_names = [
+                os.path.basename(f.name if hasattr(f, "name") else f) for f in files
+            ]
+        # Build history record
+        new_entry = {
+            "id": timestamp,
+            "prompt": prompt,
+            "files": file_names,
+            "response": ai_response_text,
+            "chat_history": final_chat,
+        }
+        # Respect PHI/history flags
+        if PERSIST_HISTORY and (not PHI_MODE or (PHI_MODE and HISTORY_TTL_DAYS > 0)):
+            updated_history: List[Dict[str, Any]] = (history_state_list or []) + [new_entry]
+        else:
+            updated_history = history_state_list or []
+        history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
+        yield final_chat, updated_history, gr.update(choices=history_labels)
+    def view_history(selection: str, history_state_list: List[Dict[str, Any]]) -> str:
+        if not selection or not history_state_list:
+            return ""
+        try:
+            selected_id = selection.split(" - ", 1)[0]
+        except Exception:
+            selected_id = selection
+        selected_assessment = next(
+            (item for item in history_state_list if item.get("id") == selected_id), None
+        )
+        if not selected_assessment:
+            return "Could not find the selected assessment."
+        file_list = selected_assessment.get("files", [])
+        file_list_md = "\n- ".join(file_list) if file_list else "*(no files uploaded)*"
+        chat_entries = selected_assessment.get("chat_history", [])
+        chat_md_lines = []
+        for msg in chat_entries:
+            role = msg.get("role", "").capitalize()
+            content = msg.get("content", "")
+            chat_md_lines.append(f"**{role}:** {content}")
+        chat_md = "\n\n".join(chat_md_lines)
+        return f"""### Assessment from: {selected_assessment['id']}
+**Files Used:**
+- {file_list_md}
+---
+**Original Prompt:**
+> {selected_assessment['prompt']}
+---
+**AI Generated Response:**
 {selected_assessment['response']}
+---
+**Chat Transcript:**
 {chat_md}
 """
+    # Wire events (using proper gr.State component for history)
+    send_btn.click(
+        run_analysis_wrapper,
+        inputs=[prompt_input, files_input, chat_history_output, assessment_history],
+        outputs=[chat_history_output, assessment_history, history_dropdown],
+    )
+    history_dropdown.change(
+        view_history,
+        inputs=[history_dropdown, assessment_history],
+        outputs=[history_display],
+    )
+    clear_btn.click(
+        lambda: (None, None, []),
+        outputs=[prompt_input, files_input, chat_history_output],
+    )
+    ping_btn.click(ping_cohere, outputs=[ping_out])
+    voice_btn.click(None, [], [], js="rs_toggle_stt('prompt_box')")
+if __name__ == "__main__":
+    if not os.getenv("COHERE_API_KEY"):
+        print("🔴 COHERE_API_KEY environment variable not set. Application may not function correctly.")
+    demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))