Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

Rajan Sharma commited on Oct 1

Commit

86ddd31

verified ·

1 Parent(s): da71da2

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -204

app.py CHANGED Viewed

@@ -1,339 +1,221 @@
 from __future__ import annotations
 import os
-import io
 import traceback
-from contextlib import redirect_stdout
-from datetime import datetime
 from typing import List, Dict, Any
-import regex as re  # pip install regex
 import gradio as gr
 import pandas as pd
 # --- BACKEND IMPORTS ---
-from langchain_cohere import ChatCohere  # kept for compatibility if you use it elsewhere
-# If you still want PythonREPL later, you can add it back:
-# from langchain_community.utilities.python import PythonREPL
 # --- LOCAL MODULE IMPORTS ---
 from settings import (
-    HEALTHCARE_SETTINGS,
-    GENERAL_CONVERSATION_PROMPT,
-    COHERE_MODEL_PRIMARY,
-    COHERE_TIMEOUT_S,
-    USE_OPEN_FALLBACKS,
 )
 from audit_log import log_event
 from privacy import safety_filter, refusal_reply
 from llm_router import cohere_chat, _co_client, cohere_embed
-# =========================
-# Utility Helpers
-# =========================
 def load_markdown_text(filepath: str) -> str:
-    """Safely load text content from a markdown file."""
     try:
-        with open(filepath, "r", encoding="utf-8") as f:
             return f.read()
     except FileNotFoundError:
         return f"**Error:** The document `{os.path.basename(filepath)}` was not found."
-def _sanitize_text(s: Any) -> str:
-    """Remove control chars (except tabs/newlines) to keep UI stable."""
-    if not isinstance(s, str):
-        s = str(s)
-    return re.sub(r"[\p{C}--[\n\t]]+", "", s)
-def _indent(s: str, spaces: int) -> str:
-    pad = " " * spaces
-    return "\n".join((pad + line) if line.strip() else line for line in s.splitlines())
-# =========================
-# AI Coding Path
-# =========================
 def _create_python_script(user_scenario: str, schema_context: str) -> str:
-    """
-    Ask the model to produce a single Python snippet that goes INSIDE a try: block of analyze_data(dfs).
-    We extract the content between ```python ... ``` fences if present.
-    """
     prompt_for_coder = f"""
-You are an expert Python data scientist. Your sole job is to write a single, complete analysis that will be inserted INSIDE:
-def analyze_data(dfs):
-    try:
-        # YOUR CODE HERE
-    except Exception as e:
-        print(f"An error occurred during analysis: {{e}}")
-RULES:
-1) Use the provided list `dfs` of pandas DataFrames (dfs[0], dfs[1], ...).
-2) Print results at each major step with print().
-3) No placeholders; operate on real data in dfs.
-4) The code you return must be valid Python and indentation-safe.
-5) Do NOT redefine analyze_data; only provide the body INSIDE the try: block.
---- DATA SCHEMA (heads) ---
 {schema_context}
 --- END SCHEMA ---
---- USER SCENARIO ---
 {user_scenario}
-Return only a Python code block, fenced as ```python ... ``` containing the body that goes inside the try: block.
 """
-    generated_text = cohere_chat(prompt_for_coder) or ""
-    # Prefer fenced code
-    fence = re.search(r"```python\s*(.*?)```", generated_text, re.DOTALL | re.IGNORECASE)
-    if fence:
-        body = fence.group(1).strip()
     else:
-        # Fallback: take everything, best-effort trim if model didn’t fence
-        body = generated_text.strip()
-    # Strip any accidental wrapper definitions the model might add
-    # e.g., remove "def analyze_data(dfs):" and a nested try:/except: if present
-    body = re.sub(r"^def\s+analyze_data\s*\(.*?\):\s*", "", body)
-    # We keep user's try/except if they provided, but usually we want raw steps.
-    return body.strip() or "print('Error: No analysis steps were generated.')"
 def ping_cohere() -> str:
     """Lightweight health check against Cohere."""
     try:
         cli = _co_client()
-        if not cli:
-            return "Cohere client not initialized. Is COHERE_API_KEY set?"
         vecs = cohere_embed(["hello", "world"])
-        if vecs:
-            return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY}, timeout={COHERE_TIMEOUT_S}s)"
-        return "Cohere reachable."
     except Exception as e:
         return f"Cohere ping failed: {e}"
-# =========================
-# Core Analysis Engine
-# =========================
 def handle(user_msg: str, files: list) -> str:
-    """
-    Main backend engine using the 'Coder pattern':
-      - Safety check
-      - Load CSVs -> dfs
-      - Build schema heads
-      - Ask the model for analysis code (body only)
-      - Execute analyze_data(dfs) in a safe, isolated namespace
-      - Return captured stdout
-    """
     try:
         safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
-        if blocked_in:
-            return refusal_reply(reason_in)
-        # Resolve file paths (Gradio may give temp File objects or strings)
         file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
         if file_paths:
-            dataframes: List[pd.DataFrame] = []
-            schema_parts: List[str] = []
             for i, p in enumerate(file_paths):
-                if isinstance(p, str) and p.lower().endswith(".csv"):
                     try:
                         df = pd.read_csv(p)
                     except UnicodeDecodeError:
-                        df = pd.read_csv(p, encoding="latin1")
                     dataframes.append(df)
-                    head_md = df.head().to_markdown() if not df.empty else "(empty dataframe)"
-                    schema_parts.append(
-                        f"DataFrame dfs[{i}] from `{os.path.basename(p)}`:\n{head_md}\n"
-                    )
-            if not dataframes:
-                return "Please upload at least one CSV file."
             schema_context = "\n".join(schema_parts)
-            analysis_body = _create_python_script(safe_in, schema_context)
-            # Assemble the full script to exec
-            script = f"""
-import pandas as pd
-def analyze_data(dfs):
-    try:
-{_indent(analysis_body, 8)}
-    except Exception as e:
-        print(f"An error occurred during analysis: {{e}}")
-"""
-            # Execute in isolated namespace and capture stdout
-            ns: Dict[str, Any] = {}
-            ns["dfs"] = dataframes  # make dfs available inside exec scope
-            buf = io.StringIO()
             try:
-                with redirect_stdout(buf):
-                    exec(script, ns, ns)
-                    # call analyze_data(dfs)
-                    ns["analyze_data"](ns["dfs"])
             except Exception as e:
-                return _sanitize_text(f"An error occurred while executing the AI-generated script:\n{e}\n\nTraceback:\n{traceback.format_exc()}")
-            output = buf.getvalue()
-            return _sanitize_text(output or "(No output produced by the analysis.)")
-        # No files: fall back to general conversation
-        prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {safe_in}\nAssistant:"
-        resp = cohere_chat(prompt) or "How can I help further?"
-        return _sanitize_text(resp)
     except Exception as e:
         tb = traceback.format_exc()
         log_event("app_error", None, {"err": str(e), "tb": tb})
         return f"A critical error occurred: {e}"
-# =========================
-# UI (Gradio)
-# =========================
 PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
 TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
 with gr.Blocks(theme="soft", css="style.css") as demo:
-    assessment_history = gr.State([])  # list[dict]: id, prompt, files, response
-    # Modals
     with gr.Group(visible=False) as privacy_modal:
         with gr.Blocks():
             gr.Markdown(PRIVACY_POLICY_TEXT)
             close_privacy_btn = gr.Button("Close")
     with gr.Group(visible=False) as terms_modal:
         with gr.Blocks():
             gr.Markdown(TERMS_OF_SERVICE_TEXT)
             close_terms_btn = gr.Button("Close")
     gr.Markdown("# Universal AI Data Analyst")
     with gr.Row(variant="panel"):
         with gr.Column(scale=1):
             gr.Markdown("## New Assessment")
-            files_input = gr.Files(
-                label="Upload Data Files (.csv)",
-                file_count="multiple",
-                type="filepath",
-                file_types=[".csv"],
-            )
-            prompt_input = gr.Textbox(
-                label="Prompt",
-                placeholder="Paste your scenario here.",
-                lines=15,
-            )
             with gr.Row():
                 send_btn = gr.Button("▶️ Run Analysis", variant="primary", scale=2)
                 clear_btn = gr.Button("🗑️ Clear")
             ping_btn = gr.Button("Ping Cohere")
             ping_out = gr.Markdown()
         with gr.Column(scale=2):
             with gr.Tabs():
                 with gr.TabItem("Current Assessment", id=0):
-                    chat_history_output = gr.Chatbot(
-                        label="Analysis Output",
-                        type="messages",
-                        height=600,
-                    )
                 with gr.TabItem("Assessment History", id=1):
                     gr.Markdown("## Review Past Assessments")
-                    history_dropdown = gr.Dropdown(
-                        label="Select an assessment to review",
-                        choices=[],
-                    )
                     history_display = gr.Markdown(label="Selected Assessment Details")
-    with gr.Row():
-        gr.Markdown("---")
     with gr.Row():
         privacy_link = gr.Button("Privacy Policy", variant="link")
         terms_link = gr.Button("Terms of Service", variant="link")
-    # ---------- Callbacks ----------
     def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
         if not prompt or not files:
             gr.Warning("Please provide both a prompt and at least one data file.")
             yield chat_history_list, history_state_list, gr.update()
             return
-        chat_with_user_msg = (chat_history_list or []) + [{"role": "user", "content": prompt}]
-        thinking_message = chat_with_user_msg + [
-            {"role": "assistant", "content": "```\n🧠 Generating analysis script... This may take a moment.\n```"}
-        ]
         yield thinking_message, history_state_list, gr.update()
         ai_response_text = handle(prompt, files)
-        final_chat = chat_with_user_msg + [{"role": "assistant", "content": ai_response_text}]
         timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        file_names = [os.path.basename(getattr(f, "name", f)) for f in files]
-        new_assessment = {
-            "id": timestamp,
-            "prompt": prompt,
-            "files": file_names,
-            "response": ai_response_text,
-        }
-        updated_history = (history_state_list or []) + [new_assessment]
         history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
         yield final_chat, updated_history, gr.update(choices=history_labels)
     def view_history(selection, history_state_list):
-        if not selection or not history_state_list:
-            return ""
-        selected_id = selection.split(" - ")[0]
         selected_assessment = next((item for item in history_state_list if item["id"] == selected_id), None)
         if selected_assessment:
-            file_list_md = "\n- ".join(selected_assessment["files"])
-            return (
-                f"### Assessment from: {selected_assessment['id']}\n"
-                f"**Files Used:**\n- {file_list_md}\n---\n"
-                f"**Original Prompt:**\n> {selected_assessment['prompt']}\n---\n"
-                f"**AI Generated Response:**\n{selected_assessment['response']}"
-            )
         return "Could not find the selected assessment."
-    # Wire events
     send_btn.click(
         run_analysis_wrapper,
         inputs=[prompt_input, files_input, chat_history_output, assessment_history],
-        outputs=[chat_history_output, assessment_history, history_dropdown],
     )
     history_dropdown.change(
         view_history,
         inputs=[history_dropdown, assessment_history],
-        outputs=[history_display],
     )
-    clear_btn.click(lambda: (None, None, [], []),
-                    outputs=[prompt_input, files_input, chat_history_output, assessment_history])
     ping_btn.click(ping_cohere, outputs=[ping_out])
     privacy_link.click(lambda: gr.update(visible=True), outputs=[privacy_modal])
     close_privacy_btn.click(lambda: gr.update(visible=False), outputs=[privacy_modal])
     terms_link.click(lambda: gr.update(visible=True), outputs=[terms_modal])
     close_terms_btn.click(lambda: gr.update(visible=False), outputs=[terms_modal])
-# =========================
-# Entrypoint
-# =========================
 if __name__ == "__main__":
     if not os.getenv("COHERE_API_KEY"):
-        print("🔴 COHERE_API_KEY environment variable not set. The app may not function correctly.")
     demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))

+# app.py
 from __future__ import annotations
 import os
 import traceback
+import regex as re2
 from typing import List, Dict, Any
 import gradio as gr
 import pandas as pd
+from datetime import datetime
 # --- BACKEND IMPORTS ---
+from langchain_cohere import ChatCohere
+from langchain_community.utilities.python import PythonREPL # Re-introducing the standard, robust executor
 # --- LOCAL MODULE IMPORTS ---
 from settings import (
+    HEALTHCARE_SETTINGS, GENERAL_CONVERSATION_PROMPT,
+    COHERE_MODEL_PRIMARY, COHERE_TIMEOUT_S, USE_OPEN_FALLBACKS
 )
 from audit_log import log_event
 from privacy import safety_filter, refusal_reply
 from llm_router import cohere_chat, _co_client, cohere_embed
+# --- UTILITY FUNCTIONS ---
 def load_markdown_text(filepath: str) -> str:
+    """Safely loads text content from a markdown file."""
     try:
+        with open(filepath, 'r', encoding='utf-8') as f:
             return f.read()
     except FileNotFoundError:
         return f"**Error:** The document `{os.path.basename(filepath)}` was not found."
+def _sanitize_text(s: str) -> str:
+    if not isinstance(s, str): return s
+    return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
 def _create_python_script(user_scenario: str, schema_context: str) -> str:
+    """Uses an LLM to act as an "AI Coder", writing a complete Python script."""
+    # --- THE FINAL PROMPT FIX IS HERE ---
     prompt_for_coder = f"""
+You are an expert Python data scientist. Your sole job is to write a single, complete, and executable Python script to answer the user's request.
+You have access to a list of pandas dataframes loaded into a variable named `dfs`.
+--- DATA SCHEMA ---
 {schema_context}
 --- END SCHEMA ---
+CRITICAL RULE: You MUST use the exact column names provided in the DATA SCHEMA. Column names are case-sensitive. Pay close attention to capitalization (e.g., 'Zone' vs 'zone'). A KeyError will cause a failure.
+Based on the user's scenario below, write a single Python script that performs the entire analysis.
+RULES FOR YOUR SCRIPT:
+1.  **Use the DataFrames:** Your script MUST use the `dfs` list and the exact column names from the schema.
+2.  **Print Your Findings:** Use the `print()` function at each step to output the results as a formatted report.
+3.  **No Placeholders:** Do not use placeholder data.
+4.  **Self-Contained:** The script must be entirely self-contained.
+--- USER'S SCENARIO ---
 {user_scenario}
+--- PYTHON SCRIPT ---
+Now, write the complete Python script to be executed. The script should start with `import pandas as pd` and contain all the logic.
+```python
 """
+    generated_text = cohere_chat(prompt_for_coder)
+    match = re2.search(r"```python\n(.*?)```", generated_text, re2.DOTALL)
+    if match:
+        return match.group(1).strip()
     else:
+        return "print('Error: The AI failed to generate a valid Python script.')"
+def _append_msg(history_messages: List[Dict[str, str]], role: str, content: str) -> List[Dict[str, str]]:
+    return (history_messages or []) + [{"role": role, "content": content}]
 def ping_cohere() -> str:
     """Lightweight health check against Cohere."""
     try:
         cli = _co_client()
+        if not cli: return "Cohere client not initialized."
         vecs = cohere_embed(["hello", "world"])
+        return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY})" if vecs else "Cohere reachable."
     except Exception as e:
         return f"Cohere ping failed: {e}"
+# --- THE CORE ANALYSIS ENGINE ---
 def handle(user_msg: str, files: list) -> str:
+    """This is the powerful backend engine using the "Coder" pattern."""
     try:
         safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
+        if blocked_in: return refusal_reply(reason_in)
         file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
         if file_paths:
+            dataframes = []
+            schema_parts = []
             for i, p in enumerate(file_paths):
+                if p.endswith('.csv'):
                     try:
                         df = pd.read_csv(p)
                     except UnicodeDecodeError:
+                        df = pd.read_csv(p, encoding='latin1')
                     dataframes.append(df)
+                    schema_parts.append(f"DataFrame `dfs[{i}]` (from file `{os.path.basename(p)}`):\n{df.head().to_markdown()}\n")
+            if not dataframes: return "Please upload at least one CSV file."
             schema_context = "\n".join(schema_parts)
+            analysis_script = _create_python_script(safe_in, schema_context)
+            python_repl = PythonREPL()
+            local_vars = {"dfs": dataframes}
             try:
+                res = python_repl.run(command=analysis_script, locals=local_vars)
+                return _sanitize_text(res)
             except Exception as e:
+                return f"An error occurred executing the script: {e}\n\nGenerated Script:\n```python\n{analysis_script}\n```"
+        else:
+            prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {safe_in}\nAssistant:"
+            return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
     except Exception as e:
         tb = traceback.format_exc()
         log_event("app_error", None, {"err": str(e), "tb": tb})
         return f"A critical error occurred: {e}"
+# --- PRE-LOAD LEGAL DOCUMENTS ---
 PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
 TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
+# ---------------- THE PROFESSIONAL UI WITH INTEGRATED LEGAL DOCS ----------------
 with gr.Blocks(theme="soft", css="style.css") as demo:
+    assessment_history = gr.State([])
     with gr.Group(visible=False) as privacy_modal:
         with gr.Blocks():
             gr.Markdown(PRIVACY_POLICY_TEXT)
             close_privacy_btn = gr.Button("Close")
     with gr.Group(visible=False) as terms_modal:
         with gr.Blocks():
             gr.Markdown(TERMS_OF_SERVICE_TEXT)
             close_terms_btn = gr.Button("Close")
     gr.Markdown("# Universal AI Data Analyst")
     with gr.Row(variant="panel"):
         with gr.Column(scale=1):
             gr.Markdown("## New Assessment")
+            files_input = gr.Files(label="Upload Data Files (.csv)", file_count="multiple", type="filepath", file_types=[".csv"])
+            prompt_input = gr.Textbox(label="Prompt", placeholder="Paste your scenario here.", lines=15)
             with gr.Row():
                 send_btn = gr.Button("▶️ Run Analysis", variant="primary", scale=2)
                 clear_btn = gr.Button("🗑️ Clear")
             ping_btn = gr.Button("Ping Cohere")
             ping_out = gr.Markdown()
         with gr.Column(scale=2):
             with gr.Tabs():
                 with gr.TabItem("Current Assessment", id=0):
+                    chat_history_output = gr.Chatbot(label="Analysis Output", type="messages", height=600)
                 with gr.TabItem("Assessment History", id=1):
                     gr.Markdown("## Review Past Assessments")
+                    history_dropdown = gr.Dropdown(label="Select an assessment to review", choices=[])
                     history_display = gr.Markdown(label="Selected Assessment Details")
+    with gr.Row(): gr.Markdown("---")
     with gr.Row():
         privacy_link = gr.Button("Privacy Policy", variant="link")
         terms_link = gr.Button("Terms of Service", variant="link")
     def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
         if not prompt or not files:
             gr.Warning("Please provide both a prompt and at least one data file.")
             yield chat_history_list, history_state_list, gr.update()
             return
+        chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
+        thinking_message = _append_msg(chat_with_user_msg, "assistant", "```\n🧠 Generating analysis script... This may take a moment.\n```")
         yield thinking_message, history_state_list, gr.update()
         ai_response_text = handle(prompt, files)
+        final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
         timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        file_names = [os.path.basename(f.name if hasattr(f, 'name') else f) for f in files]
+        new_assessment = {"id": timestamp, "prompt": prompt, "files": file_names, "response": ai_response_text}
+        updated_history = history_state_list + [new_assessment]
         history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
         yield final_chat, updated_history, gr.update(choices=history_labels)
     def view_history(selection, history_state_list):
+        if not selection or not history_state_list: return ""
+        selected_id = selection.split(" - ")
         selected_assessment = next((item for item in history_state_list if item["id"] == selected_id), None)
         if selected_assessment:
+            file_list_md = "\n- ".join(selected_assessment['files'])
+            return f"""### Assessment from: {selected_assessment['id']}\n**Files Used:**\n- {file_list_md}\n---\n**Original Prompt:**\n> {selected_assessment['prompt']}\n---\n**AI Generated Response:**\n{selected_assessment['response']}"""
         return "Could not find the selected assessment."
     send_btn.click(
         run_analysis_wrapper,
         inputs=[prompt_input, files_input, chat_history_output, assessment_history],
+        outputs=[chat_history_output, assessment_history, history_dropdown]
     )
     history_dropdown.change(
         view_history,
         inputs=[history_dropdown, assessment_history],
+        outputs=[history_display]
     )
+    clear_btn.click(lambda: (None, None, [], []), outputs=[prompt_input, files_input, chat_history_output, assessment_history])
     ping_btn.click(ping_cohere, outputs=[ping_out])
     privacy_link.click(lambda: gr.update(visible=True), outputs=[privacy_modal])
     close_privacy_btn.click(lambda: gr.update(visible=False), outputs=[privacy_modal])
     terms_link.click(lambda: gr.update(visible=True), outputs=[terms_modal])
     close_terms_btn.click(lambda: gr.update(visible=False), outputs=[terms_modal])
 if __name__ == "__main__":
     if not os.getenv("COHERE_API_KEY"):
+        print("🔴 COHERE_API_KEY environment variable not set. Application may not function correctly.")
     demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))