Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

Rajan Sharma commited on Oct 1

Commit

56f8933

verified ·

1 Parent(s): 64e8c0c

Update app.py

Browse files

Files changed (1) hide show

app.py +188 -186

app.py CHANGED Viewed

@@ -10,9 +10,8 @@ import pandas as pd
 from datetime import datetime
 # --- BACKEND IMPORTS ---
-from langchain.agents.agent_types import AgentType
 from langchain_cohere import ChatCohere
-from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
 # --- LOCAL MODULE IMPORTS ---
 from settings import (
@@ -37,201 +36,204 @@ def _sanitize_text(s: str) -> str:
     if not isinstance(s, str): return s
     return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
-# --- THE FINAL FIX (PART 1): The "Senior Analyst" AI ---
-def _create_enhanced_prompt(user_scenario: str, file_context: str) -> str:
-    """
-    Uses an LLM to act as a "Senior Analyst", breaking the complex user
-    scenario into a clear, step-by-step plan for the agent.
-    """
-    prompt_for_planner = f"""
-You are a Senior Data Analyst. Your job is to create a clear, step-by-step execution plan for a Junior AI Data Analyst.
-The user has provided a complex scenario and a list of data files. The Junior Analyst gets confused by long prompts and can get stuck in loops.
-Your plan must be simple, clear, and sequential.
---- DATA CONTEXT ---
-{file_context}
-The Junior Analyst has access to these files in a list of pandas dataframes (df1, df2, etc.), in the order listed above.
 --- USER'S SCENARIO ---
 {user_scenario}
---- YOUR TASK ---
-Create a "Step-by-Step Execution Plan" for the Junior Analyst. Tell it exactly what to do, one task at a time, referencing the correct dataframe (df1, df2, etc.).
-Instruct it to perform all data preparation first, then the analysis, then the recommendations.
-Tell it that it MUST complete ALL steps in the plan before providing the final report.
-This plan will be given to the Junior Analyst. Make it easy to follow.
-"""
-    structured_brief = cohere_chat(prompt_for_planner)
-    return structured_brief if structured_brief else user_scenario
-def _append_msg(history_messages: List[Dict[str, str]], role: str, content: str) -> List[Dict[str, str]]:
-    return (history_messages or []) + [{"role": role, "content": content}]
-def ping_cohere() -> str:
-    """Lightweight health check against Cohere."""
     try:
-        cli = _co_client()
-        if not cli: return "Cohere client not initialized. Is COHERE_API_KEY set?"
-        vecs = cohere_embed(["hello", "world"])
-        return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY}, timeout={COHERE_TIMEOUT_S}s)" if vecs else "Cohere reachable."
     except Exception as e:
-        return f"Cohere ping failed: {e}"
-# --- THE CORE ANALYSIS ENGINE ---
 def handle(user_msg: str, files: list) -> str:
-    """This is the powerful backend engine."""
-    try:
-        safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
-        if blocked_in: return refusal_reply(reason_in)
-        file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
-        if file_paths:
-            dataframes = []
-            file_names = []
-            for p in file_paths:
-                if p.endswith('.csv'):
-                    try:
-                        df = pd.read_csv(p)
-                        dataframes.append(df)
-                        file_names.append(os.path.basename(p))
-                    except UnicodeDecodeError:
-                        print(f"Warning: Reading {os.path.basename(p)} with fallback latin1 encoding.")
-                        df = pd.read_csv(p, encoding='latin1')
-                        dataframes.append(df)
-                        file_names.append(os.path.basename(p))
-            if not dataframes: return "Please upload at least one CSV file."
-            file_context_string = "The user has provided the following data files: " + ", ".join(file_names)
-            llm = ChatCohere(model=COHERE_MODEL_PRIMARY, temperature=0)
-            enhanced_prompt = _create_enhanced_prompt(safe_in, file_context_string)
-            # --- THE FINAL FIX (PART 2): Stricter Agent with Error Handling Rule ---
-            AGENT_PREFIX = """
-You are a Junior AI Data Analyst. Your job is to execute the step-by-step plan provided by your Senior Analyst using Python and pandas.
-You have access to dataframes named df1, df2, etc.
-You MUST follow these rules:
-1.  **EXECUTE THE PLAN:** Follow the execution plan exactly, one step at a time.
-2.  **FORMATTING:** Your response MUST be in one of two formats. NEVER mix them.
-    *   **To run code:**
-        Thought: Your reasoning for the code you are about to run to complete the current step.
-        Action: python_repl_ast
-        Action Input: The single line of python code to run.
-    *   **To give the final answer:**
-        Thought: I have finished all steps in the plan and can now provide the final report.
-        Final Answer: The complete, final answer, formatted as a concise report.
-3.  **ERROR HANDLING:** If your code produces an error, DO NOT try the same code again. Analyze the error message and try a DIFFERENT approach to solve the step. If you are stuck, say so.
 """
-            agent = create_pandas_dataframe_agent(
-                llm, dataframes, agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
-                verbose=True, allow_dangerous_code=True, prefix=AGENT_PREFIX, max_iterations=50,
-                handle_parsing_errors=True
-            )
-            result = agent.invoke({"input": enhanced_prompt})
-            return _sanitize_text(result.get("output", "No output generated."))
-        else:
-            prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {safe_in}\nAssistant:"
-            return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
-    except Exception as e:
-        tb = traceback.format_exc()
-        log_event("app_error", None, {"err": str(e), "tb": tb})
-        return f"A critical error occurred: {e}"
-# --- PRE-LOAD LEGAL DOCUMENTS ---
 PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
 TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
-# ---------------- THE PROFESSIONAL UI WITH INTEGRATED LEGAL DOCS ----------------
 with gr.Blocks(theme="soft", css="style.css") as demo:
-    assessment_history = gr.State([])
-    # ... (The rest of the UI code is identical to the last working version) ...
-    with gr.Group(visible=False) as privacy_modal:
-        with gr.Blocks():
-            gr.Markdown(PRIVACY_POLICY_TEXT)
-            close_privacy_btn = gr.Button("Close")
-    with gr.Group(visible=False) as terms_modal:
-        with gr.Blocks():
-            gr.Markdown(TERMS_OF_SERVICE_TEXT)
-            close_terms_btn = gr.Button("Close")
-    gr.Markdown("# Universal AI Data Analyst")
-    with gr.Row(variant="panel"):
-        with gr.Column(scale=1):
-            gr.Markdown("## New Assessment")
-            files_input = gr.Files(label="Upload Data Files (.csv)", file_count="multiple", type="filepath", file_types=[".csv"])
-            prompt_input = gr.Textbox(label="Prompt", placeholder="Paste your scenario here.", lines=15)
-            with gr.Row():
-                send_btn = gr.Button("▶️ Run Analysis", variant="primary", scale=2)
-                clear_btn = gr.Button("🗑️ Clear")
-            ping_btn = gr.Button("Ping Cohere")
-            ping_out = gr.Markdown()
-        with gr.Column(scale=2):
-            with gr.Tabs():
-                with gr.TabItem("Current Assessment", id=0):
-                    chat_history_output = gr.Chatbot(label="Analysis Output", type="messages", height=600)
-                with gr.TabItem("Assessment History", id=1):
-                    gr.Markdown("## Review Past Assessments")
-                    history_dropdown = gr.Dropdown(label="Select an assessment to review", choices=[])
-                    history_display = gr.Markdown(label="Selected Assessment Details")
-    with gr.Row(): gr.Markdown("---")
-    with gr.Row():
-        privacy_link = gr.Button("Privacy Policy", variant="link")
-        terms_link = gr.Button("Terms of Service", variant="link")
-    def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
-        if not prompt or not files:
-            gr.Warning("Please provide both a prompt and at least one data file.")
-            yield chat_history_list, history_state_list, gr.update()
-            return
-        chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
-        thinking_message = _append_msg(chat_with_user_msg, "assistant", "```\n🧠 Formulating execution plan... Please wait.\n```")
-        yield thinking_message, history_state_list, gr.update()
-        ai_response_text = handle(prompt, files)
-        final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
-        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        file_names = [os.path.basename(f.name if hasattr(f, 'name') else f) for f in files]
-        new_assessment = {"id": timestamp, "prompt": prompt, "files": file_names, "response": ai_response_text}
-        updated_history = history_state_list + [new_assessment]
-        history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
-        yield final_chat, updated_history, gr.update(choices=history_labels)
-    def view_history(selection, history_state_list):
-        if not selection or not history_state_list: return ""
-        selected_id = selection.split(" - ")[0]
-        selected_assessment = next((item for item in history_state_list if item["id"] == selected_id), None)
-        if selected_assessment:
-            file_list_md = "\n- ".join(selected_assessment['files'])
-            return f"""### Assessment from: {selected_assessment['id']}\n**Files Used:**\n- {file_list_md}\n---\n**Original Prompt:**\n> {selected_assessment['prompt']}\n---\n**AI Generated Response:**\n{selected_assessment['response']}"""
-        return "Could not find the selected assessment."
-    send_btn.click(
-        run_analysis_wrapper,
-        inputs=[prompt_input, files_input, chat_history_output, assessment_history],
-        outputs=[chat_history_output, assessment_history, history_dropdown]
-    )
-    history_dropdown.change(
-        view_history,
-        inputs=[history_dropdown, assessment_history],
-        outputs=[history_display]
-    )
-    clear_btn.click(lambda: (None, None, [], []), outputs=[prompt_input, files_input, chat_history_output, assessment_history])
-    ping_btn.click(ping_cohere, outputs=[ping_out])
-    privacy_link.click(lambda: gr.update(visible=True), outputs=[privacy_modal])
-    close_privacy_btn.click(lambda: gr.update(visible=False), outputs=[privacy_modal])
-    terms_link.click(lambda: gr.update(visible=True), outputs=[terms_modal])
-    close_terms_btn.click(lambda: gr.update(visible=False), outputs=[terms_modal])
-if __name__ == "__main__":
-    if not os.getenv("COHERE_API_KEY"):
-        print("🔴 COHERE_API_KEY environment variable not set. Application may not function correctly.")
-    demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))

 from datetime import datetime
 # --- BACKEND IMPORTS ---
 from langchain_cohere import ChatCohere
+from langchain_community.utilities.python import PythonREPL
 # --- LOCAL MODULE IMPORTS ---
 from settings import (
     if not isinstance(s, str): return s
     return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
+def _create_python_script(user_scenario: str, schema_context: str) -> str:
+    """Uses an LLM to act as an "AI Coder", writing a complete Python script."""
+    prompt_for_coder = f"""
+You are an expert Python data scientist. Your sole job is to write a single, complete, and executable Python script to answer the user's request.
+You have access to a list of pandas dataframes loaded into a variable named `dfs`. The first dataframe is `dfs[0]`, the second is `dfs[1]`, and so on.
+CRITICAL CONTEXT: Before writing any code, you MUST first understand the data you have been given. Here is the schema for each dataframe:
+--- DATA SCHEMA ---
+{schema_context}
+--- END SCHEMA ---
+Based on the user's scenario below, write a single Python script that performs the entire analysis.
+RULES FOR YOUR SCRIPT:
+1.  **Use the DataFrames:** Your script MUST use the `dfs` list to access the data.
+2.  **Print Your Findings:** Use the `print()` function at each step of your analysis to output the results. The final output of your script should be the complete, formatted report.
+3.  **No Placeholders:** Do not use placeholder data. Your code must perform the real calculations.
+4.  **Self-Contained:** The script must be entirely self-contained.
 --- USER'S SCENARIO ---
 {user_scenario}
+--- PYTHON SCRIPT ---
+```python
+import pandas as pd
+def analyze_data(dfs):
     try:
+        # Your generated Python code will go here.
+        pass
     except Exception as e:
+        print(f"An error occurred during analysis: {{e}}")
+Now, write the complete Python script inside the try block.
+"""
+generated_text = cohere_chat(prompt_for_coder)
+match = re2.search(r"python\n(.*?)", generated_text, re2.DOTALL)
+if match:
+script_content = match.group(1).strip()
+script_content = script_content.replace("def analyze_data(dfs):", "", 1)
+script_content = "\n".join([line for line in script_content.split('\n') if "pass" not in line])
+return script_content.strip()
+else:
+return "print('Error: The AI failed to generate a valid Python script.')"
+def _append_msg(history_messages: List[Dict[str, str]], role: str, content: str) -> List[Dict[str, str]]:
+return (history_messages or []) + [{"role": role, "content": content}]
+def ping_cohere() -> str:
+"""Lightweight health check against Cohere."""
+try:
+cli = _co_client()
+if not cli: return "Cohere client not initialized. Is COHERE_API_KEY set?"
+vecs = cohere_embed(["hello", "world"])
+return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY}, timeout={COHERE_TIMEOUT_S}s)" if vecs else "Cohere reachable."
+except Exception as e:
+return f"Cohere ping failed: {e}"
+--- THE CORE ANALYSIS ENGINE ---
 def handle(user_msg: str, files: list) -> str:
+"""This is the powerful backend engine using the "Coder" pattern."""
+try:
+safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
+if blocked_in: return refusal_reply(reason_in)
+code
+Code
+file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
+    if file_paths:
+        dataframes = []
+        schema_parts = []
+        for i, p in enumerate(file_paths):
+            if p.endswith('.csv'):
+                try:
+                    df = pd.read_csv(p)
+                    dataframes.append(df)
+                    schema_parts.append(f"DataFrame `dfs[{i}]` (from file `{os.path.basename(p)}`):\n{df.head().to_markdown()}\n")
+                except UnicodeDecodeError:
+                    print(f"Warning: Reading {os.path.basename(p)} with fallback latin1 encoding.")
+                    df = pd.read_csv(p, encoding='latin1')
+                    dataframes.append(df)
+                    schema_parts.append(f"DataFrame `dfs[{i}]` (from file `{os.path.basename(p)}`):\n{df.head().to_markdown()}\n")
+        if not dataframes: return "Please upload at least one CSV file."
+        schema_context = "\n".join(schema_parts)
+        analysis_script_logic = _create_python_script(safe_in, schema_context)
+        python_repl = PythonREPL()
+        full_script_to_run = f"""
+import pandas as pd
+def analyze_data(dfs):
+try:
+{analysis_script_logic}
+except Exception as e:
+print(f"An error occurred during analysis: {{e}}")
+analyze_data(dfs)
 """
+local_vars = {"dfs": dataframes}
+try:
+# --- THE FINAL FIX IS HERE ---
+res = python_repl.run(command=full_script_to_run, locals=local_vars)
+return _sanitize_text(res)
+except Exception as e:
+return f"An error occurred while executing the AI-generated script: {e}"
+else:
+prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {safe_in}\nAssistant:"
+return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
+code
+Code
+except Exception as e:
+    tb = traceback.format_exc()
+    log_event("app_error", None, {"err": str(e), "tb": tb})
+    return f"A critical error occurred: {e}"
+--- PRE-LOAD LEGAL DOCUMENTS ---
 PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
 TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
+---------------- THE PROFESSIONAL UI WITH INTEGRATED LEGAL DOCS ----------------
 with gr.Blocks(theme="soft", css="style.css") as demo:
+assessment_history = gr.State([])
+code
+Code
+with gr.Group(visible=False) as privacy_modal:
+    with gr.Blocks():
+        gr.Markdown(PRIVACY_POLICY_TEXT)
+        close_privacy_btn = gr.Button("Close")
+with gr.Group(visible=False) as terms_modal:
+    with gr.Blocks():
+        gr.Markdown(TERMS_OF_SERVICE_TEXT)
+        close_terms_btn = gr.Button("Close")
+gr.Markdown("# Universal AI Data Analyst")
+with gr.Row(variant="panel"):
+    with gr.Column(scale=1):
+        gr.Markdown("## New Assessment")
+        files_input = gr.Files(label="Upload Data Files (.csv)", file_count="multiple", type="filepath", file_types=[".csv"])
+        prompt_input = gr.Textbox(label="Prompt", placeholder="Paste your scenario here.", lines=15)
+        with gr.Row():
+            send_btn = gr.Button("▶️ Run Analysis", variant="primary", scale=2)
+            clear_btn = gr.Button("🗑️ Clear")
+        ping_btn = gr.Button("Ping Cohere")
+        ping_out = gr.Markdown()
+    with gr.Column(scale=2):
+        with gr.Tabs():
+            with gr.TabItem("Current Assessment", id=0):
+                chat_history_output = gr.Chatbot(label="Analysis Output", type="messages", height=600)
+            with gr.TabItem("Assessment History", id=1):
+                gr.Markdown("## Review Past Assessments")
+                history_dropdown = gr.Dropdown(label="Select an assessment to review", choices=[])
+                history_display = gr.Markdown(label="Selected Assessment Details")
+with gr.Row(): gr.Markdown("---")
+with gr.Row():
+    privacy_link = gr.Button("Privacy Policy", variant="link")
+    terms_link = gr.Button("Terms of Service", variant="link")
+def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
+    if not prompt or not files:
+        gr.Warning("Please provide both a prompt and at least one data file.")
+        yield chat_history_list, history_state_list, gr.update()
+        return
+    chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
+    thinking_message = _append_msg(chat_with_user_msg, "assistant", "```\n🧠 Generating analysis script... This may take a moment.\n```")
+    yield thinking_message, history_state_list, gr.update()
+    ai_response_text = handle(prompt, files)
+    final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
+    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    file_names = [os.path.basename(f.name if hasattr(f, 'name') else f) for f in files]
+    new_assessment = {"id": timestamp, "prompt": prompt, "files": file_names, "response": ai_response_text}
+    updated_history = history_state_list + [new_assessment]
+    history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
+    yield final_chat, updated_history, gr.update(choices=history_labels)
+def view_history(selection, history_state_list):
+    if not selection or not history_state_list: return ""
+    selected_id = selection.split(" - ")[0]
+    selected_assessment = next((item for item in history_state_list if item["id"] == selected_id), None)
+    if selected_assessment:
+        file_list_md = "\n- ".join(selected_assessment['files'])
+        return f"""### Assessment from: {selected_assessment['id']}\n**Files Used:**\n- {file_list_md}\n---\n**Original Prompt:**\n> {selected_assessment['prompt']}\n---\n**AI Generated Response:**\n{selected_assessment['response']}"""
+    return "Could not find the selected assessment."
+send_btn.click(
+    run_analysis_wrapper,
+    inputs=[prompt_input, files_input, chat_history_output, assessment_history],
+    outputs=[chat_history_output, assessment_history, history_dropdown]
+)
+history_dropdown.change(
+    view_history,
+    inputs=[history_dropdown, assessment_history],
+    outputs=[history_display]
+)
+clear_btn.click(lambda: (None, None, [], []), outputs=[prompt_input, files_input, chat_history_output, assessment_history])
+ping_btn.click(ping_cohere, outputs=[ping_out])
+privacy_link.click(lambda: gr.update(visible=True), outputs=[privacy_modal])
+close_privacy_btn.click(lambda: gr.update(visible=False), outputs=[privacy_modal])
+terms_link.click(lambda: gr.update(visible=True), outputs=[terms_modal])
+close_terms_btn.click(lambda: gr.update(visible=False), outputs=[terms_modal])
+if name == "main":
+if not os.getenv("COHERE_API_KEY"):
+print("🔴 COHERE_API_KEY environment variable not set. Application may not function correctly.")
+demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))