Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

Rajan Sharma commited on Oct 10

Commit

85429e8

verified ·

1 Parent(s): f68dc31

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -60

app.py CHANGED Viewed

@@ -1,59 +1,70 @@
-from future import annotations
 import os
 import io
 import json
 import traceback
 from contextlib import redirect_stdout
 from typing import List, Dict, Any
 import gradio as gr
 import pandas as pd
 from datetime import datetime
 import regex as re2
 import re
---- BACKEND IMPORTS ---
 from langchain_cohere import ChatCohere
---- LOCAL MODULE IMPORTS ---
 from settings import (
-GENERAL_CONVERSATION_PROMPT,
-COHERE_MODEL_PRIMARY, COHERE_TIMEOUT_S, USE_OPEN_FALLBACKS
 )
 from audit_log import log_event
 from privacy import safety_filter, refusal_reply
 from llm_router import cohere_chat, _co_client, cohere_embed
---- UTILITY FUNCTIONS ---
 def load_markdown_text(filepath: str) -> str:
-"""Safely loads text content from a markdown file."""
-try:
-with open(filepath, 'r', encoding='utf-8') as f: return f.read()
-except FileNotFoundError:
-return f"Error: Document {os.path.basename(filepath)} not found."
 def _sanitize_text(s: str) -> str:
-if not isinstance(s, str): return s
-return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
---- THE "ANALYST-WRITER" PIPELINE ---
 def _create_python_script(user_scenario: str, schema_context: str) -> str:
-"""Asks the AI to write a Python script that outputs raw, structured JSON."""
-code
-Code
-# --- THE FINAL, MOST ROBUST PROMPT ---
-prompt_for_coder = f"""
-You are an expert Python data scientist. Your job is to write a script to extract the data needed to answer the user's request and print the findings as a single JSON object.
---- DATA CONTEXT ---
-The data is pre-loaded into a Python list of pandas DataFrames called dfs.
 {schema_context}
---- END DATA CONTEXT ---
 CRITICAL RULES:
-DO NOT READ FILES: You MUST NOT include pd.read_csv. The data is in the dfs variable.
-JSON OUTPUT ONLY: Your script's ONLY output must be a single JSON object printed to stdout.
-JSON SERIALIZATION (VERY IMPORTANT): The json library can only handle standard Python types. Before creating the final dictionary, ensure all values are standard types. If a value is a pandas/numpy number (like int64), convert it to a standard Python int or float using .item(). If a value is a pandas Series, convert it using .tolist().
-DEFENSIVE CODING (CRITICAL): Before passing a variable to a function, be paranoid. For example, if you write a helper function that expects a dictionary, DO NOT pass it a list. If a function expects a single item, DO NOT pass it a whole dataframe. Always check the data type of your variables. This will prevent AttributeError crashes.
-BE PRECISE: Use the exact, case-sensitive column names from the schema and robustly clean strings (re.sub()) before converting them to numbers.
 --- USER'S SCENARIO ---
 {user_scenario}
 --- PYTHON SCRIPT ---
-Now, write the complete, robust, and defensive Python script that analyzes the dfs variable and prints a single, serializable JSON object.
-code
-Python
 """
     generated_text = cohere_chat(prompt_for_coder)
     match = re2.search(r"```python\n(.*?)```", generated_text, re2.DOTALL)
@@ -61,7 +72,6 @@ Python
     return "print(json.dumps({'error': 'Failed to generate a valid Python script.'}))"
 def _generate_long_report(prompt: str) -> str:
-    """Calls the Cohere API directly with a much higher max_tokens limit."""
     try:
         client = _co_client()
         if not client: return "Error: Cohere client not initialized."
@@ -76,11 +86,13 @@ def _generate_long_report(prompt: str) -> str:
         return f"Error during final report generation: {e}"
 def _generate_final_report(user_scenario: str, raw_data_json: str) -> str:
-    """Asks the AI to act as a consultant and write a polished report from the raw data."""
     prompt_for_writer = f"""
-You are an expert management consultant. A data science script has extracted key findings. Your task is to synthesize these findings into a professional report that answers the user's questions.
---- USER'S ORIGINAL SCENARIO ---
 {user_scenario}
 --- END SCENARIO ---
@@ -92,7 +104,7 @@ Now, write the final, polished report. The report MUST:
 1.  Follow the "Expected Output Format" requested by the user.
 2.  Use tables, bullet points, and DETAILED narrative justifications for each recommendation.
 3.  Synthesize the raw data into actionable insights. Do not just copy the raw numbers; interpret them.
-4.  Ensure you fully address ALL evaluation questions.
 """
     return _generate_long_report(prompt_for_writer)
@@ -100,7 +112,6 @@ def _append_msg(h: List[Dict[str, str]], r: str, c: str) -> List[Dict[str, str]]
     return (h or []) + [{"role": r, "content": c}]
 def ping_cohere() -> str:
-    """Lightweight health check."""
     try:
         cli = _co_client()
         if not cli: return "Cohere client not initialized."
@@ -108,10 +119,7 @@ def ping_cohere() -> str:
         return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY})" if vecs else "Cohere reachable."
     except Exception as e: return f"Cohere ping failed: {e}"
-# --- THE CORE ANALYSIS ENGINE ---
 def handle(user_msg: str, files: list, yield_update) -> str:
-    """Orchestrates the 'Analyst-Writer' pipeline."""
     try:
         safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
         if blocked_in: return refusal_reply(reason_in)
@@ -156,11 +164,9 @@ def handle(user_msg: str, files: list, yield_update) -> str:
         log_event("app_error", None, {"err": str(e), "tb": tb})
         return f"A critical error occurred: {e}"
-# --- PRE-LOAD LEGAL DOCUMENTS ---
 PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
 TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
-# ---------------- THE PROFESSIONAL UI ----------------
 with gr.Blocks(theme="soft", css="style.css") as demo:
     assessment_history = gr.State([])
@@ -219,7 +225,7 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
         timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
         if files:
-            file_names = [os.path.basename(fn.name if hasattr(fn, 'name') else fn) for fn in files]
             new_assessment = {"id": timestamp, "prompt": prompt, "files": file_names, "response": ai_response_text}
             updated_history = (history_state_list or []) + [new_assessment]
             history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
@@ -227,17 +233,15 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
         else:
             yield final_chat, history_state_list, gr.update()
-   def view_history(selection, history_state_list):
-    if not selection or not history_state_list:
-        return ""
-    # THE FIX IS HERE: Correctly extract just the timestamp (the first part)
-    selected_id = selection.split(" - ")[0]
-    selected_assessment = next((item for item in history_state_list if item["id"] == selected_id), None)
-    if selected_assessment:
-        file_list_md = "\n- ".join(selected_assessment.get('files', []))
-        return f"""### Assessment from: {selected_assessment['id']}
 **Files Used:**
 - {file_list_md}
 ---
@@ -247,19 +251,21 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
 **AI Generated Response:**
 {selected_assessment['response']}
 """
-    return "Could not find the selected assessment."
     send_btn.click(
         run_analysis_wrapper,
         inputs=[prompt_input, files_input, chat_history_output, assessment_history],
         outputs=[chat_history_output, assessment_history, history_dropdown]
     )
-    history_dropdown.change(view_history, inputs=[history_dropdown, assessment_history], outputs=[history_display])
-   # We remove 'assessment_history' from the list of outputs, so it is no longer cleared.
-    # We remove 'assessment_history' from the list of outputs, so it is no longer cleared.
     clear_btn.click(
-      lambda: (None, None, []),
-      outputs=[prompt_input, files_input, chat_history_output]
     )
     ping_btn.click(ping_cohere, outputs=[ping_out])
     privacy_link.click(lambda: gr.update(visible=True), outputs=[privacy_modal])

+from __future__ import annotations
 import os
 import io
 import json
 import traceback
 from contextlib import redirect_stdout
 from typing import List, Dict, Any
 import gradio as gr
 import pandas as pd
 from datetime import datetime
 import regex as re2
 import re
 from langchain_cohere import ChatCohere
 from settings import (
+    GENERAL_CONVERSATION_PROMPT,
+    COHERE_MODEL_PRIMARY, COHERE_TIMEOUT_S, USE_OPEN_FALLBACKS
 )
 from audit_log import log_event
 from privacy import safety_filter, refusal_reply
 from llm_router import cohere_chat, _co_client, cohere_embed
 def load_markdown_text(filepath: str) -> str:
+    try:
+        with open(filepath, 'r', encoding='utf-8') as f:
+            return f.read()
+    except FileNotFoundError:
+        return f"**Error:** Document `{os.path.basename(filepath)}` not found."
 def _sanitize_text(s: str) -> str:
+    if not isinstance(s, str): return s
+    return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
 def _create_python_script(user_scenario: str, schema_context: str) -> str:
+    EXPERT_ANALYTICAL_GUIDELINES = """
+--- EXPERT ANALYTICAL GUIDELINES ---
+When writing your script, you MUST follow these expert business rules:
+1.  **Linking Datasets Rule:** If you need to connect facilities to health zones when the 'zone' column is not in the facility list, you must first identify the high-priority zone from the beds data, then find the major city (by facility count) in the facility list, and *then* assess that city's capacity. Do not try to filter the facility list by a 'zone' column if it does not exist in the schema.
+2.  **Prioritization Rule:** To prioritize locations, you MUST combine the most recent population data with specific high-risk health indicators to create a multi-factor risk score.
+3.  **Capacity Calculation Rule:** For capacity over a 3-month window, assume **60 working days**.
+4.  **Cost Calculation Rule:** Sum 'Startup cost' and 'Ongoing cost' per person before multiplying.
+"""
+    prompt_for_coder = f"""
+You are an expert Python data scientist. Your job is to write a script to extract the data needed to answer the user's request.
+You have dataframes in a list `dfs`.
+{EXPERT_ANALYTICAL_GUIDELINES}
+--- DATA SCHEMA ---
 {schema_context}
+--- END DATA SCHEMA ---
 CRITICAL RULES:
+1.  **DO NOT READ FILES:** You MUST NOT include `pd.read_csv`. The data is ALREADY loaded in the `dfs` variable. You MUST use this variable. Failure to do so will cause a fatal error.
+2.  **JSON OUTPUT ONLY:** Your script's ONLY output must be a single JSON object printed to stdout containing the raw data findings.
+3.  **BE PRECISE:** Use the exact, case-sensitive column names from the schema and robustly clean strings (`re.sub()`) before converting to numbers.
+4.  **JSON SERIALIZATION:** Before adding data to your final dictionary for JSON conversion, you MUST convert any pandas-specific types (like `int64`) to standard Python types using `.item()` for single values or `.tolist()` for lists.
 --- USER'S SCENARIO ---
 {user_scenario}
 --- PYTHON SCRIPT ---
+Now, write the complete Python script that performs the analysis and prints a single, serializable JSON object.
+```python
 """
     generated_text = cohere_chat(prompt_for_coder)
     match = re2.search(r"```python\n(.*?)```", generated_text, re2.DOTALL)
     return "print(json.dumps({'error': 'Failed to generate a valid Python script.'}))"
 def _generate_long_report(prompt: str) -> str:
     try:
         client = _co_client()
         if not client: return "Error: Cohere client not initialized."
         return f"Error during final report generation: {e}"
 def _generate_final_report(user_scenario: str, raw_data_json: str) -> str:
     prompt_for_writer = f"""
+You are an expert management consultant and data analyst.
+A data science script has run to extract key findings. You have the user's original request and the raw JSON data.
+Your task is to synthesize these raw findings into a single, comprehensive, and professional report that directly answers all of the user's questions with detailed justifications.
+--- USER'S ORIGINAL SCENARIO & DELIVERABLES ---
 {user_scenario}
 --- END SCENARIO ---
 1.  Follow the "Expected Output Format" requested by the user.
 2.  Use tables, bullet points, and DETAILED narrative justifications for each recommendation.
 3.  Synthesize the raw data into actionable insights. Do not just copy the raw numbers; interpret them.
+4.  Ensure you fully address ALL evaluation questions, especially the final recommendations.
 """
     return _generate_long_report(prompt_for_writer)
     return (h or []) + [{"role": r, "content": c}]
 def ping_cohere() -> str:
     try:
         cli = _co_client()
         if not cli: return "Cohere client not initialized."
         return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY})" if vecs else "Cohere reachable."
     except Exception as e: return f"Cohere ping failed: {e}"
 def handle(user_msg: str, files: list, yield_update) -> str:
     try:
         safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
         if blocked_in: return refusal_reply(reason_in)
         log_event("app_error", None, {"err": str(e), "tb": tb})
         return f"A critical error occurred: {e}"
 PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
 TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
 with gr.Blocks(theme="soft", css="style.css") as demo:
     assessment_history = gr.State([])
         timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
         if files:
+            file_names = [os.path.basename(f.name if hasattr(f, 'name') else f) for f in files]
             new_assessment = {"id": timestamp, "prompt": prompt, "files": file_names, "response": ai_response_text}
             updated_history = (history_state_list or []) + [new_assessment]
             history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
         else:
             yield final_chat, history_state_list, gr.update()
+    def view_history(selection, history_state_list):
+        if not selection or not history_state_list:
+            return ""
+        selected_id = selection.split(" - ")
+        selected_assessment = next((item for item in history_state_list if item["id"] == selected_id), None)
+        if selected_assessment:
+            file_list_md = "\n- ".join(selected_assessment.get('files', []))
+            return f"""### Assessment from: {selected_assessment['id']}
 **Files Used:**
 - {file_list_md}
 ---
 **AI Generated Response:**
 {selected_assessment['response']}
 """
+        return "Could not find the selected assessment."
     send_btn.click(
         run_analysis_wrapper,
         inputs=[prompt_input, files_input, chat_history_output, assessment_history],
         outputs=[chat_history_output, assessment_history, history_dropdown]
     )
+    history_dropdown.change(
+        view_history,
+        inputs=[history_dropdown, assessment_history],
+        outputs=[history_display]
+    )
     clear_btn.click(
+        lambda: (None, None, []),
+        outputs=[prompt_input, files_input, chat_history_output]
     )
     ping_btn.click(ping_cohere, outputs=[ping_out])
     privacy_link.click(lambda: gr.update(visible=True), outputs=[privacy_modal])