Spaces:
Sleeping
Sleeping
Rajan Sharma
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,7 +11,7 @@ import gradio as gr
|
|
| 11 |
import pandas as pd
|
| 12 |
from datetime import datetime
|
| 13 |
import regex as re2
|
| 14 |
-
import re
|
| 15 |
|
| 16 |
# --- BACKEND IMPORTS ---
|
| 17 |
from langchain_cohere import ChatCohere
|
|
@@ -44,36 +44,26 @@ def _sanitize_text(s: str) -> str:
|
|
| 44 |
def _create_python_script(user_scenario: str, schema_context: str) -> str:
|
| 45 |
"""Asks the AI to write a Python script that outputs raw, structured JSON."""
|
| 46 |
|
| 47 |
-
# --- THE FINAL
|
| 48 |
-
EXPERT_ANALYTICAL_GUIDELINES = """
|
| 49 |
-
--- EXPERT ANALYTICAL GUIDELINES ---
|
| 50 |
-
When writing your script, you MUST follow these expert business rules:
|
| 51 |
-
1. **Linking Datasets Rule:** If you need to connect facilities to health zones, you cannot assume the zone is in the facility list. You must first identify the high-priority zone from the beds data, and then find the major city (by facility count) in the facility list, and *then* assess that city's capacity. Do not try to filter the facility list by a 'zone' column if it does not exist in the schema.
|
| 52 |
-
2. **Prioritization Rule:** To prioritize locations, you MUST combine the most recent population data with specific high-risk health indicators to create a multi-factor risk score.
|
| 53 |
-
3. **Capacity Calculation Rule:** For capacity over a 3-month window, assume **60 working days**.
|
| 54 |
-
4. **Cost Calculation Rule:** Sum 'Startup cost' and 'Ongoing cost' per person before multiplying.
|
| 55 |
-
"""
|
| 56 |
-
|
| 57 |
prompt_for_coder = f"""
|
| 58 |
-
You are an expert Python data scientist. Your job is to write a script to
|
| 59 |
-
You have dataframes in a list `dfs`.
|
| 60 |
-
|
| 61 |
-
{EXPERT_ANALYTICAL_GUIDELINES}
|
| 62 |
|
| 63 |
-
--- DATA
|
|
|
|
| 64 |
{schema_context}
|
| 65 |
-
--- END
|
| 66 |
|
| 67 |
CRITICAL RULES:
|
| 68 |
-
1.
|
| 69 |
-
2.
|
| 70 |
-
3.
|
|
|
|
| 71 |
|
| 72 |
--- USER'S SCENARIO ---
|
| 73 |
{user_scenario}
|
| 74 |
|
| 75 |
--- PYTHON SCRIPT ---
|
| 76 |
-
Now, write the complete Python script that
|
| 77 |
```python
|
| 78 |
"""
|
| 79 |
generated_text = cohere_chat(prompt_for_coder)
|
|
@@ -101,12 +91,9 @@ def _generate_long_report(prompt: str) -> str:
|
|
| 101 |
def _generate_final_report(user_scenario: str, raw_data_json: str) -> str:
|
| 102 |
"""Asks the AI to act as a consultant and write a polished report from the raw data."""
|
| 103 |
prompt_for_writer = f"""
|
| 104 |
-
You are an expert management consultant
|
| 105 |
-
A data science script has run to extract key findings. You have the user's original request and the raw JSON data.
|
| 106 |
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
--- USER'S ORIGINAL SCENARIO & DELIVERABLES ---
|
| 110 |
{user_scenario}
|
| 111 |
--- END SCENARIO ---
|
| 112 |
|
|
@@ -118,7 +105,7 @@ Now, write the final, polished report. The report MUST:
|
|
| 118 |
1. Follow the "Expected Output Format" requested by the user.
|
| 119 |
2. Use tables, bullet points, and DETAILED narrative justifications for each recommendation.
|
| 120 |
3. Synthesize the raw data into actionable insights. Do not just copy the raw numbers; interpret them.
|
| 121 |
-
4. Ensure you fully address ALL evaluation questions
|
| 122 |
"""
|
| 123 |
return _generate_long_report(prompt_for_writer)
|
| 124 |
|
|
@@ -234,14 +221,12 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
|
|
| 234 |
|
| 235 |
chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
|
| 236 |
|
| 237 |
-
# This is a dummy update function for now, as we're not streaming mid-process
|
| 238 |
def dummy_update(message):
|
| 239 |
pass
|
| 240 |
|
| 241 |
thinking_message = _append_msg(chat_with_user_msg, "assistant", "```\n🧠 Generating and executing analysis... Please wait.\n```")
|
| 242 |
yield thinking_message, history_state_list, gr.update()
|
| 243 |
|
| 244 |
-
# The handle function is now called with the dummy update function
|
| 245 |
ai_response_text = handle(prompt, files, dummy_update)
|
| 246 |
|
| 247 |
final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
|
|
@@ -256,10 +241,9 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
|
|
| 256 |
else:
|
| 257 |
yield final_chat, history_state_list, gr.update()
|
| 258 |
|
| 259 |
-
|
| 260 |
def view_history(selection, history_state_list):
|
| 261 |
if not selection or not history_state_list: return ""
|
| 262 |
-
selected_id = selection.split(" - ")
|
| 263 |
selected_assessment = next((item for item in history_state_list if item["id"] == selected_id), None)
|
| 264 |
if selected_assessment:
|
| 265 |
file_list_md = "\n- ".join(selected_assessment['files'])
|
|
@@ -271,7 +255,7 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
|
|
| 271 |
inputs=[prompt_input, files_input, chat_history_output, assessment_history],
|
| 272 |
outputs=[chat_history_output, assessment_history, history_dropdown]
|
| 273 |
)
|
| 274 |
-
history_dropdown.change(view_history, inputs=[history_dropdown, assessment_history], outputs=[
|
| 275 |
clear_btn.click(lambda: (None, None, [], []), outputs=[prompt_input, files_input, chat_history_output, assessment_history])
|
| 276 |
ping_btn.click(ping_cohere, outputs=[ping_out])
|
| 277 |
privacy_link.click(lambda: gr.update(visible=True), outputs=[privacy_modal])
|
|
|
|
| 11 |
import pandas as pd
|
| 12 |
from datetime import datetime
|
| 13 |
import regex as re2
|
| 14 |
+
import re
|
| 15 |
|
| 16 |
# --- BACKEND IMPORTS ---
|
| 17 |
from langchain_cohere import ChatCohere
|
|
|
|
| 44 |
def _create_python_script(user_scenario: str, schema_context: str) -> str:
|
| 45 |
"""Asks the AI to write a Python script that outputs raw, structured JSON."""
|
| 46 |
|
| 47 |
+
# --- THE FINAL, DEFINITIVE PROMPT FIX ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
prompt_for_coder = f"""
|
| 49 |
+
You are an expert Python data scientist. Your job is to write a script to analyze data that has ALREADY been loaded.
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
+
--- DATA CONTEXT ---
|
| 52 |
+
The data has been pre-loaded into a Python list of pandas DataFrames called `dfs`.
|
| 53 |
{schema_context}
|
| 54 |
+
--- END DATA CONTEXT ---
|
| 55 |
|
| 56 |
CRITICAL RULES:
|
| 57 |
+
1. **DO NOT READ FILES:** You MUST NOT include any code that reads files (e.g., `pd.read_csv`). The data is ALREADY loaded in the `dfs` variable. You MUST use this variable as your starting point. Failure to do so will cause a fatal error.
|
| 58 |
+
2. **JSON OUTPUT ONLY:** Your script's ONLY output must be a single JSON object printed to stdout. This JSON must contain the raw data findings for each of the user's tasks.
|
| 59 |
+
3. **BE PRECISE:** Use the exact, case-sensitive column names from the schema and robustly clean strings (`re.sub()`) before converting to numbers.
|
| 60 |
+
4. **JSON SERIALIZATION:** Before adding data to your final dictionary for JSON conversion, you MUST convert any pandas-specific types (like `int64`) to standard Python types using `.item()` for single values or `.tolist()` for lists.
|
| 61 |
|
| 62 |
--- USER'S SCENARIO ---
|
| 63 |
{user_scenario}
|
| 64 |
|
| 65 |
--- PYTHON SCRIPT ---
|
| 66 |
+
Now, write the complete Python script that analyzes the `dfs` variable and prints a single, serializable JSON object.
|
| 67 |
```python
|
| 68 |
"""
|
| 69 |
generated_text = cohere_chat(prompt_for_coder)
|
|
|
|
| 91 |
def _generate_final_report(user_scenario: str, raw_data_json: str) -> str:
|
| 92 |
"""Asks the AI to act as a consultant and write a polished report from the raw data."""
|
| 93 |
prompt_for_writer = f"""
|
| 94 |
+
You are an expert management consultant. A data science script has extracted key findings. Your task is to synthesize these findings into a professional report that answers the user's questions.
|
|
|
|
| 95 |
|
| 96 |
+
--- USER'S ORIGINAL SCENARIO ---
|
|
|
|
|
|
|
| 97 |
{user_scenario}
|
| 98 |
--- END SCENARIO ---
|
| 99 |
|
|
|
|
| 105 |
1. Follow the "Expected Output Format" requested by the user.
|
| 106 |
2. Use tables, bullet points, and DETAILED narrative justifications for each recommendation.
|
| 107 |
3. Synthesize the raw data into actionable insights. Do not just copy the raw numbers; interpret them.
|
| 108 |
+
4. Ensure you fully address ALL evaluation questions.
|
| 109 |
"""
|
| 110 |
return _generate_long_report(prompt_for_writer)
|
| 111 |
|
|
|
|
| 221 |
|
| 222 |
chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
|
| 223 |
|
|
|
|
| 224 |
def dummy_update(message):
|
| 225 |
pass
|
| 226 |
|
| 227 |
thinking_message = _append_msg(chat_with_user_msg, "assistant", "```\n🧠 Generating and executing analysis... Please wait.\n```")
|
| 228 |
yield thinking_message, history_state_list, gr.update()
|
| 229 |
|
|
|
|
| 230 |
ai_response_text = handle(prompt, files, dummy_update)
|
| 231 |
|
| 232 |
final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
|
|
|
|
| 241 |
else:
|
| 242 |
yield final_chat, history_state_list, gr.update()
|
| 243 |
|
|
|
|
| 244 |
def view_history(selection, history_state_list):
|
| 245 |
if not selection or not history_state_list: return ""
|
| 246 |
+
selected_id = selection.split(" - ")
|
| 247 |
selected_assessment = next((item for item in history_state_list if item["id"] == selected_id), None)
|
| 248 |
if selected_assessment:
|
| 249 |
file_list_md = "\n- ".join(selected_assessment['files'])
|
|
|
|
| 255 |
inputs=[prompt_input, files_input, chat_history_output, assessment_history],
|
| 256 |
outputs=[chat_history_output, assessment_history, history_dropdown]
|
| 257 |
)
|
| 258 |
+
history_dropdown.change(view_history, inputs=[history_dropdown, assessment_history], outputs=[display_history])
|
| 259 |
clear_btn.click(lambda: (None, None, [], []), outputs=[prompt_input, files_input, chat_history_output, assessment_history])
|
| 260 |
ping_btn.click(ping_cohere, outputs=[ping_out])
|
| 261 |
privacy_link.click(lambda: gr.update(visible=True), outputs=[privacy_modal])
|