Rajan Sharma commited on
Commit
47ade56
·
verified ·
1 Parent(s): c578f08

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -32
app.py CHANGED
@@ -11,7 +11,7 @@ import gradio as gr
11
  import pandas as pd
12
  from datetime import datetime
13
  import regex as re2
14
- import re # Standard library regex module
15
 
16
  # --- BACKEND IMPORTS ---
17
  from langchain_cohere import ChatCohere
@@ -44,36 +44,26 @@ def _sanitize_text(s: str) -> str:
44
  def _create_python_script(user_scenario: str, schema_context: str) -> str:
45
  """Asks the AI to write a Python script that outputs raw, structured JSON."""
46
 
47
- # --- THE FINAL ALIGNMENT AND BUG FIX IS HERE ---
48
- EXPERT_ANALYTICAL_GUIDELINES = """
49
- --- EXPERT ANALYTICAL GUIDELINES ---
50
- When writing your script, you MUST follow these expert business rules:
51
- 1. **Linking Datasets Rule:** If you need to connect facilities to health zones, you cannot assume the zone is in the facility list. You must first identify the high-priority zone from the beds data, and then find the major city (by facility count) in the facility list, and *then* assess that city's capacity. Do not try to filter the facility list by a 'zone' column if it does not exist in the schema.
52
- 2. **Prioritization Rule:** To prioritize locations, you MUST combine the most recent population data with specific high-risk health indicators to create a multi-factor risk score.
53
- 3. **Capacity Calculation Rule:** For capacity over a 3-month window, assume **60 working days**.
54
- 4. **Cost Calculation Rule:** Sum 'Startup cost' and 'Ongoing cost' per person before multiplying.
55
- """
56
-
57
  prompt_for_coder = f"""
58
- You are an expert Python data scientist. Your job is to write a script to extract the data needed to answer the user's request.
59
- You have dataframes in a list `dfs`.
60
-
61
- {EXPERT_ANALYTICAL_GUIDELINES}
62
 
63
- --- DATA SCHEMA ---
 
64
  {schema_context}
65
- --- END SCHEMA ---
66
 
67
  CRITICAL RULES:
68
- 1. Your script's ONLY output should be a single JSON object printed to stdout containing the raw data findings.
69
- 2. Use the exact, case-sensitive column names from the schema.
70
- 3. Before converting strings to numbers, you MUST robustly clean them of all non-numeric characters (e.g., $, %, ~) using `re.sub()`.
 
71
 
72
  --- USER'S SCENARIO ---
73
  {user_scenario}
74
 
75
  --- PYTHON SCRIPT ---
76
- Now, write the complete Python script that performs the analysis and prints a single JSON object with the results.
77
  ```python
78
  """
79
  generated_text = cohere_chat(prompt_for_coder)
@@ -101,12 +91,9 @@ def _generate_long_report(prompt: str) -> str:
101
  def _generate_final_report(user_scenario: str, raw_data_json: str) -> str:
102
  """Asks the AI to act as a consultant and write a polished report from the raw data."""
103
  prompt_for_writer = f"""
104
- You are an expert management consultant and data analyst.
105
- A data science script has run to extract key findings. You have the user's original request and the raw JSON data.
106
 
107
- Your task is to synthesize these raw findings into a single, comprehensive, and professional report that directly answers all of the user's questions with detailed justifications.
108
-
109
- --- USER'S ORIGINAL SCENARIO & DELIVERABLES ---
110
  {user_scenario}
111
  --- END SCENARIO ---
112
 
@@ -118,7 +105,7 @@ Now, write the final, polished report. The report MUST:
118
  1. Follow the "Expected Output Format" requested by the user.
119
  2. Use tables, bullet points, and DETAILED narrative justifications for each recommendation.
120
  3. Synthesize the raw data into actionable insights. Do not just copy the raw numbers; interpret them.
121
- 4. Ensure you fully address ALL evaluation questions, especially the final recommendations.
122
  """
123
  return _generate_long_report(prompt_for_writer)
124
 
@@ -234,14 +221,12 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
234
 
235
  chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
236
 
237
- # This is a dummy update function for now, as we're not streaming mid-process
238
  def dummy_update(message):
239
  pass
240
 
241
  thinking_message = _append_msg(chat_with_user_msg, "assistant", "```\n🧠 Generating and executing analysis... Please wait.\n```")
242
  yield thinking_message, history_state_list, gr.update()
243
 
244
- # The handle function is now called with the dummy update function
245
  ai_response_text = handle(prompt, files, dummy_update)
246
 
247
  final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
@@ -256,10 +241,9 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
256
  else:
257
  yield final_chat, history_state_list, gr.update()
258
 
259
-
260
  def view_history(selection, history_state_list):
261
  if not selection or not history_state_list: return ""
262
- selected_id = selection.split(" - ") # Safer split
263
  selected_assessment = next((item for item in history_state_list if item["id"] == selected_id), None)
264
  if selected_assessment:
265
  file_list_md = "\n- ".join(selected_assessment['files'])
@@ -271,7 +255,7 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
271
  inputs=[prompt_input, files_input, chat_history_output, assessment_history],
272
  outputs=[chat_history_output, assessment_history, history_dropdown]
273
  )
274
- history_dropdown.change(view_history, inputs=[history_dropdown, assessment_history], outputs=[history_display])
275
  clear_btn.click(lambda: (None, None, [], []), outputs=[prompt_input, files_input, chat_history_output, assessment_history])
276
  ping_btn.click(ping_cohere, outputs=[ping_out])
277
  privacy_link.click(lambda: gr.update(visible=True), outputs=[privacy_modal])
 
11
  import pandas as pd
12
  from datetime import datetime
13
  import regex as re2
14
+ import re
15
 
16
  # --- BACKEND IMPORTS ---
17
  from langchain_cohere import ChatCohere
 
44
  def _create_python_script(user_scenario: str, schema_context: str) -> str:
45
  """Asks the AI to write a Python script that outputs raw, structured JSON."""
46
 
47
+ # --- THE FINAL, DEFINITIVE PROMPT FIX ---
 
 
 
 
 
 
 
 
 
48
  prompt_for_coder = f"""
49
+ You are an expert Python data scientist. Your job is to write a script to analyze data that has ALREADY been loaded.
 
 
 
50
 
51
+ --- DATA CONTEXT ---
52
+ The data has been pre-loaded into a Python list of pandas DataFrames called `dfs`.
53
  {schema_context}
54
+ --- END DATA CONTEXT ---
55
 
56
  CRITICAL RULES:
57
+ 1. **DO NOT READ FILES:** You MUST NOT include any code that reads files (e.g., `pd.read_csv`). The data is ALREADY loaded in the `dfs` variable. You MUST use this variable as your starting point. Failure to do so will cause a fatal error.
58
+ 2. **JSON OUTPUT ONLY:** Your script's ONLY output must be a single JSON object printed to stdout. This JSON must contain the raw data findings for each of the user's tasks.
59
+ 3. **BE PRECISE:** Use the exact, case-sensitive column names from the schema and robustly clean strings (`re.sub()`) before converting to numbers.
60
+ 4. **JSON SERIALIZATION:** Before adding data to your final dictionary for JSON conversion, you MUST convert any pandas-specific types (like `int64`) to standard Python types using `.item()` for single values or `.tolist()` for lists.
61
 
62
  --- USER'S SCENARIO ---
63
  {user_scenario}
64
 
65
  --- PYTHON SCRIPT ---
66
+ Now, write the complete Python script that analyzes the `dfs` variable and prints a single, serializable JSON object.
67
  ```python
68
  """
69
  generated_text = cohere_chat(prompt_for_coder)
 
91
  def _generate_final_report(user_scenario: str, raw_data_json: str) -> str:
92
  """Asks the AI to act as a consultant and write a polished report from the raw data."""
93
  prompt_for_writer = f"""
94
+ You are an expert management consultant. A data science script has extracted key findings. Your task is to synthesize these findings into a professional report that answers the user's questions.
 
95
 
96
+ --- USER'S ORIGINAL SCENARIO ---
 
 
97
  {user_scenario}
98
  --- END SCENARIO ---
99
 
 
105
  1. Follow the "Expected Output Format" requested by the user.
106
  2. Use tables, bullet points, and DETAILED narrative justifications for each recommendation.
107
  3. Synthesize the raw data into actionable insights. Do not just copy the raw numbers; interpret them.
108
+ 4. Ensure you fully address ALL evaluation questions.
109
  """
110
  return _generate_long_report(prompt_for_writer)
111
 
 
221
 
222
  chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
223
 
 
224
  def dummy_update(message):
225
  pass
226
 
227
  thinking_message = _append_msg(chat_with_user_msg, "assistant", "```\n🧠 Generating and executing analysis... Please wait.\n```")
228
  yield thinking_message, history_state_list, gr.update()
229
 
 
230
  ai_response_text = handle(prompt, files, dummy_update)
231
 
232
  final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
 
241
  else:
242
  yield final_chat, history_state_list, gr.update()
243
 
 
244
  def view_history(selection, history_state_list):
245
  if not selection or not history_state_list: return ""
246
+ selected_id = selection.split(" - ")
247
  selected_assessment = next((item for item in history_state_list if item["id"] == selected_id), None)
248
  if selected_assessment:
249
  file_list_md = "\n- ".join(selected_assessment['files'])
 
255
  inputs=[prompt_input, files_input, chat_history_output, assessment_history],
256
  outputs=[chat_history_output, assessment_history, history_dropdown]
257
  )
258
+ history_dropdown.change(view_history, inputs=[history_dropdown, assessment_history], outputs=[display_history])
259
  clear_btn.click(lambda: (None, None, [], []), outputs=[prompt_input, files_input, chat_history_output, assessment_history])
260
  ping_btn.click(ping_cohere, outputs=[ping_out])
261
  privacy_link.click(lambda: gr.update(visible=True), outputs=[privacy_modal])