Rajan Sharma commited on
Commit
f68dc31
·
verified ·
1 Parent(s): 947666f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -76
app.py CHANGED
@@ -1,79 +1,59 @@
1
- # app.py
2
- from __future__ import annotations
3
  import os
4
  import io
5
  import json
6
  import traceback
7
  from contextlib import redirect_stdout
8
  from typing import List, Dict, Any
9
-
10
  import gradio as gr
11
  import pandas as pd
12
  from datetime import datetime
13
  import regex as re2
14
  import re
15
-
16
- # --- BACKEND IMPORTS ---
17
  from langchain_cohere import ChatCohere
18
-
19
- # --- LOCAL MODULE IMPORTS ---
20
  from settings import (
21
- GENERAL_CONVERSATION_PROMPT,
22
- COHERE_MODEL_PRIMARY, COHERE_TIMEOUT_S, USE_OPEN_FALLBACKS
23
  )
24
  from audit_log import log_event
25
  from privacy import safety_filter, refusal_reply
26
  from llm_router import cohere_chat, _co_client, cohere_embed
27
-
28
- # --- UTILITY FUNCTIONS ---
29
-
30
  def load_markdown_text(filepath: str) -> str:
31
- """Safely loads text content from a markdown file."""
32
- try:
33
- with open(filepath, 'r', encoding='utf-8') as f: return f.read()
34
- except FileNotFoundError:
35
- return f"**Error:** Document `{os.path.basename(filepath)}` not found."
36
-
37
  def _sanitize_text(s: str) -> str:
38
- if not isinstance(s, str): return s
39
- return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
40
-
41
- # --- THE "ANALYST-WRITER" PIPELINE ---
42
-
43
  def _create_python_script(user_scenario: str, schema_context: str) -> str:
44
- """Asks the AI to write a Python script that outputs raw, structured JSON."""
45
-
46
- EXPERT_ANALYTICAL_GUIDELINES = """
47
- --- EXPERT ANALYTICAL GUIDELINES ---
48
- When writing your script, you MUST follow these expert business rules:
49
- 1. **Linking Datasets Rule:** If you need to connect facilities to health zones when the 'zone' column is not in the facility list, you must first identify the high-priority zone from the beds data, then find the major city (by facility count) in the facility list, and *then* assess that city's capacity. Do not try to filter the facility list by a 'zone' column if it does not exist in the schema.
50
- 2. **Prioritization Rule:** To prioritize locations, you MUST combine the most recent population data with specific high-risk health indicators to create a multi-factor risk score.
51
- 3. **Capacity Calculation Rule:** For capacity over a 3-month window, assume **60 working days**.
52
- 4. **Cost Calculation Rule:** Sum 'Startup cost' and 'Ongoing cost' per person before multiplying.
53
- """
54
-
55
- prompt_for_coder = f"""
56
- You are an expert Python data scientist. Your job is to write a script to extract the data needed to answer the user's request.
57
- You have dataframes in a list `dfs`.
58
-
59
- {EXPERT_ANALYTICAL_GUIDELINES}
60
-
61
- --- DATA SCHEMA ---
62
  {schema_context}
63
- --- END DATA SCHEMA ---
64
-
65
  CRITICAL RULES:
66
- 1. **DO NOT READ FILES:** You MUST NOT include `pd.read_csv`. The data is ALREADY loaded in the `dfs` variable.
67
- 2. **JSON OUTPUT ONLY:** Your script's ONLY output must be a single JSON object printed to stdout containing the raw data findings.
68
- 3. **BE PRECISE:** Use the exact, case-sensitive column names from the schema and robustly clean strings (`re.sub()`) before converting to numbers.
69
- 4. **JSON SERIALIZATION:** Before adding data to your final dictionary for JSON conversion, you MUST convert any pandas-specific types (like `int64`) to standard Python types using `.item()` for single values or `.tolist()` for lists.
70
-
71
  --- USER'S SCENARIO ---
72
  {user_scenario}
73
-
74
  --- PYTHON SCRIPT ---
75
- Now, write the complete Python script that performs the analysis and prints a single, serializable JSON object.
76
- ```python
 
77
  """
78
  generated_text = cohere_chat(prompt_for_coder)
79
  match = re2.search(r"```python\n(.*?)```", generated_text, re2.DOTALL)
@@ -98,12 +78,9 @@ def _generate_long_report(prompt: str) -> str:
98
  def _generate_final_report(user_scenario: str, raw_data_json: str) -> str:
99
  """Asks the AI to act as a consultant and write a polished report from the raw data."""
100
  prompt_for_writer = f"""
101
- You are an expert management consultant and data analyst.
102
- A data science script has run to extract key findings. You have the user's original request and the raw JSON data.
103
-
104
- Your task is to synthesize these raw findings into a single, comprehensive, and professional report that directly answers all of the user's questions with detailed justifications.
105
 
106
- --- USER'S ORIGINAL SCENARIO & DELIVERABLES ---
107
  {user_scenario}
108
  --- END SCENARIO ---
109
 
@@ -222,8 +199,6 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
222
  privacy_link = gr.Button("Privacy Policy", variant="link")
223
  terms_link = gr.Button("Terms of Service", variant="link")
224
 
225
- # --- DEFINITIVE FIX: ALL UI LOGIC IS NOW CORRECTLY INDENTED ---
226
-
227
  def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
228
  if not prompt:
229
  gr.Warning("Please enter a prompt.")
@@ -244,7 +219,7 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
244
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
245
 
246
  if files:
247
- file_names = [os.path.basename(f.name if hasattr(f, 'name') else f) for f in files]
248
  new_assessment = {"id": timestamp, "prompt": prompt, "files": file_names, "response": ai_response_text}
249
  updated_history = (history_state_list or []) + [new_assessment]
250
  history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
@@ -252,15 +227,17 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
252
  else:
253
  yield final_chat, history_state_list, gr.update()
254
 
255
- def view_history(selection, history_state_list):
256
- if not selection or not history_state_list:
257
- return ""
258
- selected_id = selection.split(" - ")
259
- selected_assessment = next((item for item in history_state_list if item["id"] == selected_id), None)
260
-
261
- if selected_assessment:
262
- file_list_md = "\n- ".join(selected_assessment.get('files', []))
263
- return f"""### Assessment from: {selected_assessment['id']}
 
 
264
  **Files Used:**
265
  - {file_list_md}
266
  ---
@@ -270,21 +247,19 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
270
  **AI Generated Response:**
271
  {selected_assessment['response']}
272
  """
273
- return "Could not find the selected assessment."
274
 
275
  send_btn.click(
276
  run_analysis_wrapper,
277
  inputs=[prompt_input, files_input, chat_history_output, assessment_history],
278
  outputs=[chat_history_output, assessment_history, history_dropdown]
279
  )
280
- history_dropdown.change(
281
- view_history,
282
- inputs=[history_dropdown, assessment_history],
283
- outputs=[history_display]
284
- )
285
  clear_btn.click(
286
- lambda: (None, None, []),
287
- outputs=[prompt_input, files_input, chat_history_output]
288
  )
289
  ping_btn.click(ping_cohere, outputs=[ping_out])
290
  privacy_link.click(lambda: gr.update(visible=True), outputs=[privacy_modal])
 
1
+ from future import annotations
 
2
  import os
3
  import io
4
  import json
5
  import traceback
6
  from contextlib import redirect_stdout
7
  from typing import List, Dict, Any
 
8
  import gradio as gr
9
  import pandas as pd
10
  from datetime import datetime
11
  import regex as re2
12
  import re
13
+ --- BACKEND IMPORTS ---
 
14
  from langchain_cohere import ChatCohere
15
+ --- LOCAL MODULE IMPORTS ---
 
16
  from settings import (
17
+ GENERAL_CONVERSATION_PROMPT,
18
+ COHERE_MODEL_PRIMARY, COHERE_TIMEOUT_S, USE_OPEN_FALLBACKS
19
  )
20
  from audit_log import log_event
21
  from privacy import safety_filter, refusal_reply
22
  from llm_router import cohere_chat, _co_client, cohere_embed
23
+ --- UTILITY FUNCTIONS ---
 
 
24
  def load_markdown_text(filepath: str) -> str:
25
+ """Safely loads text content from a markdown file."""
26
+ try:
27
+ with open(filepath, 'r', encoding='utf-8') as f: return f.read()
28
+ except FileNotFoundError:
29
+ return f"Error: Document {os.path.basename(filepath)} not found."
 
30
  def _sanitize_text(s: str) -> str:
31
+ if not isinstance(s, str): return s
32
+ return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
33
+ --- THE "ANALYST-WRITER" PIPELINE ---
 
 
34
  def _create_python_script(user_scenario: str, schema_context: str) -> str:
35
+ """Asks the AI to write a Python script that outputs raw, structured JSON."""
36
+ code
37
+ Code
38
+ # --- THE FINAL, MOST ROBUST PROMPT ---
39
+ prompt_for_coder = f"""
40
+ You are an expert Python data scientist. Your job is to write a script to extract the data needed to answer the user's request and print the findings as a single JSON object.
41
+ --- DATA CONTEXT ---
42
+ The data is pre-loaded into a Python list of pandas DataFrames called dfs.
 
 
 
 
 
 
 
 
 
 
43
  {schema_context}
44
+ --- END DATA CONTEXT ---
 
45
  CRITICAL RULES:
46
+ DO NOT READ FILES: You MUST NOT include pd.read_csv. The data is in the dfs variable.
47
+ JSON OUTPUT ONLY: Your script's ONLY output must be a single JSON object printed to stdout.
48
+ JSON SERIALIZATION (VERY IMPORTANT): The json library can only handle standard Python types. Before creating the final dictionary, ensure all values are standard types. If a value is a pandas/numpy number (like int64), convert it to a standard Python int or float using .item(). If a value is a pandas Series, convert it using .tolist().
49
+ DEFENSIVE CODING (CRITICAL): Before passing a variable to a function, be paranoid. For example, if you write a helper function that expects a dictionary, DO NOT pass it a list. If a function expects a single item, DO NOT pass it a whole dataframe. Always check the data type of your variables. This will prevent AttributeError crashes.
50
+ BE PRECISE: Use the exact, case-sensitive column names from the schema and robustly clean strings (re.sub()) before converting them to numbers.
51
  --- USER'S SCENARIO ---
52
  {user_scenario}
 
53
  --- PYTHON SCRIPT ---
54
+ Now, write the complete, robust, and defensive Python script that analyzes the dfs variable and prints a single, serializable JSON object.
55
+ code
56
+ Python
57
  """
58
  generated_text = cohere_chat(prompt_for_coder)
59
  match = re2.search(r"```python\n(.*?)```", generated_text, re2.DOTALL)
 
78
  def _generate_final_report(user_scenario: str, raw_data_json: str) -> str:
79
  """Asks the AI to act as a consultant and write a polished report from the raw data."""
80
  prompt_for_writer = f"""
81
+ You are an expert management consultant. A data science script has extracted key findings. Your task is to synthesize these findings into a professional report that answers the user's questions.
 
 
 
82
 
83
+ --- USER'S ORIGINAL SCENARIO ---
84
  {user_scenario}
85
  --- END SCENARIO ---
86
 
 
199
  privacy_link = gr.Button("Privacy Policy", variant="link")
200
  terms_link = gr.Button("Terms of Service", variant="link")
201
 
 
 
202
  def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
203
  if not prompt:
204
  gr.Warning("Please enter a prompt.")
 
219
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
220
 
221
  if files:
222
+ file_names = [os.path.basename(fn.name if hasattr(fn, 'name') else fn) for fn in files]
223
  new_assessment = {"id": timestamp, "prompt": prompt, "files": file_names, "response": ai_response_text}
224
  updated_history = (history_state_list or []) + [new_assessment]
225
  history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
 
227
  else:
228
  yield final_chat, history_state_list, gr.update()
229
 
230
+
231
+ def view_history(selection, history_state_list):
232
+ if not selection or not history_state_list:
233
+ return ""
234
+ # THE FIX IS HERE: Correctly extract just the timestamp (the first part)
235
+ selected_id = selection.split(" - ")[0]
236
+ selected_assessment = next((item for item in history_state_list if item["id"] == selected_id), None)
237
+
238
+ if selected_assessment:
239
+ file_list_md = "\n- ".join(selected_assessment.get('files', []))
240
+ return f"""### Assessment from: {selected_assessment['id']}
241
  **Files Used:**
242
  - {file_list_md}
243
  ---
 
247
  **AI Generated Response:**
248
  {selected_assessment['response']}
249
  """
250
+ return "Could not find the selected assessment."
251
 
252
  send_btn.click(
253
  run_analysis_wrapper,
254
  inputs=[prompt_input, files_input, chat_history_output, assessment_history],
255
  outputs=[chat_history_output, assessment_history, history_dropdown]
256
  )
257
+ history_dropdown.change(view_history, inputs=[history_dropdown, assessment_history], outputs=[history_display])
258
+ # We remove 'assessment_history' from the list of outputs, so it is no longer cleared.
259
+ # We remove 'assessment_history' from the list of outputs, so it is no longer cleared.
 
 
260
  clear_btn.click(
261
+ lambda: (None, None, []),
262
+ outputs=[prompt_input, files_input, chat_history_output]
263
  )
264
  ping_btn.click(ping_cohere, outputs=[ping_out])
265
  privacy_link.click(lambda: gr.update(visible=True), outputs=[privacy_modal])