VEDAGI1 commited on
Commit
e3931ad
verified
1 Parent(s): 8b54e98

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -190
app.py CHANGED
@@ -1,11 +1,12 @@
 
 
1
  # app.py
2
- #
3
  # Universal AI Data Analyst with:
4
- # - IMPROVED: "Plan-and-Execute" logic for high-accuracy analysis.
5
- # - IMPROVED: Professional, structured report generation.
6
- # - IMPROVED: Enriched schema context for the AI analyst.
7
- # - Unchanged UI, event wiring, and core infrastructure.
8
-
9
  from __future__ import annotations
10
 
11
  import io
@@ -20,28 +21,16 @@ import gradio as gr
20
  import pandas as pd
21
  import regex as re2
22
  import re
23
-
24
  from langchain_cohere import ChatCohere # noqa: F401
25
-
26
  from settings import (
27
  GENERAL_CONVERSATION_PROMPT,
28
  COHERE_MODEL_PRIMARY,
29
- COHERE_TIMEOUT_S,
30
- USE_OPEN_FALLBACKS,
31
  )
32
- from audit_log import log_event
33
- from privacy import safety_filter, refusal_reply
34
- from llm_router import cohere_chat, _co_client, cohere_embed
35
-
36
  # Try to import optional HIPAA flags; fall back to safe defaults if not defined.
37
  try:
38
- from settings import (
39
- PHI_MODE,
40
- PERSIST_HISTORY,
41
- HISTORY_TTL_DAYS,
42
- REDACT_BEFORE_LLM,
43
- ALLOW_EXTERNAL_PHI,
44
- )
45
  except Exception:
46
  PHI_MODE = False
47
  PERSIST_HISTORY = True
@@ -49,8 +38,11 @@ except Exception:
49
  REDACT_BEFORE_LLM = False
50
  ALLOW_EXTERNAL_PHI = True
51
 
 
 
 
52
 
53
- # ---------------------- Helpers (analysis logic selectively improved) ----------------------
54
  def load_markdown_text(filepath: str) -> str:
55
  try:
56
  with open(filepath, "r", encoding="utf-8") as f:
@@ -58,14 +50,12 @@ def load_markdown_text(filepath: str) -> str:
58
  except FileNotFoundError:
59
  return f"**Error:** Document `{os.path.basename(filepath)}` not found."
60
 
61
-
62
  def _sanitize_text(s: str) -> str:
63
  if not isinstance(s, str):
64
  return s
65
  # Remove control characters (except newline and tab)
66
  return re2.sub(r"[\p{C}--[\n\t]]+", "", s)
67
 
68
-
69
  # Conservative PHI redaction patterns (only applied if PHI_MODE & REDACT_BEFORE_LLM are enabled)
70
  PHI_PATTERNS = [
71
  (re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "[REDACTED_SSN]"),
@@ -77,7 +67,6 @@ PHI_PATTERNS = [
77
  (re.compile(r"\b\d{5}(-\d{4})?\b"), "[REDACTED_ZIP]"),
78
  ]
79
 
80
-
81
  def redact_phi(text: str) -> str:
82
  if not isinstance(text, str):
83
  return text
@@ -86,7 +75,6 @@ def redact_phi(text: str) -> str:
86
  t = pat.sub(repl, t)
87
  return t
88
 
89
-
90
  def safe_log(event_name: str, meta: dict | None = None):
91
  # Avoid logging raw PHI or payloads
92
  try:
@@ -97,93 +85,46 @@ def safe_log(event_name: str, meta: dict | None = None):
97
  # Never raise from logging
98
  pass
99
 
100
-
101
  def _create_python_script(user_scenario: str, schema_context: str) -> str:
102
- """
103
- IMPROVED: Generates a Python script using a universal "Map, Plan, Execute" approach.
104
- The AI first maps user concepts to data columns, then plans and executes the analysis.
105
- This ensures the logic is robust, dynamic, and not hardcoded to a specific dataset.
106
- """
 
 
 
 
 
 
107
  prompt_for_coder = f"""\
108
- You are an expert-level, universal Python data scientist. Your task is to dynamically analyze any provided dataset(s) to answer a user's business request.
 
109
 
110
- --- USER'S SCENARIO ---
111
- {user_scenario}
112
- --- END SCENARIO ---
113
 
114
  --- DATA SCHEMA ---
115
  {schema_context}
116
  --- END DATA SCHEMA ---
117
 
118
- You must follow a rigorous three-step "Map, Plan, Execute" process:
119
-
120
- **Step 1: Map Concepts to Data.**
121
- First, analyze the user's scenario and the provided data schemas. Identify the key business concepts (e.g., "hospitals", "sales", "regions") and metrics (e.g., "wait times", "revenue", "population"). Then, create a logical mapping from these concepts to the actual column names in the provided DataFrames. State these mappings clearly. This is the most critical step to ensure your analysis is relevant.
122
-
123
- **Step 2: Create a Detailed Analysis Plan.**
124
- Based on your mapping, formulate a step-by-step plan. Describe the data cleaning, merging, grouping, and aggregation steps needed to answer the user's request using the columns you identified.
125
-
126
- **Step 3: Write the Python Script.**
127
- Based on your plan, write a complete Python script.
128
-
129
- CRITICAL SCRIPTING RULES:
130
- 1. **DYNAMIC DATAFRAME IDENTIFICATION:** Your script MUST identify the correct DataFrame by checking for the presence of the columns you mapped in Step 1. Do NOT use hardcoded indices like `dfs[0]`.
131
- 2. **ROBUST SUCCESS CHECK (MOST IMPORTANT TO PREVENT AMBIGUITY ERROR):** After attempting to find a DataFrame, you MUST check for success by comparing the result to `None`. Do NOT use `if not my_dataframe:` as this is ambiguous.
132
- ```python
133
- # Good, robust code
134
- def find_df_by_cols(dfs, required_cols):
135
- for df in dfs:
136
- if all(col in df.columns for col in required_cols):
137
- return df
138
- return None
139
-
140
- primary_df = find_df_by_cols(dfs, ['user_id', 'transaction_amount'])
141
-
142
- # This is the correct way to check for failure
143
- if primary_df is None:
144
- raise ValueError("Could not find the primary dataframe based on its columns.")
145
- ```
146
- 3. **VERIFY COLUMN EXISTENCE:** Only use columns that you have explicitly identified and mapped.
147
- 4. **NO FILE READING:** The data is already in the `dfs` list.
148
- 5. **STRICTLY JSON OUTPUT:** The script's ONLY output must be a single JSON object.
149
- 6. **ROBUST & GENERIC:** Write robust code that can handle potential missing data (`errors='coerce'`, checking for `None`).
150
-
151
- Now, provide your response in the following format:
152
-
153
- **ANALYSIS PLAN:**
154
- ```text
155
- **1. Concept-to-Column Mapping:**
156
- - Concept: [e.g., 'Hospitals'] -> Mapped Column: [e.g., `Facility`]
157
- - Concept: [e.g., 'Surgical Wait Time'] -> Mapped Column: [e.g., `Surgery_Median`]
158
-
159
- **2. Step-by-Step Analysis:**
160
- 1. **Data Identification:** [e.g., "Define a helper function to find dataframes by checking for key columns..."]
161
- 2. **Data Cleaning:** [e.g., "Convert metric columns to numeric..."]
162
- 3. **Analysis Step A:** [e.g., "Group the primary dataframe by the 'Facility' column and calculate the mean of the 'Surgery_Median' column..."]
163
- 4. ...
164
-
165
- the final JSON object]
166
-
167
- # Your complete Python script starts here
168
- import pandas as pd
169
- import json
170
- import re
171
 
172
- # Main analysis logic...
173
- # ...
174
- # Final print statement
175
- print(json.dumps(final_data_structure, indent=4))```
176
  """
177
  generated_text = cohere_chat(prompt_for_coder)
178
- # This regex is more robust for extracting the final code block
179
- match = re2.search(r"PYTHON SCRIPT:\s*```python\n(.*?)```", generated_text, re2.DOTALL)
180
  if match:
181
  return match.group(1).strip()
182
- # Fallback if the structured format fails
183
- fallback_match = re2.search(r"```python\n(.*?)```", generated_text, re2.DOTALL)
184
- if fallback_match:
185
- return fallback_match.group(1).strip()
186
- return "print(json.dumps({'error': 'Failed to generate a valid Python script from the plan.'}))"
187
 
188
 
189
  def _generate_long_report(prompt: str) -> str:
@@ -203,15 +144,13 @@ def _generate_long_report(prompt: str) -> str:
203
 
204
 
205
  def _generate_final_report(user_scenario: str, raw_data_json: str) -> str:
206
- """
207
- IMPROVED: Generates a professional, structured report from the JSON data.
208
- The prompt guides the AI to synthesize insights in a standard consulting format,
209
- ensuring a high level of detail and actionable recommendations.
210
- """
211
  prompt_for_writer = f"""\
212
- You are an expert management consultant specializing in data-driven strategy. A Python script has been executed to extract key data points based on a user's request. Your task is to synthesize this raw data into a polished, comprehensive, and actionable report.
 
213
 
214
- --- USER'S ORIGINAL SCENARIO ---
 
 
215
  {user_scenario}
216
  --- END SCENARIO ---
217
 
@@ -219,37 +158,11 @@ You are an expert management consultant specializing in data-driven strategy. A
219
  {raw_data_json}
220
  --- END RAW DATA ---
221
 
222
- CRITICAL INSTRUCTIONS:
223
- You must write a final report that follows this exact structure:
224
-
225
- **### Executive Summary**
226
- - Start with a brief paragraph summarizing the core problem, key findings, and top recommendations. This should be a high-level overview for a leadership audience.
227
-
228
- **### 1. [First Key Finding, e.g., Hospitals with the Longest Wait Times]**
229
- - Present the relevant data in a Markdown table.
230
- - Write a short narrative interpreting the data. What does it mean? Are there any outliers? Why might these facilities have long waits (e.g., specialized care, rural location, capacity issues)?
231
-
232
- **### 2. [Second Key Finding, e.g., Specialties with the Longest Wait Times]**
233
- - Present the relevant data in a Markdown table.
234
- - Interpret the findings. Why are these specialties facing delays (e.g., specialist shortages, equipment needs)?
235
-
236
- **### 3. [Third Key Finding, e.g., Zone-Level Performance]**
237
- - Present the data in a table, including a comparison to a relevant average or baseline.
238
- - Analyze the geographic or systemic issues this data reveals.
239
-
240
- **### 4. [Fourth Key Finding, if applicable, e.g., Geographic Distribution]**
241
- - Synthesize location data with the wait-time findings.
242
- - Discuss the implications for patient equity, travel burdens, and access to care.
243
-
244
- **### 5. Recommendations for Resource Allocation**
245
- - Provide specific, actionable, and justified recommendations.
246
- - Structure them by category (e.g., by facility, by specialty, by zone).
247
- - For each recommendation, provide a clear rationale directly linked to the data findings above (e.g., "Allocate additional resources to Glace Bay Hospital because it is a rural facility in a high-wait zone, suggesting a capacity bottleneck.").
248
-
249
- **### Data Limitations**
250
- - Briefly mention any potential limitations of the analysis (e.g., missing data, use of proxies, case severity not included). This adds credibility to the report.
251
-
252
- Do not just repeat the JSON data. Your value is in interpreting the numbers, connecting the dots between different findings, and providing clear, data-backed strategic advice.
253
  """
254
  return _generate_long_report(prompt_for_writer)
255
 
@@ -284,7 +197,7 @@ def handle(user_msg: str, files: list, yield_update) -> str:
284
  file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
285
 
286
  if file_paths:
287
- # CSV analysis path
288
  dataframes, schema_parts = [], []
289
  for i, p in enumerate(file_paths):
290
  if p.endswith(".csv"):
@@ -293,21 +206,8 @@ def handle(user_msg: str, files: list, yield_update) -> str:
293
  except UnicodeDecodeError:
294
  df = pd.read_csv(p, encoding="latin1")
295
  dataframes.append(df)
296
- # --- IMPROVEMENT: ENRICHED SCHEMA CONTEXT ---
297
- schema_buffer = io.StringIO()
298
- df.info(buf=schema_buffer)
299
- schema_info = schema_buffer.getvalue()
300
  schema_parts.append(
301
- f"""DataFrame `dfs[{i}]` (`{os.path.basename(p)}`):
302
- ### Head
303
- {df.head().to_markdown()}
304
-
305
- ### Schema and Data Types
306
- {schema_info}
307
-
308
- ### Summary Statistics
309
- {df.describe(include='all').to_markdown()}
310
- """
311
  )
312
 
313
  if not dataframes:
@@ -316,13 +216,16 @@ def handle(user_msg: str, files: list, yield_update) -> str:
316
  schema_context = "\n".join(schema_parts)
317
 
318
  # If external PHI is not allowed, use redacted prompt; otherwise use original
319
- prompt_for_code = (
320
- redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
321
- )
322
- yield_update("```\n馃 Generating aligned analysis script...\n```")
 
323
  analysis_script = _create_python_script(prompt_for_code, schema_context)
324
 
325
- yield_update("```\n鈿欙笍 Executing script to extract raw data...\n```")
 
 
326
  execution_namespace = {"dfs": dataframes, "pd": pd, "re": re, "json": json}
327
  output_buffer = io.StringIO()
328
 
@@ -336,24 +239,21 @@ def handle(user_msg: str, files: list, yield_update) -> str:
336
  f"```python\n{analysis_script}\n```"
337
  )
338
 
339
- yield_update("```\n鉁嶏笍 Synthesizing final comprehensive report...\n```")
340
- writer_input = (
341
- redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
342
- )
343
  final_report = _generate_final_report(writer_input, raw_data_output)
344
  return _sanitize_text(final_report)
345
  else:
346
  # Pure chat path
347
- chat_input = (
348
- redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
349
- )
350
  prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {chat_input}\nAssistant:"
351
  return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
352
 
353
  except Exception as e:
354
  tb = traceback.format_exc()
355
  safe_log("app_error", {"err": str(e)})
356
- return ("A critical error occurred. Please contact your administrator." if PHI_MODE else f"A critical error occurred: {e}")
357
 
358
 
359
  PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
@@ -361,6 +261,7 @@ TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
361
 
362
 
363
  # ---------------------- Sleek UI assets (CSS/JS only) ----------------------
 
364
  SLEEK_CSS = """
365
  /* Full-bleed, modern look */
366
  :root, body, #root, .gradio-container { height: 100%; }
@@ -441,7 +342,7 @@ function rs_toggle_stt(elemId){
441
  __rs_rec.onresult = (ev) => {
442
  let t = "";
443
  for (let i = ev.resultIndex; i < ev.results.length; i++){
444
- t += ev.results[i][0].transcript;
445
  }
446
  box.value = (base + " " + t).trim();
447
  box.dispatchEvent(new Event("input", { bubbles: true }));
@@ -454,14 +355,16 @@ function rs_toggle_stt(elemId){
454
 
455
 
456
  # ---------------------- Sleek UI (with fixed State wiring) ----------------------
 
457
  with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
458
  # Persistent in-memory history component (fixes list/_id error)
459
  assessment_history = gr.State([])
460
 
461
  # Header
462
  with gr.Row(elem_classes=["header"]):
463
- gr.Markdown("<h1>Clarity Ops Augmented Decision Support</h1>")
464
- pill = ("PHI Mode ON 路 history off" if (PHI_MODE and not PERSIST_HISTORY) else "PHI Mode ON" if PHI_MODE else "PHI Mode OFF")
 
465
  gr.Markdown(f"<span class='badge'>{pill}</span>")
466
 
467
  # Main layout
@@ -483,6 +386,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
483
  elem_id="prompt_box",
484
  autofocus=True,
485
  )
 
486
  with gr.Row(elem_classes=["actions"]):
487
  send_btn = gr.Button("鈻讹笍 Run Analysis", variant="primary")
488
  clear_btn = gr.Button("馃Ч Clear")
@@ -491,66 +395,72 @@ with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
491
  gr.Markdown("<div class='voice-hint'>Click Voice to start/stop dictation into the prompt box.</div>")
492
  ping_btn = gr.Button("馃攲 Ping Cohere")
493
  ping_out = gr.Markdown()
 
494
  gr.Markdown("<div class='hr'></div>")
495
  if PHI_MODE:
496
  gr.Markdown(
497
  "鈿狅笍 **PHI Mode:** History persistence is disabled by default. Avoid unnecessary identifiers."
498
  )
 
499
  with gr.Accordion("Privacy & Terms", open=False):
500
  gr.Markdown(PRIVACY_POLICY_TEXT)
501
  gr.Markdown("<div class='hr'></div>")
502
  gr.Markdown(TERMS_OF_SERVICE_TEXT)
 
503
  # Right panel
504
  with gr.Column(elem_classes=["right"]):
505
  with gr.Tabs(elem_classes=["tabs"]):
506
  with gr.TabItem("Current Assessment", id=0, elem_classes=["tabitem"]):
507
  with gr.Column(elem_id="chatbot_container"):
508
- chat_history_output = gr.Chatbot(
509
- label="Analysis Output", type="messages"
510
- )
511
  with gr.TabItem("Assessment History", id=1, elem_classes=["tabitem"]):
512
  gr.Markdown("### Review Past Assessments")
513
- history_dropdown = gr.Dropdown(
514
- label="Select an assessment to review", choices=[]
515
- )
516
  history_display = gr.Markdown(label="Selected Assessment Details")
517
 
518
  # Inject voice-to-text helper
519
  gr.HTML(VOICE_STT_HTML)
520
 
521
  # --------- Event logic (unchanged analysis flow) ----------
522
- def run_analysis_wrapper(
523
- prompt, files, chat_history_list, history_state_list
524
- ):
525
  if not prompt:
526
  gr.Warning("Please enter a prompt.")
527
  yield chat_history_list, history_state_list, gr.update()
528
  return
529
 
 
530
  chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
531
 
532
  # Optional progress callback (not streaming in this UI)
533
  def dummy_update(message: str):
534
  pass
535
 
 
536
  thinking_message = _append_msg(
537
  chat_with_user_msg,
538
  "assistant",
539
- "```\n馃 Generating and executing analysis... Please wait.\n```",
 
 
540
  )
541
  yield thinking_message, history_state_list, gr.update()
542
 
 
543
  ai_response_text = handle(prompt, files, dummy_update)
544
 
 
545
  final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
546
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
547
 
 
548
  file_names: List[str] = []
549
  if files:
550
  file_names = [
551
  os.path.basename(f.name if hasattr(f, "name") else f) for f in files
552
  ]
553
 
 
554
  new_entry = {
555
  "id": timestamp,
556
  "prompt": prompt,
@@ -559,17 +469,13 @@ with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
559
  "chat_history": final_chat,
560
  }
561
 
 
562
  if PERSIST_HISTORY and (not PHI_MODE or (PHI_MODE and HISTORY_TTL_DAYS > 0)):
563
- updated_history: List[Dict[str, Any]] = (history_state_list or []) + [
564
- new_entry
565
- ]
566
  else:
567
  updated_history = history_state_list or []
568
 
569
- history_labels = [
570
- f"{item['id']} - {item['prompt'][:40]}..."
571
- for item in updated_history
572
- ]
573
 
574
  yield final_chat, updated_history, gr.update(choices=history_labels)
575
 
@@ -577,7 +483,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
577
  if not selection or not history_state_list:
578
  return ""
579
  try:
580
- selected_id = selection.split(" - ", 1)[0]
581
  except Exception:
582
  selected_id = selection
583
 
@@ -633,7 +539,5 @@ with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
633
 
634
  if __name__ == "__main__":
635
  if not os.getenv("COHERE_API_KEY"):
636
- print(
637
- "馃敶 COHERE_API_KEY environment variable not set. Application may not function correctly."
638
- )
639
  demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))
 
1
+ working app.py Nov 15th
2
+
3
  # app.py
 
4
  # Universal AI Data Analyst with:
5
+ # - Unchanged analysis & assessment logic
6
+ # - Fixed Gradio event wiring (uses gr.State for history)
7
+ # - Triple-quoted progress strings (no unterminated literals)
8
+ # - Sleek full-width UI and Voice-to-Text (browser Web Speech API)
9
+ # - Optional HIPAA flags (fallback defaults if not present in settings.py)
10
  from __future__ import annotations
11
 
12
  import io
 
21
  import pandas as pd
22
  import regex as re2
23
  import re
 
24
  from langchain_cohere import ChatCohere # noqa: F401
 
25
  from settings import (
26
  GENERAL_CONVERSATION_PROMPT,
27
  COHERE_MODEL_PRIMARY,
28
+ COHERE_TIMEOUT_S, # noqa: F401
29
+ USE_OPEN_FALLBACKS # noqa: F401
30
  )
 
 
 
 
31
  # Try to import optional HIPAA flags; fall back to safe defaults if not defined.
32
  try:
33
+ from settings import PHI_MODE, PERSIST_HISTORY, HISTORY_TTL_DAYS, REDACT_BEFORE_LLM, ALLOW_EXTERNAL_PHI
 
 
 
 
 
 
34
  except Exception:
35
  PHI_MODE = False
36
  PERSIST_HISTORY = True
 
38
  REDACT_BEFORE_LLM = False
39
  ALLOW_EXTERNAL_PHI = True
40
 
41
+ from audit_log import log_event
42
+ from privacy import safety_filter, refusal_reply
43
+ from llm_router import cohere_chat, _co_client, cohere_embed
44
 
45
+ # ---------------------- Helpers (analysis logic unchanged) ----------------------
46
  def load_markdown_text(filepath: str) -> str:
47
  try:
48
  with open(filepath, "r", encoding="utf-8") as f:
 
50
  except FileNotFoundError:
51
  return f"**Error:** Document `{os.path.basename(filepath)}` not found."
52
 
 
53
  def _sanitize_text(s: str) -> str:
54
  if not isinstance(s, str):
55
  return s
56
  # Remove control characters (except newline and tab)
57
  return re2.sub(r"[\p{C}--[\n\t]]+", "", s)
58
 
 
59
  # Conservative PHI redaction patterns (only applied if PHI_MODE & REDACT_BEFORE_LLM are enabled)
60
  PHI_PATTERNS = [
61
  (re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "[REDACTED_SSN]"),
 
67
  (re.compile(r"\b\d{5}(-\d{4})?\b"), "[REDACTED_ZIP]"),
68
  ]
69
 
 
70
  def redact_phi(text: str) -> str:
71
  if not isinstance(text, str):
72
  return text
 
75
  t = pat.sub(repl, t)
76
  return t
77
 
 
78
  def safe_log(event_name: str, meta: dict | None = None):
79
  # Avoid logging raw PHI or payloads
80
  try:
 
85
  # Never raise from logging
86
  pass
87
 
 
88
  def _create_python_script(user_scenario: str, schema_context: str) -> str:
89
+ EXPERT_ANALYTICAL_GUIDELINES = """
90
+ --- EXPERT ANALYTICAL GUIDELINES ---
91
+ When writing your script, you MUST follow these expert business rules:
92
+ 1. **Linking Datasets Rule:** If you need to connect facilities to health zones when the 'zone' column is not in the facility list,
93
+ you must first identify the high-priority zone from the beds data, then find the major city (by facility count) in the facility list,
94
+ and *then* assess that city's capacity. Do not try to filter the facility list by a 'zone' column if it does not exist in the schema.
95
+ 2. **Prioritization Rule:** To prioritize locations, you MUST combine the most recent population data with specific high-risk health indicators
96
+ to create a multi-factor risk score.
97
+ 3. **Capacity Calculation Rule:** For capacity over a 3-month window, assume **60 working days**.
98
+ 4. **Cost Calculation Rule:** Sum 'Startup cost' and 'Ongoing cost' per person before multiplying.
99
+ """
100
  prompt_for_coder = f"""\
101
+ You are an expert Python data scientist. Your job is to write a script to extract the data needed to answer the user's request.
102
+ You have dataframes in a list `dfs`.
103
 
104
+ {EXPERT_ANALYTICAL_GUIDELINES}
 
 
105
 
106
  --- DATA SCHEMA ---
107
  {schema_context}
108
  --- END DATA SCHEMA ---
109
 
110
+ CRITICAL RULES:
111
+ 1. **DO NOT READ FILES:** You MUST NOT include `pd.read_csv`. The data is ALREADY loaded in the `dfs` variable. You MUST use this variable. Failure to do so will cause a fatal error.
112
+ 2. **JSON OUTPUT ONLY:** Your script's ONLY output must be a single JSON object printed to stdout containing the raw data findings.
113
+ 3. **BE PRECISE:** Use the exact, case-sensitive column names from the schema and robustly clean strings (`re.sub()`) before converting to numbers.
114
+ 4. **JSON SERIALIZATION:** Before adding data to your final dictionary for JSON conversion, you MUST convert any pandas-specific types (like `int64`) to standard Python types using `.item()` for single values or `.tolist()` for lists.
115
+
116
+ --- USER'S SCENARIO ---
117
+ {user_scenario}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
+ --- PYTHON SCRIPT ---
120
+ Now, write the complete Python script that performs the analysis and prints a single, serializable JSON object.
121
+ ```python
 
122
  """
123
  generated_text = cohere_chat(prompt_for_coder)
124
+ match = re2.search(r"```python\n(.*?)```", generated_text, re2.DOTALL)
 
125
  if match:
126
  return match.group(1).strip()
127
+ return "print(json.dumps({'error': 'Failed to generate a valid Python script.'}))"
 
 
 
 
128
 
129
 
130
  def _generate_long_report(prompt: str) -> str:
 
144
 
145
 
146
  def _generate_final_report(user_scenario: str, raw_data_json: str) -> str:
 
 
 
 
 
147
  prompt_for_writer = f"""\
148
+ You are an expert management consultant and data analyst.
149
+ A data science script has run to extract key findings. You have the user's original request and the raw JSON data.
150
 
151
+ Your task is to synthesize these raw findings into a single, comprehensive, and professional report that directly answers all of the user's questions with detailed justifications.
152
+
153
+ --- USER'S ORIGINAL SCENARIO & DELIVERABLES ---
154
  {user_scenario}
155
  --- END SCENARIO ---
156
 
 
158
  {raw_data_json}
159
  --- END RAW DATA ---
160
 
161
+ Now, write the final, polished report. The report MUST:
162
+ 1. Follow the "Expected Output Format" requested by the user.
163
+ 2. Use tables, bullet points, and DETAILED narrative justifications for each recommendation.
164
+ 3. Synthesize the raw data into actionable insights. Do not just copy the raw numbers; interpret them.
165
+ 4. Ensure you fully address ALL evaluation questions, especially the final recommendations.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  """
167
  return _generate_long_report(prompt_for_writer)
168
 
 
197
  file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
198
 
199
  if file_paths:
200
+ # CSV analysis path (unchanged)
201
  dataframes, schema_parts = [], []
202
  for i, p in enumerate(file_paths):
203
  if p.endswith(".csv"):
 
206
  except UnicodeDecodeError:
207
  df = pd.read_csv(p, encoding="latin1")
208
  dataframes.append(df)
 
 
 
 
209
  schema_parts.append(
210
+ f"DataFrame `dfs[{i}]` (`{os.path.basename(p)}`):\n{df.head().to_markdown()}\n"
 
 
 
 
 
 
 
 
 
211
  )
212
 
213
  if not dataframes:
 
216
  schema_context = "\n".join(schema_parts)
217
 
218
  # If external PHI is not allowed, use redacted prompt; otherwise use original
219
+ prompt_for_code = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
220
+
221
+ yield_update("""```
222
+ 馃 Generating aligned analysis script...
223
+ ```""")
224
  analysis_script = _create_python_script(prompt_for_code, schema_context)
225
 
226
+ yield_update("""```
227
+ 鈿欙笍 Executing script to extract raw data...
228
+ ```""")
229
  execution_namespace = {"dfs": dataframes, "pd": pd, "re": re, "json": json}
230
  output_buffer = io.StringIO()
231
 
 
239
  f"```python\n{analysis_script}\n```"
240
  )
241
 
242
+ yield_update("""```
243
+ 鉁嶏笍 Synthesizing final comprehensive report...```""")
244
+ writer_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
 
245
  final_report = _generate_final_report(writer_input, raw_data_output)
246
  return _sanitize_text(final_report)
247
  else:
248
  # Pure chat path
249
+ chat_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
 
 
250
  prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {chat_input}\nAssistant:"
251
  return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
252
 
253
  except Exception as e:
254
  tb = traceback.format_exc()
255
  safe_log("app_error", {"err": str(e)})
256
+ return "A critical error occurred. Please contact your administrator." if PHI_MODE else f"A critical error occurred: {e}"
257
 
258
 
259
  PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
 
261
 
262
 
263
  # ---------------------- Sleek UI assets (CSS/JS only) ----------------------
264
+
265
  SLEEK_CSS = """
266
  /* Full-bleed, modern look */
267
  :root, body, #root, .gradio-container { height: 100%; }
 
342
  __rs_rec.onresult = (ev) => {
343
  let t = "";
344
  for (let i = ev.resultIndex; i < ev.results.length; i++){
345
+ t += ev.results[i].transcript;
346
  }
347
  box.value = (base + " " + t).trim();
348
  box.dispatchEvent(new Event("input", { bubbles: true }));
 
355
 
356
 
357
  # ---------------------- Sleek UI (with fixed State wiring) ----------------------
358
+
359
  with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
360
  # Persistent in-memory history component (fixes list/_id error)
361
  assessment_history = gr.State([])
362
 
363
  # Header
364
  with gr.Row(elem_classes=["header"]):
365
+ gr.Markdown("<h1>Clarity Ops Augemented Decision Support</h1>")
366
+ pill = "PHI Mode ON 路 history off" if (PHI_MODE and not PERSIST_HISTORY) else \
367
+ "PHI Mode ON" if PHI_MODE else "PHI Mode OFF"
368
  gr.Markdown(f"<span class='badge'>{pill}</span>")
369
 
370
  # Main layout
 
386
  elem_id="prompt_box",
387
  autofocus=True,
388
  )
389
+
390
  with gr.Row(elem_classes=["actions"]):
391
  send_btn = gr.Button("鈻讹笍 Run Analysis", variant="primary")
392
  clear_btn = gr.Button("馃Ч Clear")
 
395
  gr.Markdown("<div class='voice-hint'>Click Voice to start/stop dictation into the prompt box.</div>")
396
  ping_btn = gr.Button("馃攲 Ping Cohere")
397
  ping_out = gr.Markdown()
398
+
399
  gr.Markdown("<div class='hr'></div>")
400
  if PHI_MODE:
401
  gr.Markdown(
402
  "鈿狅笍 **PHI Mode:** History persistence is disabled by default. Avoid unnecessary identifiers."
403
  )
404
+
405
  with gr.Accordion("Privacy & Terms", open=False):
406
  gr.Markdown(PRIVACY_POLICY_TEXT)
407
  gr.Markdown("<div class='hr'></div>")
408
  gr.Markdown(TERMS_OF_SERVICE_TEXT)
409
+
410
  # Right panel
411
  with gr.Column(elem_classes=["right"]):
412
  with gr.Tabs(elem_classes=["tabs"]):
413
  with gr.TabItem("Current Assessment", id=0, elem_classes=["tabitem"]):
414
  with gr.Column(elem_id="chatbot_container"):
415
+ chat_history_output = gr.Chatbot(label="Analysis Output", type="messages")
 
 
416
  with gr.TabItem("Assessment History", id=1, elem_classes=["tabitem"]):
417
  gr.Markdown("### Review Past Assessments")
418
+ history_dropdown = gr.Dropdown(label="Select an assessment to review", choices=[])
 
 
419
  history_display = gr.Markdown(label="Selected Assessment Details")
420
 
421
  # Inject voice-to-text helper
422
  gr.HTML(VOICE_STT_HTML)
423
 
424
  # --------- Event logic (unchanged analysis flow) ----------
425
+
426
+ def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
 
427
  if not prompt:
428
  gr.Warning("Please enter a prompt.")
429
  yield chat_history_list, history_state_list, gr.update()
430
  return
431
 
432
+ # Append user's message
433
  chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
434
 
435
  # Optional progress callback (not streaming in this UI)
436
  def dummy_update(message: str):
437
  pass
438
 
439
+ # Thinking bubble
440
  thinking_message = _append_msg(
441
  chat_with_user_msg,
442
  "assistant",
443
+ """```
444
+ 馃 Generating and executing analysis... Please wait.
445
+ ```""",
446
  )
447
  yield thinking_message, history_state_list, gr.update()
448
 
449
+ # Run analysis/chat
450
  ai_response_text = handle(prompt, files, dummy_update)
451
 
452
+ # Append final assistant response
453
  final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
454
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
455
 
456
+ # Capture filenames (if any)
457
  file_names: List[str] = []
458
  if files:
459
  file_names = [
460
  os.path.basename(f.name if hasattr(f, "name") else f) for f in files
461
  ]
462
 
463
+ # Build history record
464
  new_entry = {
465
  "id": timestamp,
466
  "prompt": prompt,
 
469
  "chat_history": final_chat,
470
  }
471
 
472
+ # Respect PHI/history flags
473
  if PERSIST_HISTORY and (not PHI_MODE or (PHI_MODE and HISTORY_TTL_DAYS > 0)):
474
+ updated_history: List[Dict[str, Any]] = (history_state_list or []) + [new_entry]
 
 
475
  else:
476
  updated_history = history_state_list or []
477
 
478
+ history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
 
 
 
479
 
480
  yield final_chat, updated_history, gr.update(choices=history_labels)
481
 
 
483
  if not selection or not history_state_list:
484
  return ""
485
  try:
486
+ selected_id = selection.split(" - ", 1)
487
  except Exception:
488
  selected_id = selection
489
 
 
539
 
540
  if __name__ == "__main__":
541
  if not os.getenv("COHERE_API_KEY"):
542
+ print("馃敶 COHERE_API_KEY environment variable not set. Application may not function correctly.")
 
 
543
  demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))