Rajan Sharma commited on
Commit
4f6005b
·
verified ·
1 Parent(s): 48963bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -402
app.py CHANGED
@@ -1,66 +1,60 @@
1
  # app.py
2
-
3
- from __future__ import annotations
4
- import os
5
- import io
6
- import json
7
- import traceback
8
- from contextlib import redirect_stdout
9
- from typing import List, Dict, Any
10
-
11
- import gradio as gr
12
- import pandas a# app.py
13
  #
14
- # This file defines a Gradio-based AI data analyst application with
15
- # support for maintaining a persistent chat and assessment history.
16
- # The history feature preserves each chat/assessment session (prompt,
17
- # associated files, generated response, and full conversation) so that
18
- # users can revisit past analyses without losing any existing
19
- # functionality. A dropdown selector in the "Assessment History" tab
20
- # allows users to select and review previous sessions, including the
21
- # complete chat transcript.
22
 
23
  from __future__ import annotations
24
- import os
25
  import io
26
  import json
 
27
  import traceback
28
  from contextlib import redirect_stdout
29
- from typing import List, Dict, Any
 
30
 
31
  import gradio as gr
32
  import pandas as pd
33
- from datetime import datetime
34
  import regex as re2
35
  import re
36
 
37
- from langchain_cohere import ChatCohere
38
 
39
  from settings import (
40
  GENERAL_CONVERSATION_PROMPT,
41
- COHERE_MODEL_PRIMARY, COHERE_TIMEOUT_S, USE_OPEN_FALLBACKS
 
 
42
  )
43
  from audit_log import log_event
44
  from privacy import safety_filter, refusal_reply
45
  from llm_router import cohere_chat, _co_client, cohere_embed
46
 
 
47
  def load_markdown_text(filepath: str) -> str:
48
  try:
49
- with open(filepath, 'r', encoding='utf-8') as f:
50
  return f.read()
51
  except FileNotFoundError:
52
  return f"**Error:** Document `{os.path.basename(filepath)}` not found."
53
 
 
54
  def _sanitize_text(s: str) -> str:
55
- if not isinstance(s, str): return s
56
- return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
 
 
 
57
 
58
  def _create_python_script(user_scenario: str, schema_context: str) -> str:
59
  EXPERT_ANALYTICAL_GUIDELINES = """
60
  --- EXPERT ANALYTICAL GUIDELINES ---
61
  When writing your script, you MUST follow these expert business rules:
62
- 1. **Linking Datasets Rule:** If you need to connect facilities to health zones when the 'zone' column is not in the facility list, you must first identify the high-priority zone from the beds data, then find the major city (by facility count) in the facility list, and *then* assess that city's capacity. Do not try to filter the facility list by a 'zone' column if it does not exist in the schema.
63
- 2. **Prioritization Rule:** To prioritize locations, you MUST combine the most recent population data with specific high-risk health indicators to create a multi-factor risk score.
 
 
 
64
  3. **Capacity Calculation Rule:** For capacity over a 3-month window, assume **60 working days**.
65
  4. **Cost Calculation Rule:** Sum 'Startup cost' and 'Ongoing cost' per person before multiplying.
66
  """
@@ -90,23 +84,27 @@ Now, write the complete Python script that performs the analysis and prints a si
90
  """
91
  generated_text = cohere_chat(prompt_for_coder)
92
  match = re2.search(r"```python\n(.*?)```", generated_text, re2.DOTALL)
93
- if match: return match.group(1).strip()
 
94
  return "print(json.dumps({'error': 'Failed to generate a valid Python script.'}))"
95
 
 
96
  def _generate_long_report(prompt: str) -> str:
97
  try:
98
  client = _co_client()
99
- if not client: return "Error: Cohere client not initialized."
 
100
  response = client.chat(
101
  model=COHERE_MODEL_PRIMARY,
102
  message=prompt,
103
- max_tokens=4096
104
  )
105
  return response.text
106
  except Exception as e:
107
  log_event("cohere_chat_error", None, {"err": str(e)})
108
  return f"Error during final report generation: {e}"
109
 
 
110
  def _generate_final_report(user_scenario: str, raw_data_json: str) -> str:
111
  prompt_for_writer = f"""\
112
  You are an expert management consultant and data analyst.
@@ -130,57 +128,72 @@ Now, write the final, polished report. The report MUST:
130
  """
131
  return _generate_long_report(prompt_for_writer)
132
 
 
133
  def _append_msg(h: List[Dict[str, str]], r: str, c: str) -> List[Dict[str, str]]:
134
  return (h or []) + [{"role": r, "content": c}]
135
 
 
136
  def ping_cohere() -> str:
137
  try:
138
  cli = _co_client()
139
- if not cli: return "Cohere client not initialized."
 
140
  vecs = cohere_embed(["hello", "world"])
141
  return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY})" if vecs else "Cohere reachable."
142
- except Exception as e: return f"Cohere ping failed: {e}"
 
 
143
 
144
  def handle(user_msg: str, files: list, yield_update) -> str:
145
  try:
146
  safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
147
- if blocked_in: return refusal_reply(reason_in)
 
148
 
149
  file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
150
 
151
  if file_paths:
152
  dataframes, schema_parts = [], []
153
  for i, p in enumerate(file_paths):
154
- if p.endswith('.csv'):
155
- try: df = pd.read_csv(p)
156
- except UnicodeDecodeError: df = pd.read_csv(p, encoding='latin1')
 
 
157
  dataframes.append(df)
158
- schema_parts.append(f"DataFrame `dfs[{i}]` (`{os.path.basename(p)}`):\n{df.head().to_markdown()}\n")
 
 
159
 
160
- if not dataframes: return "Please upload at least one CSV file."
 
161
 
162
  schema_context = "\n".join(schema_parts)
163
 
164
- yield_update("```
165
  🧠 Generating aligned analysis script...
166
- ```" )
167
  analysis_script = _create_python_script(safe_in, schema_context)
168
 
169
- yield_update("```
170
  ⚙️ Executing script to extract raw data...
171
- ```" )
172
  execution_namespace = {"dfs": dataframes, "pd": pd, "re": re, "json": json}
173
  output_buffer = io.StringIO()
174
 
175
  try:
176
- with redirect_stdout(output_buffer): exec(analysis_script, execution_namespace)
 
177
  raw_data_output = output_buffer.getvalue()
178
  except Exception as e:
179
- return f"An error occurred executing the script: {e}\n\nGenerated Script:\n```python\n{analysis_script}\n```"
 
 
 
180
 
181
- yield_update("```
182
  ✍️ Synthesizing final comprehensive report...
183
- ```" )
184
  final_report = _generate_final_report(safe_in, raw_data_output)
185
  return _sanitize_text(final_report)
186
  else:
@@ -192,19 +205,21 @@ def handle(user_msg: str, files: list, yield_update) -> str:
192
  log_event("app_error", None, {"err": str(e), "tb": tb})
193
  return f"A critical error occurred: {e}"
194
 
 
195
  PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
196
  TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
197
 
198
  with gr.Blocks(theme="soft", css="style.css") as demo:
199
- # Maintain a persistent history of past assessments or chat sessions.
200
- # Each entry in `assessment_history` is a dictionary containing:
201
- # - id: timestamp of the session (string)
202
- # - prompt: the initial user prompt (string)
203
- # - files: list of filenames uploaded by the user (list of str)
204
- # - response: the assistant's final response text (string)
205
- # - chat_history: the full chat transcript as a list of message dictionaries
206
  assessment_history = gr.State([])
207
 
 
208
  with gr.Group(visible=False) as privacy_modal:
209
  with gr.Blocks():
210
  gr.Markdown(PRIVACY_POLICY_TEXT)
@@ -215,13 +230,25 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
215
  gr.Markdown(TERMS_OF_SERVICE_TEXT)
216
  close_terms_btn = gr.Button("Close")
217
 
 
218
  gr.Markdown("# Universal AI Data Analyst")
219
  with gr.Row(variant="panel"):
220
  with gr.Column(scale=1):
221
  gr.Markdown("## New Assessment")
222
- gr.Markdown("<p style='font-size:0.9rem; color: #6C757D;'>Upload CSVs for data analysis, or just enter a prompt to chat.</p>")
223
- files_input = gr.Files(label="Upload Data Files (.csv)", file_count="multiple", type="filepath", file_types=[".csv"])
224
- prompt_input = gr.Textbox(label="Prompt", placeholder="Paste your scenario or question here.", lines=15)
 
 
 
 
 
 
 
 
 
 
 
225
  with gr.Row():
226
  send_btn = gr.Button("▶️ Send / Run Analysis", variant="primary", scale=2)
227
  clear_btn = gr.Button("🗑️ Clear")
@@ -230,67 +257,66 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
230
  with gr.Column(scale=2):
231
  with gr.Tabs():
232
  with gr.TabItem("Current Assessment", id=0):
233
- chat_history_output = gr.Chatbot(label="Analysis Output", type="messages", height=600)
 
 
234
  with gr.TabItem("Assessment History", id=1):
235
  gr.Markdown("## Review Past Assessments")
236
- history_dropdown = gr.Dropdown(label="Select an assessment to review", choices=[])
237
- # Use Markdown to display details of the selected assessment, including chat transcript.
 
238
  history_display = gr.Markdown(label="Selected Assessment Details")
239
- with gr.Row(): gr.Markdown("---")
 
 
 
240
  with gr.Row():
241
  privacy_link = gr.Button("Privacy Policy", variant="link")
242
  terms_link = gr.Button("Terms of Service", variant="link")
243
 
 
 
244
  def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
245
- """Handle a new user prompt and update chat and assessment history.
246
-
247
- This wrapper manages the entire lifecycle of a chat or data analysis:
248
- 1. Append the user's message to the ongoing conversation.
249
- 2. Dispatch the request to the AI handler and receive a response.
250
- 3. Construct a new session entry (with timestamp, prompt, files, response and full chat).
251
- 4. Update the persistent history and dropdown choices.
252
-
253
- Args:
254
- prompt (str): The current user prompt.
255
- files (list): List of file paths selected by the user.
256
- chat_history_list (list): Current chat conversation as a list of message dicts.
257
- history_state_list (list): List of past assessment/chat sessions.
258
-
259
- Returns:
260
- tuple: Updated chat history list, updated history list, and updated dropdown choices.
261
  """
262
  if not prompt:
263
  gr.Warning("Please enter a prompt.")
264
  yield chat_history_list, history_state_list, gr.update()
265
  return
266
 
267
- # Append the user's message to the existing chat history
268
  chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
269
 
270
- # Provide immediate feedback to the user that analysis is in progress
271
- def dummy_update(message):
272
- # This callback is intentionally left blank; progress messages are not streamed here
273
  pass
274
 
275
- thinking_message = _append_msg(chat_with_user_msg, "assistant", "```
 
 
 
 
276
  🧠 Generating and executing analysis... Please wait.
277
- ```" )
278
- # Yield intermediate state showing a thinking message
279
  yield thinking_message, history_state_list, gr.update()
280
 
281
- # Run the AI handler (analysis or chat) to get the assistant's response
282
  ai_response_text = handle(prompt, files, dummy_update)
283
 
284
- # Append the assistant's final response to the chat conversation
285
  final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
286
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
287
 
288
- # Capture uploaded filenames (if any)
289
  file_names: List[str] = []
290
  if files:
291
- file_names = [os.path.basename(f.name if hasattr(f, 'name') else f) for f in files]
 
 
292
 
293
- # Build a new entry for the assessment/chat history
294
  new_entry = {
295
  "id": timestamp,
296
  "prompt": prompt,
@@ -299,324 +325,45 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
299
  "chat_history": final_chat,
300
  }
301
 
302
- # Update the history state (initialize if necessary)
303
  updated_history: List[Dict[str, Any]] = (history_state_list or []) + [new_entry]
 
 
 
304
 
305
- # Build dropdown labels showing timestamp and a snippet of the prompt
306
- history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
307
-
308
- # Return the final chat, updated history, and updated dropdown choices
309
  yield final_chat, updated_history, gr.update(choices=history_labels)
310
 
311
  def view_history(selection: str, history_state_list: List[Dict[str, Any]]) -> str:
312
- """Render the details of a selected past assessment or chat session.
313
-
314
- The selection string contains the timestamp and prompt snippet separated by ' - '.
315
- This function locates the corresponding history entry and returns a formatted
316
- Markdown string with all relevant details, including the full chat transcript.
317
-
318
- Args:
319
- selection (str): The selected dropdown label of the form 'timestamp - prompt...'.
320
- history_state_list (list): The list of stored history entries.
321
-
322
- Returns:
323
- str: Markdown-formatted details of the selected session.
324
  """
325
  if not selection or not history_state_list:
326
  return ""
327
- # Extract the unique ID (timestamp) from the dropdown label
328
- # The dropdown label is of the form "timestamp - snippet..."
329
  try:
330
  selected_id = selection.split(" - ", 1)[0]
331
  except Exception:
332
  selected_id = selection
333
- # Find the matching session in the history
334
- selected_assessment = next((item for item in history_state_list if item.get("id") == selected_id), None)
335
-
336
- if selected_assessment:
337
- # Prepare file list display
338
- file_list = selected_assessment.get('files', [])
339
- file_list_md = "\n- ".join(file_list) if file_list else "*(no files uploaded)*"
340
-
341
- # Prepare chat history display: show each role/message pair on its own line
342
- chat_entries = selected_assessment.get("chat_history", [])
343
- chat_md_lines = []
344
- for msg in chat_entries:
345
- role = msg.get("role", "").capitalize()
346
- content = msg.get("content", "")
347
- chat_md_lines.append(f"**{role}:** {content}")
348
- chat_md = "\n\n".join(chat_md_lines)
349
-
350
- return f"""### Assessment from: {selected_assessment['id']}
351
- **Files Used:**\n- {file_list_md}
352
- ---
353
- **Original Prompt:**\n> {selected_assessment['prompt']}
354
- ---
355
- **AI Generated Response:**\n{selected_assessment['response']}
356
- ---
357
- **Chat Transcript:**\n{chat_md}
358
- """
359
- return "Could not find the selected assessment."
360
-
361
- # Register interaction handlers
362
- send_btn.click(
363
- run_analysis_wrapper,
364
- inputs=[prompt_input, files_input, chat_history_output, assessment_history],
365
- outputs=[chat_history_output, assessment_history, history_dropdown]
366
- )
367
- history_dropdown.change(
368
- view_history,
369
- inputs=[history_dropdown, assessment_history],
370
- outputs=[history_display]
371
- )
372
- clear_btn.click(
373
- lambda: (None, None, []),
374
- outputs=[prompt_input, files_input, chat_history_output]
375
- )
376
- ping_btn.click(ping_cohere, outputs=[ping_out])
377
- privacy_link.click(lambda: gr.update(visible=True), outputs=[privacy_modal])
378
- close_privacy_btn.click(lambda: gr.update(visible=False), outputs=[privacy_modal])
379
- terms_link.click(lambda: gr.update(visible=True), outputs=[terms_modal])
380
- close_terms_btn.click(lambda: gr.update(visible=False), outputs=[terms_modal])
381
-
382
- if __name__ == "__main__":
383
- if not os.getenv("COHERE_API_KEY"):
384
- print("🔴 COHERE_API_KEY environment variable not set. Application may not function correctly.")
385
- demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))s pd
386
- from datetime import datetime
387
- import regex as re2
388
- import re
389
-
390
- from langchain_cohere import ChatCohere
391
-
392
- from settings import (
393
- GENERAL_CONVERSATION_PROMPT,
394
- COHERE_MODEL_PRIMARY, COHERE_TIMEOUT_S, USE_OPEN_FALLBACKS
395
- )
396
- from audit_log import log_event
397
- from privacy import safety_filter, refusal_reply
398
- from llm_router import cohere_chat, _co_client, cohere_embed
399
-
400
- def load_markdown_text(filepath: str) -> str:
401
- try:
402
- with open(filepath, 'r', encoding='utf-8') as f:
403
- return f.read()
404
- except FileNotFoundError:
405
- return f"**Error:** Document `{os.path.basename(filepath)}` not found."
406
-
407
- def _sanitize_text(s: str) -> str:
408
- if not isinstance(s, str): return s
409
- return re2.sub(r'[\p{C}--[\n\t]]+', '', s)
410
-
411
- def _create_python_script(user_scenario: str, schema_context: str) -> str:
412
- EXPERT_ANALYTICAL_GUIDELINES = """
413
- --- EXPERT ANALYTICAL GUIDELINES ---
414
- When writing your script, you MUST follow these expert business rules:
415
- 1. **Linking Datasets Rule:** If you need to connect facilities to health zones when the 'zone' column is not in the facility list, you must first identify the high-priority zone from the beds data, then find the major city (by facility count) in the facility list, and *then* assess that city's capacity. Do not try to filter the facility list by a 'zone' column if it does not exist in the schema.
416
- 2. **Prioritization Rule:** To prioritize locations, you MUST combine the most recent population data with specific high-risk health indicators to create a multi-factor risk score.
417
- 3. **Capacity Calculation Rule:** For capacity over a 3-month window, assume **60 working days**.
418
- 4. **Cost Calculation Rule:** Sum 'Startup cost' and 'Ongoing cost' per person before multiplying.
419
- """
420
-
421
- prompt_for_coder = f"""
422
- You are an expert Python data scientist. Your job is to write a script to extract the data needed to answer the user's request.
423
- You have dataframes in a list `dfs`.
424
-
425
- {EXPERT_ANALYTICAL_GUIDELINES}
426
 
427
- --- DATA SCHEMA ---
428
- {schema_context}
429
- --- END DATA SCHEMA ---
430
-
431
- CRITICAL RULES:
432
- 1. **DO NOT READ FILES:** You MUST NOT include `pd.read_csv`. The data is ALREADY loaded in the `dfs` variable. You MUST use this variable. Failure to do so will cause a fatal error.
433
- 2. **JSON OUTPUT ONLY:** Your script's ONLY output must be a single JSON object printed to stdout containing the raw data findings.
434
- 3. **BE PRECISE:** Use the exact, case-sensitive column names from the schema and robustly clean strings (`re.sub()`) before converting to numbers.
435
- 4. **JSON SERIALIZATION:** Before adding data to your final dictionary for JSON conversion, you MUST convert any pandas-specific types (like `int64`) to standard Python types using `.item()` for single values or `.tolist()` for lists.
436
-
437
- --- USER'S SCENARIO ---
438
- {user_scenario}
439
-
440
- --- PYTHON SCRIPT ---
441
- Now, write the complete Python script that performs the analysis and prints a single, serializable JSON object.
442
- ```python
443
- """
444
- generated_text = cohere_chat(prompt_for_coder)
445
- match = re2.search(r"```python\n(.*?)```", generated_text, re2.DOTALL)
446
- if match: return match.group(1).strip()
447
- return "print(json.dumps({'error': 'Failed to generate a valid Python script.'}))"
448
-
449
- def _generate_long_report(prompt: str) -> str:
450
- try:
451
- client = _co_client()
452
- if not client: return "Error: Cohere client not initialized."
453
- response = client.chat(
454
- model=COHERE_MODEL_PRIMARY,
455
- message=prompt,
456
- max_tokens=4096
457
  )
458
- return response.text
459
- except Exception as e:
460
- log_event("cohere_chat_error", None, {"err": str(e)})
461
- return f"Error during final report generation: {e}"
462
-
463
- def _generate_final_report(user_scenario: str, raw_data_json: str) -> str:
464
- prompt_for_writer = f"""
465
- You are an expert management consultant and data analyst.
466
- A data science script has run to extract key findings. You have the user's original request and the raw JSON data.
467
-
468
- Your task is to synthesize these raw findings into a single, comprehensive, and professional report that directly answers all of the user's questions with detailed justifications.
469
-
470
- --- USER'S ORIGINAL SCENARIO & DELIVERABLES ---
471
- {user_scenario}
472
- --- END SCENARIO ---
473
-
474
- --- RAW DATA FINDINGS (JSON) ---
475
- {raw_data_json}
476
- --- END RAW DATA ---
477
-
478
- Now, write the final, polished report. The report MUST:
479
- 1. Follow the "Expected Output Format" requested by the user.
480
- 2. Use tables, bullet points, and DETAILED narrative justifications for each recommendation.
481
- 3. Synthesize the raw data into actionable insights. Do not just copy the raw numbers; interpret them.
482
- 4. Ensure you fully address ALL evaluation questions, especially the final recommendations.
483
- """
484
- return _generate_long_report(prompt_for_writer)
485
-
486
- def _append_msg(h: List[Dict[str, str]], r: str, c: str) -> List[Dict[str, str]]:
487
- return (h or []) + [{"role": r, "content": c}]
488
-
489
- def ping_cohere() -> str:
490
- try:
491
- cli = _co_client()
492
- if not cli: return "Cohere client not initialized."
493
- vecs = cohere_embed(["hello", "world"])
494
- return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY})" if vecs else "Cohere reachable."
495
- except Exception as e: return f"Cohere ping failed: {e}"
496
-
497
- def handle(user_msg: str, files: list, yield_update) -> str:
498
- try:
499
- safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
500
- if blocked_in: return refusal_reply(reason_in)
501
-
502
- file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
503
-
504
- if file_paths:
505
- dataframes, schema_parts = [], []
506
- for i, p in enumerate(file_paths):
507
- if p.endswith('.csv'):
508
- try: df = pd.read_csv(p)
509
- except UnicodeDecodeError: df = pd.read_csv(p, encoding='latin1')
510
- dataframes.append(df)
511
- schema_parts.append(f"DataFrame `dfs[{i}]` (`{os.path.basename(p)}`):\n{df.head().to_markdown()}\n")
512
-
513
- if not dataframes: return "Please upload at least one CSV file."
514
-
515
- schema_context = "\n".join(schema_parts)
516
-
517
- yield_update("```\n🧠 Generating aligned analysis script...\n```")
518
- analysis_script = _create_python_script(safe_in, schema_context)
519
-
520
- yield_update("```\n⚙️ Executing script to extract raw data...\n```")
521
- execution_namespace = {"dfs": dataframes, "pd": pd, "re": re, "json": json}
522
- output_buffer = io.StringIO()
523
-
524
- try:
525
- with redirect_stdout(output_buffer): exec(analysis_script, execution_namespace)
526
- raw_data_output = output_buffer.getvalue()
527
- except Exception as e:
528
- return f"An error occurred executing the script: {e}\n\nGenerated Script:\n```python\n{analysis_script}\n```"
529
-
530
- yield_update("```\n✍️ Synthesizing final comprehensive report...\n```")
531
- final_report = _generate_final_report(safe_in, raw_data_output)
532
- return _sanitize_text(final_report)
533
- else:
534
- prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {safe_in}\nAssistant:"
535
- return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
536
-
537
- except Exception as e:
538
- tb = traceback.format_exc()
539
- log_event("app_error", None, {"err": str(e), "tb": tb})
540
- return f"A critical error occurred: {e}"
541
-
542
- PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
543
- TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
544
-
545
- with gr.Blocks(theme="soft", css="style.css") as demo:
546
- assessment_history = gr.State([])
547
-
548
- with gr.Group(visible=False) as privacy_modal:
549
- with gr.Blocks():
550
- gr.Markdown(PRIVACY_POLICY_TEXT)
551
- close_privacy_btn = gr.Button("Close")
552
-
553
- with gr.Group(visible=False) as terms_modal:
554
- with gr.Blocks():
555
- gr.Markdown(TERMS_OF_SERVICE_TEXT)
556
- close_terms_btn = gr.Button("Close")
557
-
558
- gr.Markdown("# Universal AI Data Analyst")
559
- with gr.Row(variant="panel"):
560
- with gr.Column(scale=1):
561
- gr.Markdown("## New Assessment")
562
- gr.Markdown("<p style='font-size:0.9rem; color: #6C757D;'>Upload CSVs for data analysis, or just enter a prompt to chat.</p>")
563
- files_input = gr.Files(label="Upload Data Files (.csv)", file_count="multiple", type="filepath", file_types=[".csv"])
564
- prompt_input = gr.Textbox(label="Prompt", placeholder="Paste your scenario or question here.", lines=15)
565
- with gr.Row():
566
- send_btn = gr.Button("▶️ Send / Run Analysis", variant="primary", scale=2)
567
- clear_btn = gr.Button("🗑️ Clear")
568
- ping_btn = gr.Button("Ping Cohere")
569
- ping_out = gr.Markdown()
570
- with gr.Column(scale=2):
571
- with gr.Tabs():
572
- with gr.TabItem("Current Assessment", id=0):
573
- chat_history_output = gr.Chatbot(label="Analysis Output", type="messages", height=600)
574
- with gr.TabItem("Assessment History", id=1):
575
- gr.Markdown("## Review Past Assessments")
576
- history_dropdown = gr.Dropdown(label="Select an assessment to review", choices=[])
577
- history_display = gr.Markdown(label="Selected Assessment Details")
578
- with gr.Row(): gr.Markdown("---")
579
- with gr.Row():
580
- privacy_link = gr.Button("Privacy Policy", variant="link")
581
- terms_link = gr.Button("Terms of Service", variant="link")
582
-
583
- def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
584
- if not prompt:
585
- gr.Warning("Please enter a prompt.")
586
- yield chat_history_list, history_state_list, gr.update()
587
- return
588
-
589
- chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
590
-
591
- def dummy_update(message):
592
- pass
593
-
594
- thinking_message = _append_msg(chat_with_user_msg, "assistant", "```\n🧠 Generating and executing analysis... Please wait.\n```")
595
- yield thinking_message, history_state_list, gr.update()
596
-
597
- ai_response_text = handle(prompt, files, dummy_update)
598
-
599
- final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
600
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
601
-
602
- if files:
603
- file_names = [os.path.basename(f.name if hasattr(f, 'name') else f) for f in files]
604
- new_assessment = {"id": timestamp, "prompt": prompt, "files": file_names, "response": ai_response_text}
605
- updated_history = (history_state_list or []) + [new_assessment]
606
- history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
607
- yield final_chat, updated_history, gr.update(choices=history_labels)
608
- else:
609
- yield final_chat, history_state_list, gr.update()
610
-
611
- def view_history(selection, history_state_list):
612
- if not selection or not history_state_list:
613
- return ""
614
- selected_id = selection.split(" - ")
615
- selected_assessment = next((item for item in history_state_list if item["id"] == selected_id), None)
616
-
617
- if selected_assessment:
618
- file_list_md = "\n- ".join(selected_assessment.get('files', []))
619
- return f"""### Assessment from: {selected_assessment['id']}
620
  **Files Used:**
621
  - {file_list_md}
622
  ---
@@ -625,22 +372,23 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
625
  ---
626
  **AI Generated Response:**
627
  {selected_assessment['response']}
 
 
 
628
  """
629
- return "Could not find the selected assessment."
630
 
 
631
  send_btn.click(
632
  run_analysis_wrapper,
633
  inputs=[prompt_input, files_input, chat_history_output, assessment_history],
634
- outputs=[chat_history_output, assessment_history, history_dropdown]
635
  )
636
  history_dropdown.change(
637
- view_history,
638
- inputs=[history_dropdown, assessment_history],
639
- outputs=[history_display]
640
  )
641
  clear_btn.click(
642
- lambda: (None, None, []),
643
- outputs=[prompt_input, files_input, chat_history_output]
644
  )
645
  ping_btn.click(ping_cohere, outputs=[ping_out])
646
  privacy_link.click(lambda: gr.update(visible=True), outputs=[privacy_modal])
@@ -651,4 +399,4 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
651
  if __name__ == "__main__":
652
  if not os.getenv("COHERE_API_KEY"):
653
  print("🔴 COHERE_API_KEY environment variable not set. Application may not function correctly.")
654
- demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))
 
1
  # app.py
 
 
 
 
 
 
 
 
 
 
 
2
  #
3
+ # Gradio-based AI data analyst app with persistent chat & assessment history.
4
+ # Each session stores: timestamp, prompt, files (if any), final response, and full chat transcript.
 
 
 
 
 
 
5
 
6
  from __future__ import annotations
7
+
8
  import io
9
  import json
10
+ import os
11
  import traceback
12
  from contextlib import redirect_stdout
13
+ from datetime import datetime
14
+ from typing import Any, Dict, List
15
 
16
  import gradio as gr
17
  import pandas as pd
 
18
  import regex as re2
19
  import re
20
 
21
+ from langchain_cohere import ChatCohere # noqa: F401
22
 
23
  from settings import (
24
  GENERAL_CONVERSATION_PROMPT,
25
+ COHERE_MODEL_PRIMARY,
26
+ COHERE_TIMEOUT_S, # noqa: F401
27
+ USE_OPEN_FALLBACKS, # noqa: F401
28
  )
29
  from audit_log import log_event
30
  from privacy import safety_filter, refusal_reply
31
  from llm_router import cohere_chat, _co_client, cohere_embed
32
 
33
+
34
  def load_markdown_text(filepath: str) -> str:
35
  try:
36
+ with open(filepath, "r", encoding="utf-8") as f:
37
  return f.read()
38
  except FileNotFoundError:
39
  return f"**Error:** Document `{os.path.basename(filepath)}` not found."
40
 
41
+
42
  def _sanitize_text(s: str) -> str:
43
+ if not isinstance(s, str):
44
+ return s
45
+ # Remove control characters (except newline and tab)
46
+ return re2.sub(r"[\p{C}--[\n\t]]+", "", s)
47
+
48
 
49
  def _create_python_script(user_scenario: str, schema_context: str) -> str:
50
  EXPERT_ANALYTICAL_GUIDELINES = """
51
  --- EXPERT ANALYTICAL GUIDELINES ---
52
  When writing your script, you MUST follow these expert business rules:
53
+ 1. **Linking Datasets Rule:** If you need to connect facilities to health zones when the 'zone' column is not in the facility list,
54
+ you must first identify the high-priority zone from the beds data, then find the major city (by facility count) in the facility list,
55
+ and *then* assess that city's capacity. Do not try to filter the facility list by a 'zone' column if it does not exist in the schema.
56
+ 2. **Prioritization Rule:** To prioritize locations, you MUST combine the most recent population data with specific high-risk health indicators
57
+ to create a multi-factor risk score.
58
  3. **Capacity Calculation Rule:** For capacity over a 3-month window, assume **60 working days**.
59
  4. **Cost Calculation Rule:** Sum 'Startup cost' and 'Ongoing cost' per person before multiplying.
60
  """
 
84
  """
85
  generated_text = cohere_chat(prompt_for_coder)
86
  match = re2.search(r"```python\n(.*?)```", generated_text, re2.DOTALL)
87
+ if match:
88
+ return match.group(1).strip()
89
  return "print(json.dumps({'error': 'Failed to generate a valid Python script.'}))"
90
 
91
+
92
  def _generate_long_report(prompt: str) -> str:
93
  try:
94
  client = _co_client()
95
+ if not client:
96
+ return "Error: Cohere client not initialized."
97
  response = client.chat(
98
  model=COHERE_MODEL_PRIMARY,
99
  message=prompt,
100
+ max_tokens=4096,
101
  )
102
  return response.text
103
  except Exception as e:
104
  log_event("cohere_chat_error", None, {"err": str(e)})
105
  return f"Error during final report generation: {e}"
106
 
107
+
108
  def _generate_final_report(user_scenario: str, raw_data_json: str) -> str:
109
  prompt_for_writer = f"""\
110
  You are an expert management consultant and data analyst.
 
128
  """
129
  return _generate_long_report(prompt_for_writer)
130
 
131
+
132
  def _append_msg(h: List[Dict[str, str]], r: str, c: str) -> List[Dict[str, str]]:
133
  return (h or []) + [{"role": r, "content": c}]
134
 
135
+
136
  def ping_cohere() -> str:
137
  try:
138
  cli = _co_client()
139
+ if not cli:
140
+ return "Cohere client not initialized."
141
  vecs = cohere_embed(["hello", "world"])
142
  return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY})" if vecs else "Cohere reachable."
143
+ except Exception as e:
144
+ return f"Cohere ping failed: {e}"
145
+
146
 
147
  def handle(user_msg: str, files: list, yield_update) -> str:
148
  try:
149
  safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
150
+ if blocked_in:
151
+ return refusal_reply(reason_in)
152
 
153
  file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
154
 
155
  if file_paths:
156
  dataframes, schema_parts = [], []
157
  for i, p in enumerate(file_paths):
158
+ if p.endswith(".csv"):
159
+ try:
160
+ df = pd.read_csv(p)
161
+ except UnicodeDecodeError:
162
+ df = pd.read_csv(p, encoding="latin1")
163
  dataframes.append(df)
164
+ schema_parts.append(
165
+ f"DataFrame `dfs[{i}]` (`{os.path.basename(p)}`):\n{df.head().to_markdown()}\n"
166
+ )
167
 
168
+ if not dataframes:
169
+ return "Please upload at least one CSV file."
170
 
171
  schema_context = "\n".join(schema_parts)
172
 
173
+ yield_update("""```
174
  🧠 Generating aligned analysis script...
175
+ ```""")
176
  analysis_script = _create_python_script(safe_in, schema_context)
177
 
178
+ yield_update("""```
179
  ⚙️ Executing script to extract raw data...
180
+ ```""")
181
  execution_namespace = {"dfs": dataframes, "pd": pd, "re": re, "json": json}
182
  output_buffer = io.StringIO()
183
 
184
  try:
185
+ with redirect_stdout(output_buffer):
186
+ exec(analysis_script, execution_namespace)
187
  raw_data_output = output_buffer.getvalue()
188
  except Exception as e:
189
+ return (
190
+ f"An error occurred executing the script: {e}\n\nGenerated Script:\n"
191
+ f"```python\n{analysis_script}\n```"
192
+ )
193
 
194
+ yield_update("""```
195
  ✍️ Synthesizing final comprehensive report...
196
+ ```""")
197
  final_report = _generate_final_report(safe_in, raw_data_output)
198
  return _sanitize_text(final_report)
199
  else:
 
205
  log_event("app_error", None, {"err": str(e), "tb": tb})
206
  return f"A critical error occurred: {e}"
207
 
208
+
209
  PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
210
  TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
211
 
212
  with gr.Blocks(theme="soft", css="style.css") as demo:
213
+ # Persistent history of past assessments / chat sessions
214
+ # Each entry:
215
+ # - id: timestamp
216
+ # - prompt: original prompt
217
+ # - files: list of uploaded filenames
218
+ # - response: final response text
219
+ # - chat_history: full transcript (list of {role, content})
220
  assessment_history = gr.State([])
221
 
222
+ # Modals
223
  with gr.Group(visible=False) as privacy_modal:
224
  with gr.Blocks():
225
  gr.Markdown(PRIVACY_POLICY_TEXT)
 
230
  gr.Markdown(TERMS_OF_SERVICE_TEXT)
231
  close_terms_btn = gr.Button("Close")
232
 
233
+ # UI
234
  gr.Markdown("# Universal AI Data Analyst")
235
  with gr.Row(variant="panel"):
236
  with gr.Column(scale=1):
237
  gr.Markdown("## New Assessment")
238
+ gr.Markdown(
239
+ "<p style='font-size:0.9rem; color: #6C757D;'>Upload CSVs for data analysis, or just enter a prompt to chat.</p>"
240
+ )
241
+ files_input = gr.Files(
242
+ label="Upload Data Files (.csv)",
243
+ file_count="multiple",
244
+ type="filepath",
245
+ file_types=[".csv"],
246
+ )
247
+ prompt_input = gr.Textbox(
248
+ label="Prompt",
249
+ placeholder="Paste your scenario or question here.",
250
+ lines=15,
251
+ )
252
  with gr.Row():
253
  send_btn = gr.Button("▶️ Send / Run Analysis", variant="primary", scale=2)
254
  clear_btn = gr.Button("🗑️ Clear")
 
257
  with gr.Column(scale=2):
258
  with gr.Tabs():
259
  with gr.TabItem("Current Assessment", id=0):
260
+ chat_history_output = gr.Chatbot(
261
+ label="Analysis Output", type="messages", height=600
262
+ )
263
  with gr.TabItem("Assessment History", id=1):
264
  gr.Markdown("## Review Past Assessments")
265
+ history_dropdown = gr.Dropdown(
266
+ label="Select an assessment to review", choices=[]
267
+ )
268
  history_display = gr.Markdown(label="Selected Assessment Details")
269
+
270
+ with gr.Row():
271
+ gr.Markdown("---")
272
+
273
  with gr.Row():
274
  privacy_link = gr.Button("Privacy Policy", variant="link")
275
  terms_link = gr.Button("Terms of Service", variant="link")
276
 
277
+ # Logic
278
+
279
  def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
280
+ """
281
+ Handle a new user prompt and update chat & assessment history.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  """
283
  if not prompt:
284
  gr.Warning("Please enter a prompt.")
285
  yield chat_history_list, history_state_list, gr.update()
286
  return
287
 
288
+ # Append user's message
289
  chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
290
 
291
+ # Optional streaming update callback (unused here)
292
+ def dummy_update(message: str):
 
293
  pass
294
 
295
+ # Show thinking message
296
+ thinking_message = _append_msg(
297
+ chat_with_user_msg,
298
+ "assistant",
299
+ """```
300
  🧠 Generating and executing analysis... Please wait.
301
+ ```""",
302
+ )
303
  yield thinking_message, history_state_list, gr.update()
304
 
305
+ # Run analysis/chat
306
  ai_response_text = handle(prompt, files, dummy_update)
307
 
308
+ # Append final assistant response
309
  final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
310
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
311
 
312
+ # Capture filenames (if any)
313
  file_names: List[str] = []
314
  if files:
315
+ file_names = [
316
+ os.path.basename(f.name if hasattr(f, "name") else f) for f in files
317
+ ]
318
 
319
+ # Create a new history record (always, even for chat-only)
320
  new_entry = {
321
  "id": timestamp,
322
  "prompt": prompt,
 
325
  "chat_history": final_chat,
326
  }
327
 
 
328
  updated_history: List[Dict[str, Any]] = (history_state_list or []) + [new_entry]
329
+ history_labels = [
330
+ f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history
331
+ ]
332
 
 
 
 
 
333
  yield final_chat, updated_history, gr.update(choices=history_labels)
334
 
335
  def view_history(selection: str, history_state_list: List[Dict[str, Any]]) -> str:
336
+ """
337
+ Render details for a selected past assessment/chat session.
 
 
 
 
 
 
 
 
 
 
338
  """
339
  if not selection or not history_state_list:
340
  return ""
341
+ # Selection label format: "timestamp - prompt..."
 
342
  try:
343
  selected_id = selection.split(" - ", 1)[0]
344
  except Exception:
345
  selected_id = selection
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
 
347
+ selected_assessment = next(
348
+ (item for item in history_state_list if item.get("id") == selected_id),
349
+ None,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
  )
351
+ if not selected_assessment:
352
+ return "Could not find the selected assessment."
353
+
354
+ file_list = selected_assessment.get("files", [])
355
+ file_list_md = "\n- ".join(file_list) if file_list else "*(no files uploaded)*"
356
+
357
+ # Chat transcript (role + content)
358
+ chat_entries = selected_assessment.get("chat_history", [])
359
+ chat_md_lines = []
360
+ for msg in chat_entries:
361
+ role = msg.get("role", "").capitalize()
362
+ content = msg.get("content", "")
363
+ chat_md_lines.append(f"**{role}:** {content}")
364
+ chat_md = "\n\n".join(chat_md_lines)
365
+
366
+ return f"""### Assessment from: {selected_assessment['id']}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
  **Files Used:**
368
  - {file_list_md}
369
  ---
 
372
  ---
373
  **AI Generated Response:**
374
  {selected_assessment['response']}
375
+ ---
376
+ **Chat Transcript:**
377
+ {chat_md}
378
  """
 
379
 
380
+ # Wire up UI events
381
  send_btn.click(
382
  run_analysis_wrapper,
383
  inputs=[prompt_input, files_input, chat_history_output, assessment_history],
384
+ outputs=[chat_history_output, assessment_history, history_dropdown],
385
  )
386
  history_dropdown.change(
387
+ view_history, inputs=[history_dropdown, assessment_history], outputs=[history_display]
 
 
388
  )
389
  clear_btn.click(
390
+ lambda: (None, None, []), # clear prompt, files, and chat
391
+ outputs=[prompt_input, files_input, chat_history_output],
392
  )
393
  ping_btn.click(ping_cohere, outputs=[ping_out])
394
  privacy_link.click(lambda: gr.update(visible=True), outputs=[privacy_modal])
 
399
  if __name__ == "__main__":
400
  if not os.getenv("COHERE_API_KEY"):
401
  print("🔴 COHERE_API_KEY environment variable not set. Application may not function correctly.")
402
+ demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))