VEDAGI1 commited on
Commit
5dbf496
·
verified ·
1 Parent(s): dc12e99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +436 -480
app.py CHANGED
@@ -1,13 +1,13 @@
1
- # app.py
2
- #
3
- # Universal AI Data Analyst with:
4
- # - IMPROVED: "Plan-and-Execute" logic for high-accuracy analysis.
5
- # - IMPROVED: Professional, structured report generation.
6
- # - IMPROVED: Enriched schema context for the AI analyst.
7
- # - Unchanged UI, event wiring, and core infrastructure.
8
-
9
- from __future__ import annotations
10
-
11
  import io
12
  import json
13
  import os
@@ -15,561 +15,517 @@ import traceback
15
  from contextlib import redirect_stdout
16
  from datetime import datetime
17
  from typing import Any, Dict, List
18
-
19
  import gradio as gr
20
  import pandas as pd
21
  import regex as re2
22
  import re
23
-
24
  from langchain_cohere import ChatCohere # noqa: F401
25
  from settings import (
26
- GENERAL_CONVERSATION_PROMPT,
27
- COHERE_MODEL_PRIMARY,
28
- COHERE_TIMEOUT_S,
29
- USE_OPEN_FALLBACKS,
30
  )
 
 
 
 
 
 
 
 
 
31
  from audit_log import log_event
32
  from privacy import safety_filter, refusal_reply
33
  from llm_router import cohere_chat, _co_client, cohere_embed
34
-
35
- # Try to import optional HIPAA flags; fall back to safe defaults if not defined.
36
- try:
37
- from settings import (
38
- PHI_MODE,
39
- PERSIST_HISTORY,
40
- HISTORY_TTL_DAYS,
41
- REDACT_BEFORE_LLM,
42
- ALLOW_EXTERNAL_PHI,
43
- )
44
- except Exception:
45
- PHI_MODE = False
46
- PERSIST_HISTORY = True
47
- HISTORY_TTL_DAYS = 365
48
- REDACT_BEFORE_LLM = False
49
- ALLOW_EXTERNAL_PHI = True
50
-
51
-
52
- # ---------------------- Helpers (analysis logic selectively improved) ----------------------
53
  def load_markdown_text(filepath: str) -> str:
54
- try:
55
- with open(filepath, "r", encoding="utf-8") as f:
56
- return f.read()
57
- except FileNotFoundError:
58
- return f"**Error:** Document `{os.path.basename(filepath)}` not found."
59
-
60
-
61
  def _sanitize_text(s: str) -> str:
62
- if not isinstance(s, str):
63
- return s
64
- # Remove control characters (except newline and tab)
65
- return re2.sub(r"[\p{C}--[\n\t]]+", "", s)
66
-
67
-
68
- # Conservative PHI redaction patterns (only applied if PHI_MODE & REDACT_BEFORE_LLM are enabled)
69
  PHI_PATTERNS = [
70
- (re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "[REDACTED_SSN]"),
71
- (re.compile(r"\b\d{9}\b"), "[REDACTED_MRN]"),
72
- (re.compile(r"\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b"), "[REDACTED_PHONE]"),
73
- (re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}"), "[REDACTED_EMAIL]"),
74
- (re.compile(r"\b(19|20)\d{2}-\d{2}-\d{2}\b"), "[REDACTED_DOB]"),
75
- (re.compile(r"\b\d{2}/\d{2}/(19|20)\d{2}\b"), "[REDACTED_DOB]"),
76
- (re.compile(r"\b\d{5}(-\d{4})?\b"), "[REDACTED_ZIP]"),
77
  ]
78
-
79
-
80
  def redact_phi(text: str) -> str:
81
- if not isinstance(text, str):
82
- return text
83
- t = text
84
- for pat, repl in PHI_PATTERNS:
85
- t = pat.sub(repl, t)
86
- return t
87
-
88
-
89
  def safe_log(event_name: str, meta: dict | None = None):
90
- # Avoid logging raw PHI or payloads
91
- try:
92
- meta = (meta or {}).copy()
93
- meta.pop("raw", None)
94
- log_event(event_name, None, meta)
95
- except Exception:
96
- # Never raise from logging
97
- pass
98
-
99
-
100
  def _create_python_script(user_scenario: str, schema_context: str) -> str:
101
- """
102
- IMPROVED: Generates a Python script using a "Plan-and-Execute" approach.
103
- The AI first creates a step-by-step plan, then writes code to execute it.
104
- This ensures the analysis is logical, correctly aggregated, and aligned with the user's goal.
105
- """
106
- prompt_for_coder = f"""\
107
- You are an expert-level Python data scientist acting as a consultant. Your task is to analyze data to answer a user's business request.
108
-
109
- --- USER'S SCENARIO ---
110
- {user_scenario}
111
- --- END SCENARIO ---
112
-
 
 
 
113
  --- DATA SCHEMA ---
114
  {schema_context}
115
  --- END DATA SCHEMA ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
- You must follow a rigorous two-step process:
118
-
119
- **Step 1: Create a Detailed Analysis Plan.**
120
- First, think step-by-step. Deconstruct the user's request into a clear, logical plan.
121
- The plan must identify the key metrics, necessary data manipulations (cleaning, grouping, aggregation), and the final outputs required.
122
-
123
- - **CRITICAL for aggregation:** If the user asks for analysis by category (e.g., "specialty," "department"), you MUST identify the correct high-level categorical column for grouping. DO NOT aggregate by granular, free-text procedure descriptions unless explicitly asked. Your goal is to find meaningful, strategic trends.
124
-
125
- **Step 2: Write the Python Script.**
126
- Based on your plan, write a complete Python script.
127
-
128
- CRITICAL SCRIPTING RULES:
129
- 1. **NO FILE READING:** The data is already loaded into a list of pandas DataFrames called `dfs`. You MUST use this variable. Do not include `pd.read_csv`.
130
- 2. **STRICTLY JSON OUTPUT:** The script's ONLY output to stdout MUST be a single, well-structured JSON object containing all the raw data findings from your plan.
131
- 3. **ROBUST DATA CLEANING:** Before performing calculations, clean data robustly. Convert numeric columns to numbers using `pd.to_numeric(..., errors='coerce')`. Handle missing values (`NaN`) appropriately (e.g., by excluding them from averages).
132
- 4. **JSON SERIALIZATION:** Ensure all data in the final dictionary is JSON-serializable. Use `.item()` for single numpy values and `.tolist()` for arrays/series.
133
 
134
- Now, provide your response in the following format:
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
- **ANALYSIS PLAN:**
137
 
138
- Objective: [Briefly state the main goal]
139
- Data Cleaning: [Describe steps to clean and prepare the data]
140
- Analysis Step A: [e.g., "Calculate average wait times per hospital by grouping dfs[0] by 'Facility' and averaging 'Surgery_Median'."]
141
- Analysis Step B: [e.g., "Identify top 5 specialties by grouping dfs[0] by the 'Specialty' column and calculating the mean of 'Surgery_Median'."]
142
- Analysis Step C: [e.g., "Determine zone-level performance by grouping by 'Zone' and comparing to the overall provincial average."]
143
- JSON Output Structure: [Describe the keys and values of the final JSON object]
144
 
145
- text**PYTHON SCRIPT:**
146
- ```python
147
- # Your complete Python script starts here
148
- import pandas as pd
149
- import json
150
- import re
151
 
152
- # Main analysis logic...
153
- # ...
154
- # Final print statement
155
- print(json.dumps(final_data_structure, indent=4))
156
- """
157
- generated_text = cohere_chat(prompt_for_coder)
158
- This regex is more robust for extracting the final code block
159
- match = re2.search(r"PYTHON SCRIPT:\s*python\n(.*?)", generated_text, re2.DOTALL)
160
- if match:
161
- return match.group(1).strip()
162
- Fallback if the structured format fails
163
- fallback_match = re2.search(r"python\n(.*?)", generated_text, re2.DOTALL)
164
- if fallback_match:
165
- return fallback_match.group(1).strip()
166
- return "print(json.dumps({'error': 'Failed to generate a valid Python script from the plan.'}))"
167
- def _generate_long_report(prompt: str) -> str:
168
- try:
169
- client = _co_client()
170
- if not client:
171
- return "Error: Cohere client not initialized."
172
- response = client.chat(
173
- model=COHERE_MODEL_PRIMARY,
174
- message=prompt,
175
- max_tokens=4096,
176
- )
177
- return response.text
178
- except Exception as e:
179
- safe_log("cohere_chat_error", {"err": str(e)})
180
- return f"Error during final report generation: {e}"
181
- def _generate_final_report(user_scenario: str, raw_data_json: str) -> str:
182
- """
183
- IMPROVED: Generates a professional, structured report from the JSON data.
184
- The prompt guides the AI to synthesize insights in a standard consulting format,
185
- ensuring a high level of detail and actionable recommendations.
186
- """
187
- prompt_for_writer = f"""\
188
- You are an expert management consultant specializing in data-driven strategy. A Python script has been executed to extract key data points based on a user's request. Your task is to synthesize this raw data into a polished, comprehensive, and actionable report.
189
- --- USER'S ORIGINAL SCENARIO ---
190
  {user_scenario}
191
  --- END SCENARIO ---
 
192
  --- RAW DATA FINDINGS (JSON) ---
193
  {raw_data_json}
194
  --- END RAW DATA ---
195
- CRITICAL INSTRUCTIONS:
196
- You must write a final report that follows this exact structure:
197
- Executive Summary
198
-
199
- Start with a brief paragraph summarizing the core problem, key findings, and top recommendations. This should be a high-level overview for a leadership audience.
200
 
201
- 1. [First Key Finding, e.g., Hospitals with the Longest Wait Times]
202
-
203
- Present the relevant data in a Markdown table.
204
- Write a short narrative interpreting the data. What does it mean? Are there any outliers? Why might these facilities have long waits (e.g., specialized care, rural location, capacity issues)?
 
 
 
205
 
206
- 2. [Second Key Finding, e.g., Specialties with the Longest Wait Times]
207
 
208
- Present the relevant data in a Markdown table.
209
- Interpret the findings. Why are these specialties facing delays (e.g., specialist shortages, equipment needs)?
210
 
211
- 3. [Third Key Finding, e.g., Zone-Level Performance]
212
 
213
- Present the data in a table, including a comparison to a relevant average or baseline.
214
- Analyze the geographic or systemic issues this data reveals.
 
 
 
 
 
 
 
215
 
216
- 4. [Fourth Key Finding, if applicable, e.g., Geographic Distribution]
217
 
218
- Synthesize location data with the wait-time findings.
219
- Discuss the implications for patient equity, travel burdens, and access to care.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
- 5. Recommendations for Resource Allocation
222
 
223
- Provide specific, actionable, and justified recommendations.
224
- Structure them by category (e.g., by facility, by specialty, by zone).
225
- For each recommendation, provide a clear rationale directly linked to the data findings above (e.g., "Allocate additional resources to Glace Bay Hospital because it is a rural facility in a high-wait zone, suggesting a capacity bottleneck.").
226
 
227
- Data Limitations
228
 
229
- Briefly mention any potential limitations of the analysis (e.g., missing data, use of proxies, case severity not included). This adds credibility to the report.
230
 
231
- Do not just repeat the JSON data. Your value is in interpreting the numbers, connecting the dots between different findings, and providing clear, data-backed strategic advice.
232
- """
233
- return _generate_long_report(prompt_for_writer)
234
- def _append_msg(h: List[Dict[str, str]], r: str, c: str) -> List[Dict[str, str]]:
235
- return (h or []) + [{"role": r, "content": c}]
236
- def ping_cohere() -> str:
237
- try:
238
- cli = _co_client()
239
- if not cli:
240
- return "Cohere client not initialized."
241
- vecs = cohere_embed(["hello", "world"])
242
- return f"Cohere OK (model={COHERE_MODEL_PRIMARY})" if vecs else "Cohere reachable."
243
- except Exception as e:
244
- return f"Cohere ping failed: {e}"
245
- def handle(user_msg: str, files: list, yield_update) -> str:
246
- try:
247
- Safety filter on incoming message
248
- safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
249
- if blocked_in:
250
- return refusal_reply(reason_in)
251
- Optional PHI redaction for prompts sent to an external LLM
252
- redacted_in = safe_in
253
- if PHI_MODE and REDACT_BEFORE_LLM:
254
- redacted_in = redact_phi(safe_in)
255
- file_paths: List[str] = [
256
- getattr(f, "name", None) or f for f in (files or [])
257
- ]
258
- if file_paths:
259
- CSV analysis path
260
- dataframes, schema_parts = [], []
261
- for i, p in enumerate(file_paths):
262
- if p.endswith(".csv"):
263
- try:
264
- df = pd.read_csv(p)
265
- except UnicodeDecodeError:
266
- df = pd.read_csv(p, encoding="latin1")
267
- dataframes.append(df)
268
- --- IMPROVEMENT: ENRICHED SCHEMA CONTEXT ---
269
- schema_buffer = io.StringIO()
270
- df.info(buf=schema_buffer)
271
- schema_info = schema_buffer.getvalue()
272
- schema_parts.append(
273
- f"""DataFrame dfs[{i}] ({os.path.basename(p)}):\n\nHead\n{df.head().to_markdown()}\n\nSchema and Data Types\n\n{schema_info}\n\n\nSummary Statistics\n{df.describe(include='all').to_markdown()}\n"""
274
- )
275
- if not dataframes:
276
- return "Please upload at least one CSV file."
277
- schema_context = "\n".join(schema_parts)
278
- If external PHI is not allowed, use redacted prompt; otherwise use original
279
- prompt_for_code = (
280
- redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
281
- )
282
- yield_update("Generating aligned analysis script...")
283
- analysis_script = _create_python_script(prompt_for_code, schema_context)
284
- yield_update("Executing script to extract raw data...")
285
- execution_namespace = {"dfs": dataframes, "pd": pd, "re": re, "json": json}
286
- output_buffer = io.StringIO()
287
- try:
288
- with redirect_stdout(output_buffer):
289
- exec(analysis_script, execution_namespace)
290
- raw_data_output = output_buffer.getvalue()
291
- except Exception as e:
292
- return (
293
- f"An error occurred executing the script: {e}\n\nGenerated Script:\n"
294
- f"python\n{analysis_script}\n"
295
- )
296
- yield_update("Synthesizing final comprehensive report...")
297
- writer_input = (
298
- redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
299
- )
300
- final_report = _generate_final_report(writer_input, raw_data_output)
301
- return _sanitize_text(final_report)
302
- else:
303
- Pure chat path
304
- chat_input = (
305
- redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
306
- )
307
- prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {chat_input}\nAssistant:"
308
- return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
309
- except Exception as e:
310
- tb = traceback.format_exc()
311
- safe_log("app_error", {"err": str(e)})
312
- return "A critical error occurred. Please contact your administrator." if PHI_MODE else f"A critical error occurred: {e}"
313
- PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
314
- TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
315
- ---------------------- Sleek UI assets (CSS/JS only) ----------------------
316
  SLEEK_CSS = """
317
  /* Full-bleed, modern look */
318
  :root, body, #root, .gradio-container { height: 100%; }
319
  .gradio-container { padding: 0 !important; }
320
  .block { padding: 0 !important; }
 
321
  /* Header */
322
  .header {
323
- padding: 20px 28px;
324
- background: linear-gradient(135deg, #0e1726, #1d2a44 60%, #243a5e);
325
- color: #fff;
326
- display: flex; align-items: center; justify-content: space-between;
327
- gap: 16px;
328
  }
329
  .header h1 { margin: 0; font-size: 22px; letter-spacing: 0.3px; font-weight: 600; }
330
  .header .badge { font-size: 12px; opacity: 0.9; background:#ffffff22; padding:6px 10px; border-radius: 999px; }
 
331
  /* Main layout */
332
  .main {
333
- display: grid;
334
- grid-template-columns: 420px 1fr;
335
- gap: 16px;
336
- padding: 16px;
337
- height: calc(100vh - 72px);
338
- box-sizing: border-box;
339
  }
340
  .left, .right {
341
- background: #0b1020;
342
- color: #e9edf3;
343
- border-radius: 16px;
344
- border: 1px solid #1c2642;
345
  }
346
  .left { padding: 16px; display: flex; flex-direction: column; gap: 12px; }
347
  .right { padding: 0; display: flex; flex-direction: column; }
 
348
  /* Panels */
349
  .panel-title { font-size: 14px; font-weight: 600; color: #aeb8cc; margin-bottom: 6px; }
350
  .helper { font-size: 12px; color: #97a3bb; margin-bottom: 8px; }
 
351
  /* Sticky actions */
352
  .actions {
353
- display: flex; gap: 8px; align-items: center; justify-content: stretch;
354
  }
355
  .actions .gr-button { flex: 1; }
 
356
  /* Tabs full height */
357
  .right .tabs { height: 100%; display: flex; flex-direction: column; }
358
  .right .tabitem { flex: 1; display: flex; flex-direction: column; }
359
  #chatbot_container { flex: 1; }
360
  #chatbot_container .gr-chatbot { height: 100%; }
 
361
  /* Tiny separators */
362
  .hr { height: 1px; background: #16203b; margin: 10px 0; }
 
363
  /* Voice hint */
364
  .voice-hint { font-size: 12px; color:#9fb0cc; margin-top: 4px; }
365
  """
 
366
  VOICE_STT_HTML = """
367
  <script>
368
  let __rs_rec = null;
369
  function rs_toggle_stt(elemId){
370
- const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
371
- if (!SpeechRecognition){
372
- alert("This browser does not support Speech Recognition. Try Chrome or Edge.");
373
- return;
374
- }
375
- if (__rs_rec){ __rs_rec.stop(); __rs_rec = null; return; }
376
- __rs_rec = new SpeechRecognition();
377
- __rs_rec.lang = "en-US";
378
- __rs_rec.interimResults = true;
379
- __rs_rec.continuous = true;
380
-
381
- const box = document.querySelector(`#${elemId} textarea`);
382
- if (!box){ alert("Prompt box not found."); return; }
383
- let base = box.value || "";
384
-
385
- __rs_rec.onresult = (ev) => {
386
- let t = "";
387
- for (let i = ev.resultIndex; i < ev.results.length; i++){
388
- t += ev.results[i][0].transcript;
389
- }
390
- box.value = (base + " " + t).trim();
391
- box.dispatchEvent(new Event("input", { bubbles: true }));
392
- };
393
- __rs_rec.onend = () => { __rs_rec = null; };
394
- __rs_rec.start();
395
  }
396
  </script>
397
  """
398
- ---------------------- Sleek UI (with fixed State wiring) ----------------------
 
 
 
399
  with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
400
- Persistent in-memory history component (fixes list/_id error)
401
- assessment_history = gr.State([])
402
- Header
403
- with gr.Row(elem_classes=["header"]):
404
- gr.Markdown("Clarity Ops Augmented Decision Support")
405
- pill = (
406
- "PHI Mode ON · history off"
407
- if (PHI_MODE and not PERSIST_HISTORY)
408
- else "PHI Mode ON"
409
- if PHI_MODE
410
- else "PHI Mode OFF"
411
- )
412
- gr.Markdown(f"{pill}")
413
- Main layout
414
- with gr.Row(elem_classes=["main"]):
415
- Left panel
416
- with gr.Column(elem_classes=["left"]):
417
- gr.Markdown("New Assessment")
418
- gr.Markdown(
419
- "Upload CSVs for analysis, or enter a prompt. Voice works in modern browsers."
420
- )
421
- files_input = gr.Files(
422
- label="Upload Data Files (.csv)",
423
- file_count="multiple",
424
- type="filepath",
425
- file_types=[".csv"],
426
- )
427
- prompt_input = gr.Textbox(
428
- label="Prompt",
429
- placeholder="Paste your scenario or question here...",
430
- lines=12,
431
- elem_id="prompt_box",
432
- autofocus=True,
433
- )
434
- with gr.Row(elem_classes=["actions"]):
435
- send_btn = gr.Button("Run Analysis", variant="primary")
436
- clear_btn = gr.Button("Clear")
437
- voice_btn = gr.Button("Voice")
438
- gr.Markdown(
439
- "Click Voice to start/stop dictation into the prompt box."
440
- )
441
- ping_btn = gr.Button("Ping Cohere")
442
- ping_out = gr.Markdown()
443
- gr.Markdown("")
444
- if PHI_MODE:
445
- gr.Markdown(
446
- "Warning: PHI Mode: History persistence is disabled by default. Avoid unnecessary identifiers."
447
- )
448
- with gr.Accordion("Privacy & Terms", open=False):
449
- gr.Markdown(PRIVACY_POLICY_TEXT)
450
- gr.Markdown("")
451
- gr.Markdown(TERMS_OF_SERVICE_TEXT)
452
- Right panel
453
- with gr.Column(elem_classes=["right"]):
454
- with gr.Tabs(elem_classes=["tabs"]):
455
- with gr.TabItem("Current Assessment", id=0, elem_classes=["tabitem"]):
456
- with gr.Column(elem_id="chatbot_container"):
457
- chat_history_output = gr.Chatbot(
458
- label="Analysis Output", type="messages"
459
- )
460
- with gr.TabItem("Assessment History", id=1, elem_classes=["tabitem"]):
461
- gr.Markdown("### Review Past Assessments")
462
- history_dropdown = gr.Dropdown(
463
- label="Select an assessment to review", choices=[]
464
- )
465
- history_display = gr.Markdown(label="Selected Assessment Details")
466
- Inject voice-to-text helper
467
- gr.HTML(VOICE_STT_HTML)
468
- --------- Event logic (unchanged analysis flow) ----------
469
- def run_analysis_wrapper(
470
- prompt, files, chat_history_list, history_state_list
471
- ):
472
- if not prompt:
473
- gr.Warning("Please enter a prompt.")
474
- yield chat_history_list, history_state_list, gr.update()
475
- return
476
- Append user's message
477
- chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
478
- Thinking bubble
479
- thinking_message = _append_msg(
480
- chat_with_user_msg,
481
- "assistant",
482
- "Generating and executing analysis... Please wait.",
483
- )
484
- yield thinking_message, history_state_list, gr.update()
485
- Run analysis/chat
486
- def dummy_update(message: str):
487
- pass
488
- ai_response_text = handle(prompt, files, dummy_update)
489
- Append final assistant response
490
- final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
491
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
492
- Capture filenames (if any)
493
- file_names: List[str] = []
494
- if files:
495
- file_names = [
496
- os.path.basename(f.name if hasattr(f, "name") else f) for f in files
497
- ]
498
- Build history record
499
- new_entry = {
500
- "id": timestamp,
501
- "prompt": prompt,
502
- "files": file_names,
503
- "response": ai_response_text,
504
- "chat_history": final_chat,
505
- }
506
- Respect PHI/history flags
507
- if PERSIST_HISTORY and (not PHI_MODE or (PHI_MODE and HISTORY_TTL_DAYS > 0)):
508
- updated_history: List[Dict[str, Any]] = (history_state_list or []) + [
509
- new_entry
510
- ]
511
- else:
512
- updated_history = history_state_list or []
513
- history_labels = [
514
- f"{item['id']} - {item['prompt'][:40]}..."
515
- for item in updated_history
516
- ]
517
- yield final_chat, updated_history, gr.update(choices=history_labels)
518
- def view_history(selection: str, history_state_list: List[Dict[str, Any]]) -> str:
519
- if not selection or not history_state_list:
520
- return ""
521
- try:
522
- selected_id = selection.split(" - ", 1)[0]
523
- except Exception:
524
- selected_id = selection
525
- selected_assessment = next(
526
- (item for item in history_state_list if item.get("id") == selected_id), None
527
- )
528
- if not selected_assessment:
529
- return "Could not find the selected assessment."
530
- file_list = selected_assessment.get("files", [])
531
- file_list_md = "\n- ".join(file_list) if file_list else "(no files uploaded)"
532
- chat_entries = selected_assessment.get("chat_history", [])
533
- chat_md_lines = []
534
- for msg in chat_entries:
535
- role = msg.get("role", "").capitalize()
536
- content = msg.get("content", "")
537
- chat_md_lines.append(f"{role}: {content}")
538
- chat_md = "\n\n".join(chat_md_lines)
539
- return f"""### Assessment from: {selected_assessment['id']}
540
- Files Used:
541
-
542
- {file_list_md}
543
-
544
-
545
- Original Prompt:
546
- {selected_assessment['prompt']}
547
-
548
- AI Generated Response:
 
 
 
 
 
 
549
  {selected_assessment['response']}
550
- Chat Transcript:
 
551
  {chat_md}
552
  """
553
- Wire events (using proper gr.State component for history)
554
- send_btn.click(
555
- run_analysis_wrapper,
556
- inputs=[prompt_input, files_input, chat_history_output, assessment_history],
557
- outputs=[chat_history_output, assessment_history, history_dropdown],
558
- )
559
- history_dropdown.change(
560
- view_history,
561
- inputs=[history_dropdown, assessment_history],
562
- outputs=[history_display],
563
- )
564
- clear_btn.click(
565
- lambda: (None, None, []),
566
- outputs=[prompt_input, files_input, chat_history_output],
567
- )
568
- ping_btn.click(ping_cohere, outputs=[ping_out])
569
- voice_btn.click(None, [], [], js="rs_toggle_stt('prompt_box')")
570
- if name == "main":
571
- if not os.getenv("COHERE_API_KEY"):
572
- print(
573
- "COHERE_API_KEY environment variable not set. Application may not function correctly."
574
- )
575
- demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))
 
 
1
+ this works nov 9th 2025
2
+
3
+ app.py
4
+ Universal AI Data Analyst with:
5
+ - Unchanged analysis & assessment logic
6
+ - Fixed Gradio event wiring (uses gr.State for history)
7
+ - Triple-quoted progress strings (no unterminated literals)
8
+ - Sleek full-width UI and Voice-to-Text (browser Web Speech API)
9
+ - Optional HIPAA flags (fallback defaults if not present in settings.py)
10
+ from future import annotations
11
  import io
12
  import json
13
  import os
 
15
  from contextlib import redirect_stdout
16
  from datetime import datetime
17
  from typing import Any, Dict, List
 
18
  import gradio as gr
19
  import pandas as pd
20
  import regex as re2
21
  import re
 
22
  from langchain_cohere import ChatCohere # noqa: F401
23
  from settings import (
24
+ GENERAL_CONVERSATION_PROMPT,
25
+ COHERE_MODEL_PRIMARY,
26
+ COHERE_TIMEOUT_S, # noqa: F401
27
+ USE_OPEN_FALLBACKS # noqa: F401
28
  )
29
+ Try to import optional HIPAA flags; fall back to safe defaults if not defined.
30
+ try:
31
+ from settings import PHI_MODE, PERSIST_HISTORY, HISTORY_TTL_DAYS, REDACT_BEFORE_LLM, ALLOW_EXTERNAL_PHI
32
+ except Exception:
33
+ PHI_MODE = False
34
+ PERSIST_HISTORY = True
35
+ HISTORY_TTL_DAYS = 365
36
+ REDACT_BEFORE_LLM = False
37
+ ALLOW_EXTERNAL_PHI = True
38
  from audit_log import log_event
39
  from privacy import safety_filter, refusal_reply
40
  from llm_router import cohere_chat, _co_client, cohere_embed
41
+ ---------------------- Helpers (analysis logic unchanged) ----------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def load_markdown_text(filepath: str) -> str:
43
+ try:
44
+ with open(filepath, "r", encoding="utf-8") as f:
45
+ return f.read()
46
+ except FileNotFoundError:
47
+ return f"Error: Document {os.path.basename(filepath)} not found."
 
 
48
  def _sanitize_text(s: str) -> str:
49
+ if not isinstance(s, str):
50
+ return s
51
+ # Remove control characters (except newline and tab)
52
+ return re2.sub(r"[\p{C}--[\n\t]]+", "", s)
53
+ Conservative PHI redaction patterns (only applied if PHI_MODE & REDACT_BEFORE_LLM are enabled)
 
 
54
  PHI_PATTERNS = [
55
+ (re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "[REDACTED_SSN]"),
56
+ (re.compile(r"\b\d{9}\b"), "[REDACTED_MRN]"),
57
+ (re.compile(r"\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b"), "[REDACTED_PHONE]"),
58
+ (re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+.[A-Za-z]{2,}"), "[REDACTED_EMAIL]"),
59
+ (re.compile(r"\b(19|20)\d{2}-\d{2}-\d{2}\b"), "[REDACTED_DOB]"),
60
+ (re.compile(r"\b\d{2}/\d{2}/(19|20)\d{2}\b"), "[REDACTED_DOB]"),
61
+ (re.compile(r"\b\d{5}(-\d{4})?\b"), "[REDACTED_ZIP]"),
62
  ]
 
 
63
  def redact_phi(text: str) -> str:
64
+ if not isinstance(text, str):
65
+ return text
66
+ t = text
67
+ for pat, repl in PHI_PATTERNS:
68
+ t = pat.sub(repl, t)
69
+ return t
 
 
70
  def safe_log(event_name: str, meta: dict | None = None):
71
+ # Avoid logging raw PHI or payloads
72
+ try:
73
+ meta = (meta or {}).copy()
74
+ meta.pop("raw", None)
75
+ log_event(event_name, None, meta)
76
+ except Exception:
77
+ # Never raise from logging
78
+ pass
 
 
79
  def _create_python_script(user_scenario: str, schema_context: str) -> str:
80
+ EXPERT_ANALYTICAL_GUIDELINES = """
81
+ --- EXPERT ANALYTICAL GUIDELINES ---
82
+ When writing your script, you MUST follow these expert business rules:
83
+ Linking Datasets Rule: If you need to connect facilities to health zones when the 'zone' column is not in the facility list,
84
+ you must first identify the high-priority zone from the beds data, then find the major city (by facility count) in the facility list,
85
+ and then assess that city's capacity. Do not try to filter the facility list by a 'zone' column if it does not exist in the schema.
86
+ Prioritization Rule: To prioritize locations, you MUST combine the most recent population data with specific high-risk health indicators
87
+ to create a multi-factor risk score.
88
+ Capacity Calculation Rule: For capacity over a 3-month window, assume 60 working days.
89
+ Cost Calculation Rule: Sum 'Startup cost' and 'Ongoing cost' per person before multiplying.
90
+ """
91
+ prompt_for_coder = f"""
92
+ You are an expert Python data scientist. Your job is to write a script to extract the data needed to answer the user's request.
93
+ You have dataframes in a list dfs.
94
+ {EXPERT_ANALYTICAL_GUIDELINES}
95
  --- DATA SCHEMA ---
96
  {schema_context}
97
  --- END DATA SCHEMA ---
98
+ CRITICAL RULES:
99
+ DO NOT READ FILES: You MUST NOT include pd.read_csv. The data is ALREADY loaded in the dfs variable. You MUST use this variable. Failure to do so will cause a fatal error.
100
+ JSON OUTPUT ONLY: Your script's ONLY output must be a single JSON object printed to stdout containing the raw data findings.
101
+ BE PRECISE: Use the exact, case-sensitive column names from the schema and robustly clean strings (re.sub()) before converting to numbers.
102
+ JSON SERIALIZATION: Before adding data to your final dictionary for JSON conversion, you MUST convert any pandas-specific types (like int64) to standard Python types using .item() for single values or .tolist() for lists.
103
+ --- USER'S SCENARIO ---
104
+ {user_scenario}
105
+ --- PYTHON SCRIPT ---
106
+ Now, write the complete Python script that performs the analysis and prints a single, serializable JSON object.
107
+ code
108
+ Python
109
+ """
110
+ generated_text = cohere_chat(prompt_for_coder)
111
+ match = re2.search(r"```python\n(.*?)```", generated_text, re2.DOTALL)
112
+ if match:
113
+ return match.group(1).strip()
114
+ return "print(json.dumps({'error': 'Failed to generate a valid Python script.'}))"
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ def _generate_long_report(prompt: str) -> str:
118
+ try:
119
+ client = _co_client()
120
+ if not client:
121
+ return "Error: Cohere client not initialized."
122
+ response = client.chat(
123
+ model=COHERE_MODEL_PRIMARY,
124
+ message=prompt,
125
+ max_tokens=4096,
126
+ )
127
+ return response.text
128
+ except Exception as e:
129
+ safe_log("cohere_chat_error", {"err": str(e)})
130
+ return f"Error during final report generation: {e}"
131
 
 
132
 
133
+ def _generate_final_report(user_scenario: str, raw_data_json: str) -> str:
134
+ prompt_for_writer = f"""\
135
+ You are an expert management consultant and data analyst.
136
+ A data science script has run to extract key findings. You have the user's original request and the raw JSON data.
 
 
137
 
138
+ Your task is to synthesize these raw findings into a single, comprehensive, and professional report that directly answers all of the user's questions with detailed justifications.
 
 
 
 
 
139
 
140
+ --- USER'S ORIGINAL SCENARIO & DELIVERABLES ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  {user_scenario}
142
  --- END SCENARIO ---
143
+
144
  --- RAW DATA FINDINGS (JSON) ---
145
  {raw_data_json}
146
  --- END RAW DATA ---
 
 
 
 
 
147
 
148
+ Now, write the final, polished report. The report MUST:
149
+ 1. Follow the "Expected Output Format" requested by the user.
150
+ 2. Use tables, bullet points, and DETAILED narrative justifications for each recommendation.
151
+ 3. Synthesize the raw data into actionable insights. Do not just copy the raw numbers; interpret them.
152
+ 4. Ensure you fully address ALL evaluation questions, especially the final recommendations.
153
+ """
154
+ return _generate_long_report(prompt_for_writer)
155
 
 
156
 
157
+ def _append_msg(h: List[Dict[str, str]], r: str, c: str) -> List[Dict[str, str]]:
158
+ return (h or []) + [{"role": r, "content": c}]
159
 
 
160
 
161
+ def ping_cohere() -> str:
162
+ try:
163
+ cli = _co_client()
164
+ if not cli:
165
+ return "Cohere client not initialized."
166
+ vecs = cohere_embed(["hello", "world"])
167
+ return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY})" if vecs else "Cohere reachable."
168
+ except Exception as e:
169
+ return f"Cohere ping failed: {e}"
170
 
 
171
 
172
+ def handle(user_msg: str, files: list, yield_update) -> str:
173
+ try:
174
+ # Safety filter on incoming message
175
+ safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
176
+ if blocked_in:
177
+ return refusal_reply(reason_in)
178
+
179
+ # Optional PHI redaction for prompts sent to an external LLM
180
+ redacted_in = safe_in
181
+ if PHI_MODE and REDACT_BEFORE_LLM:
182
+ redacted_in = redact_phi(safe_in)
183
+
184
+ file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
185
+
186
+ if file_paths:
187
+ # CSV analysis path (unchanged)
188
+ dataframes, schema_parts = [], []
189
+ for i, p in enumerate(file_paths):
190
+ if p.endswith(".csv"):
191
+ try:
192
+ df = pd.read_csv(p)
193
+ except UnicodeDecodeError:
194
+ df = pd.read_csv(p, encoding="latin1")
195
+ dataframes.append(df)
196
+ schema_parts.append(
197
+ f"DataFrame `dfs[{i}]` (`{os.path.basename(p)}`):\n{df.head().to_markdown()}\n"
198
+ )
199
+
200
+ if not dataframes:
201
+ return "Please upload at least one CSV file."
202
+
203
+ schema_context = "\n".join(schema_parts)
204
+
205
+ # If external PHI is not allowed, use redacted prompt; otherwise use original
206
+ prompt_for_code = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
207
+
208
+ yield_update("""```
209
+ 🧠 Generating aligned analysis script...
210
+ ```""")
211
+ analysis_script = _create_python_script(prompt_for_code, schema_context)
212
+
213
+ yield_update("""```
214
+ ⚙️ Executing script to extract raw data...
215
+ ```""")
216
+ execution_namespace = {"dfs": dataframes, "pd": pd, "re": re, "json": json}
217
+ output_buffer = io.StringIO()
218
+
219
+ try:
220
+ with redirect_stdout(output_buffer):
221
+ exec(analysis_script, execution_namespace)
222
+ raw_data_output = output_buffer.getvalue()
223
+ except Exception as e:
224
+ return (
225
+ f"An error occurred executing the script: {e}\n\nGenerated Script:\n"
226
+ f"```python\n{analysis_script}\n```"
227
+ )
228
+
229
+ yield_update("""```
230
+ ✍️ Synthesizing final comprehensive report...
231
+ ```""")
232
+ writer_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
233
+ final_report = _generate_final_report(writer_input, raw_data_output)
234
+ return _sanitize_text(final_report)
235
+ else:
236
+ # Pure chat path
237
+ chat_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
238
+ prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {chat_input}\nAssistant:"
239
+ return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
240
+
241
+ except Exception as e:
242
+ tb = traceback.format_exc()
243
+ safe_log("app_error", {"err": str(e)})
244
+ return "A critical error occurred. Please contact your administrator." if PHI_MODE else f"A critical error occurred: {e}"
245
 
 
246
 
247
+ PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
248
+ TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
 
249
 
 
250
 
251
+ # ---------------------- Sleek UI assets (CSS/JS only) ----------------------
252
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  SLEEK_CSS = """
254
  /* Full-bleed, modern look */
255
  :root, body, #root, .gradio-container { height: 100%; }
256
  .gradio-container { padding: 0 !important; }
257
  .block { padding: 0 !important; }
258
+
259
  /* Header */
260
  .header {
261
+ padding: 20px 28px;
262
+ background: linear-gradient(135deg, #0e1726, #1d2a44 60%, #243a5e);
263
+ color: #fff;
264
+ display: flex; align-items: center; justify-content: space-between;
265
+ gap: 16px;
266
  }
267
  .header h1 { margin: 0; font-size: 22px; letter-spacing: 0.3px; font-weight: 600; }
268
  .header .badge { font-size: 12px; opacity: 0.9; background:#ffffff22; padding:6px 10px; border-radius: 999px; }
269
+
270
  /* Main layout */
271
  .main {
272
+ display: grid;
273
+ grid-template-columns: 420px 1fr;
274
+ gap: 16px;
275
+ padding: 16px;
276
+ height: calc(100vh - 72px);
277
+ box-sizing: border-box;
278
  }
279
  .left, .right {
280
+ background: #0b1020;
281
+ color: #e9edf3;
282
+ border-radius: 16px;
283
+ border: 1px solid #1c2642;
284
  }
285
  .left { padding: 16px; display: flex; flex-direction: column; gap: 12px; }
286
  .right { padding: 0; display: flex; flex-direction: column; }
287
+
288
  /* Panels */
289
  .panel-title { font-size: 14px; font-weight: 600; color: #aeb8cc; margin-bottom: 6px; }
290
  .helper { font-size: 12px; color: #97a3bb; margin-bottom: 8px; }
291
+
292
  /* Sticky actions */
293
  .actions {
294
+ display: flex; gap: 8px; align-items: center; justify-content: stretch;
295
  }
296
  .actions .gr-button { flex: 1; }
297
+
298
  /* Tabs full height */
299
  .right .tabs { height: 100%; display: flex; flex-direction: column; }
300
  .right .tabitem { flex: 1; display: flex; flex-direction: column; }
301
  #chatbot_container { flex: 1; }
302
  #chatbot_container .gr-chatbot { height: 100%; }
303
+
304
  /* Tiny separators */
305
  .hr { height: 1px; background: #16203b; margin: 10px 0; }
306
+
307
  /* Voice hint */
308
  .voice-hint { font-size: 12px; color:#9fb0cc; margin-top: 4px; }
309
  """
310
+
311
  VOICE_STT_HTML = """
312
  <script>
313
  let __rs_rec = null;
314
  function rs_toggle_stt(elemId){
315
+ const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
316
+ if (!SpeechRecognition){
317
+ alert("This browser does not support Speech Recognition. Try Chrome or Edge.");
318
+ return;
319
+ }
320
+ if (__rs_rec){ __rs_rec.stop(); __rs_rec = null; return; }
321
+ __rs_rec = new SpeechRecognition();
322
+ __rs_rec.lang = "en-US";
323
+ __rs_rec.interimResults = true;
324
+ __rs_rec.continuous = true;
325
+
326
+ const box = document.querySelector(`#${elemId} textarea`);
327
+ if (!box){ alert("Prompt box not found."); return; }
328
+ let base = box.value || "";
329
+
330
+ __rs_rec.onresult = (ev) => {
331
+ let t = "";
332
+ for (let i = ev.resultIndex; i < ev.results.length; i++){
333
+ t += ev.results[i][0].transcript;
334
+ }
335
+ box.value = (base + " " + t).trim();
336
+ box.dispatchEvent(new Event("input", { bubbles: true }));
337
+ };
338
+ __rs_rec.onend = () => { __rs_rec = null; };
339
+ __rs_rec.start();
340
  }
341
  </script>
342
  """
343
+
344
+
345
+ # ---------------------- Sleek UI (with fixed State wiring) ----------------------
346
+
347
  with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
348
+ # Persistent in-memory history component (fixes list/_id error)
349
+ assessment_history = gr.State([])
350
+
351
+ # Header
352
+ with gr.Row(elem_classes=["header"]):
353
+ gr.Markdown("<h1>Clarity Ops Augemented Decision Support</h1>")
354
+ pill = "PHI Mode ON · history off" if (PHI_MODE and not PERSIST_HISTORY) else \
355
+ "PHI Mode ON" if PHI_MODE else "PHI Mode OFF"
356
+ gr.Markdown(f"<span class='badge'>{pill}</span>")
357
+
358
+ # Main layout
359
+ with gr.Row(elem_classes=["main"]):
360
+ # Left panel
361
+ with gr.Column(elem_classes=["left"]):
362
+ gr.Markdown("<div class='panel-title'>New Assessment</div>")
363
+ gr.Markdown("<div class='helper'>Upload CSVs for analysis, or enter a prompt. Voice works in modern browsers.</div>")
364
+ files_input = gr.Files(
365
+ label="Upload Data Files (.csv)",
366
+ file_count="multiple",
367
+ type="filepath",
368
+ file_types=[".csv"],
369
+ )
370
+ prompt_input = gr.Textbox(
371
+ label="Prompt",
372
+ placeholder="Paste your scenario or question here...",
373
+ lines=12,
374
+ elem_id="prompt_box",
375
+ autofocus=True,
376
+ )
377
+
378
+ with gr.Row(elem_classes=["actions"]):
379
+ send_btn = gr.Button("▶️ Run Analysis", variant="primary")
380
+ clear_btn = gr.Button("🧹 Clear")
381
+ voice_btn = gr.Button("🎙️ Voice")
382
+
383
+ gr.Markdown("<div class='voice-hint'>Click Voice to start/stop dictation into the prompt box.</div>")
384
+ ping_btn = gr.Button("🔌 Ping Cohere")
385
+ ping_out = gr.Markdown()
386
+
387
+ gr.Markdown("<div class='hr'></div>")
388
+ if PHI_MODE:
389
+ gr.Markdown(
390
+ "⚠️ **PHI Mode:** History persistence is disabled by default. Avoid unnecessary identifiers."
391
+ )
392
+
393
+ with gr.Accordion("Privacy & Terms", open=False):
394
+ gr.Markdown(PRIVACY_POLICY_TEXT)
395
+ gr.Markdown("<div class='hr'></div>")
396
+ gr.Markdown(TERMS_OF_SERVICE_TEXT)
397
+
398
+ # Right panel
399
+ with gr.Column(elem_classes=["right"]):
400
+ with gr.Tabs(elem_classes=["tabs"]):
401
+ with gr.TabItem("Current Assessment", id=0, elem_classes=["tabitem"]):
402
+ with gr.Column(elem_id="chatbot_container"):
403
+ chat_history_output = gr.Chatbot(label="Analysis Output", type="messages")
404
+ with gr.TabItem("Assessment History", id=1, elem_classes=["tabitem"]):
405
+ gr.Markdown("### Review Past Assessments")
406
+ history_dropdown = gr.Dropdown(label="Select an assessment to review", choices=[])
407
+ history_display = gr.Markdown(label="Selected Assessment Details")
408
+
409
+ # Inject voice-to-text helper
410
+ gr.HTML(VOICE_STT_HTML)
411
+
412
+ # --------- Event logic (unchanged analysis flow) ----------
413
+
414
+ def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
415
+ if not prompt:
416
+ gr.Warning("Please enter a prompt.")
417
+ yield chat_history_list, history_state_list, gr.update()
418
+ return
419
+
420
+ # Append user's message
421
+ chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
422
+
423
+ # Optional progress callback (not streaming in this UI)
424
+ def dummy_update(message: str):
425
+ pass
426
+
427
+ # Thinking bubble
428
+ thinking_message = _append_msg(
429
+ chat_with_user_msg,
430
+ "assistant",
431
+ """```
432
+ 🧠 Generating and executing analysis... Please wait.
433
+ ```""",
434
+ )
435
+ yield thinking_message, history_state_list, gr.update()
436
+
437
+ # Run analysis/chat
438
+ ai_response_text = handle(prompt, files, dummy_update)
439
+
440
+ # Append final assistant response
441
+ final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
442
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
443
+
444
+ # Capture filenames (if any)
445
+ file_names: List[str] = []
446
+ if files:
447
+ file_names = [
448
+ os.path.basename(f.name if hasattr(f, "name") else f) for f in files
449
+ ]
450
+
451
+ # Build history record
452
+ new_entry = {
453
+ "id": timestamp,
454
+ "prompt": prompt,
455
+ "files": file_names,
456
+ "response": ai_response_text,
457
+ "chat_history": final_chat,
458
+ }
459
+
460
+ # Respect PHI/history flags
461
+ if PERSIST_HISTORY and (not PHI_MODE or (PHI_MODE and HISTORY_TTL_DAYS > 0)):
462
+ updated_history: List[Dict[str, Any]] = (history_state_list or []) + [new_entry]
463
+ else:
464
+ updated_history = history_state_list or []
465
+
466
+ history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
467
+
468
+ yield final_chat, updated_history, gr.update(choices=history_labels)
469
+
470
+ def view_history(selection: str, history_state_list: List[Dict[str, Any]]) -> str:
471
+ if not selection or not history_state_list:
472
+ return ""
473
+ try:
474
+ selected_id = selection.split(" - ", 1)[0]
475
+ except Exception:
476
+ selected_id = selection
477
+
478
+ selected_assessment = next(
479
+ (item for item in history_state_list if item.get("id") == selected_id), None
480
+ )
481
+ if not selected_assessment:
482
+ return "Could not find the selected assessment."
483
+
484
+ file_list = selected_assessment.get("files", [])
485
+ file_list_md = "\n- ".join(file_list) if file_list else "*(no files uploaded)*"
486
+
487
+ chat_entries = selected_assessment.get("chat_history", [])
488
+ chat_md_lines = []
489
+ for msg in chat_entries:
490
+ role = msg.get("role", "").capitalize()
491
+ content = msg.get("content", "")
492
+ chat_md_lines.append(f"**{role}:** {content}")
493
+ chat_md = "\n\n".join(chat_md_lines)
494
+
495
+ return f"""### Assessment from: {selected_assessment['id']}
496
+ **Files Used:**
497
+ - {file_list_md}
498
+ ---
499
+ **Original Prompt:**
500
+ > {selected_assessment['prompt']}
501
+ ---
502
+ **AI Generated Response:**
503
  {selected_assessment['response']}
504
+ ---
505
+ **Chat Transcript:**
506
  {chat_md}
507
  """
508
+
509
+ # Wire events (using proper gr.State component for history)
510
+ send_btn.click(
511
+ run_analysis_wrapper,
512
+ inputs=[prompt_input, files_input, chat_history_output, assessment_history],
513
+ outputs=[chat_history_output, assessment_history, history_dropdown],
514
+ )
515
+ history_dropdown.change(
516
+ view_history,
517
+ inputs=[history_dropdown, assessment_history],
518
+ outputs=[history_display],
519
+ )
520
+ clear_btn.click(
521
+ lambda: (None, None, []),
522
+ outputs=[prompt_input, files_input, chat_history_output],
523
+ )
524
+ ping_btn.click(ping_cohere, outputs=[ping_out])
525
+ voice_btn.click(None, [], [], js="rs_toggle_stt('prompt_box')")
526
+
527
+
528
+ if __name__ == "__main__":
529
+ if not os.getenv("COHERE_API_KEY"):
530
+ print("🔴 COHERE_API_KEY environment variable not set. Application may not function correctly.")
531
+ demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))