Rajan Sharma commited on
Commit
3d2ccd6
·
verified ·
1 Parent(s): a8e6509

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -35
app.py CHANGED
@@ -1,7 +1,15 @@
1
- # app.py
 
 
 
 
 
 
 
 
2
  #
3
- # Gradio-based AI data analyst app with persistent chat & assessment history.
4
- # Each session stores: timestamp, prompt, files (if any), final response, and full chat transcript.
5
 
6
  from __future__ import annotations
7
 
@@ -25,7 +33,14 @@ from settings import (
25
  COHERE_MODEL_PRIMARY,
26
  COHERE_TIMEOUT_S, # noqa: F401
27
  USE_OPEN_FALLBACKS, # noqa: F401
 
 
 
 
 
 
28
  )
 
29
  from audit_log import log_event
30
  from privacy import safety_filter, refusal_reply
31
  from llm_router import cohere_chat, _co_client, cohere_embed
@@ -46,6 +61,40 @@ def _sanitize_text(s: str) -> str:
46
  return re2.sub(r"[\p{C}--[\n\t]]+", "", s)
47
 
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  def _create_python_script(user_scenario: str, schema_context: str) -> str:
50
  EXPERT_ANALYTICAL_GUIDELINES = """
51
  --- EXPERT ANALYTICAL GUIDELINES ---
@@ -101,7 +150,7 @@ def _generate_long_report(prompt: str) -> str:
101
  )
102
  return response.text
103
  except Exception as e:
104
- log_event("cohere_chat_error", None, {"err": str(e)})
105
  return f"Error during final report generation: {e}"
106
 
107
 
@@ -146,13 +195,22 @@ def ping_cohere() -> str:
146
 
147
  def handle(user_msg: str, files: list, yield_update) -> str:
148
  try:
 
149
  safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
150
  if blocked_in:
151
  return refusal_reply(reason_in)
152
 
 
 
 
 
 
 
153
  file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
154
 
155
  if file_paths:
 
 
156
  dataframes, schema_parts = [], []
157
  for i, p in enumerate(file_paths):
158
  if p.endswith(".csv"):
@@ -170,10 +228,13 @@ def handle(user_msg: str, files: list, yield_update) -> str:
170
 
171
  schema_context = "\n".join(schema_parts)
172
 
 
 
 
173
  yield_update("""```
174
  🧠 Generating aligned analysis script...
175
  ```""")
176
- analysis_script = _create_python_script(safe_in, schema_context)
177
 
178
  yield_update("""```
179
  ⚙️ Executing script to extract raw data...
@@ -194,31 +255,37 @@ def handle(user_msg: str, files: list, yield_update) -> str:
194
  yield_update("""```
195
  ✍️ Synthesizing final comprehensive report...
196
  ```""")
197
- final_report = _generate_final_report(safe_in, raw_data_output)
 
 
198
  return _sanitize_text(final_report)
199
  else:
200
- prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {safe_in}\nAssistant:"
 
 
201
  return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
202
 
203
  except Exception as e:
204
  tb = traceback.format_exc()
205
- log_event("app_error", None, {"err": str(e), "tb": tb})
206
- return f"A critical error occurred: {e}"
 
207
 
208
 
209
  PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
210
  TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
211
 
212
  with gr.Blocks(theme="soft", css="style.css") as demo:
213
- # Persistent history of past assessments / chat sessions
214
- # Each entry:
215
- # - id: timestamp
216
- # - prompt: original prompt
217
- # - files: list of uploaded filenames
218
- # - response: final response text
219
- # - chat_history: full transcript (list of {role, content})
220
  assessment_history = gr.State([])
221
 
 
 
 
 
 
 
 
222
  # Modals
223
  with gr.Group(visible=False) as privacy_modal:
224
  with gr.Blocks():
@@ -245,9 +312,7 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
245
  file_types=[".csv"],
246
  )
247
  prompt_input = gr.Textbox(
248
- label="Prompt",
249
- placeholder="Paste your scenario or question here.",
250
- lines=15,
251
  )
252
  with gr.Row():
253
  send_btn = gr.Button("▶️ Send / Run Analysis", variant="primary", scale=2)
@@ -277,22 +342,19 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
277
  # Logic
278
 
279
  def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
280
- """
281
- Handle a new user prompt and update chat & assessment history.
282
- """
283
  if not prompt:
284
  gr.Warning("Please enter a prompt.")
285
  yield chat_history_list, history_state_list, gr.update()
286
  return
287
 
288
- # Append user's message
289
  chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
290
 
291
- # Optional streaming update callback (unused here)
292
  def dummy_update(message: str):
293
  pass
294
 
295
- # Show thinking message
296
  thinking_message = _append_msg(
297
  chat_with_user_msg,
298
  "assistant",
@@ -305,18 +367,18 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
305
  # Run analysis/chat
306
  ai_response_text = handle(prompt, files, dummy_update)
307
 
308
- # Append final assistant response
309
  final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
310
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
311
 
312
- # Capture filenames (if any)
313
  file_names: List[str] = []
314
  if files:
315
  file_names = [
316
  os.path.basename(f.name if hasattr(f, "name") else f) for f in files
317
  ]
318
 
319
- # Create a new history record (always, even for chat-only)
320
  new_entry = {
321
  "id": timestamp,
322
  "prompt": prompt,
@@ -325,7 +387,12 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
325
  "chat_history": final_chat,
326
  }
327
 
328
- updated_history: List[Dict[str, Any]] = (history_state_list or []) + [new_entry]
 
 
 
 
 
329
  history_labels = [
330
  f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history
331
  ]
@@ -333,12 +400,8 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
333
  yield final_chat, updated_history, gr.update(choices=history_labels)
334
 
335
  def view_history(selection: str, history_state_list: List[Dict[str, Any]]) -> str:
336
- """
337
- Render details for a selected past assessment/chat session.
338
- """
339
  if not selection or not history_state_list:
340
  return ""
341
- # Selection label format: "timestamp - prompt..."
342
  try:
343
  selected_id = selection.split(" - ", 1)[0]
344
  except Exception:
@@ -354,7 +417,6 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
354
  file_list = selected_assessment.get("files", [])
355
  file_list_md = "\n- ".join(file_list) if file_list else "*(no files uploaded)*"
356
 
357
- # Chat transcript (role + content)
358
  chat_entries = selected_assessment.get("chat_history", [])
359
  chat_md_lines = []
360
  for msg in chat_entries:
@@ -377,7 +439,7 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
377
  {chat_md}
378
  """
379
 
380
- # Wire up UI events
381
  send_btn.click(
382
  run_analysis_wrapper,
383
  inputs=[prompt_input, files_input, chat_history_output, assessment_history],
 
1
+
2
+ # app_phi.py
3
+ #
4
+ # HIPAA-aware wrapper of the existing app. This keeps the analysis and assessment
5
+ # capabilities intact while adding PHI safeguards:
6
+ # - PHI_MODE flags with opt-in persistence
7
+ # - Redaction before sending content to any external LLM
8
+ # - Safer logging (no raw PHI)
9
+ # - Optional banner and history controls
10
  #
11
+ # NOTE: This file is designed to be a drop-in alternative to app.py.
12
+ # It preserves existing analysis logic and UI while adding HIPAA toggles.
13
 
14
  from __future__ import annotations
15
 
 
33
  COHERE_MODEL_PRIMARY,
34
  COHERE_TIMEOUT_S, # noqa: F401
35
  USE_OPEN_FALLBACKS, # noqa: F401
36
+ # HIPAA flags
37
+ PHI_MODE,
38
+ PERSIST_HISTORY,
39
+ HISTORY_TTL_DAYS,
40
+ REDACT_BEFORE_LLM,
41
+ ALLOW_EXTERNAL_PHI,
42
  )
43
+
44
  from audit_log import log_event
45
  from privacy import safety_filter, refusal_reply
46
  from llm_router import cohere_chat, _co_client, cohere_embed
 
61
  return re2.sub(r"[\p{C}--[\n\t]]+", "", s)
62
 
63
 
64
+ # ---------------------- HIPAA helpers ----------------------
65
+
66
+ # Very conservative redaction (risk reduction; not a full de-identification program).
67
+ PHI_PATTERNS = [
68
+ (re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "[REDACTED_SSN]"), # US SSN
69
+ (re.compile(r"\b\d{9}\b"), "[REDACTED_MRN]"), # 9-digit MRN (example)
70
+ (re.compile(r"\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b"), "[REDACTED_PHONE]"),
71
+ (re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}"), "[REDACTED_EMAIL]"),
72
+ (re.compile(r"\b(19|20)\d{2}-\d{2}-\d{2}\b"), "[REDACTED_DOB]"), # YYYY-MM-DD
73
+ (re.compile(r"\b\d{2}/\d{2}/(19|20)\d{2}\b"), "[REDACTED_DOB]"), # MM/DD/YYYY
74
+ (re.compile(r"\b\d{5}(-\d{4})?\b"), "[REDACTED_ZIP]"), # ZIP (US)
75
+ ]
76
+
77
+ def redact_phi(text: str) -> str:
78
+ if not isinstance(text, str):
79
+ return text
80
+ t = text
81
+ for pat, repl in PHI_PATTERNS:
82
+ t = pat.sub(repl, t)
83
+ return t
84
+
85
+ def safe_log(event_name: str, meta: dict | None = None):
86
+ # Avoid logging raw PHI or payloads
87
+ try:
88
+ meta = (meta or {}).copy()
89
+ meta.pop("raw", None)
90
+ log_event(event_name, None, meta)
91
+ except Exception:
92
+ # Never raise from logging in PHI context
93
+ pass
94
+
95
+
96
+ # ---------------------- Original analysis helpers (unchanged) ----------------------
97
+
98
  def _create_python_script(user_scenario: str, schema_context: str) -> str:
99
  EXPERT_ANALYTICAL_GUIDELINES = """
100
  --- EXPERT ANALYTICAL GUIDELINES ---
 
150
  )
151
  return response.text
152
  except Exception as e:
153
+ safe_log("cohere_chat_error", {"err": str(e)})
154
  return f"Error during final report generation: {e}"
155
 
156
 
 
195
 
196
  def handle(user_msg: str, files: list, yield_update) -> str:
197
  try:
198
+ # Run app safety filter
199
  safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
200
  if blocked_in:
201
  return refusal_reply(reason_in)
202
 
203
+ # If PHI mode is enabled and we are not permitted to send PHI externally,
204
+ # redact the content before any LLM calls.
205
+ redacted_in = safe_in
206
+ if PHI_MODE and REDACT_BEFORE_LLM:
207
+ redacted_in = redact_phi(safe_in)
208
+
209
  file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
210
 
211
  if file_paths:
212
+ # CSV analysis path preserved; we only use redacted_in in PROMPTS sent to the LLM.
213
+ # CSV data itself is processed locally as before.
214
  dataframes, schema_parts = [], []
215
  for i, p in enumerate(file_paths):
216
  if p.endswith(".csv"):
 
228
 
229
  schema_context = "\n".join(schema_parts)
230
 
231
+ # If PHI is not allowed externally and PHI_MODE is on, we will use the redacted prompt.
232
+ prompt_for_code = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
233
+
234
  yield_update("""```
235
  🧠 Generating aligned analysis script...
236
  ```""")
237
+ analysis_script = _create_python_script(prompt_for_code, schema_context)
238
 
239
  yield_update("""```
240
  ⚙️ Executing script to extract raw data...
 
255
  yield_update("""```
256
  ✍️ Synthesizing final comprehensive report...
257
  ```""")
258
+ # For the final narrative, also route based on PHI policy
259
+ writer_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
260
+ final_report = _generate_final_report(writer_input, raw_data_output)
261
  return _sanitize_text(final_report)
262
  else:
263
+ # Pure chat path: redact if PHI_MODE and external is not allowed
264
+ chat_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
265
+ prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {chat_input}\nAssistant:"
266
  return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
267
 
268
  except Exception as e:
269
  tb = traceback.format_exc()
270
+ safe_log("app_error", {"err": str(e)})
271
+ # Do not leak stack traces to UI in PHI mode
272
+ return "A critical error occurred. Please contact your administrator." if PHI_MODE else f"A critical error occurred: {e}"
273
 
274
 
275
  PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
276
  TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
277
 
278
  with gr.Blocks(theme="soft", css="style.css") as demo:
279
+ # Persistent history state (in-memory). PHI mode defaults to no persistence.
 
 
 
 
 
 
280
  assessment_history = gr.State([])
281
 
282
+ # Optional PHI banner
283
+ if PHI_MODE:
284
+ gr.Markdown(
285
+ "⚠️ **PHI Mode Enabled**: Protected Health Information safeguards are active. "
286
+ "History persistence is disabled by default. Avoid unnecessary identifiers."
287
+ )
288
+
289
  # Modals
290
  with gr.Group(visible=False) as privacy_modal:
291
  with gr.Blocks():
 
312
  file_types=[".csv"],
313
  )
314
  prompt_input = gr.Textbox(
315
+ label="Prompt", placeholder="Paste your scenario or question here.", lines=15
 
 
316
  )
317
  with gr.Row():
318
  send_btn = gr.Button("▶️ Send / Run Analysis", variant="primary", scale=2)
 
342
  # Logic
343
 
344
  def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
 
 
 
345
  if not prompt:
346
  gr.Warning("Please enter a prompt.")
347
  yield chat_history_list, history_state_list, gr.update()
348
  return
349
 
350
+ # Append user message
351
  chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
352
 
353
+ # Placeholder for streamed updates (unused)
354
  def dummy_update(message: str):
355
  pass
356
 
357
+ # Thinking message
358
  thinking_message = _append_msg(
359
  chat_with_user_msg,
360
  "assistant",
 
367
  # Run analysis/chat
368
  ai_response_text = handle(prompt, files, dummy_update)
369
 
370
+ # Append assistant response
371
  final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
372
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
373
 
374
+ # Filenames
375
  file_names: List[str] = []
376
  if files:
377
  file_names = [
378
  os.path.basename(f.name if hasattr(f, "name") else f) for f in files
379
  ]
380
 
381
+ # Construct history entry
382
  new_entry = {
383
  "id": timestamp,
384
  "prompt": prompt,
 
387
  "chat_history": final_chat,
388
  }
389
 
390
+ # Persist only if allowed
391
+ if PERSIST_HISTORY and (not PHI_MODE or (PHI_MODE and HISTORY_TTL_DAYS > 0)):
392
+ updated_history: List[Dict[str, Any]] = (history_state_list or []) + [new_entry]
393
+ else:
394
+ updated_history = history_state_list or []
395
+
396
  history_labels = [
397
  f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history
398
  ]
 
400
  yield final_chat, updated_history, gr.update(choices=history_labels)
401
 
402
  def view_history(selection: str, history_state_list: List[Dict[str, Any]]) -> str:
 
 
 
403
  if not selection or not history_state_list:
404
  return ""
 
405
  try:
406
  selected_id = selection.split(" - ", 1)[0]
407
  except Exception:
 
417
  file_list = selected_assessment.get("files", [])
418
  file_list_md = "\n- ".join(file_list) if file_list else "*(no files uploaded)*"
419
 
 
420
  chat_entries = selected_assessment.get("chat_history", [])
421
  chat_md_lines = []
422
  for msg in chat_entries:
 
439
  {chat_md}
440
  """
441
 
442
+ # Wire up UI
443
  send_btn.click(
444
  run_analysis_wrapper,
445
  inputs=[prompt_input, files_input, chat_history_output, assessment_history],