Rajan Sharma commited on
Commit
56288ba
·
verified ·
1 Parent(s): 99d5da9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -47
app.py CHANGED
@@ -1,8 +1,11 @@
1
- # app_phi_sleek.py
2
  #
3
- # Sleek UI + Voice-to-Text for the HIPAA-aware app.
4
- # IMPORTANT: All analysis/assessment logic is preserved exactly.
5
- # Changes are limited to the UI layout, CSS, and a client-side STT helper.
 
 
 
6
 
7
  from __future__ import annotations
8
 
@@ -24,22 +27,25 @@ from langchain_cohere import ChatCohere # noqa: F401
24
  from settings import (
25
  GENERAL_CONVERSATION_PROMPT,
26
  COHERE_MODEL_PRIMARY,
27
- COHERE_TIMEOUT_S, # noqa: F401
28
- USE_OPEN_FALLBACKS, # noqa: F401
29
- # HIPAA flags
30
- PHI_MODE,
31
- PERSIST_HISTORY,
32
- HISTORY_TTL_DAYS,
33
- REDACT_BEFORE_LLM,
34
- ALLOW_EXTERNAL_PHI,
35
  )
 
 
 
 
 
 
 
 
 
36
 
37
  from audit_log import log_event
38
  from privacy import safety_filter, refusal_reply
39
  from llm_router import cohere_chat, _co_client, cohere_embed
40
 
41
 
42
- # ---------------------- helpers (unchanged logic) ----------------------
43
 
44
  def load_markdown_text(filepath: str) -> str:
45
  try:
@@ -52,10 +58,11 @@ def load_markdown_text(filepath: str) -> str:
52
  def _sanitize_text(s: str) -> str:
53
  if not isinstance(s, str):
54
  return s
 
55
  return re2.sub(r"[\p{C}--[\n\t]]+", "", s)
56
 
57
 
58
- # Very conservative PHI redaction (unchanged idea)
59
  PHI_PATTERNS = [
60
  (re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "[REDACTED_SSN]"),
61
  (re.compile(r"\b\d{9}\b"), "[REDACTED_MRN]"),
@@ -75,11 +82,13 @@ def redact_phi(text: str) -> str:
75
  return t
76
 
77
  def safe_log(event_name: str, meta: dict | None = None):
 
78
  try:
79
  meta = (meta or {}).copy()
80
  meta.pop("raw", None)
81
  log_event(event_name, None, meta)
82
  except Exception:
 
83
  pass
84
 
85
 
@@ -183,11 +192,12 @@ def ping_cohere() -> str:
183
 
184
  def handle(user_msg: str, files: list, yield_update) -> str:
185
  try:
 
186
  safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
187
  if blocked_in:
188
  return refusal_reply(reason_in)
189
 
190
- # Redact prompt if PHI_MODE and external PHI not allowed
191
  redacted_in = safe_in
192
  if PHI_MODE and REDACT_BEFORE_LLM:
193
  redacted_in = redact_phi(safe_in)
@@ -195,6 +205,7 @@ def handle(user_msg: str, files: list, yield_update) -> str:
195
  file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
196
 
197
  if file_paths:
 
198
  dataframes, schema_parts = [], []
199
  for i, p in enumerate(file_paths):
200
  if p.endswith(".csv"):
@@ -211,6 +222,8 @@ def handle(user_msg: str, files: list, yield_update) -> str:
211
  return "Please upload at least one CSV file."
212
 
213
  schema_context = "\n".join(schema_parts)
 
 
214
  prompt_for_code = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
215
 
216
  yield_update("""```
@@ -241,6 +254,7 @@ def handle(user_msg: str, files: list, yield_update) -> str:
241
  final_report = _generate_final_report(writer_input, raw_data_output)
242
  return _sanitize_text(final_report)
243
  else:
 
244
  chat_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
245
  prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {chat_input}\nAssistant:"
246
  return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
@@ -254,7 +268,8 @@ def handle(user_msg: str, files: list, yield_update) -> str:
254
  PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
255
  TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
256
 
257
- # ---------------------- Sleek UI Layer only ----------------------
 
258
 
259
  SLEEK_CSS = """
260
  /* Full-bleed, modern look */
@@ -347,7 +362,13 @@ function rs_toggle_stt(elemId){
347
  </script>
348
  """
349
 
 
 
 
350
  with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
 
 
 
351
  # Header
352
  with gr.Row(elem_classes=["header"]):
353
  gr.Markdown("<h1>Universal AI Data Analyst</h1>")
@@ -355,7 +376,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
355
  "PHI Mode ON" if PHI_MODE else "PHI Mode OFF"
356
  gr.Markdown(f"<span class='badge'>{pill}</span>")
357
 
358
- # Main
359
  with gr.Row(elem_classes=["main"]):
360
  # Left panel
361
  with gr.Column(elem_classes=["left"]):
@@ -387,12 +408,10 @@ with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
387
  gr.Markdown("<div class='hr'></div>")
388
  if PHI_MODE:
389
  gr.Markdown(
390
- "⚠️ **PHI Mode:** History persistence is disabled by default. Avoid unnecessary identifiers.",
391
  )
392
 
393
  with gr.Accordion("Privacy & Terms", open=False):
394
- PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
395
- TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
396
  gr.Markdown(PRIVACY_POLICY_TEXT)
397
  gr.Markdown("<div class='hr'></div>")
398
  gr.Markdown(TERMS_OF_SERVICE_TEXT)
@@ -402,20 +421,16 @@ with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
402
  with gr.Tabs(elem_classes=["tabs"]):
403
  with gr.TabItem("Current Assessment", id=0, elem_classes=["tabitem"]):
404
  with gr.Column(elem_id="chatbot_container"):
405
- chat_history_output = gr.Chatbot(
406
- label="Analysis Output", type="messages"
407
- )
408
  with gr.TabItem("Assessment History", id=1, elem_classes=["tabitem"]):
409
  gr.Markdown("### Review Past Assessments")
410
- history_dropdown = gr.Dropdown(
411
- label="Select an assessment to review", choices=[]
412
- )
413
  history_display = gr.Markdown(label="Selected Assessment Details")
414
 
415
- # Inject STT helper
416
  gr.HTML(VOICE_STT_HTML)
417
 
418
- # --------- Logic (unchanged analysis flow) ----------
419
 
420
  def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
421
  if not prompt:
@@ -423,11 +438,14 @@ with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
423
  yield chat_history_list, history_state_list, gr.update()
424
  return
425
 
 
426
  chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
427
 
 
428
  def dummy_update(message: str):
429
  pass
430
 
 
431
  thinking_message = _append_msg(
432
  chat_with_user_msg,
433
  "assistant",
@@ -437,17 +455,21 @@ with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
437
  )
438
  yield thinking_message, history_state_list, gr.update()
439
 
 
440
  ai_response_text = handle(prompt, files, dummy_update)
441
 
 
442
  final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
443
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
444
 
 
445
  file_names: List[str] = []
446
  if files:
447
  file_names = [
448
  os.path.basename(f.name if hasattr(f, "name") else f) for f in files
449
  ]
450
 
 
451
  new_entry = {
452
  "id": timestamp,
453
  "prompt": prompt,
@@ -456,15 +478,13 @@ with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
456
  "chat_history": final_chat,
457
  }
458
 
459
- # Respect PHI history rules exactly as before
460
  if PERSIST_HISTORY and (not PHI_MODE or (PHI_MODE and HISTORY_TTL_DAYS > 0)):
461
  updated_history: List[Dict[str, Any]] = (history_state_list or []) + [new_entry]
462
  else:
463
  updated_history = history_state_list or []
464
 
465
- history_labels = [
466
- f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history
467
- ]
468
 
469
  yield final_chat, updated_history, gr.update(choices=history_labels)
470
 
@@ -477,8 +497,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
477
  selected_id = selection
478
 
479
  selected_assessment = next(
480
- (item for item in history_state_list if item.get("id") == selected_id),
481
- None,
482
  )
483
  if not selected_assessment:
484
  return "Could not find the selected assessment."
@@ -508,30 +527,24 @@ with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
508
  {chat_md}
509
  """
510
 
511
- # Wire events
512
  send_btn.click(
513
  run_analysis_wrapper,
514
- inputs=[prompt_input, files_input, chat_history_output, []], # local state
515
- outputs=[chat_history_output, [], history_dropdown],
516
  )
517
- # Use a hidden State for history to avoid changing logic; or substitute your existing State variable.
518
- # If you want to persist in-memory between runs, replace [] with a gr.State([]) you manage outside.
519
-
520
  history_dropdown.change(
521
- view_history, inputs=[history_dropdown, []], outputs=[history_display]
 
 
522
  )
523
-
524
  clear_btn.click(
525
  lambda: (None, None, []),
526
  outputs=[prompt_input, files_input, chat_history_output],
527
  )
528
-
529
  ping_btn.click(ping_cohere, outputs=[ping_out])
 
530
 
531
- # Voice button (client-side only)
532
- voice_btn.click(
533
- None, [], [], js="rs_toggle_stt('prompt_box')"
534
- )
535
 
536
  if __name__ == "__main__":
537
  if not os.getenv("COHERE_API_KEY"):
 
1
+ # app.py
2
  #
3
+ # Universal AI Data Analyst with:
4
+ # - Unchanged analysis & assessment logic
5
+ # - Fixed Gradio event wiring (uses gr.State for history)
6
+ # - Triple-quoted progress strings (no unterminated literals)
7
+ # - Sleek full-width UI and Voice-to-Text (browser Web Speech API)
8
+ # - Optional HIPAA flags (fallback defaults if not present in settings.py)
9
 
10
  from __future__ import annotations
11
 
 
27
  from settings import (
28
  GENERAL_CONVERSATION_PROMPT,
29
  COHERE_MODEL_PRIMARY,
30
+ COHERE_TIMEOUT_S, # noqa: F401
31
+ USE_OPEN_FALLBACKS # noqa: F401
 
 
 
 
 
 
32
  )
33
+ # Try to import optional HIPAA flags; fall back to safe defaults if not defined.
34
+ try:
35
+ from settings import PHI_MODE, PERSIST_HISTORY, HISTORY_TTL_DAYS, REDACT_BEFORE_LLM, ALLOW_EXTERNAL_PHI
36
+ except Exception:
37
+ PHI_MODE = False
38
+ PERSIST_HISTORY = True
39
+ HISTORY_TTL_DAYS = 365
40
+ REDACT_BEFORE_LLM = False
41
+ ALLOW_EXTERNAL_PHI = True
42
 
43
  from audit_log import log_event
44
  from privacy import safety_filter, refusal_reply
45
  from llm_router import cohere_chat, _co_client, cohere_embed
46
 
47
 
48
+ # ---------------------- Helpers (analysis logic unchanged) ----------------------
49
 
50
  def load_markdown_text(filepath: str) -> str:
51
  try:
 
58
  def _sanitize_text(s: str) -> str:
59
  if not isinstance(s, str):
60
  return s
61
+ # Remove control characters (except newline and tab)
62
  return re2.sub(r"[\p{C}--[\n\t]]+", "", s)
63
 
64
 
65
+ # Conservative PHI redaction patterns (only applied if PHI_MODE & REDACT_BEFORE_LLM are enabled)
66
  PHI_PATTERNS = [
67
  (re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "[REDACTED_SSN]"),
68
  (re.compile(r"\b\d{9}\b"), "[REDACTED_MRN]"),
 
82
  return t
83
 
84
  def safe_log(event_name: str, meta: dict | None = None):
85
+ # Avoid logging raw PHI or payloads
86
  try:
87
  meta = (meta or {}).copy()
88
  meta.pop("raw", None)
89
  log_event(event_name, None, meta)
90
  except Exception:
91
+ # Never raise from logging
92
  pass
93
 
94
 
 
192
 
193
  def handle(user_msg: str, files: list, yield_update) -> str:
194
  try:
195
+ # Safety filter on incoming message
196
  safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
197
  if blocked_in:
198
  return refusal_reply(reason_in)
199
 
200
+ # Optional PHI redaction for prompts sent to an external LLM
201
  redacted_in = safe_in
202
  if PHI_MODE and REDACT_BEFORE_LLM:
203
  redacted_in = redact_phi(safe_in)
 
205
  file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
206
 
207
  if file_paths:
208
+ # CSV analysis path (unchanged)
209
  dataframes, schema_parts = [], []
210
  for i, p in enumerate(file_paths):
211
  if p.endswith(".csv"):
 
222
  return "Please upload at least one CSV file."
223
 
224
  schema_context = "\n".join(schema_parts)
225
+
226
+ # If external PHI is not allowed, use redacted prompt; otherwise use original
227
  prompt_for_code = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
228
 
229
  yield_update("""```
 
254
  final_report = _generate_final_report(writer_input, raw_data_output)
255
  return _sanitize_text(final_report)
256
  else:
257
+ # Pure chat path
258
  chat_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
259
  prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {chat_input}\nAssistant:"
260
  return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
 
268
  PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
269
  TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
270
 
271
+
272
+ # ---------------------- Sleek UI assets (CSS/JS only) ----------------------
273
 
274
  SLEEK_CSS = """
275
  /* Full-bleed, modern look */
 
362
  </script>
363
  """
364
 
365
+
366
+ # ---------------------- Sleek UI (with fixed State wiring) ----------------------
367
+
368
  with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
369
+ # Persistent in-memory history component (fixes list/_id error)
370
+ assessment_history = gr.State([])
371
+
372
  # Header
373
  with gr.Row(elem_classes=["header"]):
374
  gr.Markdown("<h1>Universal AI Data Analyst</h1>")
 
376
  "PHI Mode ON" if PHI_MODE else "PHI Mode OFF"
377
  gr.Markdown(f"<span class='badge'>{pill}</span>")
378
 
379
+ # Main layout
380
  with gr.Row(elem_classes=["main"]):
381
  # Left panel
382
  with gr.Column(elem_classes=["left"]):
 
408
  gr.Markdown("<div class='hr'></div>")
409
  if PHI_MODE:
410
  gr.Markdown(
411
+ "⚠️ **PHI Mode:** History persistence is disabled by default. Avoid unnecessary identifiers."
412
  )
413
 
414
  with gr.Accordion("Privacy & Terms", open=False):
 
 
415
  gr.Markdown(PRIVACY_POLICY_TEXT)
416
  gr.Markdown("<div class='hr'></div>")
417
  gr.Markdown(TERMS_OF_SERVICE_TEXT)
 
421
  with gr.Tabs(elem_classes=["tabs"]):
422
  with gr.TabItem("Current Assessment", id=0, elem_classes=["tabitem"]):
423
  with gr.Column(elem_id="chatbot_container"):
424
+ chat_history_output = gr.Chatbot(label="Analysis Output", type="messages")
 
 
425
  with gr.TabItem("Assessment History", id=1, elem_classes=["tabitem"]):
426
  gr.Markdown("### Review Past Assessments")
427
+ history_dropdown = gr.Dropdown(label="Select an assessment to review", choices=[])
 
 
428
  history_display = gr.Markdown(label="Selected Assessment Details")
429
 
430
+ # Inject voice-to-text helper
431
  gr.HTML(VOICE_STT_HTML)
432
 
433
+ # --------- Event logic (unchanged analysis flow) ----------
434
 
435
  def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
436
  if not prompt:
 
438
  yield chat_history_list, history_state_list, gr.update()
439
  return
440
 
441
+ # Append user's message
442
  chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
443
 
444
+ # Optional progress callback (not streaming in this UI)
445
  def dummy_update(message: str):
446
  pass
447
 
448
+ # Thinking bubble
449
  thinking_message = _append_msg(
450
  chat_with_user_msg,
451
  "assistant",
 
455
  )
456
  yield thinking_message, history_state_list, gr.update()
457
 
458
+ # Run analysis/chat
459
  ai_response_text = handle(prompt, files, dummy_update)
460
 
461
+ # Append final assistant response
462
  final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
463
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
464
 
465
+ # Capture filenames (if any)
466
  file_names: List[str] = []
467
  if files:
468
  file_names = [
469
  os.path.basename(f.name if hasattr(f, "name") else f) for f in files
470
  ]
471
 
472
+ # Build history record
473
  new_entry = {
474
  "id": timestamp,
475
  "prompt": prompt,
 
478
  "chat_history": final_chat,
479
  }
480
 
481
+ # Respect PHI/history flags
482
  if PERSIST_HISTORY and (not PHI_MODE or (PHI_MODE and HISTORY_TTL_DAYS > 0)):
483
  updated_history: List[Dict[str, Any]] = (history_state_list or []) + [new_entry]
484
  else:
485
  updated_history = history_state_list or []
486
 
487
+ history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
 
 
488
 
489
  yield final_chat, updated_history, gr.update(choices=history_labels)
490
 
 
497
  selected_id = selection
498
 
499
  selected_assessment = next(
500
+ (item for item in history_state_list if item.get("id") == selected_id), None
 
501
  )
502
  if not selected_assessment:
503
  return "Could not find the selected assessment."
 
527
  {chat_md}
528
  """
529
 
530
+ # Wire events (using proper gr.State component for history)
531
  send_btn.click(
532
  run_analysis_wrapper,
533
+ inputs=[prompt_input, files_input, chat_history_output, assessment_history],
534
+ outputs=[chat_history_output, assessment_history, history_dropdown],
535
  )
 
 
 
536
  history_dropdown.change(
537
+ view_history,
538
+ inputs=[history_dropdown, assessment_history],
539
+ outputs=[history_display],
540
  )
 
541
  clear_btn.click(
542
  lambda: (None, None, []),
543
  outputs=[prompt_input, files_input, chat_history_output],
544
  )
 
545
  ping_btn.click(ping_cohere, outputs=[ping_out])
546
+ voice_btn.click(None, [], [], js="rs_toggle_stt('prompt_box')")
547
 
 
 
 
 
548
 
549
  if __name__ == "__main__":
550
  if not os.getenv("COHERE_API_KEY"):