Spaces:
Sleeping
Sleeping
Rajan Sharma
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,15 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
#
|
| 3 |
-
#
|
| 4 |
-
#
|
| 5 |
|
| 6 |
from __future__ import annotations
|
| 7 |
|
|
@@ -25,7 +33,14 @@ from settings import (
|
|
| 25 |
COHERE_MODEL_PRIMARY,
|
| 26 |
COHERE_TIMEOUT_S, # noqa: F401
|
| 27 |
USE_OPEN_FALLBACKS, # noqa: F401
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
)
|
|
|
|
| 29 |
from audit_log import log_event
|
| 30 |
from privacy import safety_filter, refusal_reply
|
| 31 |
from llm_router import cohere_chat, _co_client, cohere_embed
|
|
@@ -46,6 +61,40 @@ def _sanitize_text(s: str) -> str:
|
|
| 46 |
return re2.sub(r"[\p{C}--[\n\t]]+", "", s)
|
| 47 |
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
def _create_python_script(user_scenario: str, schema_context: str) -> str:
|
| 50 |
EXPERT_ANALYTICAL_GUIDELINES = """
|
| 51 |
--- EXPERT ANALYTICAL GUIDELINES ---
|
|
@@ -101,7 +150,7 @@ def _generate_long_report(prompt: str) -> str:
|
|
| 101 |
)
|
| 102 |
return response.text
|
| 103 |
except Exception as e:
|
| 104 |
-
|
| 105 |
return f"Error during final report generation: {e}"
|
| 106 |
|
| 107 |
|
|
@@ -146,13 +195,22 @@ def ping_cohere() -> str:
|
|
| 146 |
|
| 147 |
def handle(user_msg: str, files: list, yield_update) -> str:
|
| 148 |
try:
|
|
|
|
| 149 |
safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
|
| 150 |
if blocked_in:
|
| 151 |
return refusal_reply(reason_in)
|
| 152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
|
| 154 |
|
| 155 |
if file_paths:
|
|
|
|
|
|
|
| 156 |
dataframes, schema_parts = [], []
|
| 157 |
for i, p in enumerate(file_paths):
|
| 158 |
if p.endswith(".csv"):
|
|
@@ -170,10 +228,13 @@ def handle(user_msg: str, files: list, yield_update) -> str:
|
|
| 170 |
|
| 171 |
schema_context = "\n".join(schema_parts)
|
| 172 |
|
|
|
|
|
|
|
|
|
|
| 173 |
yield_update("""```
|
| 174 |
🧠 Generating aligned analysis script...
|
| 175 |
```""")
|
| 176 |
-
analysis_script = _create_python_script(
|
| 177 |
|
| 178 |
yield_update("""```
|
| 179 |
⚙️ Executing script to extract raw data...
|
|
@@ -194,31 +255,37 @@ def handle(user_msg: str, files: list, yield_update) -> str:
|
|
| 194 |
yield_update("""```
|
| 195 |
✍️ Synthesizing final comprehensive report...
|
| 196 |
```""")
|
| 197 |
-
|
|
|
|
|
|
|
| 198 |
return _sanitize_text(final_report)
|
| 199 |
else:
|
| 200 |
-
|
|
|
|
|
|
|
| 201 |
return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
|
| 202 |
|
| 203 |
except Exception as e:
|
| 204 |
tb = traceback.format_exc()
|
| 205 |
-
|
| 206 |
-
|
|
|
|
| 207 |
|
| 208 |
|
| 209 |
PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
|
| 210 |
TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
|
| 211 |
|
| 212 |
with gr.Blocks(theme="soft", css="style.css") as demo:
|
| 213 |
-
# Persistent history
|
| 214 |
-
# Each entry:
|
| 215 |
-
# - id: timestamp
|
| 216 |
-
# - prompt: original prompt
|
| 217 |
-
# - files: list of uploaded filenames
|
| 218 |
-
# - response: final response text
|
| 219 |
-
# - chat_history: full transcript (list of {role, content})
|
| 220 |
assessment_history = gr.State([])
|
| 221 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
# Modals
|
| 223 |
with gr.Group(visible=False) as privacy_modal:
|
| 224 |
with gr.Blocks():
|
|
@@ -245,9 +312,7 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
|
|
| 245 |
file_types=[".csv"],
|
| 246 |
)
|
| 247 |
prompt_input = gr.Textbox(
|
| 248 |
-
label="Prompt",
|
| 249 |
-
placeholder="Paste your scenario or question here.",
|
| 250 |
-
lines=15,
|
| 251 |
)
|
| 252 |
with gr.Row():
|
| 253 |
send_btn = gr.Button("▶️ Send / Run Analysis", variant="primary", scale=2)
|
|
@@ -277,22 +342,19 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
|
|
| 277 |
# Logic
|
| 278 |
|
| 279 |
def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
|
| 280 |
-
"""
|
| 281 |
-
Handle a new user prompt and update chat & assessment history.
|
| 282 |
-
"""
|
| 283 |
if not prompt:
|
| 284 |
gr.Warning("Please enter a prompt.")
|
| 285 |
yield chat_history_list, history_state_list, gr.update()
|
| 286 |
return
|
| 287 |
|
| 288 |
-
# Append user
|
| 289 |
chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
|
| 290 |
|
| 291 |
-
#
|
| 292 |
def dummy_update(message: str):
|
| 293 |
pass
|
| 294 |
|
| 295 |
-
#
|
| 296 |
thinking_message = _append_msg(
|
| 297 |
chat_with_user_msg,
|
| 298 |
"assistant",
|
|
@@ -305,18 +367,18 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
|
|
| 305 |
# Run analysis/chat
|
| 306 |
ai_response_text = handle(prompt, files, dummy_update)
|
| 307 |
|
| 308 |
-
# Append
|
| 309 |
final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
|
| 310 |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 311 |
|
| 312 |
-
#
|
| 313 |
file_names: List[str] = []
|
| 314 |
if files:
|
| 315 |
file_names = [
|
| 316 |
os.path.basename(f.name if hasattr(f, "name") else f) for f in files
|
| 317 |
]
|
| 318 |
|
| 319 |
-
#
|
| 320 |
new_entry = {
|
| 321 |
"id": timestamp,
|
| 322 |
"prompt": prompt,
|
|
@@ -325,7 +387,12 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
|
|
| 325 |
"chat_history": final_chat,
|
| 326 |
}
|
| 327 |
|
| 328 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
history_labels = [
|
| 330 |
f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history
|
| 331 |
]
|
|
@@ -333,12 +400,8 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
|
|
| 333 |
yield final_chat, updated_history, gr.update(choices=history_labels)
|
| 334 |
|
| 335 |
def view_history(selection: str, history_state_list: List[Dict[str, Any]]) -> str:
|
| 336 |
-
"""
|
| 337 |
-
Render details for a selected past assessment/chat session.
|
| 338 |
-
"""
|
| 339 |
if not selection or not history_state_list:
|
| 340 |
return ""
|
| 341 |
-
# Selection label format: "timestamp - prompt..."
|
| 342 |
try:
|
| 343 |
selected_id = selection.split(" - ", 1)[0]
|
| 344 |
except Exception:
|
|
@@ -354,7 +417,6 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
|
|
| 354 |
file_list = selected_assessment.get("files", [])
|
| 355 |
file_list_md = "\n- ".join(file_list) if file_list else "*(no files uploaded)*"
|
| 356 |
|
| 357 |
-
# Chat transcript (role + content)
|
| 358 |
chat_entries = selected_assessment.get("chat_history", [])
|
| 359 |
chat_md_lines = []
|
| 360 |
for msg in chat_entries:
|
|
@@ -377,7 +439,7 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
|
|
| 377 |
{chat_md}
|
| 378 |
"""
|
| 379 |
|
| 380 |
-
# Wire up UI
|
| 381 |
send_btn.click(
|
| 382 |
run_analysis_wrapper,
|
| 383 |
inputs=[prompt_input, files_input, chat_history_output, assessment_history],
|
|
|
|
| 1 |
+
|
| 2 |
+
# app_phi.py
|
| 3 |
+
#
|
| 4 |
+
# HIPAA-aware wrapper of the existing app. This keeps the analysis and assessment
|
| 5 |
+
# capabilities intact while adding PHI safeguards:
|
| 6 |
+
# - PHI_MODE flags with opt-in persistence
|
| 7 |
+
# - Redaction before sending content to any external LLM
|
| 8 |
+
# - Safer logging (no raw PHI)
|
| 9 |
+
# - Optional banner and history controls
|
| 10 |
#
|
| 11 |
+
# NOTE: This file is designed to be a drop-in alternative to app.py.
|
| 12 |
+
# It preserves existing analysis logic and UI while adding HIPAA toggles.
|
| 13 |
|
| 14 |
from __future__ import annotations
|
| 15 |
|
|
|
|
| 33 |
COHERE_MODEL_PRIMARY,
|
| 34 |
COHERE_TIMEOUT_S, # noqa: F401
|
| 35 |
USE_OPEN_FALLBACKS, # noqa: F401
|
| 36 |
+
# HIPAA flags
|
| 37 |
+
PHI_MODE,
|
| 38 |
+
PERSIST_HISTORY,
|
| 39 |
+
HISTORY_TTL_DAYS,
|
| 40 |
+
REDACT_BEFORE_LLM,
|
| 41 |
+
ALLOW_EXTERNAL_PHI,
|
| 42 |
)
|
| 43 |
+
|
| 44 |
from audit_log import log_event
|
| 45 |
from privacy import safety_filter, refusal_reply
|
| 46 |
from llm_router import cohere_chat, _co_client, cohere_embed
|
|
|
|
| 61 |
return re2.sub(r"[\p{C}--[\n\t]]+", "", s)
|
| 62 |
|
| 63 |
|
| 64 |
+
# ---------------------- HIPAA helpers ----------------------
|
| 65 |
+
|
| 66 |
+
# Very conservative redaction (risk reduction; not a full de-identification program).
|
| 67 |
+
PHI_PATTERNS = [
|
| 68 |
+
(re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "[REDACTED_SSN]"), # US SSN
|
| 69 |
+
(re.compile(r"\b\d{9}\b"), "[REDACTED_MRN]"), # 9-digit MRN (example)
|
| 70 |
+
(re.compile(r"\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b"), "[REDACTED_PHONE]"),
|
| 71 |
+
(re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}"), "[REDACTED_EMAIL]"),
|
| 72 |
+
(re.compile(r"\b(19|20)\d{2}-\d{2}-\d{2}\b"), "[REDACTED_DOB]"), # YYYY-MM-DD
|
| 73 |
+
(re.compile(r"\b\d{2}/\d{2}/(19|20)\d{2}\b"), "[REDACTED_DOB]"), # MM/DD/YYYY
|
| 74 |
+
(re.compile(r"\b\d{5}(-\d{4})?\b"), "[REDACTED_ZIP]"), # ZIP (US)
|
| 75 |
+
]
|
| 76 |
+
|
| 77 |
+
def redact_phi(text: str) -> str:
|
| 78 |
+
if not isinstance(text, str):
|
| 79 |
+
return text
|
| 80 |
+
t = text
|
| 81 |
+
for pat, repl in PHI_PATTERNS:
|
| 82 |
+
t = pat.sub(repl, t)
|
| 83 |
+
return t
|
| 84 |
+
|
| 85 |
+
def safe_log(event_name: str, meta: dict | None = None):
|
| 86 |
+
# Avoid logging raw PHI or payloads
|
| 87 |
+
try:
|
| 88 |
+
meta = (meta or {}).copy()
|
| 89 |
+
meta.pop("raw", None)
|
| 90 |
+
log_event(event_name, None, meta)
|
| 91 |
+
except Exception:
|
| 92 |
+
# Never raise from logging in PHI context
|
| 93 |
+
pass
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
# ---------------------- Original analysis helpers (unchanged) ----------------------
|
| 97 |
+
|
| 98 |
def _create_python_script(user_scenario: str, schema_context: str) -> str:
|
| 99 |
EXPERT_ANALYTICAL_GUIDELINES = """
|
| 100 |
--- EXPERT ANALYTICAL GUIDELINES ---
|
|
|
|
| 150 |
)
|
| 151 |
return response.text
|
| 152 |
except Exception as e:
|
| 153 |
+
safe_log("cohere_chat_error", {"err": str(e)})
|
| 154 |
return f"Error during final report generation: {e}"
|
| 155 |
|
| 156 |
|
|
|
|
| 195 |
|
| 196 |
def handle(user_msg: str, files: list, yield_update) -> str:
|
| 197 |
try:
|
| 198 |
+
# Run app safety filter
|
| 199 |
safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
|
| 200 |
if blocked_in:
|
| 201 |
return refusal_reply(reason_in)
|
| 202 |
|
| 203 |
+
# If PHI mode is enabled and we are not permitted to send PHI externally,
|
| 204 |
+
# redact the content before any LLM calls.
|
| 205 |
+
redacted_in = safe_in
|
| 206 |
+
if PHI_MODE and REDACT_BEFORE_LLM:
|
| 207 |
+
redacted_in = redact_phi(safe_in)
|
| 208 |
+
|
| 209 |
file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
|
| 210 |
|
| 211 |
if file_paths:
|
| 212 |
+
# CSV analysis path preserved; we only use redacted_in in PROMPTS sent to the LLM.
|
| 213 |
+
# CSV data itself is processed locally as before.
|
| 214 |
dataframes, schema_parts = [], []
|
| 215 |
for i, p in enumerate(file_paths):
|
| 216 |
if p.endswith(".csv"):
|
|
|
|
| 228 |
|
| 229 |
schema_context = "\n".join(schema_parts)
|
| 230 |
|
| 231 |
+
# If PHI is not allowed externally and PHI_MODE is on, we will use the redacted prompt.
|
| 232 |
+
prompt_for_code = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
|
| 233 |
+
|
| 234 |
yield_update("""```
|
| 235 |
🧠 Generating aligned analysis script...
|
| 236 |
```""")
|
| 237 |
+
analysis_script = _create_python_script(prompt_for_code, schema_context)
|
| 238 |
|
| 239 |
yield_update("""```
|
| 240 |
⚙️ Executing script to extract raw data...
|
|
|
|
| 255 |
yield_update("""```
|
| 256 |
✍️ Synthesizing final comprehensive report...
|
| 257 |
```""")
|
| 258 |
+
# For the final narrative, also route based on PHI policy
|
| 259 |
+
writer_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
|
| 260 |
+
final_report = _generate_final_report(writer_input, raw_data_output)
|
| 261 |
return _sanitize_text(final_report)
|
| 262 |
else:
|
| 263 |
+
# Pure chat path: redact if PHI_MODE and external is not allowed
|
| 264 |
+
chat_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
|
| 265 |
+
prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {chat_input}\nAssistant:"
|
| 266 |
return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
|
| 267 |
|
| 268 |
except Exception as e:
|
| 269 |
tb = traceback.format_exc()
|
| 270 |
+
safe_log("app_error", {"err": str(e)})
|
| 271 |
+
# Do not leak stack traces to UI in PHI mode
|
| 272 |
+
return "A critical error occurred. Please contact your administrator." if PHI_MODE else f"A critical error occurred: {e}"
|
| 273 |
|
| 274 |
|
| 275 |
PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
|
| 276 |
TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
|
| 277 |
|
| 278 |
with gr.Blocks(theme="soft", css="style.css") as demo:
|
| 279 |
+
# Persistent history state (in-memory). PHI mode defaults to no persistence.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
assessment_history = gr.State([])
|
| 281 |
|
| 282 |
+
# Optional PHI banner
|
| 283 |
+
if PHI_MODE:
|
| 284 |
+
gr.Markdown(
|
| 285 |
+
"⚠️ **PHI Mode Enabled**: Protected Health Information safeguards are active. "
|
| 286 |
+
"History persistence is disabled by default. Avoid unnecessary identifiers."
|
| 287 |
+
)
|
| 288 |
+
|
| 289 |
# Modals
|
| 290 |
with gr.Group(visible=False) as privacy_modal:
|
| 291 |
with gr.Blocks():
|
|
|
|
| 312 |
file_types=[".csv"],
|
| 313 |
)
|
| 314 |
prompt_input = gr.Textbox(
|
| 315 |
+
label="Prompt", placeholder="Paste your scenario or question here.", lines=15
|
|
|
|
|
|
|
| 316 |
)
|
| 317 |
with gr.Row():
|
| 318 |
send_btn = gr.Button("▶️ Send / Run Analysis", variant="primary", scale=2)
|
|
|
|
| 342 |
# Logic
|
| 343 |
|
| 344 |
def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
|
|
|
|
|
|
|
|
|
|
| 345 |
if not prompt:
|
| 346 |
gr.Warning("Please enter a prompt.")
|
| 347 |
yield chat_history_list, history_state_list, gr.update()
|
| 348 |
return
|
| 349 |
|
| 350 |
+
# Append user message
|
| 351 |
chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
|
| 352 |
|
| 353 |
+
# Placeholder for streamed updates (unused)
|
| 354 |
def dummy_update(message: str):
|
| 355 |
pass
|
| 356 |
|
| 357 |
+
# Thinking message
|
| 358 |
thinking_message = _append_msg(
|
| 359 |
chat_with_user_msg,
|
| 360 |
"assistant",
|
|
|
|
| 367 |
# Run analysis/chat
|
| 368 |
ai_response_text = handle(prompt, files, dummy_update)
|
| 369 |
|
| 370 |
+
# Append assistant response
|
| 371 |
final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
|
| 372 |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 373 |
|
| 374 |
+
# Filenames
|
| 375 |
file_names: List[str] = []
|
| 376 |
if files:
|
| 377 |
file_names = [
|
| 378 |
os.path.basename(f.name if hasattr(f, "name") else f) for f in files
|
| 379 |
]
|
| 380 |
|
| 381 |
+
# Construct history entry
|
| 382 |
new_entry = {
|
| 383 |
"id": timestamp,
|
| 384 |
"prompt": prompt,
|
|
|
|
| 387 |
"chat_history": final_chat,
|
| 388 |
}
|
| 389 |
|
| 390 |
+
# Persist only if allowed
|
| 391 |
+
if PERSIST_HISTORY and (not PHI_MODE or (PHI_MODE and HISTORY_TTL_DAYS > 0)):
|
| 392 |
+
updated_history: List[Dict[str, Any]] = (history_state_list or []) + [new_entry]
|
| 393 |
+
else:
|
| 394 |
+
updated_history = history_state_list or []
|
| 395 |
+
|
| 396 |
history_labels = [
|
| 397 |
f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history
|
| 398 |
]
|
|
|
|
| 400 |
yield final_chat, updated_history, gr.update(choices=history_labels)
|
| 401 |
|
| 402 |
def view_history(selection: str, history_state_list: List[Dict[str, Any]]) -> str:
|
|
|
|
|
|
|
|
|
|
| 403 |
if not selection or not history_state_list:
|
| 404 |
return ""
|
|
|
|
| 405 |
try:
|
| 406 |
selected_id = selection.split(" - ", 1)[0]
|
| 407 |
except Exception:
|
|
|
|
| 417 |
file_list = selected_assessment.get("files", [])
|
| 418 |
file_list_md = "\n- ".join(file_list) if file_list else "*(no files uploaded)*"
|
| 419 |
|
|
|
|
| 420 |
chat_entries = selected_assessment.get("chat_history", [])
|
| 421 |
chat_md_lines = []
|
| 422 |
for msg in chat_entries:
|
|
|
|
| 439 |
{chat_md}
|
| 440 |
"""
|
| 441 |
|
| 442 |
+
# Wire up UI
|
| 443 |
send_btn.click(
|
| 444 |
run_analysis_wrapper,
|
| 445 |
inputs=[prompt_input, files_input, chat_history_output, assessment_history],
|