Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

Rajan Sharma commited on Oct 13

Commit

3d2ccd6

verified ·

1 Parent(s): a8e6509

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -35

app.py CHANGED Viewed

@@ -1,7 +1,15 @@
-# app.py
 #
-# Gradio-based AI data analyst app with persistent chat & assessment history.
-# Each session stores: timestamp, prompt, files (if any), final response, and full chat transcript.
 from __future__ import annotations
@@ -25,7 +33,14 @@ from settings import (
     COHERE_MODEL_PRIMARY,
     COHERE_TIMEOUT_S,  # noqa: F401
     USE_OPEN_FALLBACKS,  # noqa: F401
 )
 from audit_log import log_event
 from privacy import safety_filter, refusal_reply
 from llm_router import cohere_chat, _co_client, cohere_embed
@@ -46,6 +61,40 @@ def _sanitize_text(s: str) -> str:
     return re2.sub(r"[\p{C}--[\n\t]]+", "", s)
 def _create_python_script(user_scenario: str, schema_context: str) -> str:
     EXPERT_ANALYTICAL_GUIDELINES = """
 --- EXPERT ANALYTICAL GUIDELINES ---
@@ -101,7 +150,7 @@ def _generate_long_report(prompt: str) -> str:
         )
         return response.text
     except Exception as e:
-        log_event("cohere_chat_error", None, {"err": str(e)})
         return f"Error during final report generation: {e}"
@@ -146,13 +195,22 @@ def ping_cohere() -> str:
 def handle(user_msg: str, files: list, yield_update) -> str:
     try:
         safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
         if blocked_in:
             return refusal_reply(reason_in)
         file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
         if file_paths:
             dataframes, schema_parts = [], []
             for i, p in enumerate(file_paths):
                 if p.endswith(".csv"):
@@ -170,10 +228,13 @@ def handle(user_msg: str, files: list, yield_update) -> str:
             schema_context = "\n".join(schema_parts)
             yield_update("""```
 🧠 Generating aligned analysis script...
 ```""")
-            analysis_script = _create_python_script(safe_in, schema_context)
             yield_update("""```
 ⚙️ Executing script to extract raw data...
@@ -194,31 +255,37 @@ def handle(user_msg: str, files: list, yield_update) -> str:
             yield_update("""```
 ✍️ Synthesizing final comprehensive report...
 ```""")
-            final_report = _generate_final_report(safe_in, raw_data_output)
             return _sanitize_text(final_report)
         else:
-            prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {safe_in}\nAssistant:"
             return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
     except Exception as e:
         tb = traceback.format_exc()
-        log_event("app_error", None, {"err": str(e), "tb": tb})
-        return f"A critical error occurred: {e}"
 PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
 TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
 with gr.Blocks(theme="soft", css="style.css") as demo:
-    # Persistent history of past assessments / chat sessions
-    # Each entry:
-    # - id: timestamp
-    # - prompt: original prompt
-    # - files: list of uploaded filenames
-    # - response: final response text
-    # - chat_history: full transcript (list of {role, content})
     assessment_history = gr.State([])
     # Modals
     with gr.Group(visible=False) as privacy_modal:
         with gr.Blocks():
@@ -245,9 +312,7 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
                 file_types=[".csv"],
             )
             prompt_input = gr.Textbox(
-                label="Prompt",
-                placeholder="Paste your scenario or question here.",
-                lines=15,
             )
             with gr.Row():
                 send_btn = gr.Button("▶️ Send / Run Analysis", variant="primary", scale=2)
@@ -277,22 +342,19 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
     # Logic
     def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
-        """
-        Handle a new user prompt and update chat & assessment history.
-        """
         if not prompt:
             gr.Warning("Please enter a prompt.")
             yield chat_history_list, history_state_list, gr.update()
             return
-        # Append user's message
         chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
-        # Optional streaming update callback (unused here)
         def dummy_update(message: str):
             pass
-        # Show thinking message
         thinking_message = _append_msg(
             chat_with_user_msg,
             "assistant",
@@ -305,18 +367,18 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
         # Run analysis/chat
         ai_response_text = handle(prompt, files, dummy_update)
-        # Append final assistant response
         final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
         timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        # Capture filenames (if any)
         file_names: List[str] = []
         if files:
             file_names = [
                 os.path.basename(f.name if hasattr(f, "name") else f) for f in files
             ]
-        # Create a new history record (always, even for chat-only)
         new_entry = {
             "id": timestamp,
             "prompt": prompt,
@@ -325,7 +387,12 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
             "chat_history": final_chat,
         }
-        updated_history: List[Dict[str, Any]] = (history_state_list or []) + [new_entry]
         history_labels = [
             f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history
         ]
@@ -333,12 +400,8 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
         yield final_chat, updated_history, gr.update(choices=history_labels)
     def view_history(selection: str, history_state_list: List[Dict[str, Any]]) -> str:
-        """
-        Render details for a selected past assessment/chat session.
-        """
         if not selection or not history_state_list:
             return ""
-        # Selection label format: "timestamp - prompt..."
         try:
             selected_id = selection.split(" - ", 1)[0]
         except Exception:
@@ -354,7 +417,6 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
         file_list = selected_assessment.get("files", [])
         file_list_md = "\n- ".join(file_list) if file_list else "*(no files uploaded)*"
-        # Chat transcript (role + content)
         chat_entries = selected_assessment.get("chat_history", [])
         chat_md_lines = []
         for msg in chat_entries:
@@ -377,7 +439,7 @@ with gr.Blocks(theme="soft", css="style.css") as demo:
 {chat_md}
 """
-    # Wire up UI events
     send_btn.click(
         run_analysis_wrapper,
         inputs=[prompt_input, files_input, chat_history_output, assessment_history],

+# app_phi.py
+#
+# HIPAA-aware wrapper of the existing app. This keeps the analysis and assessment
+# capabilities intact while adding PHI safeguards:
+# - PHI_MODE flags with opt-in persistence
+# - Redaction before sending content to any external LLM
+# - Safer logging (no raw PHI)
+# - Optional banner and history controls
 #
+# NOTE: This file is designed to be a drop-in alternative to app.py.
+#       It preserves existing analysis logic and UI while adding HIPAA toggles.
 from __future__ import annotations
     COHERE_MODEL_PRIMARY,
     COHERE_TIMEOUT_S,  # noqa: F401
     USE_OPEN_FALLBACKS,  # noqa: F401
+    # HIPAA flags
+    PHI_MODE,
+    PERSIST_HISTORY,
+    HISTORY_TTL_DAYS,
+    REDACT_BEFORE_LLM,
+    ALLOW_EXTERNAL_PHI,
 )
 from audit_log import log_event
 from privacy import safety_filter, refusal_reply
 from llm_router import cohere_chat, _co_client, cohere_embed
     return re2.sub(r"[\p{C}--[\n\t]]+", "", s)
+# ---------------------- HIPAA helpers ----------------------
+# Very conservative redaction (risk reduction; not a full de-identification program).
+PHI_PATTERNS = [
+    (re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "[REDACTED_SSN]"),           # US SSN
+    (re.compile(r"\b\d{9}\b"), "[REDACTED_MRN]"),                         # 9-digit MRN (example)
+    (re.compile(r"\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b"), "[REDACTED_PHONE]"),
+    (re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}"), "[REDACTED_EMAIL]"),
+    (re.compile(r"\b(19|20)\d{2}-\d{2}-\d{2}\b"), "[REDACTED_DOB]"),    # YYYY-MM-DD
+    (re.compile(r"\b\d{2}/\d{2}/(19|20)\d{2}\b"), "[REDACTED_DOB]"),    # MM/DD/YYYY
+    (re.compile(r"\b\d{5}(-\d{4})?\b"), "[REDACTED_ZIP]"),               # ZIP (US)
+]
+def redact_phi(text: str) -> str:
+    if not isinstance(text, str):
+        return text
+    t = text
+    for pat, repl in PHI_PATTERNS:
+        t = pat.sub(repl, t)
+    return t
+def safe_log(event_name: str, meta: dict | None = None):
+    # Avoid logging raw PHI or payloads
+    try:
+        meta = (meta or {}).copy()
+        meta.pop("raw", None)
+        log_event(event_name, None, meta)
+    except Exception:
+        # Never raise from logging in PHI context
+        pass
+# ---------------------- Original analysis helpers (unchanged) ----------------------
 def _create_python_script(user_scenario: str, schema_context: str) -> str:
     EXPERT_ANALYTICAL_GUIDELINES = """
 --- EXPERT ANALYTICAL GUIDELINES ---
         )
         return response.text
     except Exception as e:
+        safe_log("cohere_chat_error", {"err": str(e)})
         return f"Error during final report generation: {e}"
 def handle(user_msg: str, files: list, yield_update) -> str:
     try:
+        # Run app safety filter
         safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
         if blocked_in:
             return refusal_reply(reason_in)
+        # If PHI mode is enabled and we are not permitted to send PHI externally,
+        # redact the content before any LLM calls.
+        redacted_in = safe_in
+        if PHI_MODE and REDACT_BEFORE_LLM:
+            redacted_in = redact_phi(safe_in)
         file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
         if file_paths:
+            # CSV analysis path preserved; we only use redacted_in in PROMPTS sent to the LLM.
+            # CSV data itself is processed locally as before.
             dataframes, schema_parts = [], []
             for i, p in enumerate(file_paths):
                 if p.endswith(".csv"):
             schema_context = "\n".join(schema_parts)
+            # If PHI is not allowed externally and PHI_MODE is on, we will use the redacted prompt.
+            prompt_for_code = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
             yield_update("""```
 🧠 Generating aligned analysis script...
 ```""")
+            analysis_script = _create_python_script(prompt_for_code, schema_context)
             yield_update("""```
 ⚙️ Executing script to extract raw data...
             yield_update("""```
 ✍️ Synthesizing final comprehensive report...
 ```""")
+            # For the final narrative, also route based on PHI policy
+            writer_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
+            final_report = _generate_final_report(writer_input, raw_data_output)
             return _sanitize_text(final_report)
         else:
+            # Pure chat path: redact if PHI_MODE and external is not allowed
+            chat_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
+            prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {chat_input}\nAssistant:"
             return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
     except Exception as e:
         tb = traceback.format_exc()
+        safe_log("app_error", {"err": str(e)})
+        # Do not leak stack traces to UI in PHI mode
+        return "A critical error occurred. Please contact your administrator." if PHI_MODE else f"A critical error occurred: {e}"
 PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
 TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
 with gr.Blocks(theme="soft", css="style.css") as demo:
+    # Persistent history state (in-memory). PHI mode defaults to no persistence.
     assessment_history = gr.State([])
+    # Optional PHI banner
+    if PHI_MODE:
+        gr.Markdown(
+            "⚠️ **PHI Mode Enabled**: Protected Health Information safeguards are active. "
+            "History persistence is disabled by default. Avoid unnecessary identifiers."
+        )
     # Modals
     with gr.Group(visible=False) as privacy_modal:
         with gr.Blocks():
                 file_types=[".csv"],
             )
             prompt_input = gr.Textbox(
+                label="Prompt", placeholder="Paste your scenario or question here.", lines=15
             )
             with gr.Row():
                 send_btn = gr.Button("▶️ Send / Run Analysis", variant="primary", scale=2)
     # Logic
     def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
         if not prompt:
             gr.Warning("Please enter a prompt.")
             yield chat_history_list, history_state_list, gr.update()
             return
+        # Append user message
         chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
+        # Placeholder for streamed updates (unused)
         def dummy_update(message: str):
             pass
+        # Thinking message
         thinking_message = _append_msg(
             chat_with_user_msg,
             "assistant",
         # Run analysis/chat
         ai_response_text = handle(prompt, files, dummy_update)
+        # Append assistant response
         final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
         timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        # Filenames
         file_names: List[str] = []
         if files:
             file_names = [
                 os.path.basename(f.name if hasattr(f, "name") else f) for f in files
             ]
+        # Construct history entry
         new_entry = {
             "id": timestamp,
             "prompt": prompt,
             "chat_history": final_chat,
         }
+        # Persist only if allowed
+        if PERSIST_HISTORY and (not PHI_MODE or (PHI_MODE and HISTORY_TTL_DAYS > 0)):
+            updated_history: List[Dict[str, Any]] = (history_state_list or []) + [new_entry]
+        else:
+            updated_history = history_state_list or []
         history_labels = [
             f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history
         ]
         yield final_chat, updated_history, gr.update(choices=history_labels)
     def view_history(selection: str, history_state_list: List[Dict[str, Any]]) -> str:
         if not selection or not history_state_list:
             return ""
         try:
             selected_id = selection.split(" - ", 1)[0]
         except Exception:
         file_list = selected_assessment.get("files", [])
         file_list_md = "\n- ".join(file_list) if file_list else "*(no files uploaded)*"
         chat_entries = selected_assessment.get("chat_history", [])
         chat_md_lines = []
         for msg in chat_entries:
 {chat_md}
 """
+    # Wire up UI
     send_btn.click(
         run_analysis_wrapper,
         inputs=[prompt_input, files_input, chat_history_output, assessment_history],