BiomedicalDeviceTroubleshootingAssitant

Sleeping

App Files Files Community

ibadhasnain commited on Sep 8, 2025

Commit

58b6c67

verified ·

1 Parent(s): 4b3e9c1

Update app.py

Browse files

Files changed (1) hide show

app.py +211 -74

app.py CHANGED Viewed

@@ -1,9 +1,9 @@
 # app.py
 # ---------------------------------------------------------
-# Simple Biomedical Troubleshooting Assistant
-# - Describe device & symptom in plain text.
-# - Optional: /manual to upload PDF/TXT; /clear to remove it.
-# - LLM returns a structured, education-only troubleshooting plan.
 # ---------------------------------------------------------
 import os, io, re, json
 import chainlit as cl
@@ -11,17 +11,20 @@ from dotenv import load_dotenv
 from openai import AsyncOpenAI
 from pypdf import PdfReader
-# ------------------ Config: auto provider ------------------
 load_dotenv()
-GEMINI_API_KEY  = os.getenv("Gem")
 OPENAI_API_KEY  = os.getenv("OPENAI_API_KEY")
 if GEMINI_API_KEY:
     PROVIDER = "gemini"
     MODEL_ID = "gemini-2.5-flash"
-    client = AsyncOpenAI(api_key=GEMINI_API_KEY,
-                         base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
 elif OPENAI_API_KEY:
     PROVIDER = "openai"
     MODEL_ID = "gpt-4o-mini"  # any chat-capable model you have
@@ -29,8 +32,97 @@ elif OPENAI_API_KEY:
 else:
     raise RuntimeError("Missing GEMINI_API_KEY or OPENAI_API_KEY.")
-# ------------------ Manual helpers ------------------
-def _extract_pdf_pages(data: bytes):
     pages = []
     reader = PdfReader(io.BytesIO(data))
     for i, pg in enumerate(reader.pages, start=1):
@@ -41,14 +133,14 @@ def _extract_pdf_pages(data: bytes):
         pages.append({"page": i, "text": txt})
     return pages
-def _extract_txt_pages(data: bytes, chunk_chars: int = 1400):
     try:
         txt = data.decode("utf-8", errors="ignore")
     except Exception:
         txt = ""
     return [{"page": i + 1, "text": txt[i:i + chunk_chars]} for i in range(0, len(txt), chunk_chars)] or [{"page": 1, "text": ""}]
-def _manual_search(pages, query: str, topk: int = 3):
     if not pages:
         return []
     terms = [w for w in re.findall(r"\w+", (query or "").lower()) if len(w) > 2]
@@ -59,56 +151,69 @@ def _manual_search(pages, query: str, topk: int = 3):
         if score > 0:
             scored.append((score, p))
     scored.sort(key=lambda x: x[0], reverse=True)
-    return [p for _, p in scored[:topk]] or pages[:1]
-def _excerpt(text: str, terms, window: int = 380):
-    t = text or ""
-    low = t.lower()
-    idxs = [low.find(term) for term in terms if term in low]
-    start = max(0, min([i for i in idxs if i >= 0], default=0) - window)
-    end = min(len(t), start + 2 * window)
-    return re.sub(r"\s+", " ", t[start:end]).strip()
-# ------------------ Prompt builder ------------------
 SYSTEM_PROMPT = (
     "You are a biomedical device troubleshooting assistant for clinical engineers.\n"
-    "Education-only. No diagnosis or treatment advice. No invasive repairs, no alarm bypass, no firmware hacks, "
-    "no collection of patient identifiers. Defer to OEM manuals and local policy if anything conflicts.\n\n"
-    "Given a user description of a device and symptom, produce **concise bullet lists** with sections in this exact order:\n"
     "1) Safety First (non-invasive, patient-first)\n"
     "2) Likely Causes (ranked)\n"
     "3) Step-by-Step Checks (do-not-open device; do-not-bypass alarms)\n"
     "4) Quick Tests / Verification (what, how, with what reference/simulator)\n"
     "5) Escalate When (clear triggers)\n"
-    "End with a one-line summary. If manual excerpts are provided, incorporate them but state that OEM manual prevails.\n"
 )
 async def call_llm(user_desc: str, manual_excerpts: str = "") -> str:
-    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
-    user_block = f"User description:\n{user_desc.strip()}"
     if manual_excerpts:
         user_block += f"\n\nManual excerpts (for reference):\n{manual_excerpts.strip()}"
-    messages.append({"role": "user", "content": user_block})
     resp = await client.chat.completions.create(
         model=MODEL_ID,
-        messages=messages,
     )
     return resp.choices[0].message.content or ""
-# ------------------ Session utils ------------------
-def set_manual(manual_dict):
-    cl.user_session.set("manual", manual_dict)
-def get_manual():
-    return cl.user_session.get("manual")
-# ------------------ Chainlit handlers ------------------
 WELCOME = (
-    "🛠️ **Biomedical Troubleshooting Assistant**\n"
-    "Describe the **device & symptom** in plain text (e.g., “ECG shows noisy baseline”).\n"
-    "Optional: type **/manual** to upload a PDF/TXT service manual for better guidance. Use **/clear** to remove it.\n\n"
-    "Education-only. Refer to OEM manuals & policy. No diagnosis or invasive service."
 )
 @cl.on_chat_start
@@ -116,11 +221,19 @@ async def start():
     set_manual(None)
     await cl.Message(content=WELCOME).send()
 @cl.on_message
 async def main(message: cl.Message):
     text = (message.content or "").strip()
-    # Upload manual
     if text.lower().startswith("/manual"):
         files = await cl.AskFileMessage(
             content="Upload the **service manual** (PDF or TXT). Max ~20 MB.",
@@ -128,57 +241,81 @@ async def main(message: cl.Message):
             max_files=1, max_size_mb=20, timeout=240
         ).send()
         if not files:
-            await cl.Message(content="No file received.").send()
-            return
         f = files[0]
         data = getattr(f, "content", None)
         if data is None and getattr(f, "path", None):
-            with open(f.path, "rb") as fh:
-                data = fh.read()
         try:
-            if f.mime == "application/pdf" or f.name.lower().endswith(".pdf"):
-                pages = _extract_pdf_pages(data)
-            else:
-                pages = _extract_txt_pages(data)
         except Exception as e:
-            await cl.Message(content=f"Couldn't read the manual: {e}").send()
-            return
         set_manual({"name": f.name, "pages": pages})
         await cl.Message(content=f"✅ Manual indexed: **{f.name}** — {len(pages)} page-chunks.").send()
         return
-    # Clear manual
     if text.lower().startswith("/clear"):
         set_manual(None)
         await cl.Message(content="Manual cleared.").send()
         return
-    # Regular troubleshooting request
-    if not text:
-        await cl.Message(content="Please describe the device & symptom (e.g., “ECG noisy baseline”).").send()
         return
-    manual = get_manual()
-    manual_excerpts = ""
     if manual and manual.get("pages"):
-        # simple keyword search of the manual using the user's text
-        terms = [w for w in re.findall(r"\w+", text.lower()) if len(w) > 2]
-        hits = _manual_search(manual["pages"], text, topk=3)
-        parts = []
-        for h in hits:
-            snippet = _excerpt(h.get("text",""), terms, window=420)
-            parts.append(f"[p.{h.get('page')}] {snippet}")
-        manual_excerpts = "\n".join(parts)
-    # Call the LLM
     try:
-        answer = await call_llm(text, manual_excerpts)
     except Exception as e:
         await cl.Message(content=f"⚠️ LLM call failed ({PROVIDER}): {e}").send()
         return
-    # Present result
-    prefix = ""
-    if manual and manual.get("name"):
-        prefix = f"📄 Using manual: **{manual['name']}**\n\n"
     await cl.Message(content=prefix + (answer or "I couldn’t generate a plan.")).send()

 # app.py
 # ---------------------------------------------------------
+# Biomedical Troubleshooting Assistant (Topic-Locked + Guardrails)
+# - Only biomedical device troubleshooting.
+# - /manual to upload PDF/TXT (PHI redacted in excerpts), /clear to remove, /policy to view rules.
+# - Dual guardrails: local regex + LLM JSON classifier.
 # ---------------------------------------------------------
 import os, io, re, json
 import chainlit as cl
 from openai import AsyncOpenAI
 from pypdf import PdfReader
+# =========================
+# Config: auto provider
+# =========================
 load_dotenv()
+GEMINI_API_KEY  = os.getenv("GEMINI_API_KEY")
 OPENAI_API_KEY  = os.getenv("OPENAI_API_KEY")
 if GEMINI_API_KEY:
     PROVIDER = "gemini"
     MODEL_ID = "gemini-2.5-flash"
+    client = AsyncOpenAI(
+        api_key=GEMINI_API_KEY,
+        base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
+    )
 elif OPENAI_API_KEY:
     PROVIDER = "openai"
     MODEL_ID = "gpt-4o-mini"  # any chat-capable model you have
 else:
     raise RuntimeError("Missing GEMINI_API_KEY or OPENAI_API_KEY.")
+# =========================
+# Topic lock & guardrails
+# =========================
+ALLOWED_COMMANDS = ("/manual", "/clear", "/help", "/policy")
+TOPIC_KEYWORDS = [
+    "biomedical","biomed","device","equipment","oem","service manual",
+    "troubleshoot","troubleshooting","fault","error","alarm",
+    "probe","sensor","lead","cable","battery","power","calibration","qc","verification","analyzer",
+    "ecg","spo2","oximeter","nibp","ventilator","infusion","pump",
+    "defibrillator","patient monitor","ultrasound","anesthesia","syringe pump",
+]
+RE_FORBIDDEN_CLINICAL = re.compile(r"\b(diagnos(e|is|tic)|prescrib|medicat|treat(ment|ing)?|dose|drug|therapy)\b", re.I)
+RE_INVASIVE_REPAIR    = re.compile(r"\b(open(ing)?\s+(the\s+)?(device|casing|cover)|remove\s+cover|solder|reflow|short\s+pin|jumper|board\s+level|replace\s+capacitor|tear\s+down)\b", re.I)
+RE_ALARM_BYPASS       = re.compile(r"\b(bypass|disable|silence)\s+(alarm|alert|safety|interlock)\b", re.I)
+RE_FIRMWARE_TAMPER    = re.compile(r"\b(firmware|bootloader|root|jailbreak|unlock\s+(service|engineer)\s*mode|password\s*override|service\s*code|backdoor)\b", re.I)
+# PHI patterns (best-effort)
+RE_EMAIL = re.compile(r"[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}", re.I)
+RE_PHONE = re.compile(r"(?:\+\d{1,3}[-\s.]*)?(?:\(?\d{3,4}\)?[-\s.]*)?\d{3}[-\s.]?\d{4}")
+RE_CNIC  = re.compile(r"\b\d{5}-\d{7}-\d\b")  # Pakistan CNIC
+RE_MRN   = re.compile(r"\b(MRN|Medical\s*Record(?:\s*Number)?)[:\s]*\d{4,}\b", re.I)
+RE_ADDRESS_HINT = re.compile(r"\b(address|street|road|block|apt|flat|house)\b", re.I)
+def on_topic(text: str) -> bool:
+    low = (text or "").lower().strip()
+    if not low:
+        return False
+    if any(low.startswith(cmd) for cmd in ALLOWED_COMMANDS):
+        return True
+    return any(k in low for k in TOPIC_KEYWORDS)
+def local_guard_check(text: str) -> list:
+    """Return a list of violations detected locally (fast, offline)."""
+    low = (text or "").lower()
+    issues = []
+    if RE_FORBIDDEN_CLINICAL.search(low): issues.append("clinical_advice")
+    if RE_INVASIVE_REPAIR.search(low):    issues.append("invasive_repair")
+    if RE_ALARM_BYPASS.search(low):       issues.append("alarm_bypass")
+    if RE_FIRMWARE_TAMPER.search(low):    issues.append("firmware_tamper")
+    # PHI sharing/collection indicators
+    if RE_EMAIL.search(text) or RE_PHONE.search(text) or RE_CNIC.search(text) or RE_MRN.search(text) or RE_ADDRESS_HINT.search(text):
+        issues.append("phi_share_or_collect")
+    return issues
+async def llm_guard_check(text: str) -> dict:
+    """Ask model for a strict JSON classification; fallback to {} on error."""
+    guard_system = (
+        "You are a strict safety classifier. Return ONLY a compact JSON object with boolean fields:\n"
+        "{in_scope, clinical_advice, invasive_repair, alarm_bypass, firmware_tamper, phi_share_or_collect}.\n"
+        "Definitions:\n"
+        "- in_scope: true only if user asks about biomedical equipment troubleshooting.\n"
+        "- clinical_advice: true if asking diagnosis/treatment/dosing/medication/therapy.\n"
+        "- invasive_repair: true if asking to open casing, solder, board-level repair.\n"
+        "- alarm_bypass: true if asking to silence/disable/bypass alarms/interlocks.\n"
+        "- firmware_tamper: true if asking to root, jailbreak, firmware hacks, unlock service mode, passwords.\n"
+        "- phi_share_or_collect: true if asking to share/collect personal identifiers or appears to share them.\n"
+        "Respond with JSON only, no prose."
+    )
+    try:
+        resp = await client.chat.completions.create(
+            model=MODEL_ID,
+            messages=[
+                {"role": "system", "content": guard_system},
+                {"role": "user", "content": text}
+            ],
+        )
+        raw = resp.choices[0].message.content or "{}"
+        # best-effort JSON extraction
+        m = re.search(r"\{.*\}", raw, re.S)
+        if m:
+            return json.loads(m.group(0))
+        return json.loads(raw)  # may already be JSON
+    except Exception:
+        return {}
+def redact_phi(s: str) -> str:
+    if not s: return s
+    s = RE_EMAIL.sub("[REDACTED_EMAIL]", s)
+    s = RE_CNIC.sub("[REDACTED_CNIC]", s)
+    s = RE_PHONE.sub("[REDACTED_PHONE]", s)
+    s = RE_MRN.sub("[REDACTED_MRN]", s)
+    # light address hint → don't remove whole text; just bracket note near keyword
+    s = RE_ADDRESS_HINT.sub("[ADDRESS]", s)
+    return s
+# =========================
+# Manual helpers
+# =========================
+def extract_pdf_pages(data: bytes):
     pages = []
     reader = PdfReader(io.BytesIO(data))
     for i, pg in enumerate(reader.pages, start=1):
         pages.append({"page": i, "text": txt})
     return pages
+def extract_txt_pages(data: bytes, chunk_chars: int = 1600):
     try:
         txt = data.decode("utf-8", errors="ignore")
     except Exception:
         txt = ""
     return [{"page": i + 1, "text": txt[i:i + chunk_chars]} for i in range(0, len(txt), chunk_chars)] or [{"page": 1, "text": ""}]
+def manual_hits(pages, query: str, topk: int = 3):
     if not pages:
         return []
     terms = [w for w in re.findall(r"\w+", (query or "").lower()) if len(w) > 2]
         if score > 0:
             scored.append((score, p))
     scored.sort(key=lambda x: x[0], reverse=True)
+    hits = [p for _, p in scored[:topk]] or pages[:1]
+    def excerpt(text: str, window: int = 380):
+        t = text or ""
+        low = t.lower()
+        idxs = [low.find(tk) for tk in terms if tk in low]
+        start = max(0, min([i for i in idxs if i >= 0], default=0) - window)
+        end = min(len(t), start + 2 * window)
+        return re.sub(r"\s+", " ", t[start:end]).strip()
+    # Redact PHI in excerpts
+    return [f"[p.{h['page']}] {redact_phi(excerpt(h.get('text','')))}" for h in hits]
+# =========================
+# Tutor prompt
+# =========================
 SYSTEM_PROMPT = (
     "You are a biomedical device troubleshooting assistant for clinical engineers.\n"
+    "STRICT SCOPE: Only biomedical equipment troubleshooting. If any content appears clinical, you must refuse.\n"
+    "Safety: Education-only. No diagnosis/treatment. No invasive repairs. No alarm bypass. No firmware hacks. "
+    "Do not ask users to share personal identifiers. Defer to OEM manuals and local policy if any conflict.\n\n"
+    "Given a user description of a device and symptom, produce concise bullet lists with exactly these sections:\n"
     "1) Safety First (non-invasive, patient-first)\n"
     "2) Likely Causes (ranked)\n"
     "3) Step-by-Step Checks (do-not-open device; do-not-bypass alarms)\n"
     "4) Quick Tests / Verification (what, how, with what reference/simulator)\n"
     "5) Escalate When (clear triggers)\n"
+    "End with a one-line summary. If manual excerpts are provided, incorporate them but state OEM manual prevails.\n"
 )
 async def call_llm(user_desc: str, manual_excerpts: str = "") -> str:
+    user_block = f"Device/symptom:\n{user_desc.strip()}"
     if manual_excerpts:
         user_block += f"\n\nManual excerpts (for reference):\n{manual_excerpts.strip()}"
     resp = await client.chat.completions.create(
         model=MODEL_ID,
+        messages=[
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": user_block},
+        ],
     )
     return resp.choices[0].message.content or ""
+# =========================
+# Session utils & UI
+# =========================
+def set_manual(m): cl.user_session.set("manual", m)
+def get_manual(): return cl.user_session.get("manual")
+POLICY_TEXT = (
+    "🛡️ **Safety & Scope Policy**\n"
+    "- Scope: Only **biomedical equipment troubleshooting**.\n"
+    "- No clinical advice: diagnosis, treatment, dosing, medications.\n"
+    "- No invasive repairs: opening casing, soldering, board-level fixes.\n"
+    "- No alarm bypass / interlock disable.\n"
+    "- No firmware tampering / service mode hacks / passwords.\n"
+    "- No collection or sharing of personal identifiers (emails, phone, CNIC, MRN, addresses).\n"
+    "- OEM manuals & local policy take priority."
+)
 WELCOME = (
+    "🛠️ **Biomedical Troubleshooting Assistant (Topic-Locked + Guardrails)**\n"
+    "Describe the **device & symptom** (e.g., “ECG noisy baseline”).\n"
+    "Commands: **/manual** upload PDF/TXT (PHI auto-redacted in excerpts), **/clear** remove manual, **/policy** view rules, **/help** usage.\n"
+    "Education-only. OEM manual & policy take priority."
 )
 @cl.on_chat_start
     set_manual(None)
     await cl.Message(content=WELCOME).send()
+# =========================
+# Main handler
+# =========================
 @cl.on_message
 async def main(message: cl.Message):
     text = (message.content or "").strip()
+    # Commands
+    if text.lower().startswith("/help"):
+        await cl.Message(content=WELCOME).send(); return
+    if text.lower().startswith("/policy"):
+        await cl.Message(content=POLICY_TEXT).send(); return
     if text.lower().startswith("/manual"):
         files = await cl.AskFileMessage(
             content="Upload the **service manual** (PDF or TXT). Max ~20 MB.",
             max_files=1, max_size_mb=20, timeout=240
         ).send()
         if not files:
+            await cl.Message(content="No file received.").send(); return
         f = files[0]
         data = getattr(f, "content", None)
         if data is None and getattr(f, "path", None):
+            with open(f.path, "rb") as fh: data = fh.read()
         try:
+            pages = extract_pdf_pages(data) if (f.mime == "application/pdf" or f.name.lower().endswith(".pdf")) else extract_txt_pages(data)
         except Exception as e:
+            await cl.Message(content=f"Couldn't read the manual: {e}").send(); return
         set_manual({"name": f.name, "pages": pages})
         await cl.Message(content=f"✅ Manual indexed: **{f.name}** — {len(pages)} page-chunks.").send()
         return
     if text.lower().startswith("/clear"):
         set_manual(None)
         await cl.Message(content="Manual cleared.").send()
         return
+    # Topic lock (coarse)
+    if not on_topic(text):
+        await cl.Message(
+            content="🚫 I only handle **biomedical device troubleshooting**.\n"
+                    "Describe the *device & symptom* (e.g., “Infusion pump occlusion alarm”).\n"
+                    "Use **/manual** to upload a service manual."
+        ).send()
         return
+    # Local guard (fast)
+    local_issues = local_guard_check(text)
+    if local_issues:
+        reason_map = {
+            "clinical_advice": "clinical diagnosis/treatment",
+            "invasive_repair": "invasive repair steps",
+            "alarm_bypass": "bypassing alarms/interlocks",
+            "firmware_tamper": "firmware tampering or service-mode hacks",
+            "phi_share_or_collect": "sharing or collecting personal identifiers",
+        }
+        reasons = ", ".join(reason_map[k] for k in local_issues if k in reason_map)
+        await cl.Message(
+            content=f"🚫 I can’t help with {reasons}. I only provide **safe, non-invasive biomedical equipment troubleshooting**.\n{POLICY_TEXT}"
+        ).send()
+        return
+    # LLM guard (nuanced)
+    verdict = await llm_guard_check(text)
+    if verdict:
+        if not verdict.get("in_scope", True):
+            await cl.Message(
+                content="🚫 Off-topic. I only support **biomedical device troubleshooting**.\n" + POLICY_TEXT
+            ).send(); return
+        for key, msg in [
+            ("clinical_advice", "clinical diagnosis/treatment"),
+            ("invasive_repair", "invasive repair steps"),
+            ("alarm_bypass", "bypassing alarms/interlocks"),
+            ("firmware_tamper", "firmware tampering or service-mode hacks"),
+            ("phi_share_or_collect", "sharing or collecting personal identifiers"),
+        ]:
+            if verdict.get(key):
+                await cl.Message(
+                    content=f"🚫 I can’t help with {msg}. I only provide **safe, non-invasive biomedical equipment troubleshooting**.\n{POLICY_TEXT}"
+                ).send()
+                return
+    # Manual excerpts (with PHI redaction)
+    manual = cl.user_session.get("manual")
+    excerpts = ""
     if manual and manual.get("pages"):
+        excerpts = "\n".join(manual_hits(manual["pages"], text, topk=3))
+    # Call LLM for the plan
     try:
+        answer = await call_llm(text, excerpts)
     except Exception as e:
         await cl.Message(content=f"⚠️ LLM call failed ({PROVIDER}): {e}").send()
         return
+    prefix = f"📄 Using manual: **{manual['name']}**\n\n" if (manual and manual.get("name")) else ""
     await cl.Message(content=prefix + (answer or "I couldn’t generate a plan.")).send()