Spaces:
Running
Running
| """LangGraph agent (Gemini + Tavily) wrapped by NoteGuard de-identification. | |
| Guarantee enforced in-graph: the LLM and the Tavily tool only ever receive | |
| DE-IDENTIFIED text. Real identifiers are restored only in the final, | |
| clinician-facing answer (reidentify_out). | |
| Run locally: langgraph dev (serves the `noteguard` graph for Agent Chat UI) | |
| Trace: set LANGSMITH_TRACING=true + LANGSMITH_API_KEY (runs auto-trace) | |
| Graph flow: | |
| deidentify_in -> agent -> reidentify_out -> compute_trust | |
| Version note: import names track LangGraph v1 / LangChain v0.3+. If your installed | |
| versions differ, adjust the two prebuilt imports and the create_react_agent call. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import os | |
| import re | |
| from dotenv import load_dotenv | |
| load_dotenv(override=True) # pick up .env before any os.getenv / API-key validation | |
| from langchain.chat_models import init_chat_model | |
| from langchain_core.messages import AIMessage, HumanMessage | |
| from langgraph.graph import END, START, MessagesState, StateGraph | |
| from langgraph.prebuilt import create_react_agent | |
| try: | |
| from langchain_tavily import TavilySearch | |
| except ImportError: # older package name | |
| from langchain_community.tools.tavily_search import TavilySearchResults as TavilySearch | |
| from src.deid import NoteGuard | |
| SYSTEM = """\ | |
| You are a clinical documentation assistant for NHS clinicians. | |
| Your ONLY output is a compact discharge-summary card. Reproduce EXACTLY the format below — \ | |
| no headings, no bullets, no preamble, no sign-off. | |
| ## De-identification rules — NEVER violate | |
| You only ever see DE-IDENTIFIED text. Patient identifiers — names, NHS numbers, dates of birth, | |
| addresses, GP names, consultant names — have been replaced with surrogate tokens such as | |
| [PERSON_1], [NHS_1], [DOB_1], [ADDRESS_1], [DATE_1]. | |
| Preserve every surrogate token exactly as given. A re-identification step restores real values | |
| for the clinician after you respond. Never invent, guess, or expand a surrogate into a real value. | |
| Never write the literal text of a surrogate token (e.g. [PERSON_1]) in the title line — \ | |
| use {{PATIENT}} there instead (see below). | |
| ## Output format — four elements, blank line between each | |
| {{PATIENT}} — discharge summary | |
| Admitted [DATE_X] after <reason>. Background: <key conditions/meds>. <what was done>. <key finding>. | |
| Follow-up: <GP action> · <action 2> · <action 3> | |
| Grounded: <source name 1>, <source name 2> · via Tavily | |
| ## Rules for each element | |
| **Title line:** write exactly `{{PATIENT}} — discharge summary`. \ | |
| The placeholder {{PATIENT}} is resolved to the real patient name by the system — \ | |
| you must never write a real name, a surrogate token, or any other identifier there. | |
| **Narrative paragraph:** plain clinical prose, max 4 sentences. Include only facts stated in \ | |
| the source note — never invent investigations, doses, dates, or diagnoses. \ | |
| Surrogate tokens ([DATE_1], [PERSON_1], etc.) may appear here and will be restored. \ | |
| Drop a sentence entirely when there is nothing to say (e.g. no imaging → omit that sentence). | |
| **Follow-up line:** items separated by " · " (middle dot U+00B7). \ | |
| Always include the GP action as the first item. | |
| **Grounded line:** list only the public guidance sources (short readable names, not URLs) \ | |
| actually returned by the Tavily search tool this run. \ | |
| If Tavily returned no results, omit the Grounded line entirely — never fabricate citations. | |
| **Search tool:** use only for public NICE/NHS clinical guidance. \ | |
| Never send patient text or surrogate tokens to the search tool. | |
| """ | |
| class State(MessagesState): | |
| forward: dict | |
| reverse: dict | |
| clinician_answer: str | |
| person_name: str # resolved from person_id — fills {{PATIENT}} in title | |
| # --- trust panel fields --- | |
| deid_text: str # de-identified note text (what the AI saw) | |
| identifiers_removed: int # identifiers replaced in this turn | |
| residual_count: int # known identifiers that survived de-id (pre-model) | |
| leaked_tokens: list # tokens/patterns that slipped through (post-model) | |
| faithfulness_score: float # LLM-as-judge: 0–1 | |
| sources: list # Tavily URLs cited in the answer | |
| def build_graph(known: dict | None = None): | |
| model = init_chat_model(os.getenv("NOTEGUARD_MODEL", "google_genai:gemini-2.5-flash")) | |
| tools = [TavilySearch(max_results=3)] | |
| react = create_react_agent(model, tools, prompt=SYSTEM) | |
| def deidentify_in(state: State): | |
| prior_n = len(state.get("forward") or {}) | |
| ng = NoteGuard(known=known, forward=state.get("forward"), reverse=state.get("reverse")) | |
| last = state["messages"][-1] | |
| if not isinstance(last, HumanMessage): | |
| return {"forward": ng.forward, "reverse": ng.reverse} | |
| res = ng.deidentify(last.content) | |
| ng.assert_clean(res.clean_text) # hard guarantee before model/tool see anything | |
| cleaned = HumanMessage(content=res.clean_text, id=last.id) | |
| return { | |
| "messages": [cleaned], | |
| "forward": ng.forward, | |
| "reverse": ng.reverse, | |
| "deid_text": res.clean_text, | |
| "identifiers_removed": len(ng.forward) - prior_n, | |
| "residual_count": len(res.residual), | |
| } | |
| def run_agent(state: State): | |
| out = react.invoke({"messages": state["messages"]}) | |
| return {"messages": out["messages"][len(state["messages"]) :]} | |
| def reidentify_out(state: State): | |
| ng = NoteGuard(reverse=state.get("reverse")) | |
| last = state["messages"][-1] | |
| if not isinstance(last, AIMessage): | |
| return {"clinician_answer": "", "leaked_tokens": []} | |
| content = last.content | |
| # Gemini can return content as a list of blocks [{type, text}, ...] | |
| if isinstance(content, list): | |
| raw_text = " ".join( | |
| block.get("text", "") if isinstance(block, dict) else str(block) for block in content | |
| ).strip() | |
| else: | |
| raw_text = content or "" | |
| # Check model output for orphaned tokens BEFORE reidentify restores known ones | |
| reverse = state.get("reverse") or {} | |
| leaked: list[str] = [] | |
| for m in re.finditer(r"\[[A-Z]+_\d+\]", raw_text): | |
| tok = m.group(0) | |
| if tok not in reverse: | |
| leaked.append(f"unmapped_token:{tok}") | |
| # Restore known surrogates | |
| restored = ng.reidentify(raw_text) | |
| # Replace {{PATIENT}} with the structured patient name (never from model) | |
| person_name = state.get("person_name") or "Patient" | |
| restored = restored.replace("{{PATIENT}}", person_name) | |
| # Replace any remaining [LABEL_n] that reidentify couldn't resolve — flag each | |
| def _replace_leftover(m: re.Match) -> str: | |
| tok = m.group(0) | |
| leaked.append(f"unresolved_token:{tok}") | |
| return "[redacted]" | |
| restored = re.sub(r"\[[A-Z]+_\d+\]", _replace_leftover, restored) | |
| return {"clinician_answer": restored, "leaked_tokens": leaked} | |
| def compute_trust(state: State): | |
| """Extract Tavily sources and compute faithfulness (LLM-as-judge). | |
| The faithfulness judge compares the de-identified AI answer against | |
| the de-identified source note (deid_text) — it never sees PHI. | |
| """ | |
| # --- Tavily sources from ToolMessages --- | |
| sources: list[str] = [] | |
| for msg in state["messages"]: | |
| content = getattr(msg, "content", None) | |
| if not content: | |
| continue | |
| try: | |
| items = json.loads(content) if isinstance(content, str) else content | |
| if isinstance(items, list): | |
| for item in items: | |
| if isinstance(item, dict) and item.get("url"): | |
| sources.append(item["url"]) | |
| except (json.JSONDecodeError, TypeError): | |
| pass | |
| # --- faithfulness: judge de-identified answer vs de-identified source note --- | |
| score = 0.0 | |
| last_ai = next((m for m in reversed(state["messages"]) if isinstance(m, AIMessage)), None) | |
| context = state.get("deid_text") or "" | |
| if last_ai and context: | |
| ai_content = last_ai.content | |
| if isinstance(ai_content, list): | |
| ai_content = " ".join( | |
| b.get("text", "") if isinstance(b, dict) else str(b) for b in ai_content | |
| ).strip() | |
| prompt = ( | |
| f"CONTEXT (de-identified source note):\n{context}\n\n" | |
| f"ANSWER:\n{ai_content}\n\n" | |
| "Is every clinical claim in ANSWER supported by CONTEXT? " | |
| "Reply with a single number between 0 and 1." | |
| ) | |
| try: | |
| raw = model.invoke(prompt).content | |
| if isinstance(raw, list): | |
| raw = " ".join(b.get("text", "") if isinstance(b, dict) else str(b) for b in raw) | |
| score = max(0.0, min(1.0, float(raw.strip().split()[0]))) | |
| except Exception: | |
| score = 0.0 | |
| # Merge leaked_tokens from reidentify_out with any new findings | |
| leaked = list(state.get("leaked_tokens") or []) | |
| residual_extra = state.get("residual_count", 0) | |
| return { | |
| "sources": list(dict.fromkeys(filter(None, sources))), | |
| "faithfulness_score": score, | |
| "leaked_tokens": leaked, | |
| # Bump residual_count so the API's risk calculation sees the truth | |
| "residual_count": residual_extra + len(leaked), | |
| } | |
| g = StateGraph(State) | |
| g.add_node("deidentify_in", deidentify_in) | |
| g.add_node("agent", run_agent) | |
| g.add_node("reidentify_out", reidentify_out) | |
| g.add_node("compute_trust", compute_trust) | |
| g.add_edge(START, "deidentify_in") | |
| g.add_edge("deidentify_in", "agent") | |
| g.add_edge("agent", "reidentify_out") | |
| g.add_edge("reidentify_out", "compute_trust") | |
| g.add_edge("compute_trust", END) | |
| return g.compile() | |
| # Demo vault — seeds the known-identifier set so `langgraph dev` / the web UI | |
| # resolve surrogates consistently on startup. | |
| _DEMO_KNOWN = {"PERSON": ["Margaret Okafor"], "NHS": ["485 777 3456"]} | |
| graph = build_graph(known=_DEMO_KNOWN) | |