"""LangGraph agent (Gemini + Tavily) wrapped by NoteGuard de-identification. Guarantee enforced in-graph: the LLM and the Tavily tool only ever receive DE-IDENTIFIED text. Real identifiers are restored only in the final, clinician-facing answer (reidentify_out). Run locally: langgraph dev (serves the `noteguard` graph for Agent Chat UI) Trace: set LANGSMITH_TRACING=true + LANGSMITH_API_KEY (runs auto-trace) Graph flow: deidentify_in -> agent -> reidentify_out -> compute_trust Version note: import names track LangGraph v1 / LangChain v0.3+. If your installed versions differ, adjust the two prebuilt imports and the create_react_agent call. """ from __future__ import annotations import json import os import re from dotenv import load_dotenv load_dotenv(override=True) # pick up .env before any os.getenv / API-key validation from langchain.chat_models import init_chat_model from langchain_core.messages import AIMessage, HumanMessage from langgraph.graph import END, START, MessagesState, StateGraph from langgraph.prebuilt import create_react_agent try: from langchain_tavily import TavilySearch except ImportError: # older package name from langchain_community.tools.tavily_search import TavilySearchResults as TavilySearch from src.deid import NoteGuard SYSTEM = """\ You are a clinical documentation assistant for NHS clinicians. Your ONLY output is a compact discharge-summary card. Reproduce EXACTLY the format below — \ no headings, no bullets, no preamble, no sign-off. ## De-identification rules — NEVER violate You only ever see DE-IDENTIFIED text. Patient identifiers — names, NHS numbers, dates of birth, addresses, GP names, consultant names — have been replaced with surrogate tokens such as [PERSON_1], [NHS_1], [DOB_1], [ADDRESS_1], [DATE_1]. Preserve every surrogate token exactly as given. A re-identification step restores real values for the clinician after you respond. Never invent, guess, or expand a surrogate into a real value. Never write the literal text of a surrogate token (e.g. [PERSON_1]) in the title line — \ use {{PATIENT}} there instead (see below). ## Output format — four elements, blank line between each {{PATIENT}} — discharge summary Admitted [DATE_X] after . Background: . . . Follow-up: · · Grounded: , · via Tavily ## Rules for each element **Title line:** write exactly `{{PATIENT}} — discharge summary`. \ The placeholder {{PATIENT}} is resolved to the real patient name by the system — \ you must never write a real name, a surrogate token, or any other identifier there. **Narrative paragraph:** plain clinical prose, max 4 sentences. Include only facts stated in \ the source note — never invent investigations, doses, dates, or diagnoses. \ Surrogate tokens ([DATE_1], [PERSON_1], etc.) may appear here and will be restored. \ Drop a sentence entirely when there is nothing to say (e.g. no imaging → omit that sentence). **Follow-up line:** items separated by " · " (middle dot U+00B7). \ Always include the GP action as the first item. **Grounded line:** list only the public guidance sources (short readable names, not URLs) \ actually returned by the Tavily search tool this run. \ If Tavily returned no results, omit the Grounded line entirely — never fabricate citations. **Search tool:** use only for public NICE/NHS clinical guidance. \ Never send patient text or surrogate tokens to the search tool. """ class State(MessagesState): forward: dict reverse: dict clinician_answer: str person_name: str # resolved from person_id — fills {{PATIENT}} in title # --- trust panel fields --- deid_text: str # de-identified note text (what the AI saw) identifiers_removed: int # identifiers replaced in this turn residual_count: int # known identifiers that survived de-id (pre-model) leaked_tokens: list # tokens/patterns that slipped through (post-model) faithfulness_score: float # LLM-as-judge: 0–1 sources: list # Tavily URLs cited in the answer def build_graph(known: dict | None = None): model = init_chat_model(os.getenv("NOTEGUARD_MODEL", "google_genai:gemini-2.5-flash")) tools = [TavilySearch(max_results=3)] react = create_react_agent(model, tools, prompt=SYSTEM) def deidentify_in(state: State): prior_n = len(state.get("forward") or {}) ng = NoteGuard(known=known, forward=state.get("forward"), reverse=state.get("reverse")) last = state["messages"][-1] if not isinstance(last, HumanMessage): return {"forward": ng.forward, "reverse": ng.reverse} res = ng.deidentify(last.content) ng.assert_clean(res.clean_text) # hard guarantee before model/tool see anything cleaned = HumanMessage(content=res.clean_text, id=last.id) return { "messages": [cleaned], "forward": ng.forward, "reverse": ng.reverse, "deid_text": res.clean_text, "identifiers_removed": len(ng.forward) - prior_n, "residual_count": len(res.residual), } def run_agent(state: State): out = react.invoke({"messages": state["messages"]}) return {"messages": out["messages"][len(state["messages"]) :]} def reidentify_out(state: State): ng = NoteGuard(reverse=state.get("reverse")) last = state["messages"][-1] if not isinstance(last, AIMessage): return {"clinician_answer": "", "leaked_tokens": []} content = last.content # Gemini can return content as a list of blocks [{type, text}, ...] if isinstance(content, list): raw_text = " ".join( block.get("text", "") if isinstance(block, dict) else str(block) for block in content ).strip() else: raw_text = content or "" # Check model output for orphaned tokens BEFORE reidentify restores known ones reverse = state.get("reverse") or {} leaked: list[str] = [] for m in re.finditer(r"\[[A-Z]+_\d+\]", raw_text): tok = m.group(0) if tok not in reverse: leaked.append(f"unmapped_token:{tok}") # Restore known surrogates restored = ng.reidentify(raw_text) # Replace {{PATIENT}} with the structured patient name (never from model) person_name = state.get("person_name") or "Patient" restored = restored.replace("{{PATIENT}}", person_name) # Replace any remaining [LABEL_n] that reidentify couldn't resolve — flag each def _replace_leftover(m: re.Match) -> str: tok = m.group(0) leaked.append(f"unresolved_token:{tok}") return "[redacted]" restored = re.sub(r"\[[A-Z]+_\d+\]", _replace_leftover, restored) return {"clinician_answer": restored, "leaked_tokens": leaked} def compute_trust(state: State): """Extract Tavily sources and compute faithfulness (LLM-as-judge). The faithfulness judge compares the de-identified AI answer against the de-identified source note (deid_text) — it never sees PHI. """ # --- Tavily sources from ToolMessages --- sources: list[str] = [] for msg in state["messages"]: content = getattr(msg, "content", None) if not content: continue try: items = json.loads(content) if isinstance(content, str) else content if isinstance(items, list): for item in items: if isinstance(item, dict) and item.get("url"): sources.append(item["url"]) except (json.JSONDecodeError, TypeError): pass # --- faithfulness: judge de-identified answer vs de-identified source note --- score = 0.0 last_ai = next((m for m in reversed(state["messages"]) if isinstance(m, AIMessage)), None) context = state.get("deid_text") or "" if last_ai and context: ai_content = last_ai.content if isinstance(ai_content, list): ai_content = " ".join( b.get("text", "") if isinstance(b, dict) else str(b) for b in ai_content ).strip() prompt = ( f"CONTEXT (de-identified source note):\n{context}\n\n" f"ANSWER:\n{ai_content}\n\n" "Is every clinical claim in ANSWER supported by CONTEXT? " "Reply with a single number between 0 and 1." ) try: raw = model.invoke(prompt).content if isinstance(raw, list): raw = " ".join(b.get("text", "") if isinstance(b, dict) else str(b) for b in raw) score = max(0.0, min(1.0, float(raw.strip().split()[0]))) except Exception: score = 0.0 # Merge leaked_tokens from reidentify_out with any new findings leaked = list(state.get("leaked_tokens") or []) residual_extra = state.get("residual_count", 0) return { "sources": list(dict.fromkeys(filter(None, sources))), "faithfulness_score": score, "leaked_tokens": leaked, # Bump residual_count so the API's risk calculation sees the truth "residual_count": residual_extra + len(leaked), } g = StateGraph(State) g.add_node("deidentify_in", deidentify_in) g.add_node("agent", run_agent) g.add_node("reidentify_out", reidentify_out) g.add_node("compute_trust", compute_trust) g.add_edge(START, "deidentify_in") g.add_edge("deidentify_in", "agent") g.add_edge("agent", "reidentify_out") g.add_edge("reidentify_out", "compute_trust") g.add_edge("compute_trust", END) return g.compile() # Demo vault — seeds the known-identifier set so `langgraph dev` / the web UI # resolve surrogates consistently on startup. _DEMO_KNOWN = {"PERSON": ["Margaret Okafor"], "NHS": ["485 777 3456"]} graph = build_graph(known=_DEMO_KNOWN)