Spaces:

NYSERDA-CRE-Working-Group
/

Updated_code_complaince

Running

App Files Files Community

Ryan2219 commited on 16 days ago

Commit

20eb3d7

verified ·

1 Parent(s): 2ffd324

Update app.py

Browse files

Files changed (1) hide show

app.py +883 -628

app.py CHANGED Viewed

@@ -1,628 +1,883 @@
-"""Streamlit UI for NYC Code Compliance Bot — with agent discussion panel."""
-from __future__ import annotations
-import json
-import logging
-import tempfile
-from pathlib import Path
-import os
-import sys
-import streamlit as st
-from PIL import Image
-from config import MAX_INVESTIGATION_ROUNDS
-from graph import compile_compliance_graph
-from tools.chroma_tools import warmup_collection, is_warmed_up
-from tools.crop_cache import CropCache
-from tools.image_store import ImageStore
-from tools.metadata_cache import MetadataState, get_cached_metadata
-from tools.pdf_processor import render_pages
-logger = logging.getLogger(__name__)
-# ---------------------------------------------------------------------------
-# Page config
-# ---------------------------------------------------------------------------
-st.set_page_config(
-    page_title="NYC Code Compliance Bot",
-    page_icon=":building_construction:",
-    layout="wide",
-)
-# ---------------------------------------------------------------------------
-# Custom CSS for agent discussion panel
-# ---------------------------------------------------------------------------
-st.markdown("""
-<style>
-.agent-msg {
-    padding: 8px 12px;
-    margin: 4px 0;
-    border-radius: 8px;
-    font-size: 0.9em;
-}
-.agent-planner {
-    background-color: #e3f2fd;
-    border-left: 4px solid #1565c0;
-}
-.agent-code_analyst {
-    background-color: #fff3e0;
-    border-left: 4px solid #e65100;
-}
-.agent-compliance_analyst {
-    background-color: #e8f5e9;
-    border-left: 4px solid #2e7d32;
-}
-.agent-reviewer {
-    background-color: #f3e5f5;
-    border-left: 4px solid #6a1b9a;
-}
-.agent-icon {
-    font-weight: bold;
-    margin-right: 6px;
-}
-.agent-timestamp {
-    color: #666;
-    font-size: 0.8em;
-}
-</style>
-""", unsafe_allow_html=True)
-AGENT_ICONS = {
-    "planner": "\U0001f4cb",
-    "code_analyst": "\u2696\ufe0f",
-    "compliance_analyst": "\U0001f50d",
-    "reviewer": "\U0001f91d",
-}
-AGENT_LABELS = {
-    "planner": "Planner",
-    "code_analyst": "Code Analyst",
-    "compliance_analyst": "Compliance Analyst",
-    "reviewer": "Reviewer",
-}
-# ---------------------------------------------------------------------------
-# Session state defaults
-# ---------------------------------------------------------------------------
-if "pdf_loaded" not in st.session_state:
-    st.session_state.pdf_loaded = False
-if "chat_history" not in st.session_state:
-    st.session_state.chat_history = []
-if "image_store" not in st.session_state:
-    st.session_state.image_store = None
-if "ingest_state" not in st.session_state:
-    st.session_state.ingest_state = {}
-if "pdf_bytes" not in st.session_state:
-    st.session_state.pdf_bytes = None
-if "metadata_state" not in st.session_state:
-    st.session_state.metadata_state = MetadataState()
-if "crop_cache" not in st.session_state:
-    st.session_state.crop_cache = CropCache()
-if "discussion_log" not in st.session_state:
-    st.session_state.discussion_log = []
-if "code_report" not in st.session_state:
-    st.session_state.code_report = ""
-if "code_sections" not in st.session_state:
-    st.session_state.code_sections = []
-if "image_refs" not in st.session_state:
-    st.session_state.image_refs = []
-if "db_ready" not in st.session_state:
-    st.session_state.db_ready = False
-# ---------------------------------------------------------------------------
-# Startup: warm up embedding model + ChromaDB
-# ---------------------------------------------------------------------------
-if not st.session_state.db_ready:
-    with st.status("Loading NYC Code Database...", expanded=True) as _db_status:
-        st.write(":brain: Loading embedding model (bge-large-en-v1.5)...")
-        st.write("_This is a one-time download (~1.3 GB) on first run._")
-        ok = warmup_collection()
-        if ok:
-            st.session_state.db_ready = True
-            _db_status.update(label="NYC Code Database ready", state="complete")
-        else:
-            _db_status.update(
-                label="NYC Code Database not available — code lookup will be disabled",
-                state="error",
-            )
-            st.session_state.db_ready = False
-    if st.session_state.db_ready:
-        st.rerun()
-# ---------------------------------------------------------------------------
-# Sidebar
-# ---------------------------------------------------------------------------
-with st.sidebar:
-    st.title(":building_construction: NYC Code Compliance Bot")
-    st.markdown(
-        "Upload a construction drawing PDF and ask compliance questions. "
-        "The system uses **agentic vision** + **NYC code database** to "
-        "verify code compliance."
-    )
-    st.divider()
-    # PDF upload
-    uploaded_file = st.file_uploader("Upload Drawing PDF", type=["pdf"])
-    # Default drawing button
-    _DEFAULT_PDF = Path(__file__).parent / "NorthMaconPark.pdf"
-    if _DEFAULT_PDF.exists() and not st.session_state.pdf_loaded:
-        st.markdown("**— or —**")
-        if st.button("Use Default Drawing", use_container_width=True):
-            st.session_state._use_default_pdf = True
-            st.rerun()
-    st.divider()
-    # Settings
-    st.subheader("Settings")
-    enable_consensus = st.checkbox(
-        "Enable peer review (Gemini + GPT)",
-        value=False,
-        help="GPT reviews Gemini's compliance analysis. Slower but more thorough.",
-    )
-    enable_annotation = st.checkbox(
-        "Enable annotation",
-        value=False,
-        help="Annotate crops with numbered highlights before analysis.",
-    )
-    max_rounds = st.slider(
-        "Max investigation rounds",
-        min_value=1,
-        max_value=5,
-        value=MAX_INVESTIGATION_ROUNDS,
-        help="Maximum crop-analyze loops before forcing a final verdict.",
-    )
-    st.divider()
-    st.caption("Powered by LangGraph + Gemini + GPT + ChromaDB")
-# ---------------------------------------------------------------------------
-# PDF ingestion — Phase A: render pages
-# ---------------------------------------------------------------------------
-# Determine if we have a PDF to process (uploaded or default)
-_pending_pdf: tuple[str, bytes] | None = None
-if not st.session_state.pdf_loaded:
-    if uploaded_file is not None:
-        _pending_pdf = (uploaded_file.name, uploaded_file.getvalue())
-    elif st.session_state.get("_use_default_pdf"):
-        _default = Path(__file__).parent / "NorthMaconPark.pdf"
-        if _default.exists():
-            _pending_pdf = (_default.name, _default.read_bytes())
-if _pending_pdf is not None and not st.session_state.pdf_loaded:
-    pdf_name, pdf_bytes = _pending_pdf
-    with st.status("Converting PDF to images...", expanded=True) as status:
-        tmp_dir = tempfile.mkdtemp(prefix="compliance_bot_")
-        pdf_path = Path(tmp_dir) / pdf_name
-        pdf_path.write_bytes(pdf_bytes)
-        st.session_state.pdf_bytes = pdf_bytes
-        st.session_state.crop_cache = CropCache()
-        image_store = ImageStore(str(Path(tmp_dir) / "images"))
-        st.session_state.image_store = image_store
-        page_image_dir = str(image_store._pages_dir)
-        # Check for cached metadata
-        cached = get_cached_metadata(pdf_bytes)
-        if cached is not None:
-            st.session_state.metadata_state.set_ready(json.dumps(cached, indent=2))
-            st.write("Page index loaded from cache")
-        st.write("Rendering pages...")
-        num_pages = render_pages(str(pdf_path), page_image_dir)
-        st.session_state.ingest_state = {
-            "pdf_path": str(pdf_path),
-            "page_image_dir": page_image_dir,
-            "num_pages": num_pages,
-        }
-        st.session_state.pdf_loaded = True
-        st.session_state.pop("_use_default_pdf", None)
-        st.write(f"Converted {num_pages} pages to images.")
-        status.update(label=f"PDF ready: {num_pages} pages", state="complete")
-    st.rerun()
-# ---------------------------------------------------------------------------
-# PDF ingestion — Phase B: generate page index
-# ---------------------------------------------------------------------------
-if st.session_state.pdf_loaded:
-    meta = st.session_state.metadata_state
-    if meta.status == "not_started":
-        if st.session_state.pdf_bytes is not None:
-            with st.expander(":page_facing_up: PDF Viewer", expanded=False):
-                st.pdf(st.session_state.pdf_bytes, height=400)
-        ingest = st.session_state.ingest_state
-        num_pages = ingest["num_pages"]
-        st.write("**Generating page index...**")
-        progress_bar = st.progress(0, text="Analyzing pages to build searchable index...")
-        def _index_progress(completed: int, total: int, label: str):
-            pct = completed / total
-            progress_bar.progress(pct, text=f"Indexing: {label}  ({completed}/{total} batches)")
-        meta.generate_sync(
-            ingest["pdf_path"],
-            num_pages,
-            st.session_state.pdf_bytes,
-            progress_callback=_index_progress,
-        )
-        if meta.is_ready:
-            progress_bar.progress(1.0, text="Page index ready!")
-        else:
-            progress_bar.progress(1.0, text="Indexing failed — using full PDF mode")
-        st.rerun()
-# ---------------------------------------------------------------------------
-# Main layout (pre-upload welcome)
-# ---------------------------------------------------------------------------
-if not st.session_state.pdf_loaded:
-    _left, center, _right = st.columns([1, 2, 1])
-    with center:
-        st.markdown(
-            "<h1 style='text-align: center;'>:building_construction: NYC Code Compliance Bot</h1>",
-            unsafe_allow_html=True,
-        )
-        st.markdown(
-            "<p style='text-align: center; color: grey;'>"
-            "Upload a construction drawing PDF in the sidebar to get started.<br>"
-            "This tool uses <b>agentic vision</b> and the <b>NYC Building Code database</b> "
-            "to verify code compliance in your drawings."
-            "</p>",
-            unsafe_allow_html=True,
-        )
-    st.stop()
-# ---------------------------------------------------------------------------
-# PDF viewer
-# ---------------------------------------------------------------------------
-if st.session_state.pdf_bytes is not None:
-    with st.expander(":page_facing_up: PDF Viewer", expanded=False):
-        st.pdf(st.session_state.pdf_bytes, height=400)
-# ---------------------------------------------------------------------------
-# Three-column layout: chat | discussion | images+code
-# ---------------------------------------------------------------------------
-chat_col, discuss_col, evidence_col = st.columns([2, 2, 2])
-# ---------------------------------------------------------------------------
-# Discussion panel (agent conversation)
-# ---------------------------------------------------------------------------
-def render_discussion_log(container, discussion_log: list[dict]):
-    """Render the agent discussion log with styled messages."""
-    with container:
-        for msg in discussion_log:
-            agent = msg.get("agent", "unknown")
-            icon = AGENT_ICONS.get(agent, "\U0001f916")
-            label = AGENT_LABELS.get(agent, agent)
-            css_class = f"agent-{agent}"
-            st.markdown(
-                f'<div class="agent-msg {css_class}">'
-                f'<span class="agent-timestamp">[{msg.get("timestamp", "")}]</span> '
-                f'<span class="agent-icon">{icon} {label}</span><br>'
-                f'{msg.get("summary", "")}'
-                f'</div>',
-                unsafe_allow_html=True,
-            )
-# ---------------------------------------------------------------------------
-# Chat history display
-# ---------------------------------------------------------------------------
-with chat_col:
-    st.subheader(":speech_balloon: Chat")
-    meta = st.session_state.metadata_state
-    if meta.is_ready:
-        st.caption("Page index ready — fast planning enabled")
-    elif meta.status == "failed":
-        st.caption("Page indexing failed — using full PDF mode")
-    for role, content, _refs in st.session_state.chat_history:
-        with st.chat_message(role):
-            st.markdown(content)
-    question = st.chat_input("Ask a compliance question about the drawing...")
-# ---------------------------------------------------------------------------
-# Discussion panel
-# ---------------------------------------------------------------------------
-with discuss_col:
-    st.subheader(":busts_in_silhouette: Agent Discussion")
-    discussion_container = st.container()
-    if st.session_state.discussion_log:
-        render_discussion_log(discussion_container, st.session_state.discussion_log)
-    else:
-        st.info("Agent discussions will appear here during analysis.")
-# ---------------------------------------------------------------------------
-# Evidence panel (images + code)
-# ---------------------------------------------------------------------------
-with evidence_col:
-    st.subheader(":framed_picture: Evidence")
-    evidence_tabs = st.tabs(["Drawing Crops", "Code Sections"])
-    with evidence_tabs[0]:
-        if st.session_state.image_refs:
-            for ref in st.session_state.image_refs:
-                try:
-                    img = Image.open(ref["path"])
-                    st.image(img, caption=ref["label"], use_container_width=True)
-                except Exception:
-                    st.warning(f"Could not load: {ref['label']}")
-        elif st.session_state.chat_history:
-            st.info("No images for this question.")
-        else:
-            st.info("Ask a question to see drawing crops here.")
-    with evidence_tabs[1]:
-        if st.session_state.code_sections:
-            for sec in st.session_state.code_sections:
-                with st.expander(
-                    f":balance_scale: {sec.get('code_type', '?')} §{sec.get('section_full', '?')}",
-                    expanded=False,
-                ):
-                    if sec.get("relevance"):
-                        st.caption(sec["relevance"])
-                    st.markdown(sec.get("text", "")[:1500])
-            if st.session_state.code_report:
-                with st.expander(":page_facing_up: Full Code Report", expanded=False):
-                    st.markdown(st.session_state.code_report[:5000])
-        else:
-            st.info("Code sections retrieved during analysis will appear here.")
-# ---------------------------------------------------------------------------
-# Question processing
-# ---------------------------------------------------------------------------
-if question:
-    # Add user message to history
-    st.session_state.chat_history.append(("user", question, []))
-    st.session_state.discussion_log = []  # Reset discussion for new question
-    st.session_state.code_report = ""     # Reset code report for new question
-    st.session_state.code_sections = []   # Reset code sections for new question
-    st.session_state.image_refs = []      # Reset image refs for new question
-    with chat_col:
-        with st.chat_message("user"):
-            st.markdown(question)
-    # Build initial state
-    ingest = st.session_state.ingest_state
-    image_store = st.session_state.image_store
-    meta = st.session_state.metadata_state
-    metadata_json = meta.data_json if meta.is_ready else ""
-    question_state = {
-        "messages": [],
-        "question": question,
-        "pdf_path": ingest.get("pdf_path", ""),
-        "page_image_dir": ingest.get("page_image_dir", ""),
-        "num_pages": ingest.get("num_pages", 0),
-        "page_metadata_json": metadata_json,
-        "legend_pages": [],
-        "target_pages": [],
-        "crop_tasks": [],
-        "code_queries": [],
-        "image_refs": [],
-        "code_sections": [],
-        "code_report": "",
-        "code_chapters_fetched": [],
-        "compliance_analysis": "",
-        "reviewer_analysis": "",
-        "final_verdict": "",
-        "discussion_log": [],
-        "additional_crop_tasks": [],
-        "additional_code_queries": [],
-        "needs_more_investigation": False,
-        "investigation_round": 0,
-        "max_rounds": max_rounds,
-        "enable_consensus": enable_consensus,
-        "enable_annotation": enable_annotation,
-        "status_message": [],
-    }
-    # ------------------------------------------------------------------
-    # Live progress
-    # ------------------------------------------------------------------
-    crop_cache = st.session_state.crop_cache
-    with evidence_col:
-        with evidence_tabs[0]:
-            crop_counter_placeholder = st.empty()
-            crop_image_container = st.container()
-    def on_crop_progress(
-        completed_ref, crop_task, source: str, completed_count: int, total_count: int,
-    ) -> None:
-        source_tag = " (cached)" if source == "cached" else ""
-        crop_counter_placeholder.markdown(
-            f"**Crop {completed_count}/{total_count}**{source_tag}  \n"
-            f"Latest: *{crop_task.get('label', 'Crop')}*"
-        )
-        with crop_image_container:
-            try:
-                img = Image.open(completed_ref["path"])
-                caption = completed_ref["label"]
-                if source == "cached":
-                    caption += " (cached)"
-                st.image(img, caption=caption, use_container_width=True)
-            except Exception:
-                st.warning(f"Could not load: {completed_ref['label']}")
-    # Compile graph
-    compliance_graph = compile_compliance_graph(image_store, crop_cache, on_crop_progress)
-    # Node progress labels
-    PROGRESS_LABELS = {
-        "compliance_planner": "Planning investigation...",
-        "execute_crops": "Cropping drawing images...",
-        "annotate_crops": "Annotating crops...",
-        "initial_code_lookup": "Searching NYC code database...",
-        "compliance_analyst": "Analyzing compliance...",
-        "targeted_code_lookup": "Follow-up code search...",
-        "deliberation": "Running peer review...",
-        "final_verdict": "Synthesizing verdict...",
-    }
-    with chat_col:
-        with st.status("Investigating compliance...", expanded=True) as status:
-            all_image_refs: list[dict] = []
-            all_discussion: list[dict] = []
-            final_verdict_text = ""
-            code_report_text = ""
-            st.write(PROGRESS_LABELS["compliance_planner"])
-            # Placeholder for parallel-branch status (updated after planner completes)
-            parallel_status = st.empty()
-            for event in compliance_graph.stream(question_state, stream_mode="updates"):
-                node_name = list(event.keys())[0]
-                update = event[node_name]
-                # Status messages (list, since parallel nodes can both emit)
-                status_msgs = update.get("status_message", [])
-                for status_msg in status_msgs:
-                    if status_msg:
-                        st.write(f":white_check_mark: {status_msg}")
-                # Collect discussion messages
-                new_discussion = update.get("discussion_log", [])
-                if new_discussion:
-                    all_discussion.extend(new_discussion)
-                    st.session_state.discussion_log = all_discussion
-                    # Re-render discussion panel
-                    render_discussion_log(discussion_container, all_discussion)
-                # Node-specific handling
-                if node_name == "compliance_planner":
-                    target_pages = update.get("target_pages", [])
-                    crop_tasks = update.get("crop_tasks", [])
-                    code_queries = update.get("code_queries", [])
-                    with st.expander(":clipboard: Investigation Plan", expanded=True):
-                        if target_pages:
-                            st.markdown(f"**Target pages:** {', '.join(str(p + 1) for p in target_pages)}")
-                        if crop_tasks:
-                            st.markdown(f"**Image crops ({len(crop_tasks)}):**")
-                            for i, task in enumerate(crop_tasks, 1):
-                                display_page = task.get("page_num", 0) + 1
-                                st.markdown(f"  {i}. {task.get('label', 'Crop')} (p.{display_page})")
-                        if code_queries:
-                            st.markdown(f"**Code queries ({len(code_queries)}):**")
-                            for i, q in enumerate(code_queries, 1):
-                                st.markdown(f"  {i}. [{q.get('focus_area', '?')}] {q.get('query', '')[:80]}...")
-                    if crop_tasks:
-                        crop_counter_placeholder.markdown(f"**Crop 0/{len(crop_tasks)}** — starting...")
-                    # Show parallel execution message (this appears while both branches run)
-                    parallel_status.info(
-                        ":arrows_counterclockwise: Running in parallel: "
-                        f"**Cropping {len(crop_tasks)} images** + "
-                        f"**Searching {len(code_queries)} code queries**. "
-                        "This may take 30-60 seconds..."
-                    )
-                elif node_name in ("initial_code_lookup", "execute_crops"):
-                    # Clear the parallel status once a branch finishes
-                    parallel_status.empty()
-                if node_name in ("initial_code_lookup", "targeted_code_lookup"):
-                    report = update.get("code_report", "")
-                    new_sections = update.get("code_sections", [])
-                    if report:
-                        code_report_text = report
-                        st.session_state.code_report = report
-                    if new_sections:
-                        st.session_state.code_sections.extend(new_sections)
-                        # Render each new section in the evidence panel in real-time
-                        with evidence_col:
-                            with evidence_tabs[1]:
-                                for sec in new_sections:
-                                    with st.expander(
-                                        f":balance_scale: {sec.get('code_type', '?')} "
-                                        f"§{sec.get('section_full', '?')}",
-                                        expanded=False,
-                                    ):
-                                        if sec.get("relevance"):
-                                            st.caption(sec["relevance"])
-                                        st.markdown(sec.get("text", "")[:1500])
-                elif node_name == "compliance_analyst":
-                    analysis = update.get("compliance_analysis", "")
-                    needs_more = update.get("needs_more_investigation", False)
-                    round_num = update.get("investigation_round", 1)
-                    if analysis:
-                        label = f":mag: Compliance Analysis (Round {round_num})"
-                        if needs_more:
-                            label += " — requesting more evidence"
-                        with st.expander(label, expanded=False):
-                            st.markdown(analysis[:5000])
-                elif node_name == "deliberation":
-                    review = update.get("reviewer_analysis", "")
-                    if review:
-                        with st.expander(":handshake: Peer Review", expanded=False):
-                            st.markdown(review[:3000])
-                # Collect images — persist to session state and render in evidence panel
-                new_refs = update.get("image_refs", [])
-                if new_refs:
-                    all_image_refs.extend(new_refs)
-                    st.session_state.image_refs.extend(new_refs)
-                    # Render each new crop in the evidence panel in real-time
-                    with evidence_col:
-                        with evidence_tabs[0]:
-                            for ref in new_refs:
-                                try:
-                                    img = Image.open(ref["path"])
-                                    st.image(img, caption=ref["label"], use_container_width=True)
-                                except Exception:
-                                    st.warning(f"Could not load: {ref['label']}")
-                # Capture final verdict
-                if "final_verdict" in update and update["final_verdict"]:
-                    final_verdict_text = update["final_verdict"]
-                # Show next step label
-                if node_name in PROGRESS_LABELS:
-                    next_labels = {
-                        "compliance_planner": ["execute_crops", "initial_code_lookup"],
-                        "execute_crops": ["compliance_analyst"],
-                        "annotate_crops": ["compliance_analyst"],
-                        "initial_code_lookup": ["compliance_analyst"],
-                        "compliance_analyst": ["final_verdict"],
-                        "targeted_code_lookup": ["compliance_analyst"],
-                        "deliberation": ["final_verdict"],
-                    }
-                    for next_node in next_labels.get(node_name, []):
-                        if next_node in PROGRESS_LABELS:
-                            st.write(PROGRESS_LABELS[next_node])
-            if crop_cache.size > 0:
-                st.caption(f":file_folder: {crop_cache.stats}")
-            status.update(label="Compliance investigation complete", state="complete")
-    # Display final verdict
-    if final_verdict_text:
-        with chat_col:
-            with st.chat_message("assistant"):
-                st.markdown(final_verdict_text)
-        st.session_state.chat_history[-1] = ("user", question, [])
-        st.session_state.chat_history.append(("assistant", final_verdict_text, all_image_refs))
-    else:
-        with chat_col:
-            st.error("No verdict was generated. Please try again.")
-    st.rerun()

+"""Streamlit UI for NYC Code Compliance Bot — with agent discussion panel."""
+from __future__ import annotations
+import base64
+import hashlib
+import hmac
+import json
+import logging
+import os
+import tempfile
+import time
+from pathlib import Path
+import pandas as pd
+import requests
+import streamlit as st
+from dotenv import load_dotenv
+from huggingface_hub import HfApi, hf_hub_download
+from huggingface_hub.utils import EntryNotFoundError
+from PIL import Image
+from config import MAX_INVESTIGATION_ROUNDS
+from graph import compile_compliance_graph
+from tools.chroma_tools import warmup_collection, is_warmed_up
+from tools.crop_cache import CropCache
+from tools.image_store import ImageStore
+from tools.metadata_cache import MetadataState, get_cached_metadata
+from tools.pdf_processor import render_pages
+load_dotenv()
+logger = logging.getLogger(__name__)
+# =============================================================================
+# HF OAuth + Usage Quota Configuration
+# =============================================================================
+USAGE_DATASET_REPO = "NYSERDA-CRE-Working-Group/nyserda_demo_useage_store"
+USAGE_FILENAME = "usage.csv"
+MAX_RUNS_PER_USER = 10
+HF_TOKEN = os.environ.get("HF_TOKEN")
+OAUTH_CLIENT_ID = os.environ.get("OAUTH_CLIENT_ID")
+OAUTH_CLIENT_SECRET = os.environ.get("OAUTH_CLIENT_SECRET")
+OPENID_PROVIDER_URL = os.environ.get("OPENID_PROVIDER_URL", "https://huggingface.co")
+SPACE_HOST = os.environ.get("SPACE_HOST", "")
+_hf_api = HfApi(token=HF_TOKEN) if HF_TOKEN else None
+# ---------------------------------------------------------------------------
+# OAuth helpers
+# ---------------------------------------------------------------------------
+def _get_oauth_state() -> str:
+    """Generate a deterministic OAuth state string for CSRF protection."""
+    secret = (OAUTH_CLIENT_SECRET or "fallback-secret").encode()
+    return hmac.new(secret, b"hf-oauth-state", hashlib.sha256).hexdigest()[:32]
+def _get_redirect_uri() -> str:
+    """Build the OAuth redirect URI pointing back to this Space."""
+    if SPACE_HOST:
+        return f"https://{SPACE_HOST}"
+    return "http://localhost:7860"
+def _exchange_code_for_user(code: str) -> str | None:
+    """Exchange the OAuth authorization code for an access token, then fetch username."""
+    if not OAUTH_CLIENT_ID or not OAUTH_CLIENT_SECRET:
+        st.error("OAuth is not configured. Set hf_oauth: true in your Space's README.md")
+        return None
+    redirect_uri = _get_redirect_uri()
+    token_url = f"{OPENID_PROVIDER_URL}/oauth/token"
+    credentials = base64.b64encode(
+        f"{OAUTH_CLIENT_ID}:{OAUTH_CLIENT_SECRET}".encode()
+    ).decode()
+    try:
+        resp = requests.post(
+            token_url,
+            headers={"Authorization": f"Basic {credentials}"},
+            data={
+                "grant_type": "authorization_code",
+                "code": code,
+                "redirect_uri": redirect_uri,
+                "client_id": OAUTH_CLIENT_ID,
+            },
+            timeout=10,
+        )
+        resp.raise_for_status()
+        token_data = resp.json()
+    except Exception as e:
+        st.error(f"Token exchange failed: {e}")
+        return None
+    access_token = token_data.get("access_token")
+    if not access_token:
+        st.error("No access token in OAuth response.")
+        return None
+    try:
+        userinfo_resp = requests.get(
+            f"{OPENID_PROVIDER_URL}/oauth/userinfo",
+            headers={"Authorization": f"Bearer {access_token}"},
+            timeout=10,
+        )
+        userinfo_resp.raise_for_status()
+        userinfo = userinfo_resp.json()
+    except Exception as e:
+        st.error(f"Failed to fetch user info: {e}")
+        return None
+    username = userinfo.get("preferred_username") or userinfo.get("sub")
+    if username:
+        return username.strip().lower()
+    return None
+def get_hf_user() -> str | None:
+    """Check if user is logged in via HF OAuth (authorization code flow)."""
+    if "hf_user" in st.session_state:
+        return st.session_state["hf_user"]
+    params = st.query_params
+    code = params.get("code")
+    returned_state = params.get("state")
+    if code:
+        expected_state = _get_oauth_state()
+        if returned_state == expected_state:
+            user = _exchange_code_for_user(code)
+            if user:
+                st.session_state["hf_user"] = user
+                st.query_params.clear()
+                st.rerun()
+            else:
+                st.error("OAuth login failed — could not retrieve user info.")
+                st.query_params.clear()
+        else:
+            st.error("OAuth state mismatch — please try signing in again.")
+            st.query_params.clear()
+    return None
+def login_button():
+    """Show a 'Sign in with Hugging Face' button that starts the OAuth flow."""
+    if not OAUTH_CLIENT_ID:
+        st.error(
+            "OAuth is not configured for this Space. "
+            "Add `hf_oauth: true` to your README.md metadata."
+        )
+        return
+    if st.button("🤗 Sign in with Hugging Face", use_container_width=True, type="primary"):
+        state = _get_oauth_state()
+        redirect_uri = _get_redirect_uri()
+        auth_url = (
+            f"{OPENID_PROVIDER_URL}/oauth/authorize"
+            f"?client_id={OAUTH_CLIENT_ID}"
+            f"&redirect_uri={redirect_uri}"
+            f"&scope=openid%20profile"
+            f"&response_type=code"
+            f"&state={state}"
+        )
+        st.markdown(
+            f'<meta http-equiv="refresh" content="0;url={auth_url}">',
+            unsafe_allow_html=True,
+        )
+        st.stop()
+# ---------------------------------------------------------------------------
+# Usage quota helpers
+# ---------------------------------------------------------------------------
+def _load_usage_df() -> pd.DataFrame:
+    """Load the usage CSV from HF dataset repo."""
+    if not _hf_api or not USAGE_DATASET_REPO:
+        return pd.DataFrame(columns=["user_id", "runs", "first_seen", "last_seen"])
+    try:
+        local_path = hf_hub_download(
+            repo_id=USAGE_DATASET_REPO,
+            repo_type="dataset",
+            filename=USAGE_FILENAME,
+            token=HF_TOKEN,
+        )
+        return pd.read_csv(local_path)
+    except EntryNotFoundError:
+        return pd.DataFrame(columns=["user_id", "runs", "first_seen", "last_seen"])
+def _save_usage_df(df: pd.DataFrame, commit_message: str) -> None:
+    """Save the usage CSV back to HF dataset repo."""
+    if not _hf_api or not USAGE_DATASET_REPO:
+        return
+    tmp_path = "/tmp/usage.csv"
+    df.to_csv(tmp_path, index=False)
+    _hf_api.upload_file(
+        path_or_fileobj=tmp_path,
+        path_in_repo=USAGE_FILENAME,
+        repo_id=USAGE_DATASET_REPO,
+        repo_type="dataset",
+        commit_message=commit_message,
+    )
+def check_and_increment_quota(user_id: str) -> tuple[bool, int]:
+    """Check if user has remaining quota and increment usage.
+    Returns (allowed: bool, remaining_runs: int).
+    If USAGE_DATASET_REPO is not configured, always allows (unlimited).
+    """
+    if not USAGE_DATASET_REPO:
+        return True, 999
+    now = int(time.time())
+    df = _load_usage_df()
+    if df.empty or (df["user_id"] == user_id).sum() == 0:
+        runs = 0
+        if runs >= MAX_RUNS_PER_USER:
+            return False, 0
+        new_row = {
+            "user_id": user_id,
+            "runs": 1,
+            "first_seen": now,
+            "last_seen": now,
+        }
+        df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
+        _save_usage_df(df, commit_message=f"usage: increment {user_id} to 1")
+        return True, MAX_RUNS_PER_USER - 1
+    idx = df.index[df["user_id"] == user_id][0]
+    runs = int(df.loc[idx, "runs"])
+    if runs >= MAX_RUNS_PER_USER:
+        return False, 0
+    runs += 1
+    df.loc[idx, "runs"] = runs
+    df.loc[idx, "last_seen"] = now
+    _save_usage_df(df, commit_message=f"usage: increment {user_id} to {runs}")
+    return True, MAX_RUNS_PER_USER - runs
+# ---------------------------------------------------------------------------
+# Page config
+# ---------------------------------------------------------------------------
+st.set_page_config(
+    page_title="NYC Code Compliance Bot",
+    page_icon=":building_construction:",
+    layout="wide",
+)
+# ---------------------------------------------------------------------------
+# Authentication gate — must sign in before using the app
+# ---------------------------------------------------------------------------
+uid = get_hf_user()
+if not uid:
+    _left, center, _right = st.columns([1, 2, 1])
+    with center:
+        st.markdown(
+            "<h1 style='text-align: center;'>🏗️ NYC Code Compliance Bot</h1>",
+            unsafe_allow_html=True,
+        )
+        st.markdown(
+            "<p style='text-align: center; color: grey;'>"
+            "Sign in with your Hugging Face account to get started."
+            "</p>",
+            unsafe_allow_html=True,
+        )
+        login_button()
+    st.stop()
+# ---------------------------------------------------------------------------
+# Custom CSS for agent discussion panel
+# ---------------------------------------------------------------------------
+st.markdown("""
+<style>
+.agent-msg {
+    padding: 8px 12px;
+    margin: 4px 0;
+    border-radius: 8px;
+    font-size: 0.9em;
+}
+.agent-planner {
+    background-color: #e3f2fd;
+    border-left: 4px solid #1565c0;
+}
+.agent-code_analyst {
+    background-color: #fff3e0;
+    border-left: 4px solid #e65100;
+}
+.agent-compliance_analyst {
+    background-color: #e8f5e9;
+    border-left: 4px solid #2e7d32;
+}
+.agent-reviewer {
+    background-color: #f3e5f5;
+    border-left: 4px solid #6a1b9a;
+}
+.agent-icon {
+    font-weight: bold;
+    margin-right: 6px;
+}
+.agent-timestamp {
+    color: #666;
+    font-size: 0.8em;
+}
+</style>
+""", unsafe_allow_html=True)
+AGENT_ICONS = {
+    "planner": "\U0001f4cb",
+    "code_analyst": "\u2696\ufe0f",
+    "compliance_analyst": "\U0001f50d",
+    "reviewer": "\U0001f91d",
+}
+AGENT_LABELS = {
+    "planner": "Planner",
+    "code_analyst": "Code Analyst",
+    "compliance_analyst": "Compliance Analyst",
+    "reviewer": "Reviewer",
+}
+# ---------------------------------------------------------------------------
+# Session state defaults
+# ---------------------------------------------------------------------------
+if "pdf_loaded" not in st.session_state:
+    st.session_state.pdf_loaded = False
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = []
+if "image_store" not in st.session_state:
+    st.session_state.image_store = None
+if "ingest_state" not in st.session_state:
+    st.session_state.ingest_state = {}
+if "pdf_bytes" not in st.session_state:
+    st.session_state.pdf_bytes = None
+if "metadata_state" not in st.session_state:
+    st.session_state.metadata_state = MetadataState()
+if "crop_cache" not in st.session_state:
+    st.session_state.crop_cache = CropCache()
+if "discussion_log" not in st.session_state:
+    st.session_state.discussion_log = []
+if "code_report" not in st.session_state:
+    st.session_state.code_report = ""
+if "code_sections" not in st.session_state:
+    st.session_state.code_sections = []
+if "image_refs" not in st.session_state:
+    st.session_state.image_refs = []
+if "db_ready" not in st.session_state:
+    st.session_state.db_ready = False
+# ---------------------------------------------------------------------------
+# Startup: warm up embedding model + ChromaDB
+# ---------------------------------------------------------------------------
+if not st.session_state.db_ready:
+    with st.status("Loading NYC Code Database...", expanded=True) as _db_status:
+        st.write(":brain: Loading embedding model (bge-large-en-v1.5)...")
+        st.write("_This is a one-time download (~1.3 GB) on first run._")
+        ok = warmup_collection()
+        if ok:
+            st.session_state.db_ready = True
+            _db_status.update(label="NYC Code Database ready", state="complete")
+        else:
+            _db_status.update(
+                label="NYC Code Database not available — code lookup will be disabled",
+                state="error",
+            )
+            st.session_state.db_ready = False
+    if st.session_state.db_ready:
+        st.rerun()
+# ---------------------------------------------------------------------------
+# Sidebar
+# ---------------------------------------------------------------------------
+with st.sidebar:
+    st.title(":building_construction: NYC Code Compliance Bot")
+    st.markdown(
+        "Upload a construction drawing PDF and ask compliance questions. "
+        "The system uses **agentic vision** + **NYC code database** to "
+        "verify code compliance."
+    )
+    st.divider()
+    # PDF upload
+    uploaded_file = st.file_uploader("Upload Drawing PDF", type=["pdf"])
+    # Default drawing button
+    _DEFAULT_PDF = Path(__file__).parent / "NorthMaconPark.pdf"
+    if _DEFAULT_PDF.exists() and not st.session_state.pdf_loaded:
+        st.markdown("**— or —**")
+        if st.button("Use Default Drawing", use_container_width=True):
+            st.session_state._use_default_pdf = True
+            st.rerun()
+    st.divider()
+    # Settings
+    st.subheader("Settings")
+    enable_consensus = st.checkbox(
+        "Enable peer review (Gemini + GPT)",
+        value=False,
+        help="GPT reviews Gemini's compliance analysis. Slower but more thorough.",
+    )
+    enable_annotation = st.checkbox(
+        "Enable annotation",
+        value=False,
+        help="Annotate crops with numbered highlights before analysis.",
+    )
+    max_rounds = st.slider(
+        "Max investigation rounds",
+        min_value=1,
+        max_value=5,
+        value=MAX_INVESTIGATION_ROUNDS,
+        help="Maximum crop-analyze loops before forcing a final verdict.",
+    )
+    st.divider()
+    # --- User info + sign out ---
+    st.caption(f"Signed in as: **{uid}**")
+    if st.button("Sign out", use_container_width=True):
+        st.session_state.pop("hf_user", None)
+        st.rerun()
+    st.divider()
+    st.caption("Powered by LangGraph + Gemini + GPT + ChromaDB")
+# ---------------------------------------------------------------------------
+# PDF ingestion — Phase A: render pages
+# ---------------------------------------------------------------------------
+_pending_pdf: tuple[str, bytes] | None = None
+if not st.session_state.pdf_loaded:
+    if uploaded_file is not None:
+        _pending_pdf = (uploaded_file.name, uploaded_file.getvalue())
+    elif st.session_state.get("_use_default_pdf"):
+        _default = Path(__file__).parent / "NorthMaconPark.pdf"
+        if _default.exists():
+            _pending_pdf = (_default.name, _default.read_bytes())
+if _pending_pdf is not None and not st.session_state.pdf_loaded:
+    pdf_name, pdf_bytes = _pending_pdf
+    with st.status("Converting PDF to images...", expanded=True) as status:
+        tmp_dir = tempfile.mkdtemp(prefix="compliance_bot_")
+        pdf_path = Path(tmp_dir) / pdf_name
+        pdf_path.write_bytes(pdf_bytes)
+        st.session_state.pdf_bytes = pdf_bytes
+        st.session_state.crop_cache = CropCache()
+        image_store = ImageStore(str(Path(tmp_dir) / "images"))
+        st.session_state.image_store = image_store
+        page_image_dir = str(image_store._pages_dir)
+        cached = get_cached_metadata(pdf_bytes)
+        if cached is not None:
+            st.session_state.metadata_state.set_ready(json.dumps(cached, indent=2))
+            st.write("Page index loaded from cache")
+        st.write("Rendering pages...")
+        num_pages = render_pages(str(pdf_path), page_image_dir)
+        st.session_state.ingest_state = {
+            "pdf_path": str(pdf_path),
+            "page_image_dir": page_image_dir,
+            "num_pages": num_pages,
+        }
+        st.session_state.pdf_loaded = True
+        st.session_state.pop("_use_default_pdf", None)
+        st.write(f"Converted {num_pages} pages to images.")
+        status.update(label=f"PDF ready: {num_pages} pages", state="complete")
+    st.rerun()
+# ---------------------------------------------------------------------------
+# PDF ingestion — Phase B: generate page index
+# ---------------------------------------------------------------------------
+if st.session_state.pdf_loaded:
+    meta = st.session_state.metadata_state
+    if meta.status == "not_started":
+        if st.session_state.pdf_bytes is not None:
+            with st.expander(":page_facing_up: PDF Viewer", expanded=False):
+                st.pdf(st.session_state.pdf_bytes, height=400)
+        ingest = st.session_state.ingest_state
+        num_pages = ingest["num_pages"]
+        st.write("**Generating page index...**")
+        progress_bar = st.progress(0, text="Analyzing pages to build searchable index...")
+        def _index_progress(completed: int, total: int, label: str):
+            pct = completed / total
+            progress_bar.progress(pct, text=f"Indexing: {label}  ({completed}/{total} batches)")
+        meta.generate_sync(
+            ingest["pdf_path"],
+            num_pages,
+            st.session_state.pdf_bytes,
+            progress_callback=_index_progress,
+        )
+        if meta.is_ready:
+            progress_bar.progress(1.0, text="Page index ready!")
+        else:
+            progress_bar.progress(1.0, text="Indexing failed — using full PDF mode")
+        st.rerun()
+# ---------------------------------------------------------------------------
+# Main layout (pre-upload welcome)
+# ---------------------------------------------------------------------------
+if not st.session_state.pdf_loaded:
+    _left, center, _right = st.columns([1, 2, 1])
+    with center:
+        st.markdown(
+            "<h1 style='text-align: center;'>🏗️ NYC Code Compliance Bot</h1>",
+            unsafe_allow_html=True,
+        )
+        st.markdown(
+            "<p style='text-align: center; color: grey;'>"
+            "Upload a construction drawing PDF in the sidebar to get started.<br>"
+            "This tool uses <b>agentic vision</b> and the <b>NYC Building Code database</b> "
+            "to verify code compliance in your drawings."
+            "</p>",
+            unsafe_allow_html=True,
+        )
+    st.stop()
+# ---------------------------------------------------------------------------
+# PDF viewer
+# ---------------------------------------------------------------------------
+if st.session_state.pdf_bytes is not None:
+    with st.expander(":page_facing_up: PDF Viewer", expanded=False):
+        st.pdf(st.session_state.pdf_bytes, height=400)
+# ---------------------------------------------------------------------------
+# Three-column layout: chat | discussion | images+code
+# ---------------------------------------------------------------------------
+chat_col, discuss_col, evidence_col = st.columns([2, 2, 2])
+# ---------------------------------------------------------------------------
+# Discussion panel (agent conversation)
+# ---------------------------------------------------------------------------
+def render_discussion_log(container, discussion_log: list[dict]):
+    """Render the agent discussion log with styled messages."""
+    with container:
+        for msg in discussion_log:
+            agent = msg.get("agent", "unknown")
+            icon = AGENT_ICONS.get(agent, "\U0001f916")
+            label = AGENT_LABELS.get(agent, agent)
+            css_class = f"agent-{agent}"
+            st.markdown(
+                f'<div class="agent-msg {css_class}">'
+                f'<span class="agent-timestamp">[{msg.get("timestamp", "")}]</span> '
+                f'<span class="agent-icon">{icon} {label}</span><br>'
+                f'{msg.get("summary", "")}'
+                f'</div>',
+                unsafe_allow_html=True,
+            )
+# ---------------------------------------------------------------------------
+# Chat history display
+# ---------------------------------------------------------------------------
+with chat_col:
+    st.subheader(":speech_balloon: Chat")
+    meta = st.session_state.metadata_state
+    if meta.is_ready:
+        st.caption("Page index ready — fast planning enabled")
+    elif meta.status == "failed":
+        st.caption("Page indexing failed — using full PDF mode")
+    for role, content, _refs in st.session_state.chat_history:
+        with st.chat_message(role):
+            st.markdown(content)
+    question = st.chat_input("Ask a compliance question about the drawing...")
+# ---------------------------------------------------------------------------
+# Discussion panel
+# ---------------------------------------------------------------------------
+with discuss_col:
+    st.subheader(":busts_in_silhouette: Agent Discussion")
+    discussion_container = st.container()
+    if st.session_state.discussion_log:
+        render_discussion_log(discussion_container, st.session_state.discussion_log)
+    else:
+        st.info("Agent discussions will appear here during analysis.")
+# ---------------------------------------------------------------------------
+# Evidence panel (images + code)
+# ---------------------------------------------------------------------------
+with evidence_col:
+    st.subheader(":framed_picture: Evidence")
+    evidence_tabs = st.tabs(["Drawing Crops", "Code Sections"])
+    with evidence_tabs[0]:
+        if st.session_state.image_refs:
+            for ref in st.session_state.image_refs:
+                try:
+                    img = Image.open(ref["path"])
+                    st.image(img, caption=ref["label"], use_container_width=True)
+                except Exception:
+                    st.warning(f"Could not load: {ref['label']}")
+        elif st.session_state.chat_history:
+            st.info("No images for this question.")
+        else:
+            st.info("Ask a question to see drawing crops here.")
+    with evidence_tabs[1]:
+        if st.session_state.code_sections:
+            for sec in st.session_state.code_sections:
+                with st.expander(
+                    f":balance_scale: {sec.get('code_type', '?')} §{sec.get('section_full', '?')}",
+                    expanded=False,
+                ):
+                    if sec.get("relevance"):
+                        st.caption(sec["relevance"])
+                    st.markdown(sec.get("text", "")[:1500])
+            if st.session_state.code_report:
+                with st.expander(":page_facing_up: Full Code Report", expanded=False):
+                    st.markdown(st.session_state.code_report[:5000])
+        else:
+            st.info("Code sections retrieved during analysis will appear here.")
+# ---------------------------------------------------------------------------
+# Question processing
+# ---------------------------------------------------------------------------
+if question:
+    # === QUOTA CHECK ===
+    allowed, remaining = check_and_increment_quota(uid)
+    if not allowed:
+        with chat_col:
+            st.error(
+                f"Usage limit reached: {MAX_RUNS_PER_USER} queries per user. "
+                "Please contact the admin for additional access."
+            )
+        st.stop()
+    if remaining <= 2:
+        with chat_col:
+            st.warning(f"⚠️ Only {remaining} query(ies) left!")
+    # === END QUOTA CHECK ===
+    # Add user message to history
+    st.session_state.chat_history.append(("user", question, []))
+    st.session_state.discussion_log = []
+    st.session_state.code_report = ""
+    st.session_state.code_sections = []
+    st.session_state.image_refs = []
+    with chat_col:
+        with st.chat_message("user"):
+            st.markdown(question)
+    # Build initial state
+    ingest = st.session_state.ingest_state
+    image_store = st.session_state.image_store
+    meta = st.session_state.metadata_state
+    metadata_json = meta.data_json if meta.is_ready else ""
+    question_state = {
+        "messages": [],
+        "question": question,
+        "pdf_path": ingest.get("pdf_path", ""),
+        "page_image_dir": ingest.get("page_image_dir", ""),
+        "num_pages": ingest.get("num_pages", 0),
+        "page_metadata_json": metadata_json,
+        "legend_pages": [],
+        "target_pages": [],
+        "crop_tasks": [],
+        "code_queries": [],
+        "image_refs": [],
+        "code_sections": [],
+        "code_report": "",
+        "code_chapters_fetched": [],
+        "compliance_analysis": "",
+        "reviewer_analysis": "",
+        "final_verdict": "",
+        "discussion_log": [],
+        "additional_crop_tasks": [],
+        "additional_code_queries": [],
+        "needs_more_investigation": False,
+        "investigation_round": 0,
+        "max_rounds": max_rounds,
+        "enable_consensus": enable_consensus,
+        "enable_annotation": enable_annotation,
+        "status_message": [],
+    }
+    # ------------------------------------------------------------------
+    # Live progress
+    # ------------------------------------------------------------------
+    crop_cache = st.session_state.crop_cache
+    with evidence_col:
+        with evidence_tabs[0]:
+            crop_counter_placeholder = st.empty()
+            crop_image_container = st.container()
+    def on_crop_progress(
+        completed_ref, crop_task, source: str, completed_count: int, total_count: int,
+    ) -> None:
+        source_tag = " (cached)" if source == "cached" else ""
+        crop_counter_placeholder.markdown(
+            f"**Crop {completed_count}/{total_count}**{source_tag}  \n"
+            f"Latest: *{crop_task.get('label', 'Crop')}*"
+        )
+        with crop_image_container:
+            try:
+                img = Image.open(completed_ref["path"])
+                caption = completed_ref["label"]
+                if source == "cached":
+                    caption += " (cached)"
+                st.image(img, caption=caption, use_container_width=True)
+            except Exception:
+                st.warning(f"Could not load: {completed_ref['label']}")
+    # Compile graph
+    compliance_graph = compile_compliance_graph(image_store, crop_cache, on_crop_progress)
+    # Node progress labels
+    PROGRESS_LABELS = {
+        "compliance_planner": "Planning investigation...",
+        "execute_crops": "Cropping drawing images...",
+        "annotate_crops": "Annotating crops...",
+        "initial_code_lookup": "Searching NYC code database...",
+        "compliance_analyst": "Analyzing compliance...",
+        "targeted_code_lookup": "Follow-up code search...",
+        "deliberation": "Running peer review...",
+        "final_verdict": "Synthesizing verdict...",
+    }
+    with chat_col:
+        with st.status("Investigating compliance...", expanded=True) as status:
+            all_image_refs: list[dict] = []
+            all_discussion: list[dict] = []
+            final_verdict_text = ""
+            code_report_text = ""
+            st.write(PROGRESS_LABELS["compliance_planner"])
+            parallel_status = st.empty()
+            for event in compliance_graph.stream(question_state, stream_mode="updates"):
+                node_name = list(event.keys())[0]
+                update = event[node_name]
+                status_msgs = update.get("status_message", [])
+                for status_msg in status_msgs:
+                    if status_msg:
+                        st.write(f":white_check_mark: {status_msg}")
+                new_discussion = update.get("discussion_log", [])
+                if new_discussion:
+                    all_discussion.extend(new_discussion)
+                    st.session_state.discussion_log = all_discussion
+                    render_discussion_log(discussion_container, all_discussion)
+                if node_name == "compliance_planner":
+                    target_pages = update.get("target_pages", [])
+                    crop_tasks = update.get("crop_tasks", [])
+                    code_queries = update.get("code_queries", [])
+                    with st.expander(":clipboard: Investigation Plan", expanded=True):
+                        if target_pages:
+                            st.markdown(f"**Target pages:** {', '.join(str(p + 1) for p in target_pages)}")
+                        if crop_tasks:
+                            st.markdown(f"**Image crops ({len(crop_tasks)}):**")
+                            for i, task in enumerate(crop_tasks, 1):
+                                display_page = task.get("page_num", 0) + 1
+                                st.markdown(f"  {i}. {task.get('label', 'Crop')} (p.{display_page})")
+                        if code_queries:
+                            st.markdown(f"**Code queries ({len(code_queries)}):**")
+                            for i, q in enumerate(code_queries, 1):
+                                st.markdown(f"  {i}. [{q.get('focus_area', '?')}] {q.get('query', '')[:80]}...")
+                    if crop_tasks:
+                        crop_counter_placeholder.markdown(f"**Crop 0/{len(crop_tasks)}** — starting...")
+                    parallel_status.info(
+                        ":arrows_counterclockwise: Running in parallel: "
+                        f"**Cropping {len(crop_tasks)} images** + "
+                        f"**Searching {len(code_queries)} code queries**. "
+                        "This may take 30-60 seconds..."
+                    )
+                elif node_name in ("initial_code_lookup", "execute_crops"):
+                    parallel_status.empty()
+                if node_name in ("initial_code_lookup", "targeted_code_lookup"):
+                    report = update.get("code_report", "")
+                    new_sections = update.get("code_sections", [])
+                    if report:
+                        code_report_text = report
+                        st.session_state.code_report = report
+                    if new_sections:
+                        st.session_state.code_sections.extend(new_sections)
+                        with evidence_col:
+                            with evidence_tabs[1]:
+                                for sec in new_sections:
+                                    with st.expander(
+                                        f":balance_scale: {sec.get('code_type', '?')} "
+                                        f"§{sec.get('section_full', '?')}",
+                                        expanded=False,
+                                    ):
+                                        if sec.get("relevance"):
+                                            st.caption(sec["relevance"])
+                                        st.markdown(sec.get("text", "")[:1500])
+                elif node_name == "compliance_analyst":
+                    analysis = update.get("compliance_analysis", "")
+                    needs_more = update.get("needs_more_investigation", False)
+                    round_num = update.get("investigation_round", 1)
+                    if analysis:
+                        label = f":mag: Compliance Analysis (Round {round_num})"
+                        if needs_more:
+                            label += " — requesting more evidence"
+                        with st.expander(label, expanded=False):
+                            st.markdown(analysis[:5000])
+                elif node_name == "deliberation":
+                    review = update.get("reviewer_analysis", "")
+                    if review:
+                        with st.expander(":handshake: Peer Review", expanded=False):
+                            st.markdown(review[:3000])
+                new_refs = update.get("image_refs", [])
+                if new_refs:
+                    all_image_refs.extend(new_refs)
+                    st.session_state.image_refs.extend(new_refs)
+                    with evidence_col:
+                        with evidence_tabs[0]:
+                            for ref in new_refs:
+                                try:
+                                    img = Image.open(ref["path"])
+                                    st.image(img, caption=ref["label"], use_container_width=True)
+                                except Exception:
+                                    st.warning(f"Could not load: {ref['label']}")
+                if "final_verdict" in update and update["final_verdict"]:
+                    final_verdict_text = update["final_verdict"]
+                if node_name in PROGRESS_LABELS:
+                    next_labels = {
+                        "compliance_planner": ["execute_crops", "initial_code_lookup"],
+                        "execute_crops": ["compliance_analyst"],
+                        "annotate_crops": ["compliance_analyst"],
+                        "initial_code_lookup": ["compliance_analyst"],
+                        "compliance_analyst": ["final_verdict"],
+                        "targeted_code_lookup": ["compliance_analyst"],
+                        "deliberation": ["final_verdict"],
+                    }
+                    for next_node in next_labels.get(node_name, []):
+                        if next_node in PROGRESS_LABELS:
+                            st.write(PROGRESS_LABELS[next_node])
+            if crop_cache.size > 0:
+                st.caption(f":file_folder: {crop_cache.stats}")
+            status.update(label="Compliance investigation complete", state="complete")
+    if final_verdict_text:
+        with chat_col:
+            with st.chat_message("assistant"):
+                st.markdown(final_verdict_text)
+        st.session_state.chat_history[-1] = ("user", question, [])
+        st.session_state.chat_history.append(("assistant", final_verdict_text, all_image_refs))
+    else:
+        with chat_col:
+            st.error("No verdict was generated. Please try again.")
+    st.rerun()