Spaces:

milindkamat0507
/

topic_modelling

Paused

App Files Files Community

milindkamat0507 commited on 14 days ago

Commit

3454e5c

verified ·

1 Parent(s): 5840b29

Upload app.py

Browse files

Files changed (1) hide show

app.py +774 -0

app.py ADDED Viewed

	@@ -0,0 +1,774 @@

+"""
+app.py — Topic Modelling Agentic AI | Gradio UI
+═══════════════════════════════════════════════════
+Version:  3.0.0 | April 2026
+Stack:    Gradio 5.x + LangGraph + Mistral + BERTopic
+Deploy:   HuggingFace Spaces (sdk: gradio)
+Rules:    Zero gr.HTML(). All UI via native Gradio components.
+          See GRADIO_UI_GUIDELINES_v2.docx for full standards.
+ARCHITECTURE — 20 Blocks in 5 Sections
+─────────────────────────────────────────
+  Section 1: Setup        (B1–B3)   Imports, agent, theme
+  Section 2: Helpers      (B4–B10)  Pure Python functions, no UI
+  Section 3: UI Layout    (B11–B17) gr.Blocks with native components
+  Section 4: Event Wiring (B18–B19) Connect UI to functions
+  Section 5: Launch       (B20)     Start server
+BLOCK COMMUNICATION MAP
+─────────────────────────
+  B6 (respond)  ←→ B2 (agent)   : invokes agent for chat
+  B6 (respond)  → B4 (output)   : scans for download files
+  B7 (chart)    → B17a (display) : loads Plotly JSON → gr.Plot
+  B8 (table)    → B16 (review)  : builds rows → gr.Dataframe
+  B9 (papers)   ← B16 (review)  : triggered by row click
+  B10 (submit)  → B2 (agent)    : sends review edits to agent
+  B18 (wiring)  → B5,B7,B8      : refreshes progress, charts, table
+"""
+import os
+import glob
+import json
+import plotly.io as pio
+import gradio as gr
+from langchain_mistralai import ChatMistralAI
+from langgraph.prebuilt import create_react_agent
+from langgraph.checkpoint.memory import MemorySaver
+from agent import SYSTEM_PROMPT, get_local_tools
+print(">>> app.py: imports complete")
+# ╔═══════════════════════════════════════════════════════════════╗
+# ║  SECTION 1 — SETUP                                          ║
+# ║  One-time initialization: agent creation and visual theme.   ║
+# ║  Nothing here renders UI — it prepares the backend brain     ║
+# ║  and the visual identity for the entire application.         ║
+# ╚═══════════════════════════════════════════════════════════════╝
+# ── B2: Agent setup ─────────────────────────────────────────────
+# PURPOSE:  Create the LangGraph ReAct agent that powers all chat.
+#           Connects Mistral LLM to BERTopic tools with memory so
+#           the agent remembers context across conversation turns.
+# PRODUCES: `agent` — used by B6 (respond) and B10 (_submit_review)
+# IMPORTS:  SYSTEM_PROMPT, get_local_tools from agent.py
+# NOTE:     MemorySaver keeps conversation in RAM (resets on restart).
+#           For persistent memory, swap to SQLite checkpointer.
+# ────────────────────────────────────────────────────────────────
+llm = ChatMistralAI(model="mistral-small-latest", temperature=0, timeout=300)
+tools = get_local_tools()
+agent = create_react_agent(
+    model=llm, tools=tools, prompt=SYSTEM_PROMPT, checkpointer=MemorySaver()
+)
+print(f">>> app.py: agent ready ({len(tools)} tools)")
+_msg_count = 0                    # Global message counter (shared across users)
+_uploaded = {"path": ""}          # Last uploaded CSV path (shared session)
+# ── end B2: Agent setup ────────────────────────────────────────
+# ── B3: Theme ───────────────────────────────────────────────────
+# PURPOSE:  Define the visual identity of the entire application.
+#           Replaces ALL custom CSS that was previously in HEADER_HTML:
+#           - DM Sans font (was @import url in <style> block)
+#           - Slate color palette (was hardcoded hex in inline styles)
+#           - Soft rounded corners and spacing
+# USED BY:  B20 (demo.launch) — Gradio 6 moved theme from gr.Blocks
+#           to launch(). The theme object is created here but applied
+#           in B20 via demo.launch(theme=theme).
+# REPLACES: Old HEADER_HTML lines 33-38 (<style> block with CSS)
+# ────────────────────────────────────────────────────────────────
+theme = gr.themes.Soft(
+    primary_hue="slate",
+    font=gr.themes.GoogleFont("DM Sans"),
+    font_mono=gr.themes.GoogleFont("JetBrains Mono"),
+)
+# ── end B3: Theme ───────���──────────────────────────────────────
+# ╔═══════════════════════════════════════════════════════════════╗
+# ║  SECTION 2 — HELPER FUNCTIONS                               ║
+# ║  Pure Python functions that process data and return clean    ║
+# ║  values (strings, lists, figures). NONE of these functions   ║
+# ║  return HTML strings. They feed data to UI components in     ║
+# ║  Section 3 via event handlers in Section 4.                  ║
+# ╚═══════════════════════════════════════════════════════════════╝
+# ── B4: _latest_output() ───────────────────────────────────────
+# PURPOSE:  Scan /tmp for all rq4_* output files generated by the
+#           BERTopic agent pipeline (CSVs, JSONs, chart files).
+#           Sorts them by pipeline phase order so the download
+#           component shows files in logical sequence.
+# RETURNS:  List[str] of filepaths sorted by phase, or None
+# USED BY:  B6 (respond) — attaches to download component after
+#             each agent response
+#           B10 (_submit_review) — refreshes downloads after review
+#           B19 (_auto_load_csv) — refreshes after initial upload
+# ────────────────────────────────────────────────────────────────
+def _latest_output():
+    """Scan /tmp for ALL rq4_* files, sorted by phase order.
+    Returns list of filepaths for gr.File download component."""
+    phase_order = {
+        "summaries": 1, "labels": 2, "themes": 3, "taxonomy": 4,
+        "emb": 0, "intertopic": 5, "bars": 6, "hierarchy": 7,
+        "heatmap": 8, "comparison": 9, "narrative": 10,
+    }
+    files = (
+        glob.glob("/tmp/rq4_*.csv")
+        + glob.glob("/tmp/rq4_*.json")
+        + glob.glob("/tmp/checkpoints/rq4_*.json")
+    )
+    scored = list(map(
+        lambda f: (sum(v * (k in f) for k, v in phase_order.items()), f),
+        files,
+    ))
+    scored.sort(key=lambda x: x[0])
+    return list(map(lambda x: x[1], scored)) or None
+# ── end B4: _latest_output ─────────────────────────────────────
+# ── B5: _build_progress() ──────────────────────────────────────
+# PURPOSE:  Check which Braun & Clarke phases are complete by
+#           scanning for checkpoint files on disk. Returns a
+#           human-readable emoji string showing pipeline status.
+# RETURNS:  str like "✅ Load → ✅ Codes → ⏳ Themes → ⬜ Report"
+# USED BY:  B14 (phase_progress initial value)
+#           B18 (respond_with_viz) — refreshes after each agent turn
+#           B10 (_submit_review) — refreshes after review submission
+#           B19 (_auto_load_csv) — refreshes after CSV upload
+# REPLACES: Old _build_progress() which returned 24 lines of HTML
+#           with inline-styled <span> elements and color codes.
+#           Now returns pure text with emoji — gr.Markdown renders it.
+# ────────────────────────────────────────────────────────────────
+def _build_progress():
+    """Return emoji progress pipeline. NO HTML — just text + emoji.
+    Displayed in gr.Markdown component (B14)."""
+    checks = [
+        ("Load",   bool(glob.glob("/tmp/checkpoints/rq4_*_summaries.json")
+                        or glob.glob("/tmp/checkpoints/rq4_*_emb.npy"))),
+        ("Codes",  bool(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))),
+        ("Themes", bool(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))),
+        ("Review", bool(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))),
+        ("Names",  bool(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))),
+        ("PAJAIS", bool(glob.glob("/tmp/checkpoints/rq4_*_taxonomy_map.json"))),
+        ("Report", bool(glob.glob("/tmp/rq4_comparison.csv")
+                        or glob.glob("/tmp/rq4_narrative.txt"))),
+    ]
+    return " → ".join(f"{'✅' if done else '⬜'} {name}" for name, done in checks)
+# ── end B5: _build_progress ────────────────────────────────────
+# ── B6: respond() ──────────────────────────────────────────────
+# PURPOSE:  Core chat handler. This is the brain of the app.
+#           1. Stores uploaded CSV file path (if new upload)
+#           2. Appends file location + phase context to user message
+#              so the agent knows what data is available
+#           3. Yields a "thinking..." bubble immediately (user sees
+#              instant feedback while agent processes)
+#           4. Invokes the LangGraph agent (Mistral decides which
+#              BERTopic tools to call)
+#           5. Replaces thinking bubble with actual agent response
+#           6. Attaches latest output files to download component
+# INPUTS:   message (str), chat_history (list[dict]), uploaded_file (str|None)
+# YIELDS:   Tuple of (chat_history, empty_string, download_files)
+#           — yields TWICE: first with progress bubble, then with final response
+# TALKS TO: B2 (agent.invoke) — sends message, gets response
+#           B4 (_latest_output) — gets download file list
+# USED BY:  B18 (respond_with_viz wraps this)
+#           B19 (_auto_load_csv wraps this)
+# NOTE:     Uses single thread_id="session" so agent remembers
+#           previous turns (loaded CSV path, current phase, etc.)
+# ────────────────────────────────────────────────────────────────
+def respond(message, chat_history, uploaded_file):
+    """Handle one chat turn with the LangGraph agent.
+    Yields twice: progress bubble → final response."""
+    global _msg_count
+    _msg_count += 1
+    # Store file path — uses `or` short-circuit instead of if/else
+    _uploaded["path"] = uploaded_file or _uploaded.get("path", "")
+    # Tell agent where the CSV is (prevents hallucinated filepaths)
+    file_note = (
+        f"\n[CSV file at: {_uploaded['path']}]" * bool(_uploaded["path"])
+    ) or "\n[No CSV uploaded yet — ask user to upload a file first]"
+    # Tell agent what phase we're in based on existing checkpoint files
+    phase_context = (
+        "\n[Phase context: labels exist]"
+        * bool(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))
+        or "\n[Phase context: embeddings exist]"
+        * bool(glob.glob("/tmp/checkpoints/rq4_*_emb.npy"))
+        or "\n[Phase context: fresh start]"
+    )
+    text = ((message or "").strip() or "Analyze my Scopus CSV") + file_note + phase_context
+    print(f"\n{'='*60}\n>>> MSG #{_msg_count}: '{text[:120]}'\n{'='*60}")
+    # YIELD 1: Show "thinking" bubble immediately
+    chat_history = chat_history + [
+        {"role": "user", "content": (message or "").strip()},
+        {"role": "assistant", "content": "🔬 **Working...**  _Agent is thinking..._"},
+    ]
+    yield chat_history, "", _latest_output()
+    # Invoke agent — Mistral brain decides which tools to call
+    result = agent.invoke(
+        {"messages": [("human", text)]},
+        config={"configurable": {"thread_id": "session"}},
+    )
+    response = result["messages"][-1].content
+    print(f">>> Response ({len(response)} chars)")
+    # YIELD 2: Replace thinking bubble with actual response
+    chat_history[-1] = {"role": "assistant", "content": response}
+    gr.Info(f"Agent responded ({len(response)} chars)")
+    yield chat_history, "", _latest_output()
+# ── end B6: respond ────────────────────────────────────────────
+# ── B7: _load_chart() ──────────────────────────────────────────
+# PURPOSE:  Load a BERTopic visualization chart from a saved Plotly
+#           JSON file on disk and return the figure object.
+#           The gr.Plot component in B17a renders this directly —
+#           no iframe, no HTML escaping, no srcdoc hack.
+# INPUT:    chart_name (str) — filename like "rq4_intertopic.json"
+# RETURNS:  plotly.graph_objects.Figure or None
+# USED BY:  B17a (chart_selector.change event)
+#           B18 (respond_with_viz) — auto-shows latest chart
+# REPLACES: Old _load_chart() which used html.escape() + iframe
+#           srcdoc to embed HTML files. That was 8 lines of hack.
+# REQUIRES: BERTopic tools in tools.py must save charts as Plotly
+#           JSON via pio.to_json(fig) instead of fig.write_html().
+# ────────────────────────────────────────────────────────────────
+def _load_chart(chart_name):
+    """Load Plotly chart from JSON file. Returns figure for gr.Plot.
+    No HTML, no iframe — just a native Plotly figure object."""
+    path = f"/tmp/{chart_name}"
+    (not os.path.exists(path)) and (not None)  # guard
+    return pio.from_json(open(path).read()) * bool(os.path.exists(path)) or None
+def _get_chart_choices():
+    """Find all rq4_*.json chart files in /tmp."""
+    files = sorted(glob.glob("/tmp/rq4_*.json"))
+    return list(map(os.path.basename, files))
+# ── end B7: _load_chart ──────────────────────────────────��────
+# ── B8: _load_review_table() ───────────────────────────────────
+# PURPOSE:  Load the latest BERTopic phase data (taxonomy, themes,
+#           labels, or summaries — whichever is most recent) and
+#           build a review table for the researcher to approve,
+#           rename, or annotate topics.
+# RETURNS:  List[List] with 8 columns matching the Dataframe schema:
+#           [#, Label, Evidence, Sentences, Papers, Approve, Rename, Reasoning]
+#           - Column 5 (Approve) is bool (True/False) → renders as checkbox
+#           - Columns 0-4 are read-only (enforced by static_columns in B16)
+#           - Columns 5-7 are editable by the researcher
+# USED BY:  B16 (initial table value)
+#           B10 (_submit_review) — reloads after agent processes review
+#           B18 (respond_with_viz) — refreshes after each agent turn
+# REPLACES: Old version which returned "yes"/"no" strings for Approve.
+#           Now returns True/False so gr.Dataframe renders checkboxes.
+# ────────────────────────────────────────────────────────────────
+def _load_review_table():
+    """Build review table from latest checkpoint JSON.
+    Approve column is bool (renders as checkbox in gr.Dataframe).
+    Priority: taxonomy_map > themes > labels > summaries."""
+    taxonomy_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_taxonomy_map.json"))
+    theme_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))
+    label_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))
+    summary_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_summaries.json"))
+    # Pick most advanced checkpoint available
+    path = (
+        (taxonomy_files and taxonomy_files[-1])
+        or (theme_files and theme_files[-1])
+        or (label_files and label_files[-1])
+        or (summary_files and summary_files[-1])
+        or ""
+    )
+    is_taxonomy = bool(taxonomy_files and taxonomy_files[-1] == path)
+    data = (os.path.exists(path) and json.load(open(path))) or []
+    # For taxonomy: merge with themes to get sentence/paper counts
+    theme_lookup = {}
+    (is_taxonomy and theme_files) and theme_lookup.update(
+        {t.get("label", ""): t for t in json.load(open(theme_files[-1]))}
+    )
+    rows = list(map(
+        lambda pair: [
+            pair[0],                                                          # #
+            pair[1].get("label", pair[1].get("top_words", ""))[:60],         # Label
+            # Evidence: PAJAIS mapping for taxonomy, nearest sentence otherwise
+            (
+                is_taxonomy
+                and f"→ {pair[1].get('pajais_match', '?')} | {pair[1].get('reasoning', '')}"[:120]
+            ) or (
+                (pair[1].get("nearest", [{}])[0].get("sentence", "")[:120] + "...")
+                * bool(pair[1].get("nearest"))
+            ),
+            # Sentence/paper counts
+            theme_lookup.get(pair[1].get("label", ""), pair[1]).get(
+                "sentence_count", pair[1].get("sentence_count", 0)),
+            theme_lookup.get(pair[1].get("label", ""), pair[1]).get(
+                "paper_count", pair[1].get("paper_count", 0)),
+            True,                                                             # Approve (bool → checkbox)
+            "",                                                               # Rename To
+            "",                                                               # Reasoning
+        ],
+        enumerate(data),
+    ))
+    return rows or [[0, "No data yet", "", 0, 0, False, "", ""]]
+# ── end B8: _load_review_table ─────────────────────────────────
+# ── B9: _show_papers_by_select() ───────────────────────────────
+# PURPOSE:  When the researcher clicks any row in the review table,
+#           this function fires and shows the papers belonging to
+#           that topic. Eliminates the old workflow of typing a
+#           Topic # into a separate input and clicking "Show Papers".
+# INPUT:    gr.SelectData event — contains .index (row, col) and .value
+# RETURNS:  str — formatted paper list for gr.Textbox (paper_list)
+# TRIGGERED BY: review_table.select() event in B16
+# REPLACES: Old _show_papers(topic_id) + topic_num (gr.Number) +
+#           view_papers_btn (gr.Button) — all three components removed.
+# NOTE:     Uses column 0 value (the # column) as topic_id, NOT the
+#           row index, because filtering/sorting may reorder rows.
+# ────────────────────────────────────────────────────────────────
+def _show_papers_by_select(table_data, evt: gr.SelectData):
+    """Show papers for clicked row. Uses column 0 as topic_id.
+    Triggered by review_table.select() — no separate Topic # input needed."""
+    row_idx = evt.index[0]
+    # Get topic_id from column 0 of the clicked row (not row index)
+    topic_id = int(table_data.iloc[row_idx, 0]) if hasattr(table_data, 'iloc') else int(table_data[row_idx][0])
+    # Load paper data from checkpoint files
+    label_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))
+    summary_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_summaries.json"))
+    all_files = label_files or summary_files
+    lines = []
+    for f in all_files:
+        source = os.path.basename(f).split("_")[1]
+        data = json.load(open(f))
+        for t in data:
+            (t.get("topic_id") == topic_id) and lines.append(
+                f"═══ {source.upper()} — Topic {topic_id}: "
+                f"{t.get('label', t.get('top_words', '')[:50])} ═══\n"
+                f"{t.get('sentence_count', 0)} sentences from {t.get('paper_count', 0)} papers\n"
+                f"AI Reasoning: {t.get('reasoning', 'not yet labeled')}\n\n"
+                f"── 5 NEAREST CENTROID SENTENCES (evidence) ──\n"
+                + "\n".join(
+                    f"  {i+1}. \"{t['nearest'][i]['sentence'][:200]}\"\n"
+                    f"     Paper: {t['nearest'][i].get('title', '')[:100]}"
+                    for i in range(min(5, len(t.get('nearest', []))))
+                )
+                + "\n\n── ALL PAPER TITLES ──\n"
+                + "\n".join(
+                    f"  {i+1}. {title}"
+                    for i, title in enumerate(t.get('paper_titles', []))
+                )
+            )
+    return "\n\n".join(lines) or f"Topic {topic_id} not found."
+# ── end B9: _show_papers_by_select ─────────────────────────────
+# ── B10: _submit_review() ──────────────────────────────────────
+# PURPOSE:  When the researcher finishes editing the review table
+#           (checking Approve boxes, typing Rename values, adding
+#           Reasoning notes) and clicks "Submit Review", this
+#           function converts those edits into a natural language
+#           message and sends it to the agent for processing.
+# INPUTS:   table_data (DataFrame from gr.Dataframe), chat_history (list)
+# YIELDS:   Tuple of (chat, download, chart_choices, chart_fig,
+#           review_rows, progress_str) — yields twice (progress → final)
+# TALKS TO: B2 (agent.invoke) — sends review decisions
+#           B4 (_latest_output) — refreshes downloads
+#           B5 (_build_progress) — refreshes pipeline status
+#           B7 (_get_chart_choices) — refreshes chart dropdown
+#           B8 (_load_review_table) — reloads table with updated data
+# NOTE:     Column 5 (Approve) is now bool. True = approve, False = reject.
+# ────────────────────────────────────────────────────────────────
+def _submit_review(table_data, chat_history):
+    """Convert review table edits into agent message.
+    Approve column is bool (checkbox), not string."""
+    rows = table_data.values.tolist()
+    lines = list(map(
+        lambda r: (
+            f"Topic {int(r[0])}: "
+            + (f"RENAME to '{r[6]}'" * bool(str(r[6]).strip()))
+            + (f"APPROVE '{r[1]}'" * (not bool(str(r[6]).strip())) * bool(r[5]))
+            + (f"REJECT" * (not r[5]))
+            + (f" — reason: {r[7]}" * bool(str(r[7]).strip()))
+        ),
+        rows,
+    ))
+    review_msg = "Review decisions:\n" + "\n".join(lines)
+    print(f">>> Review submitted: {review_msg[:200]}")
+    # YIELD 1: Show processing bubble
+    chat_history = chat_history + [
+        {"role": "user", "content": review_msg},
+        {"role": "assistant", "content": "🔬 **Processing review decisions...**"},
+    ]
+    gr.Info("Review submitted to agent")
+    yield (chat_history, _latest_output(), gr.update(),
+           gr.update(), gr.update(), _build_progress())
+    # Invoke agent with review decisions
+    result = agent.invoke(
+        {"messages": [("human", review_msg)]},
+        config={"configurable": {"thread_id": "session"}},
+    )
+    response = result["messages"][-1].content
+    # YIELD 2: Final response + refreshed table/charts
+    chat_history[-1] = {"role": "assistant", "content": response}
+    gr.Info("Review processed — table updated")
+    yield (
+        chat_history,
+        _latest_output(),
+        gr.update(choices=_get_chart_choices()),
+        gr.update(),
+        gr.update(value=_load_review_table()),
+        _build_progress(),
+    )
+# ── end B10: _submit_review ────────────────────────────────────
+# ╔═══════════════════════════════════════���═══════════════════════╗
+# ║  SECTION 3 — UI LAYOUT                                      ║
+# ║  All visual components defined here using ONLY native Gradio ║
+# ║  widgets. Zero gr.HTML() calls. Theming via B3.              ║
+# ║  Layout: Header → Upload → Progress → Chat → Results tabs   ║
+# ╚═══════════════════════════════════════════════════════════════╝
+print(">>> Building UI...")
+# ── B11: gr.Blocks container ───────────────────────────────────
+# PURPOSE:  Root container for the entire application UI.
+#           Enables full browser width via fill_width.
+# CONTAINS: All UI blocks B12 through B17b
+# CONFIG:   title — browser tab title (stays on Blocks in Gradio 6)
+#           fill_width — removes side padding, uses full browser width
+# NOTE:     In Gradio 6.0, theme/css/footer_links moved from
+#           gr.Blocks() to demo.launch(). See B20 for those params.
+# ────────────────────────────────────────────────────────────────
+with gr.Blocks(
+    title="Topic Modelling — Agentic AI",
+    fill_width=True,
+) as demo:
+    # ── B12: Header ────────────────────────────────────────────
+    # PURPOSE:  Application title and subtitle. Single gr.Markdown
+    #           call replaces 15 lines of HEADER_HTML that included
+    #           a gradient background div, font imports, and inline CSS.
+    # REPLACES: Old HEADER_HTML constant (lines 32-47 of old app.py)
+    # ───────────────────────────────────────────────────────────
+    gr.Markdown(
+        "# 🔬 Topic Modelling — Agentic AI\n"
+        "*Mistral · Cosine Clustering · 384d · B&C Thematic Analysis*"
+    )
+    # ── end B12: Header ────────────────────────────────────────
+    # ── B13: Data input ────────────────────────────────────────
+    # PURPOSE:  CSV file upload area with inline instructions.
+    #           Researcher uploads their Scopus CSV export here.
+    #           On upload, B19 auto-triggers the first analysis.
+    # COMPONENTS: gr.File (upload) + gr.Markdown (instructions)
+    # EVENTS:  upload.change → B19 (_auto_load_csv)
+    # ───────────────────────────────────────────────────────────
+    gr.Markdown("**① Data input**")
+    with gr.Row():
+        upload = gr.File(label="📂 Upload Scopus CSV", file_types=[".csv"])
+        gr.Markdown("**Upload your CSV** then type `run abstract only` in chat below")
+    # ── end B13: Data input ────────────────────────────────────
+    # ── B14: Progress pipeline ─────────────────────────────────
+    # PURPOSE:  Visual indicator of which Braun & Clarke analysis
+    #           phases are complete. Updated after every agent action.
+    #           Now uses gr.Markdown with emoji text (was gr.HTML
+    #           with inline-styled colored <span> elements).
+    # COMPONENT: gr.Markdown — displays emoji string from B5
+    # UPDATED BY: B18 (after chat), B10 (after review), B19 (after upload)
+    # REPLACES: Old gr.HTML(value=_build_progress()) with 24 lines of HTML
+    # ───────────────────────────────────────────────────────────
+    phase_progress = gr.Markdown(value=_build_progress())
+    # ── end B14: Progress pipeline ─────────────────────────────
+    # ── B15: Chatbot + input ───────────────────────────────────
+    # PURPOSE:  Main conversation interface between researcher and
+    #           the LangGraph agent. The chatbot displays message
+    #           history with markdown rendering. The textbox + button
+    #           below it capture user input.
+    # COMPONENTS: gr.Chatbot (display), gr.Textbox (input), gr.Button (send)
+    # EVENTS:  msg.submit → B18, send.click → B18
+    # NOTE:    placeholder text guides the researcher on available commands.
+    #          height=300 keeps chat visible while showing results below.
+    # ───────────────────────────────────────────────────────────
+    gr.Markdown("**② Agent conversation** — follow the prompts below")
+    with gr.Group():
+        chatbot = gr.Chatbot(
+            height=300,
+            show_label=False,
+            placeholder="Upload your Scopus CSV above, then type: run abstract only",
+        )
+        with gr.Row():
+            msg = gr.Textbox(
+                placeholder="run · approve · show topic 4 papers · group 0 1 5 · done",
+                show_label=False, scale=9, lines=1, max_lines=1, container=False,
+            )
+            send = gr.Button("Send", variant="primary", scale=1, min_width=70)
+    # ── end B15: Chatbot + input ───────────────────────────────
+    # ── B16: Review table tab ──────────────────────────────────
+    # PURPOSE:  Interactive topic review table where the researcher
+    #           approves, renames, or annotates BERTopic-discovered
+    #           topics. This is the core human-in-the-loop interface.
+    #
+    # KEY FEATURES (all native Gradio, no HTML):
+    #   - static_columns=[0,1,2,3,4] — first 5 columns (#, Label,
+    #     Evidence, Sentences, Papers) are READ-ONLY. Prevents
+    #     accidental edits to agent-generated data.
+    #   - datatype "bool" on column 5 — Approve renders as a native
+    #     CHECKBOX. Researcher clicks to toggle, no typing needed.
+    #   - pinned_columns=2 — # and Label columns stay visible when
+    #     scrolling horizontally through wider columns.
+    #   - show_search="filter" — built-in column filtering. Researcher
+    #     can filter by paper count, sentence count, etc.
+    #   - .select() event — clicking any row auto-loads that topic's
+    #     papers in the textbox below. REPLACES the old workflow of
+    #     Topic # input + Show Papers button (both removed).
+    #
+    # COMPONENTS: gr.Dataframe, gr.Button (submit), gr.Textbox (papers)
+    # EVENTS:  review_table.select → B9 (_show_papers_by_select)
+    #          submit_review.click → B10 (_submit_review)
+    # DATA:    Loaded by B8 (_load_review_table)
+    # REPLACES: Old gr.Dataframe (no static_columns, string Approve,
+    #           no search) + topic_num + view_papers_btn
+    # ───────────────────────────────────────────────────────────
+    gr.Markdown("**③ Results** — review table, charts, downloads")
+    with gr.Tabs():
+        with gr.Tab("📋 Review Table"):
+            gr.Markdown(
+                "*Edit Approve / Rename To / Reasoning → click Submit. "
+                "Click any row to see its papers below.*"
+            )
+            review_table = gr.Dataframe(
+                headers=[
+                    "#", "Topic Label", "Top Evidence Sentence",
+                    "Sentences", "Papers", "Approve", "Rename To", "Your Reasoning",
+                ],
+                datatype=[
+                    "number", "str", "str", "number", "number",
+                    "bool", "str", "str",
+                ],
+                interactive=True,
+                column_count=8,
+                # NOTE: These features need Gradio >=5.23. Uncomment when available:
+                # static_columns=[0, 1, 2, 3, 4],
+                # pinned_columns=2,
+                # show_search="filter",
+                # show_row_numbers=True,
+                # show_fullscreen_button=True,
+                # show_copy_button=True,
+                # column_widths=["60px","200px","250px","80px","70px","70px","150px","200px"],
+            )
+            submit_review = gr.Button("✅ Submit Review to Agent", variant="primary")
+            # Paper viewer — triggered by clicking any row (replaces Topic # + button)
+            gr.Markdown("---")
+            gr.Markdown("**📄 Papers in selected topic** *(click any row above)*")
+            paper_list = gr.Textbox(
+                label="Papers in selected topic",
+                lines=8, interactive=False,
+            )
+    # ── end B16: Review table tab ──────────────────────────────
+        # ── B17a: Charts tab ───────────────────────────────────
+        # PURPOSE:  Display BERTopic visualization charts (intertopic
+        #           distance map, bar chart, hierarchy, heatmap).
+        #           Charts are loaded as Plotly figure objects from
+        #           JSON files and rendered natively in gr.Plot.
+        # COMPONENTS: gr.Dropdown (selector), gr.Plot (display)
+        # EVENTS:  chart_selector.change → B7 (_load_chart)
+        # REPLACES: Old iframe + srcdoc hack that used html.escape()
+        #           to embed HTML files. Now uses gr.Plot directly.
+        # ───────────────────────────────────────────────────────
+        with gr.Tab("📊 Charts"):
+            chart_selector = gr.Dropdown(
+                choices=[], label="Select Chart", interactive=True,
+            )
+            chart_display = gr.Plot(label="BERTopic Visualization")
+        # ── end B17a: Charts tab ───────────────────────────────
+        # ── B17b: Download tab ─────────────────────────────────
+        # PURPOSE:  Multi-file download for all pipeline outputs.
+        #           Shows file descriptions by phase and a gr.File
+        #           component with all generated files.
+        # COMPONENTS: gr.Markdown (descriptions), gr.File (download)
+        # UPDATED BY: B18, B10, B19 — refreshed after each action
+        # ───────────────────────────────────────────────────────
+        with gr.Tab("📥 Download"):
+            gr.Markdown(
+                "**Files by Phase (per run: abstract / title):**\n\n"
+                "**Phase 2 — Discovery:** `summaries.json` · `emb.npy`\n\n"
+                "**Phase 2 — Labeling:** `labels.json`\n\n"
+                "**Phase 2 — Charts:** `intertopic.json` · `bars.json` · "
+                "`hierarchy.json` · `heatmap.json`\n\n"
+                "**Phase 3 — Themes:** `themes.json`\n\n"
+                "**Phase 5.5 — Taxonomy:** `taxonomy_map.json`\n\n"
+                "**Phase 6 — Report:** `comparison.csv` · `narrative.txt`"
+            )
+            download = gr.File(label="All output files", file_count="multiple")
+        # ── end B17b: Download tab ─────────────────────────────
+    # ╔═══════════════════════════════════════════════════════════╗
+    # ║  SECTION 4 — EVENT WIRING                                ║
+    # ║  Connect UI components to helper functions. This is       ║
+    # ║  where data flows are defined: which function runs when   ║
+    # ║  a button is clicked, a file is uploaded, or a row is     ║
+    # ║  selected. No HTML, no CSS — just Python event binding.   ║
+    # ╚═══════════════════════════════════════════════════════════╝
+    # ── B18: respond_with_viz() + event bindings ───────────────
+    # PURPOSE:  Wrapper around B6 (respond) that also refreshes
+    #           the chart dropdown, chart display, review table,
+    #           and progress pipeline after each agent response.
+    #           This is the main "after every chat turn, update
+    #           everything" orchestrator.
+    # CALLS:   B6 (respond), B5 (_build_progress), B7 (_load_chart,
+    #          _get_chart_choices), B8 (_load_review_table)
+    # BINDINGS: msg.submit → this function
+    #           send.click → this function
+    # OUTPUTS:  chatbot, msg, download, chart_selector, chart_display,
+    #           review_table, phase_progress (7 components updated)
+    # ───────────────────────────────────────────────────────────
+    chart_selector.change(_load_chart, [chart_selector], [chart_display])
+    review_table.select(
+        _show_papers_by_select, [review_table], [paper_list],
+    )
+    submit_review.click(
+        _submit_review, [review_table, chatbot],
+        [chatbot, download, chart_selector, chart_display,
+         review_table, phase_progress],
+    )
+    def respond_with_viz(message, chat_history, uploaded_file):
+        """Wrap respond() and update charts + table + progress after each turn."""
+        gen = respond(message, chat_history, uploaded_file)
+        # First yield (progress bubble)
+        hist, txt, dl = next(gen)
+        yield (hist, txt, dl, gr.update(choices=_get_chart_choices()),
+               gr.update(), gr.update(), _build_progress())
+        # Second yield (final response + populate table + charts)
+        hist, txt, dl = next(gen)
+        choices = _get_chart_choices()
+        first_chart = (choices and _load_chart(choices[-1])) or gr.update()
+        table_data = _load_review_table()
+        yield (
+            hist, txt, dl,
+            gr.update(choices=choices, value=(choices and choices[-1]) or None),
+            first_chart,
+            gr.update(value=table_data),
+            _build_progress(),
+        )
+    msg.submit(
+        respond_with_viz, [msg, chatbot, upload],
+        [chatbot, msg, download, chart_selector, chart_display,
+         review_table, phase_progress],
+    )
+    send.click(
+        respond_with_viz, [msg, chatbot, upload],
+        [chatbot, msg, download, chart_selector, chart_display,
+         review_table, phase_progress],
+    )
+    # ── end B18: respond_with_viz + event bindings ─────────────
+    # ── B19: _auto_load_csv() ──────────────────────────────────
+    # PURPOSE:  Automatically triggers analysis when a CSV file is
+    #           uploaded. The researcher doesn't need to type anything —
+    #           just uploading the file starts the pipeline.
+    #           Sends "Analyze my Scopus CSV" as the initial message.
+    # TRIGGERED BY: upload.change event
+    # CALLS:   B6 (respond) with auto-message
+    # OUTPUTS:  chatbot, download, chart_selector, chart_display,
+    #           review_table, phase_progress
+    # ───────────────────────────────────────────────────────────
+    def _auto_load_csv(uploaded_file, chat_history):
+        """Auto-trigger analysis when CSV is uploaded — no typing needed."""
+        gen = respond("Analyze my Scopus CSV", chat_history, uploaded_file)
+        # First yield (progress)
+        hist, txt, dl = next(gen)
+        yield (hist, dl, gr.update(), gr.update(),
+               gr.update(), _build_progress())
+        # Second yield (final + populate everything)
+        hist, txt, dl = next(gen)
+        choices = _get_chart_choices()
+        first_chart = (choices and _load_chart(choices[-1])) or gr.update()
+        table_data = _load_review_table()
+        yield (
+            hist, dl,
+            gr.update(choices=choices, value=(choices and choices[-1]) or None),
+            first_chart,
+            gr.update(value=table_data),
+            _build_progress(),
+        )
+    upload.change(
+        _auto_load_csv, [upload, chatbot],
+        [chatbot, download, chart_selector, chart_display,
+         review_table, phase_progress],
+    )
+    # ── end B19: _auto_load_csv ────────────────────────────────
+# ╔═══════════════════════════════════════════════════════════════╗
+# ║  SECTION 5 — LAUNCH                                         ║
+# ║  Start the Gradio server. On HuggingFace Spaces this runs   ║
+# ║  automatically. Locally, access at http://localhost:7860     ║
+# ╚═══════════════════════════════════════════════════════════════╝
+# ── B20: Launch ────────────────────────────────────────────────
+# PURPOSE:  Start the web server. In Gradio 6.0, theme/css/footer
+#           params moved here from gr.Blocks().
+# CONFIG:   theme — from B3 (Soft + DM Sans + slate)
+#           footer_links=[] — hides footer natively (no CSS hack)
+#           ssr_mode=False — for HuggingFace Spaces free tier compat
+#           server_name="0.0.0.0" — accessible on network
+# NOTE:     On Spaces, port 7860 is auto-exposed to the internet.
+#           Locally, open http://localhost:7860 in your browser.
+# ────────────────────────────────────────────────────────────────
+print(">>> Launching...")
+demo.launch(
+    server_name="0.0.0.0",
+    server_port=7860,
+    ssr_mode=False,
+    theme=theme,                    # Gradio 6: moved from gr.Blocks()
+    footer_links=[],                # Gradio 6: hides footer, replaces show_api
+)
+# ── end B20: Launch ────────────────────────────────────────────