topic_modelling

Sleeping

App Files Files Community

Dash10107 commited on Apr 21

Commit

b6a5e1c

verified ·

1 Parent(s): bf8d4f9

Update app.py

Browse files

Files changed (1) hide show

app.py +549 -420

app.py CHANGED Viewed

@@ -1,487 +1,616 @@
-# Replace ONLY the UI/layout section beginning from:
-# print(">>> Building UI...")
-# down to demo.launch(...)
-#
-# Keep all helper functions and logic exactly as they are.
-print(">>> Building UI...")
-with gr.Blocks(
-    title="Topic Modelling — Agentic AI",
-    fill_width=True,
-    theme=theme,
-    css="""
-    :root {
-        --accent: #0f766e;
-        --accent-soft: rgba(15,118,110,0.10);
-        --panel: #ffffff;
-        --panel-border: #e5e7eb;
-        --muted: #6b7280;
-        --bg-soft: #f8fafc;
-    }
-    .gradio-container {
-        max-width: 1650px !important;
-        margin: 0 auto !important;
-        padding: 18px 22px 22px 22px !important;
-        background: linear-gradient(to bottom, #fafafa, #f4f7fb);
-    }
-    .gradio-container::before {
-        content: "";
-        display: block;
-        height: 4px;
-        margin: -18px -22px 18px -22px;
-        background: linear-gradient(90deg, #0f766e, #4f46e5);
-    }
-    .app-header {
-        padding: 4px 0 14px 0;
-        border-bottom: 1px solid var(--panel-border);
-        margin-bottom: 10px;
-    }
-    .app-header h1 {
-        font-size: 28px !important;
-        font-weight: 700 !important;
-        margin-bottom: 4px !important;
-        color: #111827;
-    }
-    .app-subtitle {
-        color: var(--muted);
-        font-size: 13px;
-        letter-spacing: 0.02em;
-    }
-    .section-card {
-        border: 1px solid var(--panel-border);
-        border-radius: 18px;
-        background: white;
-        padding: 16px;
-        box-shadow: 0 1px 3px rgba(0,0,0,0.04);
-    }
-    .compact-label {
-        font-size: 12px !important;
-        font-weight: 700 !important;
-        text-transform: uppercase;
-        letter-spacing: 0.08em;
-        color: var(--muted);
-        margin-bottom: 10px !important;
-    }
-    .phase-bar {
-        background: white;
-        border: 1px solid var(--panel-border);
-        border-radius: 14px;
-        padding: 10px 14px;
-        margin: 10px 0 14px 0;
-    }
-    .phase-bar p {
-        margin: 0 !important;
-        font-family: "Fira Code", monospace !important;
-        font-size: 12px !important;
-        color: #374151;
-        line-height: 1.4;
-    }
-    .upload-panel {
-        border: 1px dashed #cbd5e1 !important;
-        border-radius: 16px !important;
-        background: #fbfdff !important;
-        padding: 8px !important;
-    }
-    .chat-shell {
-        border: 1px solid var(--panel-border);
-        border-radius: 18px;
-        background: white;
-        overflow: hidden;
-    }
-    .message.bot {
-        border-left: 3px solid var(--accent) !important;
-        background: rgba(15,118,110,0.03) !important;
-    }
-    .message.user {
-        background: #f3f4f6 !important;
-    }
-    .tab-nav {
-        gap: 6px !important;
-        margin-bottom: 12px !important;
     }
-    .tab-nav button {
-        border-radius: 10px !important;
-        padding: 8px 14px !important;
-        font-size: 13px !important;
-        font-weight: 600 !important;
-        color: #4b5563 !important;
-        background: #f3f4f6 !important;
-        border: 1px solid transparent !important;
-        transition: all 0.15s ease !important;
-    }
-    .tab-nav button.selected {
-        background: white !important;
-        color: #111827 !important;
-        border: 1px solid #d1d5db !important;
-        box-shadow: 0 1px 2px rgba(0,0,0,0.05);
-    }
-    .table-wrap {
-        border-radius: 14px !important;
-        overflow: hidden !important;
-        border: 1px solid var(--panel-border) !important;
-    }
-    .table-wrap tr:nth-child(even) td {
-        background: #fafafa !important;
-    }
-    .table-wrap th {
-        background: #f8fafc !important;
-        font-weight: 700 !important;
-        font-size: 12px !important;
-        color: #374151 !important;
-    }
-    .table-wrap td {
-        font-size: 13px !important;
-    }
-    .panel-title {
-        font-size: 14px !important;
-        font-weight: 700 !important;
-        color: #111827 !important;
-        margin-bottom: 10px !important;
-    }
-    .small-note {
-        font-size: 12px !important;
-        color: #6b7280 !important;
-        margin-top: 4px !important;
-    }
-    button.primary {
-        border-radius: 12px !important;
-        font-weight: 600 !important;
-    }
     """,
 ) as demo:
-    with gr.Column(elem_classes=["app-header"]):
-        gr.Markdown(
-            """
-# Topic Modelling Agentic AI
-<div class="app-subtitle">
-Mistral · BERTopic · 384d Embeddings · Braun & Clarke Thematic Analysis
-</div>
-"""
-        )
-    phase_progress = gr.Markdown(
-        value=_build_progress(),
-        elem_classes=["phase-bar"],
     )
-    with gr.Row(equal_height=True):
-        # LEFT SIDEBAR
-        with gr.Column(scale=3, min_width=340):
-            with gr.Group(elem_classes=["section-card"]):
-                gr.Markdown(
-                    "<div class='compact-label'>Data Source</div>"
-                )
-                upload = gr.File(
-                    label="Scopus CSV",
-                    file_types=[".csv"],
-                    elem_classes=["upload-panel"],
-                )
-                gr.Markdown(
-                    """
-<div class='small-note'>
-Upload your Scopus CSV export. The analysis starts automatically after upload.
-You can then continue using the chat to refine, review, approve, or rename topics.
-</div>
-"""
-                )
-            with gr.Group(elem_classes=["section-card"]):
-                gr.Markdown(
-                    "<div class='compact-label'>Available Commands</div>"
-                )
-                gr.Markdown(
-                    """
-- `run abstract only`
-- `approve all`
-- `show topic 4 papers`
-- `group 0 1 5`
-- `done`
-"""
-                )
-            with gr.Group(elem_classes=["section-card"]):
-                gr.Markdown(
-                    "<div class='compact-label'>Export Files</div>"
-                )
-                download = gr.File(
-                    label="Generated Outputs",
-                    file_count="multiple",
-                )
-        # MAIN CONTENT
-        with gr.Column(scale=9):
-            with gr.Row(equal_height=True):
-                # CHAT PANEL
-                with gr.Column(scale=5):
-                    with gr.Group(elem_classes=["chat-shell"]):
-                        gr.Markdown(
-                            "<div class='panel-title'>Conversation</div>"
-                        )
-                        chatbot = gr.Chatbot(
-                            height=520,
-                            show_label=False,
-                            bubble_full_width=False,
-                            avatar_images=(
-                                None,
-                                "https://api.dicebear.com/7.x/bottts-neutral/svg?seed=bertopic",
-                            ),
-                            placeholder=(
-                                "Ask the agent to analyse, review, merge, rename, "
-                                "or explain discovered topics."
-                            ),
-                        )
-                        with gr.Row():
-                            msg = gr.Textbox(
-                                placeholder="Type a command or question...",
-                                show_label=False,
-                                lines=1,
-                                max_lines=4,
-                                scale=8,
-                            )
-                            send = gr.Button(
-                                "Send",
-                                variant="primary",
-                                scale=1,
-                                min_width=90,
-                            )
-                # RIGHT PANEL
-                with gr.Column(scale=7):
-                    with gr.Tabs():
-                        with gr.Tab("Topics Review"):
-                            gr.Markdown(
-                                """
-<div class='small-note'>
-Approve, reject, rename, or annotate discovered topics. Click a row to inspect supporting papers.
-</div>
-"""
-                            )
-                            review_table = gr.Dataframe(
-                                headers=[
-                                    "#",
-                                    "Topic Label",
-                                    "Top Evidence Sentence",
-                                    "Sentences",
-                                    "Papers",
-                                    "Approve",
-                                    "Rename To",
-                                    "Your Reasoning",
-                                ],
-                                datatype=[
-                                    "number",
-                                    "str",
-                                    "str",
-                                    "number",
-                                    "number",
-                                    "bool",
-                                    "str",
-                                    "str",
-                                ],
-                                interactive=True,
-                                column_count=8,
-                                wrap=True,
-                                height=340,
-                            )
-                            submit_review = gr.Button(
-                                "Submit Review Decisions",
-                                variant="primary",
-                            )
-                            gr.Markdown(
-                                "<div class='panel-title' style='margin-top:18px;'>Source Papers</div>"
-                            )
-                            paper_list = gr.Textbox(
-                                show_label=False,
-                                lines=10,
-                                interactive=False,
-                                placeholder="Select a topic row to inspect its papers and evidence.",
-                            )
-                        with gr.Tab("Visualisations"):
-                            chart_selector = gr.Dropdown(
-                                choices=[],
-                                label="Chart",
-                                interactive=True,
-                            )
-                            chart_display = gr.Plot(
-                                label="BERTopic Visualisation",
-                                height=650,
-                            )
     chart_selector.change(_load_chart, [chart_selector], [chart_display])
     review_table.select(
-        _show_papers_by_select,
-        [review_table],
-        [paper_list],
     )
     submit_review.click(
-        _submit_review,
-        [review_table, chatbot],
-        [
-            chatbot,
-            download,
-            chart_selector,
-            chart_display,
-            review_table,
-            phase_progress,
-        ],
     )
     def respond_with_viz(message, chat_history, uploaded_file):
         gen = respond(message, chat_history, uploaded_file)
         hist, txt, dl = next(gen)
-        yield (
-            hist,
-            txt,
-            dl,
-            gr.update(choices=_get_chart_choices()),
-            gr.update(),
-            gr.update(),
-            _build_progress(),
-        )
         hist, txt, dl = next(gen)
         choices = _get_chart_choices()
         first_chart = (choices and _load_chart(choices[-1])) or gr.update()
         table_data = _load_review_table()
         yield (
-            hist,
-            txt,
-            dl,
-            gr.update(
-                choices=choices,
-                value=(choices and choices[-1]) or None,
-            ),
             first_chart,
             gr.update(value=table_data),
             _build_progress(),
         )
     msg.submit(
-        respond_with_viz,
-        [msg, chatbot, upload],
-        [
-            chatbot,
-            msg,
-            download,
-            chart_selector,
-            chart_display,
-            review_table,
-            phase_progress,
-        ],
     )
     send.click(
-        respond_with_viz,
-        [msg, chatbot, upload],
-        [
-            chatbot,
-            msg,
-            download,
-            chart_selector,
-            chart_display,
-            review_table,
-            phase_progress,
-        ],
     )
     def _auto_load_csv(uploaded_file, chat_history):
         gen = respond("Analyze my Scopus CSV", chat_history, uploaded_file)
         hist, txt, dl = next(gen)
-        yield (
-            hist,
-            dl,
-            gr.update(),
-            gr.update(),
-            gr.update(),
-            _build_progress(),
-        )
         hist, txt, dl = next(gen)
         choices = _get_chart_choices()
         first_chart = (choices and _load_chart(choices[-1])) or gr.update()
         table_data = _load_review_table()
         yield (
-            hist,
-            dl,
-            gr.update(
-                choices=choices,
-                value=(choices and choices[-1]) or None,
-            ),
             first_chart,
             gr.update(value=table_data),
             _build_progress(),
         )
     upload.change(
-        _auto_load_csv,
-        [upload, chatbot],
-        [
-            chatbot,
-            download,
-            chart_selector,
-            chart_display,
-            review_table,
-            phase_progress,
-        ],
     )
-print(">>> Launching...")
 demo.launch(
     server_name="0.0.0.0",
     server_port=7860,
     ssr_mode=False,
-    footer_links=[],
-)

+"""
+app.py — Topic Modelling Agentic AI | Gradio UI
+═══════════════════════════════════════════════════
+Version:  3.1.0 | April 2026
+Stack:    Gradio 5.x + LangGraph + Mistral + BERTopic
+Deploy:   HuggingFace Spaces (sdk: gradio)
+Rules:    Zero gr.HTML(). All UI via native Gradio components.
+          See GRADIO_UI_GUIDELINES_v2.docx for full standards.
+ARCHITECTURE — 20 Blocks in 5 Sections
+─────────────────────────────────────────
+  Section 1: Setup        (B1–B3)   Imports, agent, theme
+  Section 2: Helpers      (B4–B10)  Pure Python functions, no UI
+  Section 3: UI Layout    (B11–B17) gr.Blocks with native components
+  Section 4: Event Wiring (B18–B19) Connect UI to functions
+  Section 5: Launch       (B20)     Start server
+BLOCK COMMUNICATION MAP
+─────────────────────────
+  B6 (respond)  ←→ B2 (agent)   : invokes agent for chat
+  B6 (respond)  → B4 (output)   : scans for download files
+  B7 (chart)    → B17a (display) : loads Plotly JSON → gr.Plot
+  B8 (table)    → B16 (review)  : builds rows → gr.Dataframe
+  B9 (papers)   ← B16 (review)  : triggered by row click
+  B10 (submit)  → B2 (agent)    : sends review edits to agent
+  B18 (wiring)  → B5,B7,B8      : refreshes progress, charts, table
+"""
+import os
+import glob
+import json
+import plotly.io as pio
+import gradio as gr
+from langchain_mistralai import ChatMistralAI
+from langgraph.prebuilt import create_react_agent
+from langgraph.checkpoint.memory import MemorySaver
+from agent import SYSTEM_PROMPT, get_local_tools
+print(">>> app.py: imports complete")
+llm = ChatMistralAI(model="mistral-small-latest", temperature=0, timeout=300)
+tools = get_local_tools()
+agent = create_react_agent(
+    model=llm, tools=tools, prompt=SYSTEM_PROMPT, checkpointer=MemorySaver()
+)
+print(f">>> app.py: agent ready ({len(tools)} tools)")
+_msg_count = 0                    # Global message counter (shared across users)
+_uploaded = {"path": ""}          # Last uploaded CSV path (shared session)
+# ── end B2: Agent setup ────────────────────────────────────────
+# ── B3: Theme ───────────────────────────────────────────────────
+# PURPOSE:  Define the visual identity of the entire application.
+#           Uses teal/indigo on zinc — purposeful scientific feel.
+#           Plus Jakarta Sans: geometric-humanist, modern but not generic.
+#           Fira Code for monospace elements (phase progress, etc).
+# USED BY:  B20 (demo.launch) — theme applied at launch time.
+# ────────────────────────────────────────────────────────────────
+theme = gr.themes.Default(
+    primary_hue="teal",
+    secondary_hue="indigo",
+    neutral_hue="zinc",
+    font=gr.themes.GoogleFont("Plus Jakarta Sans"),
+    font_mono=gr.themes.GoogleFont("Fira Code"),
+    radius_size="sm",
+    spacing_size="md",
+).set(
+    button_primary_background_fill="*primary_600",
+    button_primary_background_fill_hover="*primary_500",
+    button_primary_text_color="white",
+    block_label_text_size="sm",
+    block_title_text_weight="600",
+)
+# ── end B3: Theme ──────────────────────────────────────────────
+def _latest_output():
+    """Scan /tmp for ALL rq4_* files, sorted by phase order.
+    Returns list of filepaths for gr.File download component."""
+    phase_order = {
+        "summaries": 1, "labels": 2, "themes": 3, "taxonomy": 4,
+        "emb": 0, "intertopic": 5, "bars": 6, "hierarchy": 7,
+        "heatmap": 8, "comparison": 9, "narrative": 10,
     }
+    files = (
+        glob.glob("/tmp/rq4_*.csv")
+        + glob.glob("/tmp/rq4_*.json")
+        + glob.glob("/tmp/checkpoints/rq4_*.json")
+    )
+    scored = list(map(
+        lambda f: (sum(v * (k in f) for k, v in phase_order.items()), f),
+        files,
+    ))
+    scored.sort(key=lambda x: x[0])
+    return list(map(lambda x: x[1], scored)) or None
+# ── end B4: _latest_output ─────────────────────────────────────
+def _build_progress():
+    """Return emoji progress pipeline. NO HTML — just text + emoji.
+    Displayed in gr.Markdown component (B14)."""
+    checks = [
+        ("Load",   bool(glob.glob("/tmp/checkpoints/rq4_*_summaries.json")
+                        or glob.glob("/tmp/checkpoints/rq4_*_emb.npy"))),
+        ("Codes",  bool(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))),
+        ("Themes", bool(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))),
+        ("Review", bool(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))),
+        ("Names",  bool(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))),
+        ("PAJAIS", bool(glob.glob("/tmp/checkpoints/rq4_*_taxonomy_map.json"))),
+        ("Report", bool(glob.glob("/tmp/rq4_comparison.csv")
+                        or glob.glob("/tmp/rq4_narrative.txt"))),
+    ]
+    return " → ".join(f"{'✅' if done else '⬜'} {name}" for name, done in checks)
+# ── end B5: _build_progress ────────────────────────────────────
+def respond(message, chat_history, uploaded_file):
+    """Handle one chat turn with the LangGraph agent.
+    Yields twice: progress bubble → final response."""
+    global _msg_count
+    _msg_count += 1
+    # Store file path — uses `or` short-circuit instead of if/else
+    _uploaded["path"] = uploaded_file or _uploaded.get("path", "")
+    # Tell agent where the CSV is (prevents hallucinated filepaths)
+    file_note = (
+        f"\n[CSV file at: {_uploaded['path']}]" * bool(_uploaded["path"])
+    ) or "\n[No CSV uploaded yet — ask user to upload a file first]"
+    # Tell agent what phase we're in based on existing checkpoint files
+    phase_context = (
+        "\n[Phase context: labels exist]"
+        * bool(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))
+        or "\n[Phase context: embeddings exist]"
+        * bool(glob.glob("/tmp/checkpoints/rq4_*_emb.npy"))
+        or "\n[Phase context: fresh start]"
+    )
+    text = ((message or "").strip() or "Analyze my Scopus CSV") + file_note + phase_context
+    print(f"\n{'='*60}\n>>> MSG #{_msg_count}: '{text[:120]}'\n{'='*60}")
+    # YIELD 1: Show "thinking" bubble immediately
+    chat_history = chat_history + [
+        {"role": "user", "content": (message or "").strip()},
+        {"role": "assistant", "content": "🔬 **Working...**  _Agent is thinking..._"},
+    ]
+    yield chat_history, "", _latest_output()
+    # Invoke agent — Mistral brain decides which tools to call
+    result = agent.invoke(
+        {"messages": [("human", text)]},
+        config={"configurable": {"thread_id": "session"}},
+    )
+    response = result["messages"][-1].content
+    print(f">>> Response ({len(response)} chars)")
+    # YIELD 2: Replace thinking bubble with actual response
+    chat_history[-1] = {"role": "assistant", "content": response}
+    gr.Info(f"Agent responded ({len(response)} chars)")
+    yield chat_history, "", _latest_output()
+# ── end B6: respond ────────────────────────────────────────────
+def _load_chart(chart_name):
+    """Load Plotly chart from JSON file. Returns figure for gr.Plot.
+    No HTML, no iframe — just a native Plotly figure object."""
+    path = f"/tmp/{chart_name}"
+    (not os.path.exists(path)) and (not None)  # guard
+    return pio.from_json(open(path).read()) * bool(os.path.exists(path)) or None
+def _get_chart_choices():
+    """Find all rq4_*.json chart files in /tmp."""
+    files = sorted(glob.glob("/tmp/rq4_*.json"))
+    return list(map(os.path.basename, files))
+# ── end B7: _load_chart ───────────────────────────────────────
+def _load_review_table():
+    """Build review table from latest checkpoint JSON.
+    Approve column is bool (renders as checkbox in gr.Dataframe).
+    Priority: taxonomy_map > themes > labels > summaries."""
+    taxonomy_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_taxonomy_map.json"))
+    theme_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))
+    label_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))
+    summary_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_summaries.json"))
+    # Pick most advanced checkpoint available
+    path = (
+        (taxonomy_files and taxonomy_files[-1])
+        or (theme_files and theme_files[-1])
+        or (label_files and label_files[-1])
+        or (summary_files and summary_files[-1])
+        or ""
+    )
+    is_taxonomy = bool(taxonomy_files and taxonomy_files[-1] == path)
+    data = (os.path.exists(path) and json.load(open(path))) or []
+    # For taxonomy: merge with themes to get sentence/paper counts
+    theme_lookup = {}
+    (is_taxonomy and theme_files) and theme_lookup.update(
+        {t.get("label", ""): t for t in json.load(open(theme_files[-1]))}
+    )
+    rows = list(map(
+        lambda pair: [
+            pair[0],                                                          # #
+            pair[1].get("label", pair[1].get("top_words", ""))[:60],         # Label
+            # Evidence: PAJAIS mapping for taxonomy, nearest sentence otherwise
+            (
+                is_taxonomy
+                and f"→ {pair[1].get('pajais_match', '?')} | {pair[1].get('reasoning', '')}"[:120]
+            ) or (
+                (pair[1].get("nearest", [{}])[0].get("sentence", "")[:120] + "...")
+                * bool(pair[1].get("nearest"))
+            ),
+            # Sentence/paper counts
+            theme_lookup.get(pair[1].get("label", ""), pair[1]).get(
+                "sentence_count", pair[1].get("sentence_count", 0)),
+            theme_lookup.get(pair[1].get("label", ""), pair[1]).get(
+                "paper_count", pair[1].get("paper_count", 0)),
+            True,                                                             # Approve (bool → checkbox)
+            "",                                                               # Rename To
+            "",                                                               # Reasoning
+        ],
+        enumerate(data),
+    ))
+    return rows or [[0, "No data yet", "", 0, 0, False, "", ""]]
+# ── end B8: _load_review_table ─────────────────────────────────
+def _show_papers_by_select(table_data, evt: gr.SelectData):
+    """Show papers for clicked row. Uses column 0 as topic_id.
+    Triggered by review_table.select() — no separate Topic # input needed."""
+    row_idx = evt.index[0]
+    # Get topic_id from column 0 of the clicked row (not row index)
+    topic_id = int(table_data.iloc[row_idx, 0]) if hasattr(table_data, 'iloc') else int(table_data[row_idx][0])
+    # Load paper data from checkpoint files
+    label_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))
+    summary_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_summaries.json"))
+    all_files = label_files or summary_files
+    lines = []
+    for f in all_files:
+        source = os.path.basename(f).split("_")[1]
+        data = json.load(open(f))
+        for t in data:
+            (t.get("topic_id") == topic_id) and lines.append(
+                f"═══ {source.upper()} — Topic {topic_id}: "
+                f"{t.get('label', t.get('top_words', '')[:50])} ═══\n"
+                f"{t.get('sentence_count', 0)} sentences from {t.get('paper_count', 0)} papers\n"
+                f"AI Reasoning: {t.get('reasoning', 'not yet labeled')}\n\n"
+                f"── 5 NEAREST CENTROID SENTENCES (evidence) ──\n"
+                + "\n".join(
+                    f"  {i+1}. \"{t['nearest'][i]['sentence'][:200]}\"\n"
+                    f"     Paper: {t['nearest'][i].get('title', '')[:100]}"
+                    for i in range(min(5, len(t.get('nearest', []))))
+                )
+                + "\n\n── ALL PAPER TITLES ──\n"
+                + "\n".join(
+                    f"  {i+1}. {title}"
+                    for i, title in enumerate(t.get('paper_titles', []))
+                )
+            )
+    return "\n\n".join(lines) or f"Topic {topic_id} not found."
+# ── end B9: _show_papers_by_select ─────────────────────────────
+def _submit_review(table_data, chat_history):
+    """Convert review table edits into agent message.
+    Approve column is bool (checkbox), not string."""
+    rows = table_data.values.tolist()
+    lines = list(map(
+        lambda r: (
+            f"Topic {int(r[0])}: "
+            + (f"RENAME to '{r[6]}'" * bool(str(r[6]).strip()))
+            + (f"APPROVE '{r[1]}'" * (not bool(str(r[6]).strip())) * bool(r[5]))
+            + (f"REJECT" * (not r[5]))
+            + (f" — reason: {r[7]}" * bool(str(r[7]).strip()))
+        ),
+        rows,
+    ))
+    review_msg = "Review decisions:\n" + "\n".join(lines)
+    print(f">>> Review submitted: {review_msg[:200]}")
+    # YIELD 1: Show processing bubble
+    chat_history = chat_history + [
+        {"role": "user", "content": review_msg},
+        {"role": "assistant", "content": "🔬 **Processing review decisions...**"},
+    ]
+    gr.Info("Review submitted to agent")
+    yield (chat_history, _latest_output(), gr.update(),
+           gr.update(), gr.update(), _build_progress())
+    # Invoke agent with review decisions
+    result = agent.invoke(
+        {"messages": [("human", review_msg)]},
+        config={"configurable": {"thread_id": "session"}},
+    )
+    response = result["messages"][-1].content
+    # YIELD 2: Final response + refreshed table/charts
+    chat_history[-1] = {"role": "assistant", "content": response}
+    gr.Info("Review processed — table updated")
+    yield (
+        chat_history,
+        _latest_output(),
+        gr.update(choices=_get_chart_choices()),
+        gr.update(),
+        gr.update(value=_load_review_table()),
+        _build_progress(),
+    )
+print(">>> Building UI...")
+with gr.Blocks(
+    title="Topic Modelling — Agentic AI",
+    fill_width=True,
+    css="""
+        /* Accent bar at very top of page */
+        .gradio-container::before {
+            content: "";
+            display: block;
+            height: 3px;
+            background: linear-gradient(90deg, #0d9488, #6366f1);
+            margin-bottom: 4px;
+        }
+        /* Tabs: tighter padding, bolder active state */
+        .tab-nav button {
+            font-size: 13px !important;
+            font-weight: 500 !important;
+            letter-spacing: 0.01em;
+            padding: 6px 16px !important;
+        }
+        .tab-nav button.selected {
+            font-weight: 700 !important;
+            border-bottom: 2px solid #0d9488 !important;
+        }
+        /* Dataframe: subtle zebra rows */
+        .table-wrap tr:nth-child(even) td {
+            background-color: rgba(13, 148, 136, 0.04);
+        }
+        /* Chat: teal left-border on assistant bubbles */
+        .message.bot {
+            border-left: 3px solid #0d9488 !important;
+        }
+        /* Phase progress: monospace, slightly muted */
+        .phase-bar p {
+            font-family: "Fira Code", monospace;
+            font-size: 12px;
+            letter-spacing: 0.03em;
+            opacity: 0.80;
+        }
+        /* Upload area: cleaner dashed border */
+        .upload-container {
+            border-style: dashed !important;
+            border-width: 1px !important;
+        }
     """,
 ) as demo:
+    # ── B12: Header ────────────────────────────────────────────
+    # PURPOSE:  Application title and subtitle.
+    # ───────────────────────────────────────────────────────────
+    gr.Markdown(
+        "# 🔬 Topic Modelling · Agentic AI\n"
+        "<sub>Mistral · Cosine Clustering · 384d Embeddings · Braun & Clarke Thematic Analysis</sub>"
     )
+    # ── end B12: Header ────────────────────────────────────────
+    # ── B13: Data input ────────────────────────────────────────
+    # PURPOSE:  CSV file upload area with inline instructions.
+    #           Researcher uploads their Scopus CSV export here.
+    #           On upload, B19 auto-triggers the first analysis.
+    # COMPONENTS: gr.File (upload) + gr.Markdown (instructions)
+    # EVENTS:  upload.change → B19 (_auto_load_csv)
+    # ───────────────────────────────────────────────────────────
+    gr.Markdown("**① Upload**")
+    with gr.Row():
+        upload = gr.File(label="📂 Scopus CSV", file_types=[".csv"])
+        gr.Markdown(
+            "Upload your Scopus CSV export, then type `run abstract only` in the chat below "
+            "to begin the analysis pipeline."
+        )
+    # ── end B13: Data input ────────────────────────────────────
+    # ── B14: Progress pipeline ─────────────────────────────────
+    # PURPOSE:  Visual indicator of which Braun & Clarke analysis
+    #           phases are complete. Updated after every agent action.
+    #           elem_classes="phase-bar" targets the monospace CSS rule in B11.
+    # COMPONENT: gr.Markdown — displays emoji string from B5
+    # UPDATED BY: B18 (after chat), B10 (after review), B19 (after upload)
+    # ───────────────────────────────────────────────────────────
+    phase_progress = gr.Markdown(value=_build_progress(), elem_classes=["phase-bar"])
+    # ── end B14: Progress pipeline ─────────────────────────────
+    # ── B15: Chatbot + input ───────────────────────────────────
+    # PURPOSE:  Main conversation interface between researcher and
+    #           the LangGraph agent.
+    # COMPONENTS: gr.Chatbot (display), gr.Textbox (input), gr.Button (send)
+    # EVENTS:  msg.submit → B18, send.click → B18
+    # ───────────────────────────────────────────────────────────
+    gr.Markdown("**② Conversation** — follow the guided workflow")
+    with gr.Group():
+        chatbot = gr.Chatbot(
+            height=320,
+            show_label=False,
+            avatar_images=(
+                None,
+                "https://api.dicebear.com/7.x/bottts-neutral/svg?seed=bertopic",
+            ),
+            placeholder=(
+                "**Ready.** Upload a Scopus CSV above, then type:\n\n"
+                "`run abstract only` · `approve all` · `show topic 4 papers` · `done`"
+            ),
+        )
+        with gr.Row():
+            msg = gr.Textbox(
+                placeholder="run · approve · show topic 4 papers · group 0 1 5 · done",
+                show_label=False, scale=9, lines=1, max_lines=1, container=False,
+            )
+            send = gr.Button("⏎ Send", variant="primary", scale=1, min_width=80)
+    # ── end B15: Chatbot + input ───────────────────────────────
+    # ── B16: Review table tab ──────────────────────────────────
+    # PURPOSE:  Interactive topic review table where the researcher
+    #           approves, renames, or annotates BERTopic-discovered
+    #           topics. This is the core human-in-the-loop interface.
+    #
+    # KEY FEATURES (all native Gradio, no HTML):
+    #   - static_columns=[0,1,2,3,4] — first 5 columns read-only
+    #   - datatype "bool" on column 5 — Approve renders as checkbox
+    #   - pinned_columns=2 — # and Label stay visible when scrolling
+    #   - show_search="filter" — built-in column filtering
+    #   - .select() event — clicking any row auto-loads that topic's papers
+    #
+    # COMPONENTS: gr.Dataframe, gr.Button (submit), gr.Textbox (papers)
+    # EVENTS:  review_table.select → B9, submit_review.click → B10
+    # ───────────────────────────────────────────────────────────
+    gr.Markdown("**③ Review & Export**")
+    with gr.Tabs():
+        with gr.Tab("📋 Topics"):
+            gr.Markdown(
+                "*Toggle **Approve**, fill in **Rename To** or **Reasoning**, "
+                "then click Submit. Click any row to inspect its source papers below.*"
+            )
+            review_table = gr.Dataframe(
+                headers=[
+                    "#", "Topic Label", "Top Evidence Sentence",
+                    "Sentences", "Papers", "Approve", "Rename To", "Your Reasoning",
+                ],
+                datatype=[
+                    "number", "str", "str", "number", "number",
+                    "bool", "str", "str",
+                ],
+                interactive=True,
+                column_count=8,
+                # NOTE: These features need Gradio >=5.23. Uncomment when available:
+                # static_columns=[0, 1, 2, 3, 4],
+                # pinned_columns=2,
+                # show_search="filter",
+                # show_row_numbers=True,
+                # show_fullscreen_button=True,
+                # show_copy_button=True,
+                # column_widths=["60px","200px","250px","80px","70px","70px","150px","200px"],
+            )
+            submit_review = gr.Button("✅ Submit Review to Agent", variant="primary")
+            gr.Markdown("---")
+            gr.Markdown("**📄 Papers in selected topic** *(click any row above)*")
+            paper_list = gr.Textbox(
+                label="Papers in selected topic",
+                lines=8, interactive=False,
+            )
+    # ── end B16: Review table tab ──────────────────────────────
+        # ── B17a: Charts tab ───────────────────────────────────
+        # PURPOSE:  Display BERTopic visualization charts rendered
+        #           natively in gr.Plot from Plotly JSON files.
+        # COMPONENTS: gr.Dropdown (selector), gr.Plot (display)
+        # EVENTS:  chart_selector.change → B7 (_load_chart)
+        # ───────────────────────────────────────────────────────
+        with gr.Tab("📊 Visualise"):
+            chart_selector = gr.Dropdown(
+                choices=[], label="Select chart", interactive=True,
+            )
+            chart_display = gr.Plot(label="BERTopic Visualization")
+        # ── end B17a: Charts tab ───────────────────────────────
+        # ── B17b: Download tab ─────────────────────────────────
+        # PURPOSE:  Multi-file download for all pipeline outputs.
+        # COMPONENTS: gr.Markdown (descriptions), gr.File (download)
+        # UPDATED BY: B18, B10, B19 — refreshed after each action
+        # ───────────────────────────────────────────────────────
+        with gr.Tab("⬇ Export"):
+            gr.Markdown(
+                "**Files by Phase (per run: abstract / title):**\n\n"
+                "**Phase 2 — Discovery:** `summaries.json` · `emb.npy`\n\n"
+                "**Phase 2 — Labeling:** `labels.json`\n\n"
+                "**Phase 2 — Charts:** `intertopic.json` · `bars.json` · "
+                "`hierarchy.json` · `heatmap.json`\n\n"
+                "**Phase 3 — Themes:** `themes.json`\n\n"
+                "**Phase 5.5 — Taxonomy:** `taxonomy_map.json`\n\n"
+                "**Phase 6 — Report:** `comparison.csv` · `narrative.txt`"
+            )
+            download = gr.File(label="All output files", file_count="multiple")
+        # ── end B17b: Download tab ─────────────────────────────
     chart_selector.change(_load_chart, [chart_selector], [chart_display])
     review_table.select(
+        _show_papers_by_select, [review_table], [paper_list],
     )
     submit_review.click(
+        _submit_review, [review_table, chatbot],
+        [chatbot, download, chart_selector, chart_display,
+         review_table, phase_progress],
     )
     def respond_with_viz(message, chat_history, uploaded_file):
+        """Wrap respond() and update charts + table + progress after each turn."""
         gen = respond(message, chat_history, uploaded_file)
+        # First yield (progress bubble)
         hist, txt, dl = next(gen)
+        yield (hist, txt, dl, gr.update(choices=_get_chart_choices()),
+               gr.update(), gr.update(), _build_progress())
+        # Second yield (final response + populate table + charts)
         hist, txt, dl = next(gen)
         choices = _get_chart_choices()
         first_chart = (choices and _load_chart(choices[-1])) or gr.update()
         table_data = _load_review_table()
         yield (
+            hist, txt, dl,
+            gr.update(choices=choices, value=(choices and choices[-1]) or None),
             first_chart,
             gr.update(value=table_data),
             _build_progress(),
         )
     msg.submit(
+        respond_with_viz, [msg, chatbot, upload],
+        [chatbot, msg, download, chart_selector, chart_display,
+         review_table, phase_progress],
     )
     send.click(
+        respond_with_viz, [msg, chatbot, upload],
+        [chatbot, msg, download, chart_selector, chart_display,
+         review_table, phase_progress],
     )
+    # ── end B18: respond_with_viz + event bindings ─────────────
+    # ── B19: _auto_load_csv() ──────────────────────────────────
+    # PURPOSE:  Automatically triggers analysis when a CSV file is
+    #           uploaded. Sends "Analyze my Scopus CSV" as the
+    #           initial message so no manual typing is needed.
+    # TRIGGERED BY: upload.change event
+    # CALLS:   B6 (respond) with auto-message
+    # OUTPUTS:  chatbot, download, chart_selector, chart_display,
+    #           review_table, phase_progress
+    # ───────────────────────────────────────────────────────────
     def _auto_load_csv(uploaded_file, chat_history):
+        """Auto-trigger analysis when CSV is uploaded — no typing needed."""
         gen = respond("Analyze my Scopus CSV", chat_history, uploaded_file)
+        # First yield (progress)
         hist, txt, dl = next(gen)
+        yield (hist, dl, gr.update(), gr.update(),
+               gr.update(), _build_progress())
+        # Second yield (final + populate everything)
         hist, txt, dl = next(gen)
         choices = _get_chart_choices()
         first_chart = (choices and _load_chart(choices[-1])) or gr.update()
         table_data = _load_review_table()
         yield (
+            hist, dl,
+            gr.update(choices=choices, value=(choices and choices[-1]) or None),
             first_chart,
             gr.update(value=table_data),
             _build_progress(),
         )
     upload.change(
+        _auto_load_csv, [upload, chatbot],
+        [chatbot, download, chart_selector, chart_display,
+         review_table, phase_progress],
     )
+    # ── end B19: _auto_load_csv ────────────────────────────────
+print(">>> Launching...")
 demo.launch(
     server_name="0.0.0.0",
     server_port=7860,
     ssr_mode=False,
+    theme=theme,                    # Gradio 6: moved from gr.Blocks()
+    footer_links=[],                # Gradio 6: hides footer, replaces show_api
+)
+# ── end B20: Launch ────────────────────────────────────────────