Spaces:

milindkamat0507
/

topic_modelling

Paused

App Files Files Community

milindkamat0507 commited on 13 days ago

Commit

684bbba

verified ·

1 Parent(s): a9bbad5

Upload 4 files

Browse files

Files changed (4) hide show

agent.py +541 -0
app.py +413 -0
requirements.txt +13 -0
tools.py +623 -0

agent.py ADDED Viewed

	@@ -0,0 +1,541 @@

+"""agent.py — BERTopic Thematic Discovery Agent
+Organized around Braun & Clarke's (2006) Reflexive Thematic Analysis.
+Version 4.0.0 | 4 April 2026. ZERO for/while/if.
+"""
+from datetime import datetime
+# ═══════════════════════════════════════════════════════════════════
+# GOLDEN THREAD: How the agent executes Braun & Clarke's 6 phases
+# ═══════════════════════════════════════════════════════════════════
+#
+#  🔬 BERTOPIC THEMATIC DISCOVERY AGENT
+#  │
+#  ├── 6 Tools listed upfront
+#  ├── 2 Run configs (abstract, all)
+#  ├── 4 Academic citations (B&C, Grootendorst, Campello, Reimers)
+#  │
+#  ▼
+#  B&C PHASE 1: FAMILIARIZATION ─────────── Tool 1: load_scopus_csv
+#  │  "Read and re-read the data"
+#  │   Agent loads CSV → shows preview → ASKS before proceeding
+#  │   WAIT ←── researcher confirms
+#  │
+#  ▼
+#  B&C PHASE 2: INITIAL CODES ──────────── Tool 2: run_bertopic_discovery
+#  │  "Systematically coding features"       Tool 3: label_topics_with_llm
+#  │   Sentences → 384d vectors → AgglomerativeClustering cosine → codes
+#  │   Mistral labels each code with evidence
+#  │   WAIT ←── researcher reviews codes
+#  │         ↻ re-run if needed
+#  │
+#  ▼
+#  B&C PHASE 3: SEARCHING FOR THEMES ──── Tool 4: consolidate_into_themes
+#  │  "Collating codes into themes"
+#  │   Agent proposes groupings with reasoning table
+#  │   Researcher: "group 0 1 5" / "done"
+#  │   Tool merges → new centroids → new evidence
+#  │   WAIT ←── researcher approves themes
+#  │
+#  ▼
+#  B&C PHASE 4: REVIEWING THEMES ──────── (conversation, no tool)
+#  │  "Checking if themes work"
+#  │   Agent checks ALL theme pairs for merge potential
+#  │   Saturation: "No more merges because..."
+#  │   Cites B&C: "when refinements add nothing, stop"
+#  │   WAIT ←── researcher agrees iteration complete
+#  │         ↻ back to Phase 3 if not saturated
+#  │
+#  ▼
+#  B&C PHASE 5: DEFINING & NAMING ──────── (conversation, no tool)
+#  │  "Clear definitions and names"
+#  │   Agent presents final theme definitions
+#  │   Researcher refines names
+#  │   THEN repeat Phase 2-5 for second run config
+#  │
+#  ▼
+#  PHASE 5.5: TAXONOMY COMPARISON ──────── Tool 5: compare_with_taxonomy
+#  │  "Ground themes against PAJAIS taxonomy"
+#  │   Mistral maps themes → PAJAIS categories or NOVEL
+#  │   Researcher validates mapping
+#  │   Novel themes = paper's contribution
+#  │
+#  ▼
+#  B&C PHASE 6: PRODUCING REPORT ──────── Tool 6: generate_comparison_csv
+#     "Vivid extract examples, final analysis" Tool 7: export_narrative
+#      Cross-run comparison (abstract vs title)
+#      500-word Section 7 draft
+#      Done ✅
+#
+# ═══════════════════════════════════════════════════════════════════
+SYSTEM_PROMPT = """
+═══════════════════════════════════════════════════════════════
+ 🔬 BERTOPIC THEMATIC DISCOVERY AGENT
+    Sentence-Level Topic Modeling with Researcher-in-the-Loop
+═══════════════════════════════════════════════════════════════
+You are a research assistant that performs thematic analysis on
+Scopus academic paper exports using BERTopic + Mistral LLM.
+Your workflow follows Braun & Clarke's (2006) six-phase Reflexive
+Thematic Analysis framework — the gold standard for qualitative
+research — enhanced with computational NLP at scale.
+Golden thread: CSV → Sentences → Vectors → Clusters → Topics
+→ Themes → Saturation → Taxonomy Check → Synthesis → Report
+═══════════════════════════════════════════════════════════════
+ ⛔ CRITICAL RULES
+═══════════════════════════════════════════════════════════════
+ RULE 1: ONE PHASE PER MESSAGE
+   NEVER combine multiple phases in one response.
+   Present ONE phase → STOP → wait for approval → next phase.
+ RULE 2: ALL APPROVALS VIA REVIEW TABLE
+   The researcher approves/rejects/renames using the Results
+   Table below the chat — NOT by typing in chat.
+   Your workflow for EVERY phase:
+   1. Call the tool (saves JSON → table auto-refreshes)
+   2. Briefly explain what you did in chat (2-3 sentences)
+   3. End with: "**Review the table below. Edit Approve/Rename
+      columns, then click Submit Review to Agent.**"
+   4. STOP. Wait for the researcher's Submit Review.
+   NEVER present large tables or topic lists in chat text.
+   NEVER ask researcher to type "approve" in chat.
+   The table IS the approval interface.
+═══════════════════════════════════════════════════════════════
+ YOUR 7 TOOLS
+═══════════════════════════════════════════════════════════════
+ Tool 1: load_scopus_csv(filepath)
+         Load CSV, show columns, estimate sentence count.
+ Tool 2: run_bertopic_discovery(run_key, threshold)
+         Split → embed → AgglomerativeClustering cosine → centroid nearest 5 → Plotly charts.
+ Tool 3: label_topics_with_llm(run_key)
+         5 nearest centroid sentences → Mistral → label + research area + confidence.
+ Tool 4: consolidate_into_themes(run_key, theme_map)
+         Merge researcher-approved topic groups → recompute centroids → new evidence.
+ Tool 5: compare_with_taxonomy(run_key)
+         Compare themes against PAJAIS taxonomy (Jiang et al., 2019) → mapped vs NOVEL.
+ Tool 6: generate_comparison_csv()
+         Compare themes across abstract vs title runs.
+ Tool 7: export_narrative(run_key)
+         500-word Section 7 draft via Mistral.
+═══════════════════════════════════════════════════════════════
+ RUN CONFIGURATIONS
+═══════════════════════════════════════════════════════════════
+ "abstract"  — Abstract sentences only (~10 per paper)
+ "title"     — Title only (1 per paper, 1,390 total)
+═══════════════════════════════════════════════════════════════
+ METHODOLOGY KNOWLEDGE (cite in conversation when relevant)
+═══════════════════════════════════════════════════════════════
+ Braun & Clarke (2006), Qualitative Research in Psychology, 3(2), 77-101:
+   - 6-phase reflexive thematic analysis (the framework we follow)
+   - "Phases are not linear — move back and forth as required"
+   - "When refinements are not adding anything substantial, stop"
+   - Researcher is active interpreter, not passive receiver of themes
+ Grootendorst (2022), arXiv:2203.05794 — BERTopic:
+   - Modular: any embedding, any clustering, any dim reduction
+   - Supports AgglomerativeClustering as alternative to HDBSCAN
+   - c-TF-IDF extracts distinguishing words per cluster
+   - BERTopic uses AgglomerativeClustering internally for topic reduction
+ Ward (1963), JASA + Lance & Williams (1967) — Agglomerative Clustering:
+   - Groups by pairwise cosine similarity threshold
+   - No density estimation needed — works in ANY dimension (384d)
+   - distance_threshold controls granularity (lower = more topics)
+   - Every sentence assigned to a cluster (no outliers)
+   - 62-year-old algorithm, gold standard for hierarchical grouping
+ Reimers & Gurevych (2019), EMNLP — Sentence-BERT:
+   - all-MiniLM-L6-v2 produces 384d normalized vectors
+   - Cosine similarity = semantic relatedness
+   - Same meaning clusters together regardless of exact wording
+ PACIS/ICIS Research Categories:
+   IS Design Science, HCI, E-Commerce, Knowledge Management,
+   IT Governance, Digital Innovation, Social Computing, Analytics,
+   IS Security, Green IS, Health IS, IS Education, IT Strategy
+═══════════════════════════════════════════════════════════════
+ B&C PHASE 1: FAMILIARIZATION WITH THE DATA
+ "Reading and re-reading, noting initial ideas"
+ Tool: load_scopus_csv
+═══════════════════════════════════════════════════════════════
+CRITICAL ERROR HANDLING:
+- If message says "[No CSV uploaded yet]" → respond:
+  "📂 Please upload your Scopus CSV file first using the upload
+   button at the top. Then type 'Run abstract only' to begin."
+  DO NOT call any tools. DO NOT guess filenames.
+- If a tool returns an error → explain the error clearly and
+  suggest what the researcher should do next.
+When researcher uploads CSV or says "analyze":
+1. Call load_scopus_csv(filepath) to inspect the data.
+2. DO NOT run BERTopic yet. Present the data landscape:
+   "📂 **Phase 1: Familiarization** (Braun & Clarke, 2006)
+   Loaded [N] papers (~[M] sentences estimated)
+   Columns: Title ✅ | Abstract ✅
+   Sentence-level approach: each abstract splits into ~10
+   sentences, each becomes a 384d vector. One paper can
+   contribute to MULTIPLE topics.
+   I will run 2 configurations:
+   1️⃣ **Abstract only** — what papers FOUND (findings, methods, results)
+   2️⃣ **Title only** — what papers CLAIM to be about (author's framing)
+   ⚙️ Defaults: threshold=0.7, cosine AgglomerativeClustering, 5 nearest
+   **Ready to proceed to Phase 2?**
+   • `run` — execute BERTopic discovery
+   • `run abstract` — single config
+   • `change threshold to 0.65` — more topics (stricter grouping)
+   • `change threshold to 0.8` — fewer topics (looser grouping)"
+3. WAIT for researcher confirmation before proceeding.
+═══════════════════════════════════════════════════════════════
+ B&C PHASE 2: GENERATING INITIAL CODES
+ "Systematically coding interesting features across the dataset"
+ Tools: run_bertopic_discovery → label_topics_with_llm
+═══════════════════════════════════════════════════════════════
+After researcher confirms:
+1. Call run_bertopic_discovery(run_key, threshold)
+   → Splits papers into sentences (regex, min 30 chars)
+   → Filters publisher boilerplate (copyright, license text)
+   → Embeds with all-MiniLM-L6-v2 (384d, L2-normalized)
+   → AgglomerativeClustering cosine (no UMAP, no dimension reduction)
+   → Finds 5 nearest centroid sentences per topic
+   → Saves Plotly HTML visualizations
+   → Saves embeddings + summaries checkpoints
+2. Immediately call label_topics_with_llm(run_key)
+   → Sends ALL topics with 5 evidence sentences to Mistral
+   → Returns: label + research area + confidence + niche
+   NOTE: NO PACIS categories in Phase 2. PACIS comparison comes in Phase 5.5.
+3. Present CODED data with EVIDENCE under each topic:
+   "📋 **Phase 2: Initial Codes** — [N] codes from [M] sentences
+   **Code 0: Smart Tourism AI** [IS Design, high, 150 sent, 45 papers]
+    Evidence (5 nearest centroid sentences):
+     → "Neural networks predict tourist behavior..." — _Paper #42_
+     → "AI-powered systems optimize resource allocation..." — _Paper #156_
+     → "Deep learning models demonstrate superior accuracy..." — _Paper #78_
+     → "Machine learning classifies visitor patterns..." — _Paper #201_
+     → "ANN achieves 92% accuracy in demand forecasting..." — _Paper #89_
+   **Code 1: VR Destination Marketing** [HCI, high, 67 sent, 18 papers]
+    Evidence:
+     → ...
+   📊 4 Plotly visualizations saved (download below)
+   **Review these codes. Ready for Phase 3 (theme search)?**
+   • `approve` — codes look good, move to theme grouping
+   • `re-run 0.65` — re-run with stricter threshold (more topics)
+   • `re-run 0.8` — re-run with looser threshold (fewer topics)
+   • `show topic 4 papers` — see all paper titles in topic 4
+   • `code 2 looks wrong` — I will show why it was labeled that way
+   📋 **Review Table columns explained:**
+   | Column | Meaning |
+   |--------|---------|
+   | # | Topic number |
+   | Topic Label | AI-generated name from 5 nearest sentences |
+   | Research Area | General research area (NOT PACIS — that comes later in Phase 5.5) |
+   | Confidence | How well the 5 sentences match the label |
+   | Sentences | Number of sentences clustered here |
+   | Papers | Number of unique papers contributing sentences |
+   | Approve | Edit: yes/no — keep or reject this topic |
+   | Rename To | Edit: type new name if label is wrong |
+   | Your Reasoning | Edit: why you renamed/rejected |"
+4. ⛔ STOP HERE. Do NOT auto-proceed.
+   Say: "Codes generated. Review the table below.
+   Edit Approve/Rename columns, then click Submit Review to Agent."
+5. If researcher types "show topic X papers":
+   → Load summaries.json from checkpoint
+   → Find topic X
+   → List ALL paper titles in that topic (from paper_titles field)
+   → Format as numbered list:
+     "📄 **Topic 4: AI in Tourism** — 64 papers:
+      1. Neural networks predict tourist behavior...
+      2. Deep learning for hotel revenue management...
+      3. AI-powered recommendation systems...
+      ...
+      Want to see the 5 key evidence sentences? Type `show topic 4`"
+6. If researcher types "show topic X":
+   → Show the 5 nearest centroid sentences with full paper titles
+7. If researcher questions a code:
+   → Show the 5 sentences that generated the label
+   → Explain reasoning: "AgglomerativeClustering groups sentences
+     where cosine distance < threshold. These sentences share
+     semantic proximity in 384d space even if keywords differ."
+   → Offer re-run with adjusted parameters
+═══════════════════════════════════════════════════════════════
+ B&C PHASE 3: SEARCHING FOR THEMES
+ "Collating codes into potential themes"
+ Tool: consolidate_into_themes
+═══════════════════════════════════════════════════════════════
+After researcher approves Phase 2 codes:
+1. ANALYZE the labeled codes yourself. Look for:
+   → Codes with the SAME research area → likely one theme
+   → Codes with overlapping keywords in evidence → related
+   → Codes with shared papers across clusters → connected
+   → Codes that are sub-aspects of a broader concept → merge
+   → Codes that are niche/distinct → keep standalone
+2. Present MAPPING TABLE with reasoning:
+   "🔍 **Phase 3: Searching for Themes** (Braun & Clarke, 2006)
+   I analyzed [N] codes and propose [M] themes:
+   | Code (Phase 2)                  | → | Proposed Theme        | Reasoning                    |
+   |---------------------------------|---|-----------------------|------------------------------|
+   | Code 0: Neural Network Tourism  | → | AI & ML in Tourism    | Same research area,          |
+   | Code 1: Deep Learning Predict.  | → | AI & ML in Tourism    | shared methodology,          |
+   | Code 5: ML Revenue Management   | → | AI & ML in Tourism    | Papers #42,#78 in all 3      |
+   | Code 2: VR Destination Mktg     | → | VR & Metaverse        | Both HCI category,           |
+   | Code 3: Metaverse Experiences   | → | VR & Metaverse        | 'virtual reality' overlap    |
+   | Code 4: Instagram Tourism       | → | Social Media (alone)  | Distinct platform focus      |
+   | Code 8: Green Tourism           | → | Sustainability (alone)| Niche, no overlap            |
+   **Do you agree?**
+   • `agree` — consolidate as shown
+   • `group 4 6 call it Digital Marketing` — custom grouping
+   • `move code 5 to standalone` — adjust
+   • `split AI theme into two` — more granular"
+3. ⛔ STOP HERE. Do NOT proceed to Phase 4.
+   Say: "Review the consolidated themes in the table below.
+   Edit Approve/Rename columns, then click Submit Review to Agent."
+   WAIT for the researcher's Submit Review.
+4. ONLY after explicit approval, call:
+   consolidate_into_themes(run_key, {"AI & ML": [0,1,5], "VR": [2,3], ...})
+5. Present consolidated themes with NEW centroid evidence:
+   "🎯 **Themes consolidated** (new centroids computed)
+   **Theme: AI & ML in Tourism** (294 sent, 83 papers)
+    Merged from: Codes 0, 1, 5
+    New evidence (recalculated after merge):
+     → "Neural networks predict tourist behavior..." — _Paper #42_
+     → "Deep learning optimizes hotel pricing..." — _Paper #78_
+     → ...
+   ✅ Themes look correct? Or adjust?"
+═══════════════════════════════════════════════════════════════
+ B&C PHASE 4: REVIEWING THEMES
+ "Checking if themes work in relation to coded extracts
+  and the entire data set"
+ Tool: (conversation — no tool call, agent reasons)
+═══════════════════════════════════════════════════════════════
+After consolidation, perform SATURATION CHECK:
+1. Analyze ALL theme pairs for remaining merge potential:
+   "🔍 **Phase 4: Reviewing Themes** — Saturation Analysis
+   | Theme A      | Theme B      | Overlap | Merge? | Why                |
+   |-------------|-------------|---------|--------|--------------------|
+   | AI & ML     | VR Tourism  | None    | ❌     | Different domains   |
+   | AI & ML     | ChatGPT     | Low     | ❌     | GenAI ≠ predictive |
+   | Social Media| VR Tourism  | None    | ❌     | Different channels  |
+2. If NO themes can merge:
+   "⛔ **Saturation reached** (per Braun & Clarke, 2006:
+    'when refinements are not adding anything substantial, stop')
+    Reasoning:
+    1. No remaining themes share a research area
+    2. No keyword overlap between any theme pair
+    3. Evidence sentences are semantically distinct
+    4. Further merging would lose research distinctions
+    **Do you agree iteration is complete?**
+    • `agree` — finalize, move to Phase 5
+    • `try merging X and Y` — override my recommendation"
+3. If themes CAN still merge:
+   "🔄 **Further consolidation possible:**
+    Themes 'Social Media' and 'Digital Marketing' share 3 keywords.
+    Suggest merging. Want me to consolidate?"
+4. ⛔ STOP HERE. Do NOT proceed to Phase 5.
+   Say: "Saturation analysis complete. Review themes in the table.
+   Edit Approve/Rename columns, then click Submit Review to Agent."
+═══════════════════════════════════════════════════════════════
+ B&C PHASE 5: DEFINING AND NAMING THEMES
+ "Generating clear definitions and names"
+ Tool: (conversation — agent + researcher co-create)
+═══════════════════════════════════════════════════════════════
+After saturation confirmed:
+1. Present final theme definitions:
+   "📝 **Phase 5: Theme Definitions**
+   **Theme 1: AI & Machine Learning in Tourism**
+    Definition: Research applying predictive ML/DL methods
+    (neural networks, random forests, deep learning) to tourism
+    problems including demand forecasting, pricing optimization,
+    and visitor behavior classification.
+    Scope: 294 sentences across 83 papers.
+    Research area: technology adoption. Confidence: High.
+   **Theme 2: Virtual Reality & Metaverse Tourism**
+    Definition: ...
+   **Want to rename any theme? Adjust any definition?**"
+2. ⛔ STOP HERE. Do NOT proceed to Phase 5.5 or second run.
+   Say: "Final theme names ready. Review in the table below.
+   Edit Rename To column if any names need changing, then click Submit Review."
+3. ONLY after approval: repeat ALL of Phase 2-5 for the SECOND run config.
+   (If first run was "abstract", now run "title" — or vice versa)
+═══════════════════════════════════════════════════════════════
+ PHASE 5.5: TAXONOMY COMPARISON
+ "Grounding themes against established IS research categories"
+ Tool: compare_with_taxonomy
+═══════════════════════════════════════════════════════════════
+After BOTH runs have finalized themes (Phase 5 complete for each):
+1. Call compare_with_taxonomy(run_key) for each completed run.
+   → Mistral maps each theme to PAJAIS taxonomy (Jiang et al., 2019)
+   → Flags themes as MAPPED (known category) or NOVEL (emerging)
+2. Present the mapping with researcher review:
+   "📚 **Phase 5.5: Taxonomy Comparison** (Jiang et al., 2019)
+   **Mapped to established PAJAIS categories:**
+   | Your Theme | → | PAJAIS Category | Confidence | Reasoning |
+   |---|---|---|---|---|
+   | AI & ML in Tourism | → | Business Intelligence & Analytics | high | ML/DL methods for prediction |
+   | VR & Metaverse | → | Human Behavior & HCI | high | Immersive technology interaction |
+   | Social Media Tourism | → | Social Media & Business Impact | high | Direct category match |
+   **🆕 NOVEL themes (not in existing PAJAIS taxonomy):**
+   | Your Theme | Status | Reasoning |
+   |---|---|---|
+   | ChatGPT in Tourism | 🆕 NOVEL | Generative AI is post-2019, not in taxonomy |
+   | Sustainable AI Tourism | 🆕 NOVEL | Cross-cuts Green IT + Analytics |
+   These NOVEL themes represent **emerging research areas** that
+   extend beyond the established PAJAIS classification.
+   **Researcher: Review this mapping.**
+   • `approve` — mapping is correct
+   • `theme X should map to Y instead` — adjust
+   • `merge novel themes into one` — consolidate emerging themes
+   • `this novel theme is actually part of [category]` — reclassify"
+3. ⛔ STOP HERE. Do NOT proceed to Phase 6.
+   Say: "PAJAIS taxonomy mapping complete. Review in the table below.
+   Edit Approve column for any mappings you disagree with, then click Submit Review."
+4. ONLY after approval, ask:
+   "Want me to consolidate any novel themes with existing ones?
+    Or keep them separate as evidence of emerging research areas?"
+5. ⛔ STOP AGAIN. WAIT for this answer before generating report.
+═══════════════════════════════════════════════════════════════
+ B&C PHASE 6: PRODUCING THE REPORT
+ "Selection of vivid, compelling extract examples"
+ Tools: generate_comparison_csv → export_narrative
+═══════════════════════════════════════════════════════════════
+After BOTH run configs have finalized themes:
+1. Call generate_comparison_csv()
+   → Compares themes across abstract vs title configs
+2. Say briefly in chat:
+   "Cross-run comparison complete. Check the Download tab for:
+    • comparison.csv — abstract vs title themes side by side
+    Review the themes in the table below.
+    Click Submit Review to confirm, then I'll generate the narrative."
+3. ⛔ STOP. Wait for Submit Review.
+4. After approval, call export_narrative(run_key)
+   → Mistral writes 500-word paper section referencing:
+     methodology, B&C phases, key themes, limitations
+═══════════════════════════════════════════════════════════════
+ CRITICAL RULES
+═══════════════════════════════════════════════════════════════
+ - ALWAYS follow B&C phases in order. Name each phase explicitly.
+ - ALWAYS wait for researcher confirmation between phases.
+ - ALWAYS show evidence sentences with paper metadata.
+ - ALWAYS cite B&C (2006) when discussing iteration or saturation.
+ - ALWAYS cite Grootendorst (2022) when explaining cluster behavior.
+ - ALWAYS call label_topics_with_llm before presenting topic labels.
+ - ALWAYS call compare_with_taxonomy before claiming PAJAIS mappings.
+ - Use threshold=0.7 as default (lower = more topics, higher = fewer).
+ - If too many topics (>200), suggest increasing threshold to 0.8.
+ - If too few topics (<20), suggest decreasing threshold to 0.6.
+ - NEVER skip Phase 4 saturation check or Phase 5.5 taxonomy comparison.
+ - NEVER proceed to Phase 6 without both runs completing Phase 5.5.
+ - NEVER invent topic labels — only present labels returned by Tool 3.
+ - NEVER cite paper IDs, titles, or sentences from memory — only from tool output.
+ - NEVER claim a theme is NOVEL or MAPPED without calling Tool 5 first.
+ - NEVER fabricate sentence counts or paper counts — only use tool-reported numbers.
+ - If a tool returns an error, explain clearly and continue.
+ - Keep responses concise. Tables + evidence, not paragraphs.
+Current date: """ + datetime.now().strftime("%Y-%m-%d")
+print(f">>> agent.py: SYSTEM_PROMPT loaded ({len(SYSTEM_PROMPT)} chars)")
+def get_local_tools():
+    """Load 7 BERTopic tools."""
+    print(">>> agent.py: loading tools...")
+    from tools import get_all_tools
+    return get_all_tools()

app.py ADDED Viewed

	@@ -0,0 +1,413 @@

+"""app.py — Compact Gradio + Mistral + BERTopic. Version 2.1.0 | 4 April 2026. ZERO for/while/if.
+FUNCTIONS IN THIS FILE:
+  _latest_output()  — Scans /tmp for newest rq4_* file → feeds download button
+  respond()         — Core chat handler: takes message + history + file → yields agent response
+  gr.Blocks()       — One-page UI: header + upload + chatbot + input + download
+"""
+import os
+import glob
+import gradio as gr
+from langchain_mistralai import ChatMistralAI
+from langgraph.prebuilt import create_react_agent
+from langgraph.checkpoint.memory import MemorySaver
+from agent import SYSTEM_PROMPT, get_local_tools
+print(">>> app.py: imports complete")
+# ═══════════════════════════════════════════════
+# AGENT SETUP — Mistral brain + 5 BERTopic tools
+# ═══════════════════════════════════════════════
+llm = ChatMistralAI(model="mistral-small-latest", temperature=0, timeout=300)
+tools = get_local_tools()
+agent = create_react_agent(model=llm, tools=tools, prompt=SYSTEM_PROMPT, checkpointer=MemorySaver())
+print(f">>> app.py: agent ready ({len(tools)} tools, Mistral Large)")
+_msg_count = 0
+_uploaded = {"path": ""}
+# ═══════════════════════════════════════════════
+# COMPACT HEADER — fits in ~60px
+# ═══════════════════════════════════════════════
+HEADER_HTML = """
+<style>
+@import url('https://fonts.googleapis.com/css2?family=DM+Sans:wght@400;500;600;700&display=swap');
+.gradio-container {font-family: 'DM Sans', sans-serif !important; padding: 4px 8px !important; max-width: 100% !important;}
+footer {display: none !important;}
+.section-box {border: 1px solid #e2e8f0; border-radius: 8px; padding: 8px 12px; margin-bottom: 6px; background: #fafbfc;}
+.section-label {font-size: 0.75em; font-weight: 600; color: #64748b; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 4px;}
+</style>
+<div style="background: linear-gradient(135deg, #0f172a 0%, #1e293b 50%, #334155 100%); border-radius: 8px; padding: 8px 16px; margin-bottom: 4px; color: white; display: flex; align-items: center; gap: 10px;">
+    <span style="font-size: 1.3em;">🔬</span>
+    <div>
+        <span style="font-size: 1em; font-weight: 700; color: #e0e0ff;">Topic Modelling — Agentic AI</span>
+        <span style="font-size: 0.65em; color: #94a3b8; margin-left: 8px;">Mistral 🇫🇷 · Cosine Clustering · 384d · B&C Thematic Analysis</span>
+    </div>
+</div>
+"""
+# ═══════════════════════════════════════════════
+# FUNCTION 1: Find latest output for download
+# ═══════════════════════════════════════════════
+def _latest_output():
+    """Scan /tmp for ALL rq4_* files, sorted by phase order.
+    Returns list of filepaths for gr.File download component."""
+    # Phase order: summaries → labels → themes → taxonomy → charts → comparison → narrative
+    phase_order = {"summaries": 1, "labels": 2, "themes": 3, "taxonomy": 4,
+                   "emb": 0, "intertopic": 5, "bars": 6, "hierarchy": 7,
+                   "heatmap": 8, "comparison": 9, "narrative": 10}
+    files = (glob.glob("/tmp/rq4_*.csv") + glob.glob("/tmp/rq4_*.html")
+             + glob.glob("/tmp/rq4_*.txt") + glob.glob("/tmp/checkpoints/rq4_*.json"))
+    # Sort by phase order using filename keywords
+    scored = list(map(
+        lambda f: (sum(v * (k in f) for k, v in phase_order.items()), f), files))
+    scored.sort(key=lambda x: x[0])
+    return list(map(lambda x: x[1], scored)) or None
+def _build_progress():
+    """Build HTML progress pipeline showing which B&C phases are complete."""
+    checks = [
+        ("① Load", bool(glob.glob("/tmp/checkpoints/rq4_*_summaries.json") or glob.glob("/tmp/checkpoints/rq4_*_emb.npy"))),
+        ("② Codes", bool(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))),
+        ("③ Themes", bool(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))),
+        ("④ Review", bool(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))),
+        ("⑤ Names", bool(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))),
+        ("⑤½ PAJAIS", bool(glob.glob("/tmp/checkpoints/rq4_*_taxonomy_map.json"))),
+        ("⑥ Report", bool(glob.glob("/tmp/rq4_comparison.csv") or glob.glob("/tmp/rq4_narrative.txt"))),
+    ]
+    done_bg = "#22c55e"
+    todo_bg = "#e2e8f0"
+    done_fg = "white"
+    todo_fg = "#64748b"
+    items = " → ".join(list(map(
+        lambda c: (
+            "<span style='padding:3px 8px;border-radius:4px;"
+            + "background:" + (done_bg * c[1] or todo_bg) + ";"
+            + "color:" + (done_fg * c[1] or todo_fg) + ";"
+            + "font-weight:600;font-size:0.8em;'>"
+            + c[0] + " " + ("✅" * c[1] or "⬜") + "</span>"
+        ), checks)))
+    return "<div style='padding:6px 0;text-align:center;'>" + items + "</div>"
+# ═══════════════════════════════════════════════
+# FUNCTION 2: Chat handler — core of the app
+# ═══════════════════════════════════════════════
+def respond(message, chat_history, uploaded_file):
+    """Handle one chat turn:
+    1. Store uploaded file path (if new upload)
+    2. Append file context to message so agent knows where CSV is
+    3. Show progress bubble immediately (user sees instant feedback)
+    4. Invoke agent (Mistral brain decides which tools to call)
+    5. Replace progress bubble with agent's actual response
+    6. Update download link to latest output file
+    Uses single thread_id="session" so agent remembers across turns.
+    Agent asks clarification FIRST (via SYSTEM_PROMPT) before running heavy tools."""
+    global _msg_count
+    _msg_count += 1
+    # Store file path — no if/else, uses `or` short-circuit
+    _uploaded["path"] = uploaded_file or _uploaded.get("path", "")
+    # Guard: tell agent when no file uploaded (prevents hallucinated filepath)
+    file_note = (f"\n[CSV file at: {_uploaded['path']}]" * bool(_uploaded["path"])
+                ) or "\n[No CSV uploaded yet — ask user to upload a file first]"
+    # Context: tell agent what phase we're in based on checkpoint files
+    phase_context = (
+        "\n[Phase context: labels exist]" * bool(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))
+        or "\n[Phase context: embeddings exist]" * bool(glob.glob("/tmp/checkpoints/rq4_*_emb.npy"))
+        or "\n[Phase context: fresh start]"
+    )
+    text = ((message or "").strip() or "Analyze my Scopus CSV") + file_note + phase_context
+    print(f"\n{'='*60}\n>>> MSG #{_msg_count}: '{text[:120]}'\n{'='*60}")
+    # Yield progress bubble immediately — user sees instant response
+    chat_history = chat_history + [
+        {"role": "user", "content": (message or "").strip()},
+        {"role": "assistant", "content": "🔬 **Working...**  _Agent is thinking..._"},
+    ]
+    yield chat_history, "", _latest_output()
+    # Shared session — agent remembers across messages (loaded CSV, phase, etc.)
+    # handle_tool_error=True on all tools prevents session poisoning from failures
+    result = agent.invoke(
+        {"messages": [("human", text)]},
+        config={"configurable": {"thread_id": "session"}},
+    )
+    response = result["messages"][-1].content
+    print(f">>> Response ({len(response)} chars)")
+    # Replace progress bubble with actual response
+    chat_history[-1] = {"role": "assistant", "content": response}
+    yield chat_history, "", _latest_output()
+# ═══════════════════════════════════════════════
+# UI LAYOUT — Everything fits ONE screen (~500px)
+#
+# ┌────────────────────────────────────┐
+# │ 🔬 BERTopic Agent (compact)       │  ~55px
+# ├────────────────────────────────────┤
+# │ [📂 Upload CSV]                   │  ~35px
+# ├────────────────────────────────────┤
+# │ 💬 Chat bubbles (cloud style)     │
+# │    User: "Analyze my data"        │  ~320px
+# │    Agent: "I found 1390 papers.   │
+# │     Which config? All 3?"         │
+# ├────────────────────────────────────┤
+# │ [Type message...          ] [⏎]   │  ~35px
+# ├────────────────────────────────────┤
+# │ 📥 Download                       │  ~35px
+# └────────────────────────────────────┘
+#   TOTAL: ~480px — fits any screen
+# ═══════════════════════════════════════════════
+print(">>> Building UI...")
+with gr.Blocks(title="Topic Modelling — Agentic AI") as demo:
+    gr.HTML(HEADER_HTML)
+    # ══════════════════════════════════════════
+    # SECTION 1: DATA INPUT
+    # ══════════════════════════════════════════
+    gr.HTML('<div class="section-label">① DATA INPUT</div>')
+    with gr.Group():
+        with gr.Row():
+            upload = gr.File(label="���� Upload Scopus CSV", file_types=[".csv"])
+            gr.Markdown("**Upload your CSV** then type `run abstract only` in the chat below")
+    # ══════════════════════════════════════════
+    # PHASE PROGRESS PIPELINE
+    # ══════════════════════════════════════════
+    phase_progress = gr.HTML(value=_build_progress())
+    # ══════════════════════════════════════════
+    # SECTION 2: AGENT CONVERSATION
+    # ══════════════════════════════════════════
+    gr.HTML('<div class="section-label">② AGENT CONVERSATION — follow the prompts below</div>')
+    with gr.Group():
+        chatbot = gr.Chatbot(height=250, show_label=False,
+                             placeholder="Upload your Scopus CSV above, then type: run abstract only")
+        with gr.Row():
+            msg = gr.Textbox(
+                placeholder="run · approve · show topic 4 papers · group 0 1 5 · done",
+                show_label=False, scale=9, lines=1, max_lines=1, container=False)
+            send = gr.Button("Send", variant="primary", scale=1, min_width=70)
+    # ══════════════════════════════════════════
+    # SECTION 3: RESULTS
+    # ══════════════════════════════════════════
+    gr.HTML('<div class="section-label">③ RESULTS — review table, charts, downloads</div>')
+    with gr.Group():
+        with gr.Tabs():
+            with gr.Tab("📋 Review Table"):
+                gr.Markdown("*Edit Approve / Rename To / Your Reasoning → click Submit. "
+                            "Type `show topic 4 papers` in chat to see paper list.*")
+                review_table = gr.Dataframe(
+                    headers=["#", "Topic Label", "Top Evidence Sentence",
+                             "Sentences", "Papers", "Approve", "Rename To", "Your Reasoning"],
+                    datatype=["number", "str", "str", "number", "number", "str", "str", "str"],
+                    column_count=(8, "fixed"),
+                    interactive=True,
+                    row_count=(1, "dynamic"),
+                )
+                submit_review = gr.Button("✅ Submit Review to Agent", variant="primary")
+                # Paper viewer — select topic to see papers
+                gr.Markdown("---")
+                gr.Markdown("**📄 View papers in a topic:**")
+                with gr.Row():
+                    topic_num = gr.Number(label="Topic #", value=0, precision=0, minimum=0, scale=1)
+                    view_papers_btn = gr.Button("Show Papers", scale=1)
+                paper_list = gr.Textbox(label="Papers in selected topic", lines=8,
+                                        interactive=False)
+            with gr.Tab("📊 Charts"):
+                chart_selector = gr.Dropdown(choices=[], label="Select Chart", interactive=True)
+                chart_display = gr.HTML(
+                    value="<div style='height:350px;display:flex;align-items:center;justify-content:center;"
+                          "color:#94a3b8;border:1px dashed #cbd5e1;border-radius:8px;'>"
+                          "Charts appear after BERTopic runs</div>")
+            with gr.Tab("📥 Download"):
+                gr.Markdown(
+                    "**Files by Phase (per run: abstract / title):**\n\n"
+                    "**Phase 2 — Discovery:** `summaries.json` (raw topics) · `emb.npy` (embeddings)\n\n"
+                    "**Phase 2 — Labeling:** `labels.json` (Mistral-labeled topics)\n\n"
+                    "**Phase 2 — Charts:** `intertopic.html` · `bars.html` · `hierarchy.html` · `heatmap.html`\n\n"
+                    "**Phase 3 — Themes:** `themes.json` (consolidated themes)\n\n"
+                    "**Phase 5.5 — Taxonomy:** `taxonomy_map.json` (PAJAIS mapped vs NOVEL)\n\n"
+                    "**Phase 6 — Report:** `comparison.csv` (abstract vs title) · `narrative.txt` (500-word draft)"
+                )
+                download = gr.File(label="All output files", file_count="multiple")
+    def _load_chart(chart_name):
+        """Load chart HTML into iframe — gr.HTML strips scripts, iframe executes them."""
+        import html as html_mod
+        path = f"/tmp/{chart_name}"
+        content = os.path.exists(path) and open(path).read() or ""
+        escaped = html_mod.escape(content) * bool(content)
+        return (f'<iframe srcdoc="{escaped}" width="100%" height="450" '
+                f'frameborder="0" sandbox="allow-scripts allow-same-origin"></iframe>'
+                ) * bool(escaped) or "<div style='padding:20px;color:#94a3b8;'>Select a chart above</div>"
+    def _get_chart_choices():
+        """Find all rq4_*.html chart files in /tmp."""
+        files = sorted(glob.glob("/tmp/rq4_*.html"))
+        return list(map(os.path.basename, files))
+    def _load_review_table():
+        """Load review table from latest phase JSON.
+        Priority: taxonomy_map > themes > labels > summaries.
+        Returns 8-column rows: #, Label, Evidence/Mapping, Sent, Papers, Approve, Rename, Reasoning"""
+        import json
+        taxonomy_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_taxonomy_map.json"))
+        theme_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))
+        label_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))
+        summary_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_summaries.json"))
+        # Determine which file to load
+        path = ((taxonomy_files and taxonomy_files[-1])
+                or (theme_files and theme_files[-1])
+                or (label_files and label_files[-1])
+                or (summary_files and summary_files[-1]) or "")
+        is_taxonomy = bool(taxonomy_files and taxonomy_files[-1] == path)
+        data = (os.path.exists(path) and json.load(open(path))) or []
+        # For taxonomy: merge with themes to get sentence/paper counts
+        theme_lookup = {}
+        (is_taxonomy and theme_files) and theme_lookup.update({
+            t.get("label", ""): t for t in json.load(open(theme_files[-1]))})
+        rows = list(map(
+            lambda pair: [
+                pair[0],
+                pair[1].get("label", pair[1].get("top_words", "")[:60]),
+                # Evidence column: show PAJAIS mapping for taxonomy, evidence sentence otherwise
+                (is_taxonomy and f"→ {pair[1].get('pajais_match', '?')} | {pair[1].get('reasoning', '')}"[:120])
+                or (pair[1].get("nearest", [{}])[0].get("sentence", "")[:120] + "...") * bool(pair[1].get("nearest")),
+                # Sentence/paper counts: from taxonomy lookup or direct
+                theme_lookup.get(pair[1].get("label", ""), pair[1]).get("sentence_count", pair[1].get("sentence_count", 0)),
+                theme_lookup.get(pair[1].get("label", ""), pair[1]).get("paper_count", pair[1].get("paper_count", 0)),
+                "yes",
+                "",
+                "",
+            ], enumerate(data)))
+        return rows or [[0, "No data yet", "", 0, 0, "", "", ""]]
+    def _show_papers(topic_id):
+        """Show 5 nearest centroid sentences (evidence) + all paper titles for selected topic."""
+        import json
+        topic_id = int(topic_id)
+        summary_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_summaries.json"))
+        label_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))
+        all_files = label_files or summary_files
+        source_labels = list(map(
+            lambda f: os.path.basename(f).split("_")[1], all_files))
+        all_data = list(map(lambda f: json.load(open(f)), all_files))
+        lines = []
+        list(map(lambda pair: list(map(
+            lambda t: (t.get("topic_id") == topic_id) and lines.append(
+                f"═══ {pair[0].upper()} — Topic {topic_id}: "
+                f"{t.get('label', t.get('top_words','')[:50])} ═══\n"
+                f"{t.get('sentence_count', 0)} sentences from {t.get('paper_count', 0)} papers\n"
+                f"AI Reasoning: {t.get('reasoning', 'not yet labeled')}\n\n"
+                f"── 5 NEAREST CENTROID SENTENCES (evidence) ──\n"
+                + "\n".join(list(map(
+                    lambda i: f"  {i+1}. \"{t['nearest'][i]['sentence'][:200]}\"\n"
+                              f"     Paper: {t['nearest'][i].get('title', '')[:100]}",
+                    range(min(5, len(t.get('nearest', [])))))))
+                + "\n\n── ALL PAPER TITLES ──\n"
+                + "\n".join(list(map(
+                    lambda i: f"  {i+1}. {t['paper_titles'][i]}",
+                    range(len(t.get('paper_titles', []))))))
+            ),
+            pair[1])),
+            zip(source_labels, all_data)))
+        return "\n\n".join(lines) or f"Topic {topic_id} not found."
+    view_papers_btn.click(_show_papers, [topic_num], [paper_list])
+    def _submit_review(table_data, chat_history):
+        """Convert edited review table into a message for the agent.
+        Researcher's edits (approve/rename/reasoning) become natural language."""
+        rows = table_data.values.tolist()
+        lines = list(map(
+            lambda r: (
+                f"Topic {int(r[0])}: "
+                + (f"RENAME to '{r[6]}'" * bool(str(r[6]).strip()))
+                + (f"APPROVE '{r[1]}'" * (not bool(str(r[6]).strip())) * (str(r[5]).lower().startswith("y")))
+                + (f"REJECT" * (str(r[5]).lower().startswith("n")))
+                + (f" — reason: {r[7]}" * bool(str(r[7]).strip()))
+            ), rows))
+        review_msg = "Review decisions:\n" + "\n".join(lines)
+        print(f">>> Review submitted: {review_msg[:200]}")
+        chat_history = chat_history + [
+            {"role": "user", "content": review_msg},
+            {"role": "assistant", "content": "🔬 **Processing review decisions...**"},
+        ]
+        yield chat_history, _latest_output(), gr.update(), gr.update(), gr.update(), _build_progress()
+        result = agent.invoke(
+            {"messages": [("human", review_msg)]},
+            config={"configurable": {"thread_id": "session"}},
+        )
+        response = result["messages"][-1].content
+        chat_history[-1] = {"role": "assistant", "content": response}
+        # Reload table with updated themes/labels
+        table_data = _load_review_table()
+        yield (chat_history, _latest_output(),
+               gr.update(choices=_get_chart_choices()), gr.update(),
+               gr.update(value=table_data), _build_progress())
+    chart_selector.change(_load_chart, [chart_selector], [chart_display])
+    submit_review.click(_submit_review, [review_table, chatbot],
+                        [chatbot, download, chart_selector, chart_display, review_table, phase_progress])
+    def respond_with_viz(message, chat_history, uploaded_file):
+        """Wrap respond() and update chart dropdown + review table after each message."""
+        gen = respond(message, chat_history, uploaded_file)
+        # First yield (progress)
+        hist, txt, dl = next(gen)
+        yield hist, txt, dl, gr.update(choices=_get_chart_choices()), gr.update(), gr.update(), _build_progress()
+        # Second yield (final response + populate table + charts)
+        hist, txt, dl = next(gen)
+        choices = _get_chart_choices()
+        first_chart = (choices and _load_chart(choices[-1])) or gr.update()
+        table_data = _load_review_table()
+        yield (hist, txt, dl,
+               gr.update(choices=choices, value=(choices and choices[-1]) or None),
+               first_chart,
+               gr.update(value=table_data), _build_progress())
+    msg.submit(respond_with_viz, [msg, chatbot, upload],
+               [chatbot, msg, download, chart_selector, chart_display, review_table, phase_progress])
+    send.click(respond_with_viz, [msg, chatbot, upload],
+               [chatbot, msg, download, chart_selector, chart_display, review_table, phase_progress])
+    def _auto_load_csv(uploaded_file, chat_history):
+        """Auto-trigger analysis when CSV is uploaded — stats appear without typing."""
+        gen = respond("Analyze my Scopus CSV", chat_history, uploaded_file)
+        hist, txt, dl = next(gen)
+        yield hist, dl, gr.update(), gr.update(), gr.update(), _build_progress()
+        hist, txt, dl = next(gen)
+        choices = _get_chart_choices()
+        first_chart = (choices and _load_chart(choices[-1])) or gr.update()
+        table_data = _load_review_table()
+        yield (hist, dl,
+               gr.update(choices=choices, value=(choices and choices[-1]) or None),
+               first_chart,
+               gr.update(value=table_data), _build_progress())
+    upload.change(_auto_load_csv, [upload, chatbot],
+                  [chatbot, download, chart_selector, chart_display, review_table, phase_progress])
+print(">>> Launching...")
+demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+# requirements.txt v2.0 | 4 April 2026
+# BERTopic + Mistral LLM (French, Apache 2.0, GDPR-safe)
+langchain
+langchain-mistralai
+langgraph
+langchain-core
+bertopic
+sentence-transformers
+numpy
+pandas
+plotly
+kaleido
+gradio

tools.py ADDED Viewed

	@@ -0,0 +1,623 @@

+"""tools.py — Sentence-level BERTopic pipeline + Mistral LLM. Version 3.0.0 | 4 April 2026. ZERO for/while/if.
+PIPELINE:
+  Paper → split into sentences → each sentence gets paper_id + sent_id + metadata
+  → embed sentences (384d) → AgglomerativeClustering cosine → centroid nearest 5 sentences
+  → Mistral labels topics from sentence evidence + paper metadata
+  → one paper can span MULTIPLE topics
+"""
+from langchain_core.tools import tool
+import os
+import json
+import re
+import numpy as np
+import pandas as pd
+# ═══════════════════════════════════════════════
+# DEBUG + STATE + CONSTANTS
+# ═══════════════════════════════════════════════
+DEBUG = True
+debug = {True: print, False: lambda *a, **k: None}[DEBUG]
+CHECKPOINT_DIR = "/tmp/checkpoints"
+os.makedirs(CHECKPOINT_DIR, exist_ok=True)
+NEAREST_K = 5
+SENT_SPLIT_RE = r'(?<=[.!?])\s+(?=[A-Z])'
+MIN_SENT_LEN = 30
+RUN_CONFIGS = {
+    "abstract": ["Abstract"],
+    "title": ["Title"],
+}
+_data = {}
+# ═══════════════════════════════════════════════
+# HELPER: Split text into sentences (regex, no nltk)
+# ═══════════════════════════════════════════════
+def _split_sentences(text):
+    """Split text on sentence boundaries. Filters short fragments (<30 chars).
+    Uses regex: split after .!? followed by uppercase letter."""
+    raw = re.split(SENT_SPLIT_RE, str(text))
+    return list(filter(lambda s: len(s.strip()) >= MIN_SENT_LEN, raw))
+# ═══════════════════════════════════════════════
+# TOOL 1: Load Scopus CSV
+# ═══════════════════════════════════════════════
+@tool
+def load_scopus_csv(filepath: str) -> str:
+    """Load a Scopus CSV export and show preview. Call this first.
+    Args:
+        filepath: Path to the uploaded .csv file.
+    Returns:
+        Row count, column names, and sample data."""
+    debug(f"\n>>> TOOL: load_scopus_csv(filepath='{filepath}')")
+    df = pd.read_csv(filepath, encoding="utf-8-sig")
+    _data["df"] = df
+    debug(f">>> Loaded {len(df)} rows, {len(df.columns)} columns")
+    target_cols = list(filter(lambda c: c in df.columns, ["Title", "Abstract", "Author Keywords"]))
+    sample = df[target_cols].head(3).to_string(max_colwidth=80)
+    null_counts = ", ".join(list(map(
+        lambda c: f"{c}: {df[c].notna().sum()}/{len(df)}", target_cols)))
+    # Estimate sentence counts
+    sample_sents = df["Abstract"].head(5).apply(_split_sentences).apply(len)
+    avg_abstract_sents = sample_sents.mean()
+    est_abstract = int(avg_abstract_sents * len(df))
+    title_count = int(df["Title"].notna().sum())
+    return (f"📊 **Dataset Statistics:**\n"
+            f"- **Papers:** {len(df)}\n"
+            f"- **Abstract sentences:** ~{est_abstract} (~{avg_abstract_sents:.0f} per paper)\n"
+            f"- **Title sentences:** {title_count} (1 per paper)\n"
+            f"- **Non-null:** {null_counts}\n\n"
+            f"Columns: {', '.join(list(df.columns)[:15])}\n\n"
+            f"Sample:\n{sample}")
+# ═══════════════════════════════════════════════
+# TOOL 2: Sentence-Level BERTopic Pipeline
+# ═══════════════════════════════════════════════
+@tool
+def run_bertopic_discovery(run_key: str, threshold: float = 0.7) -> str:
+    """Sentence-level BERTopic: split papers → embed sentences → cosine similarity clustering → centroid nearest 5 → Plotly charts.
+    Each sentence keeps paper_id, sent_id, and metadata. One paper can span multiple topics.
+    Uses AgglomerativeClustering with cosine distance — groups sentences by similarity threshold.
+    Args:
+        run_key: One of 'abstract' or 'title' — selects which columns to split into sentences.
+        threshold: Cosine distance threshold (0.0-1.0). Lower = stricter = more topics.
+                   0.5 = very strict (~2000 topics), 0.7 = recommended (~100 topics, default), 0.8 = loose (~30 topics), 0.9 = very loose (~10 topics).
+    Returns:
+        Topic summary with sentence counts, paper counts, and 5 nearest centroid sentences."""
+    debug(f"\n>>> TOOL: run_bertopic_discovery(run_key='{run_key}', threshold={threshold})")
+    from bertopic import BERTopic
+    from sentence_transformers import SentenceTransformer
+    df = _data["df"].copy()
+    cols = RUN_CONFIGS[run_key]
+    available = list(filter(lambda c: c in df.columns, cols))
+    debug(f">>> Columns: {available}")
+    # ── Step 1: Assemble text per paper ──
+    df["_text"] = df[available].fillna("").agg(" ".join, axis=1)
+    df["_paper_id"] = df.index
+    debug(f">>> {len(df)} papers assembled")
+    # ── Step 2: Split into sentences — regex, no nltk ──
+    debug(">>> Splitting into sentences...")
+    df["_sentences"] = df["_text"].apply(_split_sentences)
+    debug(f">>> Sentence counts: min={df['_sentences'].apply(len).min()}, "
+          f"max={df['_sentences'].apply(len).max()}, "
+          f"mean={df['_sentences'].apply(len).mean():.1f}")
+    # ── Step 3: Explode to sentence-level DataFrame ──
+    meta_cols = ["_paper_id", "Title", "Author Keywords", "_sentences"]
+    available_meta = list(filter(lambda c: c in df.columns, meta_cols))
+    sent_df = df[available_meta].explode("_sentences").rename(
+        columns={"_sentences": "text"}).reset_index(drop=True)
+    sent_df = sent_df.dropna(subset=["text"]).reset_index(drop=True)
+    sent_df["sent_id"] = sent_df.groupby("_paper_id").cumcount()
+    # ── Step 3b: Filter out publisher boilerplate sentences ──
+    # Scopus abstracts contain copyright/license noise that clustering picks up as topics.
+    # These are NOT research content — remove before embedding.
+    debug(">>> Filtering publisher boilerplate...")
+    _n_before = len(sent_df)
+    boilerplate_patterns = "|".join([
+        r"Licensee MDPI",
+        r"Published by Informa",
+        r"Published by Elsevier",
+        r"Taylor & Francis",
+        r"Copyright ©",
+        r"Creative Commons",
+        r"open access article",
+        r"Inderscience Enterprises",
+        r"All rights reserved",
+        r"This is an open access",
+        r"distributed under the terms",
+        r"The Author\(s\)",
+        r"Springer Nature",
+        r"Emerald Publishing",
+        r"limitations and future",
+        r"limitations and implications",
+        r"limitations are discussed",
+        r"limitations have been discussed",
+        r"implications are discussed",
+        r"implications were discussed",
+        r"implications are presented",
+        r"concludes with .* implications",
+    ])
+    clean_mask = ~sent_df["text"].str.contains(boilerplate_patterns, case=False, regex=True, na=False)
+    sent_df = sent_df[clean_mask].reset_index(drop=True)
+    sent_df["sent_id"] = sent_df.groupby("_paper_id").cumcount()
+    debug(f">>> Filtered: {_n_before} → {len(sent_df)} sentences ({_n_before - len(sent_df)} boilerplate removed)")
+    n_sentences = len(sent_df)
+    n_papers = len(df)
+    debug(f">>> {n_sentences} sentences from {n_papers} papers")
+    # ── Step 4: Embed sentences (384d, L2-normalized) ──
+    # BERTopic FAQ: "normalize them first to force a cosine-related distance metric"
+    # Math: for L2-normalized vectors, euclidean²(a,b) = 2(1 - cos(a,b)) → same clusters as cosine
+    debug(">>> Embedding sentences with all-MiniLM-L6-v2 (L2-normalized)...")
+    docs = sent_df["text"].tolist()
+    embedder = SentenceTransformer("all-MiniLM-L6-v2")
+    embeddings = embedder.encode(docs, show_progress_bar=False, normalize_embeddings=True)
+    debug(f">>> Embeddings: {embeddings.shape}, normalized: True")
+    # Save checkpoint
+    np.save(f"{CHECKPOINT_DIR}/rq4_{run_key}_emb.npy", embeddings)
+    # ── Step 5: Agglomerative Clustering with COSINE similarity threshold ──
+    # Groups sentences where cosine_distance < threshold → same cluster
+    # No dimension reduction. No density estimation. Pure similarity grouping.
+    debug(f">>> AgglomerativeClustering cosine threshold={threshold} on 384d embeddings...")
+    from sklearn.preprocessing import FunctionTransformer
+    from sklearn.cluster import AgglomerativeClustering
+    no_umap = FunctionTransformer()
+    cluster_model = AgglomerativeClustering(
+        n_clusters=None,
+        metric="cosine",
+        linkage="average",
+        distance_threshold=threshold,
+    )
+    topic_model = BERTopic(
+        hdbscan_model=cluster_model,
+        umap_model=no_umap,
+    )
+    topics, probs = topic_model.fit_transform(docs, embeddings)
+    n_topics = len(set(topics)) - int(-1 in topics)
+    n_outliers = int(np.sum(np.array(topics) == -1))
+    debug(f">>> {n_topics} topics, {n_outliers} outlier sentences")
+    # Store for later tools
+    _data[f"{run_key}_model"] = topic_model
+    _data[f"{run_key}_topics"] = np.array(topics)
+    _data[f"{run_key}_embeddings"] = embeddings
+    _data[f"{run_key}_sent_df"] = sent_df
+    # ── Step 6: BERTopic Plotly visualizations (skip charts that need 3+ topics) ──
+    debug(f">>> Generating visualizations ({n_topics} topics)...")
+    # visualize_topics() uses UMAP internally → crashes with < 3 topics
+    (n_topics >= 3) and topic_model.visualize_topics().write_html(
+        f"/tmp/rq4_{run_key}_intertopic.html", include_plotlyjs="cdn")
+    # barchart works with 1+ topics
+    (n_topics >= 1) and topic_model.visualize_barchart(
+        top_n_topics=min(10, max(1, n_topics))).write_html(
+        f"/tmp/rq4_{run_key}_bars.html", include_plotlyjs="cdn")
+    # hierarchy needs 2+ topics
+    (n_topics >= 2) and topic_model.visualize_hierarchy().write_html(
+        f"/tmp/rq4_{run_key}_hierarchy.html", include_plotlyjs="cdn")
+    # heatmap needs 2+ topics
+    (n_topics >= 2) and topic_model.visualize_heatmap().write_html(
+        f"/tmp/rq4_{run_key}_heatmap.html", include_plotlyjs="cdn")
+    debug(f">>> Visualizations saved (skipped charts needing more topics)")
+    # ── Step 7: Centroid nearest 5 SENTENCES — COSINE similarity ──
+    topics_arr = np.array(topics)
+    topic_info = topic_model.get_topic_info()
+    valid_rows = list(filter(lambda r: r["Topic"] != -1, topic_info.to_dict("records")))
+    def _centroid_nearest(row):
+        """Find 5 sentences nearest to topic centroid via cosine similarity."""
+        mask = topics_arr == row["Topic"]
+        member_idx = np.where(mask)[0]
+        member_embs = embeddings[mask]
+        centroid = member_embs.mean(axis=0)
+        # Cosine distance: 1 - cos_sim. For normalized vectors: cos_sim = dot product
+        norms = np.linalg.norm(member_embs, axis=1) * np.linalg.norm(centroid)
+        cosine_sim = (member_embs @ centroid) / (norms + 1e-10)
+        dists = 1 - cosine_sim
+        nearest = np.argsort(dists)[:NEAREST_K]
+        # 5 nearest sentences with paper metadata
+        nearest_evidence = list(map(lambda i: {
+            "sentence": str(sent_df.iloc[member_idx[i]]["text"])[:250],
+            "paper_id": int(sent_df.iloc[member_idx[i]]["_paper_id"]),
+            "title": str(sent_df.iloc[member_idx[i]].get("Title", ""))[:150],
+            "keywords": str(sent_df.iloc[member_idx[i]].get("Author Keywords", ""))[:150],
+        }, nearest))
+        # Count unique papers in this topic + collect their titles
+        topic_papers_df = sent_df.iloc[member_idx].drop_duplicates(subset=["_paper_id"])
+        unique_papers = len(topic_papers_df)
+        paper_titles = list(map(
+            lambda idx: str(topic_papers_df.iloc[idx].get("Title", ""))[:200],
+            range(min(50, unique_papers))))  # cap at 50 titles per topic
+        return {"topic_id": int(row["Topic"]),
+                "sentence_count": int(row["Count"]),
+                "paper_count": int(unique_papers),
+                "top_words": str(row.get("Name", ""))[:100],
+                "nearest": nearest_evidence,
+                "paper_titles": paper_titles}
+    summaries = list(map(_centroid_nearest, valid_rows))
+    json.dump(summaries, open(f"{CHECKPOINT_DIR}/rq4_{run_key}_summaries.json", "w"), indent=2, default=str)
+    debug(f">>> {len(summaries)} topics saved ({NEAREST_K} nearest sentences each)")
+    # ── Format output ──
+    lines = list(map(
+        lambda s: f"  Topic {s['topic_id']} ({s['sentence_count']} sentences, {s['paper_count']} papers): {s['top_words']}",
+        summaries))
+    return (f"[{run_key}] {n_topics} topics from {n_sentences} sentences ({n_papers} papers, {n_outliers} outliers).\n\n"
+            + "\n".join(lines)
+            + f"\n\nVisualizations: /tmp/rq4_{run_key}_*.html (4 files)"
+            + f"\nCheckpoints: {CHECKPOINT_DIR}/rq4_{run_key}_emb.npy + summaries.json")
+# ═══════════════════════════════════════════════
+# TOOL 3: Label Topics with Mistral (sentence evidence)
+# ═══════════════════════════════════════════════
+@tool
+def label_topics_with_llm(run_key: str) -> str:
+    """Send 5 nearest centroid sentences + paper metadata to Mistral for labeling.
+    Each sentence shows which paper it came from (title + keywords).
+    Args:
+        run_key: One of 'abstract' or 'title'.
+    Returns:
+        Labeled topics with sentence-level evidence."""
+    debug(f"\n>>> TOOL: label_topics_with_llm(run_key='{run_key}')")
+    from langchain_mistralai import ChatMistralAI
+    from langchain_core.prompts import PromptTemplate
+    from langchain_core.output_parsers import JsonOutputParser
+    summaries = json.load(open(f"{CHECKPOINT_DIR}/rq4_{run_key}_summaries.json"))
+    debug(f">>> Loaded {len(summaries)} topics ({NEAREST_K} sentences each)")
+    # Limit to top 50 largest topics — prevents Mistral rate limit on 2000+ topics
+    MAX_LABEL_TOPICS = 100
+    sorted_summaries = sorted(summaries, key=lambda s: s.get("sentence_count", 0), reverse=True)
+    summaries_to_label = sorted_summaries[:MAX_LABEL_TOPICS]
+    skipped = max(0, len(summaries) - MAX_LABEL_TOPICS)
+    debug(f">>> Labeling top {len(summaries_to_label)} topics (skipped {skipped} small clusters)")
+    # Format all topics — show sentence + paper metadata as evidence
+    topics_block = "\n\n".join(list(map(
+        lambda s: (f"Topic {s['topic_id']} ({s['sentence_count']} sentences from {s['paper_count']} papers):\n"
+                   f"  Top words: {s['top_words']}\n"
+                   f"  {NEAREST_K} nearest centroid sentences:\n"
+                   + "\n".join(list(map(
+                       lambda e: (f"    - \"{e['sentence'][:200]}\"\n"
+                                  f"      Paper: \"{e['title']}\"\n"
+                                  f"      Keywords: {e['keywords']}"),
+                       s["nearest"])))),
+        summaries_to_label)))
+    prompt = PromptTemplate.from_template(
+        "You are a research topic classifier for academic papers about Technology and Tourism.\n\n"
+        "For EACH topic below, you are given the 5 sentences nearest to the topic centroid,\n"
+        "plus the paper title and author keywords each sentence came from.\n\n"
+        "Return a JSON ARRAY with one object per topic:\n"
+        "- topic_id: integer\n"
+        "- label: short descriptive name (3-6 words, specific — NOT generic like 'tourism studies')\n"
+        "- category: general research area (e.g., 'technology adoption', 'consumer behavior',\n"
+        "    'virtual reality', 'social media marketing', 'sustainability', 'cultural heritage',\n"
+        "    'AI and machine learning', 'online reviews', 'destination marketing',\n"
+        "    'tourist psychology', 'hotel management', 'sharing economy',\n"
+        "    'mobile applications', 'research methodology', 'data analytics')\n"
+        "    DO NOT use PACIS/ICIS categories — just plain descriptive research area.\n"
+        "- confidence: high, medium, or low\n"
+        "- reasoning: 1 sentence explaining WHY you chose this label based on the evidence sentences\n"
+        "- niche: true or false (true = very specific sub-area with <20 sentences)\n\n"
+        "CRITICAL: be SPECIFIC in labels. Do NOT use broad terms.\n"
+        "Return ONLY valid JSON array, no markdown.\n\n"
+        "Topics:\n{topics}")
+    llm = ChatMistralAI(model="mistral-small-latest", temperature=0, timeout=300)
+    chain = prompt | llm | JsonOutputParser()
+    debug(">>> Calling Mistral (single call, all topics)...")
+    labels = chain.invoke({"topics": topics_block})
+    debug(f">>> Got {len(labels)} labels")
+    # Merge labels with summaries
+    labeled = list(map(lambda pair: {**pair[0], **pair[1]},
+                       zip(summaries, (labels + summaries)[:len(summaries)])))
+    json.dump(labeled, open(f"{CHECKPOINT_DIR}/rq4_{run_key}_labels.json", "w"), indent=2, default=str)
+    debug(f">>> Labels saved: {CHECKPOINT_DIR}/rq4_{run_key}_labels.json")
+    # Format — show label + evidence sentences + paper source
+    lines = list(map(
+        lambda l: (f"  **Topic {l.get('topic_id', '?')}: {l.get('label', '?')}** "
+                   f"[{l.get('category', '?')}] conf={l.get('confidence', '?')} "
+                   f"({l.get('sentence_count', 0)} sentences, {l.get('paper_count', 0)} papers)\n"
+                   + "\n".join(list(map(
+                       lambda e: f"    → \"{e['sentence'][:120]}...\" — _{e['title'][:60]}_",
+                       l.get("nearest", []))))),
+        labeled))
+    return f"[{run_key}] {len(labeled)} topics labeled by Mistral:\n\n" + "\n\n".join(lines)
+# ═══════════════════════════════════════════════
+# TOOL 4: Generate Comparison Table
+# ═══════════════════════════════════════════════
+@tool
+def generate_comparison_csv() -> str:
+    """Compare Mistral-labeled topics across completed runs. Includes sentence + paper counts.
+    Returns:
+        Comparison table + CSV path."""
+    debug(f"\n>>> TOOL: generate_comparison_csv()")
+    completed = list(filter(
+        lambda k: os.path.exists(f"{CHECKPOINT_DIR}/rq4_{k}_labels.json"), RUN_CONFIGS.keys()))
+    debug(f">>> Completed runs: {completed}")
+    def _load_run(run_key):
+        labels = json.load(open(f"{CHECKPOINT_DIR}/rq4_{run_key}_labels.json"))
+        return list(map(lambda l: {
+            "run": run_key, "topic_id": l.get("topic_id", ""),
+            "label": l.get("label", ""), "category": l.get("category", ""),
+            "confidence": l.get("confidence", ""), "niche": l.get("niche", ""),
+            "sentences": l.get("sentence_count", 0),
+            "papers": l.get("paper_count", 0),
+            "top_words": l.get("top_words", ""),
+        }, labels))
+    all_rows = sum(list(map(_load_run, completed)), [])
+    df = pd.DataFrame(all_rows)
+    path = "/tmp/rq4_comparison.csv"
+    df.to_csv(path, index=False)
+    debug(f">>> Comparison CSV: {path} ({len(df)} rows)")
+    return f"Comparison saved: {path} ({len(completed)} runs, {len(df)} topics)\n\n{df.to_string(index=False)}"
+# ═══════════════════════════════════════════════
+# TOOL 5: Export 500-Word Narrative
+# ═══════════════════════════════════════════════
+@tool
+def export_narrative(run_key: str) -> str:
+    """Generate 500-word narrative for research paper Section 7 via Mistral.
+    Args:
+        run_key: One of 'abstract' or 'title'.
+    Returns:
+        500-word narrative + save path."""
+    debug(f"\n>>> TOOL: export_narrative(run_key='{run_key}')")
+    from langchain_mistralai import ChatMistralAI
+    labels = json.load(open(f"{CHECKPOINT_DIR}/rq4_{run_key}_labels.json"))
+    topics_text = "\n".join(list(map(
+        lambda l: f"- {l.get('label', '?')} ({l.get('sentence_count', 0)} sentences from "
+                  f"{l.get('paper_count', 0)} papers, category: {l.get('category', '?')}, "
+                  f"confidence: {l.get('confidence', '?')}, niche: {l.get('niche', '?')})",
+        labels)))
+    llm = ChatMistralAI(model="mistral-small-latest", temperature=0.3, timeout=300)
+    result = llm.invoke(
+        f"Write exactly 500 words for a research paper Section 7 titled "
+        f"'Topic Modeling Results — BERTopic Discovery'.\n\n"
+        f"Dataset: 1390 Scopus papers on Tourism and AI.\n"
+        f"Method: Sentence-level BERTopic — each abstract split into sentences,\n"
+        f"embedded with all-MiniLM-L6-v2 (384d), clustered with AgglomerativeClustering (cosine).\n"
+        f"Note: One paper can contribute sentences to MULTIPLE topics.\n"
+        f"Run config: '{run_key}' columns.\n\n"
+        f"Topics discovered:\n{topics_text}\n\n"
+        f"Include: methodology justification for sentence-level approach,\n"
+        f"key themes, emerging niches, limitations, future work.")
+    path = "/tmp/rq4_narrative.txt"
+    open(path, "w", encoding="utf-8").write(result.content)
+    debug(f">>> Narrative saved: {path} ({len(result.content)} chars)")
+    return f"Narrative saved: {path}\n\n{result.content}"
+# ═══════════════════════════════════════════════
+# TOOL 6: Consolidate Round 1 Topics into Themes
+# ═══════════════════════════════════════════════
+@tool
+def consolidate_into_themes(run_key: str, theme_map: dict) -> str:
+    """ROUND 2: Merge fine-grained Round 1 topics into broader themes.
+    Researcher decides which topics to group. Recomputes centroids and evidence.
+    Args:
+        run_key: 'abstract' or 'title'.
+        theme_map: Dict mapping theme names to topic ID lists.
+                   Example: {"AI in Tourism": [0, 1, 5], "VR Tourism": [2, 3]}
+    Returns:
+        Consolidated themes with new 5-nearest sentence evidence per theme."""
+    debug(f"\n>>> TOOL: consolidate_into_themes(run_key='{run_key}', {len(theme_map)} themes)")
+    topics_arr = _data[f"{run_key}_topics"]
+    embeddings = _data[f"{run_key}_embeddings"]
+    sent_df = _data[f"{run_key}_sent_df"]
+    def _build_theme(item):
+        """Merge listed topics into one theme. Recompute centroid + 5 nearest."""
+        theme_name, topic_ids = item
+        mask = np.isin(topics_arr, topic_ids)
+        member_idx = np.where(mask)[0]
+        member_embs = embeddings[mask]
+        centroid = member_embs.mean(axis=0)
+        norms = np.linalg.norm(member_embs, axis=1) * np.linalg.norm(centroid)
+        cosine_sim = (member_embs @ centroid) / (norms + 1e-10)
+        dists = 1 - cosine_sim
+        nearest = np.argsort(dists)[:NEAREST_K]
+        nearest_evidence = list(map(lambda i: {
+            "sentence": str(sent_df.iloc[member_idx[i]]["text"])[:250],
+            "paper_id": int(sent_df.iloc[member_idx[i]]["_paper_id"]),
+            "title": str(sent_df.iloc[member_idx[i]].get("Title", ""))[:150],
+            "keywords": str(sent_df.iloc[member_idx[i]].get("Author Keywords", ""))[:150],
+        }, nearest))
+        unique_papers = sent_df.iloc[member_idx]["_paper_id"].nunique()
+        # Collect paper titles (up to 50)
+        topic_papers_df = sent_df.iloc[member_idx].drop_duplicates(subset=["_paper_id"])
+        paper_titles = list(map(
+            lambda idx: str(topic_papers_df.iloc[idx].get("Title", ""))[:200],
+            range(min(50, len(topic_papers_df)))))
+        return {"label": theme_name, "merged_topics": list(topic_ids),
+                "sentence_count": int(mask.sum()), "paper_count": int(unique_papers),
+                "nearest": nearest_evidence, "paper_titles": paper_titles}
+    # Add topic_id to each theme (sequential)
+    themes_raw = list(map(_build_theme, theme_map.items()))
+    themes = list(map(
+        lambda pair: {**pair[1], "topic_id": pair[0]},
+        enumerate(themes_raw)))
+    json.dump(themes, open(f"{CHECKPOINT_DIR}/rq4_{run_key}_themes.json", "w"), indent=2, default=str)
+    debug(f">>> {len(themes)} themes saved: {CHECKPOINT_DIR}/rq4_{run_key}_themes.json")
+    # Format — show theme + merged topics + evidence
+    lines = list(map(
+        lambda t: (f"  **{t['label']}** ({t['sentence_count']} sentences, {t['paper_count']} papers)\n"
+                   f"    Merged from topics: {t['merged_topics']}\n"
+                   f"    Evidence:\n"
+                   + "\n".join(list(map(
+                       lambda e: f"      → \"{e['sentence'][:120]}...\" — _{e['title'][:60]}_",
+                       t["nearest"])))),
+        themes))
+    return f"[{run_key}] Round 2: {len(themes)} themes consolidated:\n\n" + "\n\n".join(lines)
+# ═══════════════════════════════════════════════
+# TOOL 7: Compare Themes with PAJAIS Taxonomy
+# ═══════════════════════════════════════════════
+# Established IS topic taxonomy from:
+# Jiang, Liang & Tsai (2019) "Knowledge Profile in PAJAIS"
+# Pacific Asia Journal of the AIS, 11(1), 1-24. doi:10.17705/1pais.11101
+PAJAIS_TAXONOMY = [
+    "Electronic and Mobile Business / Social Commerce",
+    "Human Behavior and IS / Human-Computer Interaction",
+    "IS/IT Strategy, Leadership, Governance",
+    "Business Intelligence and Data Analytics",
+    "Design Science and IS",
+    "Enterprise Systems and BPM",
+    "IS Implementation, Adoption, and Diffusion",
+    "Social Media and Business Impact",
+    "Cultural and Global Issues in IS",
+    "IS Security and Privacy",
+    "IS Smart / IoT",
+    "Knowledge Management",
+    "ICT / Digital Platform / IT and Work",
+    "IS Healthcare",
+    "IT Project Management",
+    "Service Science and IS",
+    "Social and Organizational Aspects of IS",
+    "Research Methods and Philosophy",
+    "E-Finance / Economics of IS",
+    "E-Government",
+    "IS Education and Learning",
+    "Green IT and Sustainability",
+]
+@tool
+def compare_with_taxonomy(run_key: str) -> str:
+    """Compare BERTopic themes against established PAJAIS/PACIS taxonomy
+    (Jiang, Liang & Tsai, 2019). Identifies which themes map to known
+    categories and which are NOVEL/EMERGING (not in existing taxonomy).
+    Researcher reviews mapping and approves new theme consolidation.
+    Args:
+        run_key: 'abstract' or 'title'.
+    Returns:
+        Mapping table: BERTopic theme → PAJAIS category (or NOVEL)."""
+    debug(f"\n>>> TOOL: compare_with_taxonomy(run_key='{run_key}')")
+    from langchain_mistralai import ChatMistralAI
+    from langchain_core.prompts import PromptTemplate
+    from langchain_core.output_parsers import JsonOutputParser
+    # Load themes (prefer consolidated themes, fall back to labels)
+    themes_path = f"{CHECKPOINT_DIR}/rq4_{run_key}_themes.json"
+    labels_path = f"{CHECKPOINT_DIR}/rq4_{run_key}_labels.json"
+    source_path = (os.path.exists(themes_path) and themes_path) or labels_path
+    themes = json.load(open(source_path))
+    debug(f">>> Loaded {len(themes)} themes from {source_path}")
+    # Format themes for Mistral
+    themes_text = "\n".join(list(map(
+        lambda t: f"- {t.get('label', '?')} "
+                  f"({t.get('paper_count', t.get('count', '?'))} papers)",
+        themes)))
+    taxonomy_text = "\n".join(list(map(lambda c: f"- {c}", PAJAIS_TAXONOMY)))
+    prompt = PromptTemplate.from_template(
+        "You are an IS research taxonomy expert.\n\n"
+        "Compare each BERTopic theme against the established PAJAIS/PACIS taxonomy.\n"
+        "For EACH theme, return a JSON ARRAY with:\n"
+        "- label: the BERTopic theme name\n"
+        "- pajais_match: closest PAJAIS category (or 'NOVEL' if no match)\n"
+        "- match_confidence: high, medium, low, or none\n"
+        "- reasoning: why this mapping (1 sentence)\n"
+        "- is_novel: true if this theme represents an emerging area not in the taxonomy\n\n"
+        "Return ONLY valid JSON array.\n\n"
+        "BERTopic Themes:\n{themes}\n\n"
+        "PAJAIS Taxonomy (Jiang et al., 2019):\n{taxonomy}")
+    llm = ChatMistralAI(model="mistral-small-latest", temperature=0, timeout=300)
+    chain = prompt | llm | JsonOutputParser()
+    debug(">>> Calling Mistral for taxonomy comparison...")
+    mappings = chain.invoke({"themes": themes_text, "taxonomy": taxonomy_text})
+    debug(f">>> Got {len(mappings)} mappings")
+    # Save mapping
+    json.dump(mappings, open(f"{CHECKPOINT_DIR}/rq4_{run_key}_taxonomy_map.json", "w"), indent=2, default=str)
+    # Count novel vs mapped
+    novel = list(filter(lambda m: m.get("is_novel", False), mappings))
+    mapped = list(filter(lambda m: not m.get("is_novel", False), mappings))
+    # Format output
+    mapped_lines = list(map(
+        lambda m: f"  ✅ {m.get('label', '?')} → **{m.get('pajais_match', '?')}** "
+                  f"(conf={m.get('match_confidence', '?')}) _{m.get('reasoning', '')}_",
+        mapped))
+    novel_lines = list(map(
+        lambda m: f"  🆕 **{m.get('label', '?')}** → NOVEL "
+                  f"_{m.get('reasoning', '')}_",
+        novel))
+    return (f"[{run_key}] Taxonomy comparison (Jiang et al., 2019):\n\n"
+            f"**Mapped to PAJAIS categories ({len(mapped)}):**\n" + "\n".join(mapped_lines) +
+            f"\n\n**NOVEL / Emerging themes ({len(novel)}):**\n" + "\n".join(novel_lines) +
+            f"\n\nSaved: {CHECKPOINT_DIR}/rq4_{run_key}_taxonomy_map.json")
+# ═══════════════════════════════════════════════
+# GET ALL TOOLS
+# ═══════════════════════════════════════════════
+def get_all_tools():
+    """Return all 7 tools with error handling enabled."""
+    tools = [load_scopus_csv, run_bertopic_discovery, label_topics_with_llm,
+             consolidate_into_themes, compare_with_taxonomy,
+             generate_comparison_csv, export_narrative]
+    list(map(lambda t: setattr(t, 'handle_tool_error', True), tools))
+    debug(f">>> tools.py: {len(tools)} tools ready (handle_tool_error=True)")
+    list(map(lambda t: debug(f">>>   - {t.name}"), tools))
+    return tools