Spaces:

atharvthite05
/

BERTopic_Thematic_Analysis_Agent

Sleeping

App Files Files Community

atharvthite05 commited on Apr 12

Commit

d64ad0a

verified ·

1 Parent(s): 06b4a85

Upload 4 files

Browse files

Files changed (4) hide show

agent.py +1134 -0
app.py +1016 -0
requirements.txt +13 -0
tools.py +858 -0

agent.py ADDED Viewed

	@@ -0,0 +1,1134 @@

+"""
+agent.py — LangGraph BERTopic Thematic Analysis Agent
+======================================================
+A strictly phase-gated ReAct agent orchestrating Braun & Clarke's (2006)
+six-phase thematic analysis pipeline via LangGraph.
+Architecture
+------------
+- LLM        : ChatMistralAI (mistral-small-latest, free tier)
+- Agent type : create_react_agent (LangGraph)
+- Memory     : MemorySaver (in-process checkpointing)
+- Tools      : 7 tools imported from tools.py
+- State      : agent_state dict flows through app.py <-> agent.invoke()
+Phase gating
+------------
+  Phase 0 -> awaiting file upload
+  Phase 1 -> Familiarisation        [load_scopus_csv]
+  Phase 2 -> Initial Codes          [run_bertopic_discovery, label_topics_with_llm]
+             STOP GATE 1 — await review table submission
+  Phase 3 -> Searching Themes       [consolidate_into_themes]
+             STOP GATE 2 — await theme-merge confirmation
+  Phase 4 -> Reviewing Themes       [saturation check via LLM]
+             STOP GATE 3 — await researcher sign-off
+  Phase 5 -> Defining & Naming      [final naming confirmation]
+  Phase 5.5-> PAJAIS Mapping        [compare_with_taxonomy]
+             STOP GATE 4 — await taxonomy review
+  Phase 6 -> Report                 [generate_comparison_csv, export_narrative]
+Fixes applied (v2)
+------------------
+- BUG 2   : Removed dead lambda block (lines 514-520 in v1) that ran
+            _preprocess_phase3() twice, wasting an LLM call on every Phase 3
+            trigger. The correct ternary expression is now the only path.
+- ISSUE 3 : After Phase 2 labels are generated, _populate_review_df() converts
+            labels.json into properly formatted review table rows and stores
+            them in agent_state["review_df"] so app.py can render the table.
+- ISSUE 4 : Added startup warning when MISTRAL_API_KEY is missing.
+Integration contract (app.py)
+------------------------------
+  from agent import agent
+  reply, new_state = agent.invoke(user_message, agent_state)
+  agent_state keys consumed / produced:
+    phase           int        current phase index (0-6)
+    file_path       str        path to uploaded CSV
+    run_key         str        "abstract" | "title"
+    review_df       list[dict] review table rows (populated after Phase 2)
+    theme_map       dict       {theme_name: [cluster_id, ...]}
+    charts          dict       {chart_name: html_path}
+    output_files    list[str]  paths to downloadable artefacts
+    thread_id       str        LangGraph memory thread identifier
+    stop_gate       str|None   active gate name or None
+"""
+# ---------------------------------------------------------------------------
+# Stdlib
+# ---------------------------------------------------------------------------
+import os
+import json
+import uuid
+import time
+# ---------------------------------------------------------------------------
+# LangChain / LangGraph
+# ---------------------------------------------------------------------------
+from langchain_core.messages import HumanMessage
+from langchain_mistralai import ChatMistralAI
+from langgraph.prebuilt import create_react_agent
+from langgraph.checkpoint.memory import MemorySaver
+# ---------------------------------------------------------------------------
+# Project tools
+# ---------------------------------------------------------------------------
+from tools import (
+    ALL_TOOLS,
+    OUTPUT_DIR,
+    _load_json,
+    _run_dir,
+)
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+MISTRAL_API_KEY: str = os.environ.get("MISTRAL_API_KEY", "")
+MODEL_NAME:      str = "mistral-small-latest"
+DEFAULT_RUN_KEY: str = "abstract"
+THREAD_PREFIX:   str = "TA-"
+MAX_USER_MESSAGE_CHARS: int = 4000
+PROVIDER_RETRY_ATTEMPTS: int = 3
+PROVIDER_RETRY_BASE_DELAY_S: float = 1.5
+# FIX ISSUE 4 — surface missing API key immediately at import time
+_KEY_MISSING = not bool(MISTRAL_API_KEY)
+_KEY_MISSING and print(
+    "\n[WARNING] MISTRAL_API_KEY is not set. "
+    "All LLM calls will fail with HTTP 401.\n"
+    "Set it via: export MISTRAL_API_KEY='your-key'\n"
+    "On HuggingFace Spaces: Settings -> Variables and secrets\n"
+)
+# ---------------------------------------------------------------------------
+# Stop gate identifiers
+# ---------------------------------------------------------------------------
+GATE_POST_PHASE2  = "STOP_GATE_1_AWAIT_REVIEW_TABLE"
+GATE_POST_PHASE3  = "STOP_GATE_2_AWAIT_THEME_MERGE"
+GATE_POST_PHASE4  = "STOP_GATE_3_AWAIT_SATURATION_SIGNOFF"
+GATE_POST_PHASE55 = "STOP_GATE_4_AWAIT_TAXONOMY_REVIEW"
+# ---------------------------------------------------------------------------
+# Phase labels (used in progress reporting to app.py)
+# ---------------------------------------------------------------------------
+PHASE_LABELS = {
+    0: "Awaiting Upload",
+    1: "Phase 1 — Familiarisation",
+    2: "Phase 2 — Initial Codes",
+    3: "Phase 3 — Searching Themes",
+    4: "Phase 4 — Reviewing Themes",
+    5: "Phase 5 — Defining & Naming",
+    6: "Phase 5.5 — PAJAIS Mapping",
+    7: "Phase 6 — Report",
+    8: "Complete",
+}
+# ============================================================================
+# System prompt
+# ============================================================================
+SYSTEM_PROMPT = """
+═══════════════════════════════════════════════════════════════
+ 🔬 BERTOPIC THEMATIC DISCOVERY AGENT
+    Sentence-Level Topic Modeling with Researcher-in-the-Loop
+═══════════════════════════════════════════════════════════════
+You are a research assistant that performs thematic analysis on
+Scopus academic paper exports using BERTopic + Mistral LLM.
+Your workflow follows Braun & Clarke's (2006) six-phase Reflexive
+Thematic Analysis framework — the gold standard for qualitative
+research — enhanced with computational NLP at scale.
+Golden thread: CSV → Sentences → Vectors → Clusters → Topics
+→ Themes → Saturation → Taxonomy Check → Synthesis → Report
+═══════════════════════════════════════════════════════════════
+ ⛔ CRITICAL RULES
+═══════════════════════════════════════════════════════════════
+ RULE 1: ONE PHASE PER MESSAGE
+   NEVER combine multiple phases in one response.
+   Present ONE phase → STOP → wait for approval → next phase.
+ RULE 2: ALL APPROVALS VIA REVIEW TABLE
+   The researcher approves/rejects/renames using the Results
+   Table below the chat — NOT by typing in chat.
+   Your workflow for EVERY phase:
+   1. Call the tool (saves JSON → table auto-refreshes)
+   2. Briefly explain what you did in chat (2-3 sentences)
+   3. End with: "**Review the table below. Edit Approve/Rename
+      columns, then click Submit Review to Agent.**"
+   4. STOP. Wait for the researcher's Submit Review.
+   NEVER present large tables or topic lists in chat text.
+   NEVER ask researcher to type "approve" in chat.
+   The table IS the approval interface.
+ RULE 3: ALWAYS APPEND A PHASE/GATE MARKER
+     End each phase response with EXACTLY one marker token:
+     [PHASE 1 COMPLETE — READY FOR PHASE 2]
+     [STOP GATE 1 — AWAITING REVIEW TABLE SUBMISSION]
+     [STOP GATE 2 — AWAITING THEME MERGE CONFIRMATION]
+     [STOP GATE 3 — AWAITING SATURATION SIGN-OFF]
+     [PHASE 5 COMPLETE — READY FOR PAJAIS MAPPING]
+     [STOP GATE 4 — AWAITING TAXONOMY REVIEW]
+     [ANALYSIS COMPLETE — ALL PHASES FINISHED]
+     Do not modify spelling or punctuation of these markers.
+═══════════════════════════════════════════════════════════════
+ YOUR 7 TOOLS
+═══════════════════════════════════════════════════════════════
+ Tool 1: load_scopus_csv(filepath)
+         Load CSV, show columns, estimate sentence count.
+ Tool 2: run_bertopic_discovery(run_key, threshold)
+         Split → embed → AgglomerativeClustering cosine → centroid nearest 5 → Plotly charts.
+ Tool 3: label_topics_with_llm(run_key)
+         5 nearest centroid sentences → Mistral → label + research area + confidence.
+ Tool 4: consolidate_into_themes(run_key, theme_map)
+         Merge researcher-approved topic groups → recompute centroids → new evidence.
+ Tool 5: compare_with_taxonomy(run_key)
+         Compare themes against PAJAIS taxonomy (Jiang et al., 2019) → mapped vs NOVEL.
+ Tool 6: generate_comparison_csv()
+         Compare themes across abstract vs title runs.
+ Tool 7: export_narrative(run_key)
+         500-word Section 7 draft via Mistral.
+═══════════════════════════════════════════════════════════════
+ RUN CONFIGURATIONS
+═══════════════════════════════════════════════════════════════
+ "abstract"  — Abstract sentences only (~10 per paper)
+ "title"     — Title only (1 per paper, 1,390 total)
+═══════════════════════════════════════════════════════════════
+ METHODOLOGY KNOWLEDGE (cite in conversation when relevant)
+══��════════════════════════════════════════════════════════════
+ Braun & Clarke (2006), Qualitative Research in Psychology, 3(2), 77-101:
+   - 6-phase reflexive thematic analysis (the framework we follow)
+   - "Phases are not linear — move back and forth as required"
+   - "When refinements are not adding anything substantial, stop"
+   - Researcher is active interpreter, not passive receiver of themes
+ Grootendorst (2022), arXiv:2203.05794 — BERTopic:
+   - Modular: any embedding, any clustering, any dim reduction
+   - Supports AgglomerativeClustering as alternative to HDBSCAN
+   - c-TF-IDF extracts distinguishing words per cluster
+   - BERTopic uses AgglomerativeClustering internally for topic reduction
+ Ward (1963), JASA + Lance & Williams (1967) — Agglomerative Clustering:
+   - Groups by pairwise cosine similarity threshold
+   - No density estimation needed — works in ANY dimension (384d)
+   - distance_threshold controls granularity (lower = more topics)
+   - Every sentence assigned to a cluster (no outliers)
+   - 62-year-old algorithm, gold standard for hierarchical grouping
+ Reimers & Gurevych (2019), EMNLP — Sentence-BERT:
+   - all-MiniLM-L6-v2 produces 384d normalized vectors
+   - Cosine similarity = semantic relatedness
+   - Same meaning clusters together regardless of exact wording
+ PACIS/ICIS Research Categories:
+   IS Design Science, HCI, E-Commerce, Knowledge Management,
+   IT Governance, Digital Innovation, Social Computing, Analytics,
+   IS Security, Green IS, Health IS, IS Education, IT Strategy
+═══════════════════════════════════════════════════════════════
+ B&C PHASE 1: FAMILIARIZATION WITH THE DATA
+ "Reading and re-reading, noting initial ideas"
+ Tool: load_scopus_csv
+═══════════════════════════════════════════════════════════════
+CRITICAL ERROR HANDLING:
+- If message says "[No CSV uploaded yet]" → respond:
+  "📂 Please upload your Scopus CSV file first using the upload
+   button at the top. Then type 'Run abstract only' to begin."
+  DO NOT call any tools. DO NOT guess filenames.
+- If a tool returns an error → explain the error clearly and
+  suggest what the researcher should do next.
+When researcher uploads CSV or says "analyze":
+1. Call load_scopus_csv(filepath) to inspect the data.
+2. DO NOT run BERTopic yet. Present the data landscape:
+   "📂 **Phase 1: Familiarization** (Braun & Clarke, 2006)
+   Loaded [N] papers (~[M] sentences estimated)
+   Columns: Title ✅ | Abstract ✅
+   Sentence-level approach: each abstract splits into ~10
+   sentences, each becomes a 384d vector. One paper can
+   contribute to MULTIPLE topics.
+   I will run 2 configurations:
+   1️⃣ **Abstract only** — what papers FOUND (findings, methods, results)
+   2️⃣ **Title only** — what papers CLAIM to be about (author's framing)
+   ⚙️ Defaults: threshold=0.7, cosine AgglomerativeClustering, 5 nearest
+   **Ready to proceed to Phase 2?**
+   • `run` — execute BERTopic discovery
+   • `run abstract` — single config
+   • `change threshold to 0.65` — more topics (stricter grouping)
+   • `change threshold to 0.8` — fewer topics (looser grouping)"
+3. WAIT for researcher confirmation before proceeding.
+═══════════════════════════════════════════════════════════════
+ B&C PHASE 2: GENERATING INITIAL CODES
+ "Systematically coding interesting features across the dataset"
+ Tools: run_bertopic_discovery → label_topics_with_llm
+═══════════════════════════════════════════════════════════════
+After researcher confirms:
+1. Call run_bertopic_discovery(run_key, threshold)
+   → Splits papers into sentences (regex, min 30 chars)
+   → Filters publisher boilerplate (copyright, license text)
+   → Embeds with all-MiniLM-L6-v2 (384d, L2-normalized)
+   → AgglomerativeClustering cosine (no UMAP, no dimension reduction)
+   → Finds 5 nearest centroid sentences per topic
+   → Saves Plotly HTML visualizations
+   → Saves embeddings + summaries checkpoints
+2. Immediately call label_topics_with_llm(run_key)
+   → Sends ALL topics with 5 evidence sentences to Mistral
+   → Returns: label + research area + confidence + niche
+   NOTE: NO PACIS categories in Phase 2. PACIS comparison comes in Phase 5.5.
+3. Present CODED data with EVIDENCE under each topic:
+   "📋 **Phase 2: Initial Codes** — [N] codes from [M] sentences
+   **Code 0: Smart Tourism AI** [IS Design, high, 150 sent, 45 papers]
+    Evidence (5 nearest centroid sentences):
+     → "Neural networks predict tourist behavior..." — _Paper #42_
+     → "AI-powered systems optimize resource allocation..." — _Paper #156_
+     → "Deep learning models demonstrate superior accuracy..." — _Paper #78_
+     → "Machine learning classifies visitor patterns..." — _Paper #201_
+     → "ANN achieves 92% accuracy in demand forecasting..." — _Paper #89_
+   **Code 1: VR Destination Marketing** [HCI, high, 67 sent, 18 papers]
+    Evidence:
+     → ...
+   📊 4 Plotly visualizations saved (download below)
+   **Review these codes. Ready for Phase 3 (theme search)?**
+   • `approve` — codes look good, move to theme grouping
+   • `re-run 0.65` — re-run with stricter threshold (more topics)
+   • `re-run 0.8` — re-run with looser threshold (fewer topics)
+   • `show topic 4 papers` — see all paper titles in topic 4
+   • `code 2 looks wrong` — I will show why it was labeled that way
+   📋 **Review Table columns explained:**
+   | Column | Meaning |
+   |--------|---------|
+   | # | Topic number |
+   | Topic Label | AI-generated name from 5 nearest sentences |
+   | Research Area | General research area (NOT PACIS — that comes later in Phase 5.5) |
+   | Confidence | How well the 5 sentences match the label |
+   | Sentences | Number of sentences clustered here |
+   | Papers | Number of unique papers contributing sentences |
+   | Approve | Edit: yes/no — keep or reject this topic |
+   | Rename To | Edit: type new name if label is wrong |
+   | Your Reasoning | Edit: why you renamed/rejected |"
+4. ⛔ STOP HERE. Do NOT auto-proceed.
+   Say: "Codes generated. Review the table below.
+   Edit Approve/Rename columns, then click Submit Review to Agent."
+5. If researcher types "show topic X papers":
+   → Load summaries.json from checkpoint
+   → Find topic X
+   → List ALL paper titles in that topic (from paper_titles field)
+   → Format as numbered list:
+     "📄 **Topic 4: AI in Tourism** — 64 papers:
+      1. Neural networks predict tourist behavior...
+      2. Deep learning for hotel revenue management...
+      3. AI-powered recommendation systems...
+      ...
+      Want to see the 5 key evidence sentences? Type `show topic 4`"
+6. If researcher types "show topic X":
+   → Show the 5 nearest centroid sentences with full paper titles
+7. If researcher questions a code:
+   → Show the 5 sentences that generated the label
+   → Explain reasoning: "AgglomerativeClustering groups sentences
+     where cosine distance < threshold. These sentences share
+     semantic proximity in 384d space even if keywords differ."
+   → Offer re-run with adjusted parameters
+═══════════════════════════════════════════════════════════════
+ B&C PHASE 3: SEARCHING FOR THEMES
+ "Collating codes into potential themes"
+ Tool: consolidate_into_themes
+═══════════════════════════════════════════════════════════════
+After researcher approves Phase 2 codes:
+1. ANALYZE the labeled codes yourself. Look for:
+   → Codes with the SAME research area → likely one theme
+   → Codes with overlapping keywords in evidence → related
+   → Codes with shared papers across clusters → connected
+   → Codes that are sub-aspects of a broader concept → merge
+   → Codes that are niche/distinct → keep standalone
+2. Present MAPPING TABLE with reasoning:
+   "🔍 **Phase 3: Searching for Themes** (Braun & Clarke, 2006)
+   I analyzed [N] codes and propose [M] themes:
+   | Code (Phase 2)                  | → | Proposed Theme        | Reasoning                    |
+   |---------------------------------|---|-----------------------|------------------------------|
+   | Code 0: Neural Network Tourism  | → | AI & ML in Tourism    | Same research area,          |
+   | Code 1: Deep Learning Predict.  | → | AI & ML in Tourism    | shared methodology,          |
+   | Code 5: ML Revenue Management   | → | AI & ML in Tourism    | Papers #42,#78 in all 3      |
+   | Code 2: VR Destination Mktg     | → | VR & Metaverse        | Both HCI category,           |
+   | Code 3: Metaverse Experiences   | → | VR & Metaverse        | 'virtual reality' overlap    |
+   | Code 4: Instagram Tourism       | → | Social Media (alone)  | Distinct platform focus      |
+   | Code 8: Green Tourism           | → | Sustainability (alone)| Niche, no overlap            |
+   **Do you agree?**
+   • `agree` — consolidate as shown
+   • `group 4 6 call it Digital Marketing` — custom grouping
+   • `move code 5 to standalone` — adjust
+   • `split AI theme into two` — more granular"
+3. ⛔ STOP HERE. Do NOT proceed to Phase 4.
+   Say: "Review the consolidated themes in the table below.
+   Edit Approve/Rename columns, then click Submit Review to Agent."
+   WAIT for the researcher's Submit Review.
+4. ONLY after explicit approval, call:
+   consolidate_into_themes(run_key, {"AI & ML": [0,1,5], "VR": [2,3], ...})
+5. Present consolidated themes with NEW centroid evidence:
+   "🎯 **Themes consolidated** (new centroids computed)
+   **Theme: AI & ML in Tourism** (294 sent, 83 papers)
+    Merged from: Codes 0, 1, 5
+    New evidence (recalculated after merge):
+     → "Neural networks predict tourist behavior..." — _Paper #42_
+     → "Deep learning optimizes hotel pricing..." — _Paper #78_
+     → ...
+   ✅ Themes look correct? Or adjust?"
+═══════════════════════════════════════════════════════════════
+ B&C PHASE 4: REVIEWING THEMES
+ "Checking if themes work in relation to coded extracts
+  and the entire data set"
+ Tool: (conversation — no tool call, agent reasons)
+═══════════════════════════════════════════════════════════════
+After consolidation, perform SATURATION CHECK:
+1. Analyze ALL theme pairs for remaining merge potential:
+   "🔍 **Phase 4: Reviewing Themes** — Saturation Analysis
+   | Theme A      | Theme B      | Overlap | Merge? | Why                |
+   |-------------|-------------|---------|--------|--------------------|
+   | AI & ML     | VR Tourism  | None    | ❌     | Different domains   |
+   | AI & ML     | ChatGPT     | Low     | ❌     | GenAI ≠ predictive |
+   | Social Media| VR Tourism  | None    | ❌     | Different channels  |
+2. If NO themes can merge:
+   "⛔ **Saturation reached** (per Braun & Clarke, 2006:
+    'when refinements are not adding anything substantial, stop')
+    Reasoning:
+    1. No remaining themes share a research area
+    2. No keyword overlap between any theme pair
+    3. Evidence sentences are semantically distinct
+    4. Further merging would lose research distinctions
+    **Do you agree iteration is complete?**
+    • `agree` — finalize, move to Phase 5
+    • `try merging X and Y` — override my recommendation"
+3. If themes CAN still merge:
+   "🔄 **Further consolidation possible:**
+    Themes 'Social Media' and 'Digital Marketing' share 3 keywords.
+    Suggest merging. Want me to consolidate?"
+4. ⛔ STOP HERE. Do NOT proceed to Phase 5.
+   Say: "Saturation analysis complete. Review themes in the table.
+   Edit Approve/Rename columns, then click Submit Review to Agent."
+═══════════════════════════════════════════════════════════════
+ B&C PHASE 5: DEFINING AND NAMING THEMES
+ "Generating clear definitions and names"
+ Tool: (conversation — agent + researcher co-create)
+═══════════════════════════════════════════════════════════════
+After saturation confirmed:
+1. Present final theme definitions:
+   "📝 **Phase 5: Theme Definitions**
+   **Theme 1: AI & Machine Learning in Tourism**
+    Definition: Research applying predictive ML/DL methods
+    (neural networks, random forests, deep learning) to tourism
+    problems including demand forecasting, pricing optimization,
+    and visitor behavior classification.
+    Scope: 294 sentences across 83 papers.
+    Research area: technology adoption. Confidence: High.
+   **Theme 2: Virtual Reality & Metaverse Tourism**
+    Definition: ...
+   **Want to rename any theme? Adjust any definition?**"
+2. ⛔ STOP HERE. Do NOT proceed to Phase 5.5 or second run.
+   Say: "Final theme names ready. Review in the table below.
+   Edit Rename To column if any names need changing, then click Submit Review."
+3. ONLY after approval: repeat ALL of Phase 2-5 for the SECOND run config.
+   (If first run was "abstract", now run "title" — or vice versa)
+═══════════════════════════════════════════════════════════════
+ PHASE 5.5: TAXONOMY COMPARISON
+ "Grounding themes against established IS research categories"
+ Tool: compare_with_taxonomy
+═══════════════════════════════════════════════════════════════
+After BOTH runs have finalized themes (Phase 5 complete for each):
+1. Call compare_with_taxonomy(run_key) for each completed run.
+   → Mistral maps each theme to PAJAIS taxonomy (Jiang et al., 2019)
+   → Flags themes as MAPPED (known category) or NOVEL (emerging)
+2. Present the mapping with researcher review:
+   "📚 **Phase 5.5: Taxonomy Comparison** (Jiang et al., 2019)
+   **Mapped to established PAJAIS categories:**
+   | Your Theme | → | PAJAIS Category | Confidence | Reasoning |
+   |---|---|---|---|---|
+   | AI & ML in Tourism | → | Business Intelligence & Analytics | high | ML/DL methods for prediction |
+   | VR & Metaverse | → | Human Behavior & HCI | high | Immersive technology interaction |
+   | Social Media Tourism | → | Social Media & Business Impact | high | Direct category match |
+   **🆕 NOVEL themes (not in existing PAJAIS taxonomy):**
+   | Your Theme | Status | Reasoning |
+   |---|---|---|
+   | ChatGPT in Tourism | 🆕 NOVEL | Generative AI is post-2019, not in taxonomy |
+   | Sustainable AI Tourism | 🆕 NOVEL | Cross-cuts Green IT + Analytics |
+   These NOVEL themes represent **emerging research areas** that
+   extend beyond the established PAJAIS classification.
+   **Researcher: Review this mapping.**
+   • `approve` — mapping is correct
+   • `theme X should map to Y instead` — adjust
+   • `merge novel themes into one` — consolidate emerging themes
+   • `this novel theme is actually part of [category]` — reclassify"
+3. ⛔ STOP HERE. Do NOT proceed to Phase 6.
+   Say: "PAJAIS taxonomy mapping complete. Review in the table below.
+   Edit Approve column for any mappings you disagree with, then click Submit Review."
+4. ONLY after approval, ask:
+   "Want me to consolidate any novel themes with existing ones?
+    Or keep them separate as evidence of emerging research areas?"
+5. ⛔ STOP AGAIN. WAIT for this answer before generating report.
+═══════════════════════════════════════════════════════════════
+ B&C PHASE 6: PRODUCING THE REPORT
+ "Selection of vivid, compelling extract examples"
+ Tools: generate_comparison_csv → export_narrative
+═══════════════════════════════════════════════════════════════
+After BOTH run configs have finalized themes:
+1. Call generate_comparison_csv()
+   → Compares themes across abstract vs title configs
+2. Say briefly in chat:
+   "Cross-run comparison complete. Check the Download tab for:
+    • comparison.csv — abstract vs title themes side by side
+    Review the themes in the table below.
+    Click Submit Review to confirm, then I'll generate the narrative."
+3. ⛔ STOP. Wait for Submit Review.
+4. After approval, call export_narrative(run_key)
+   → Mistral writes 500-word paper section referencing:
+     methodology, B&C phases, key themes, limitations
+═══════════════════════════════════════════════════════════════
+ CRITICAL RULES
+═══════════════════════════════════════════════════════════════
+ - ALWAYS follow B&C phases in order. Name each phase explicitly.
+ - ALWAYS wait for researcher confirmation between phases.
+ - ALWAYS show evidence sentences with paper metadata.
+ - ALWAYS cite B&C (2006) when discussing iteration or saturation.
+ - ALWAYS cite Grootendorst (2022) when explaining cluster behavior.
+ - ALWAYS call label_topics_with_llm before presenting topic labels.
+ - ALWAYS call compare_with_taxonomy before claiming PAJAIS mappings.
+ - Use threshold=0.7 as default (lower = more topics, higher = fewer).
+ - If too many topics (>200), suggest increasing threshold to 0.8.
+ - If too few topics (<20), suggest decreasing threshold to 0.6.
+ - NEVER skip Phase 4 saturation check or Phase 5.5 taxonomy comparison.
+ - NEVER proceed to Phase 6 without both runs completing Phase 5.5.
+ - NEVER invent topic labels — only present labels returned by Tool 3.
+ - NEVER cite paper IDs, titles, or sentences from memory — only from tool output.
+ - NEVER claim a theme is NOVEL or MAPPED without calling Tool 5 first.
+ - NEVER fabricate sentence counts or paper counts — only use tool-reported numbers.
+ - If a tool returns an error, explain clearly and continue.
+ - Keep responses concise. Tables + evidence, not paragraphs.
+"""
+# ============================================================================
+# LLM + Agent construction
+# ============================================================================
+def _build_llm() -> ChatMistralAI:
+    return ChatMistralAI(
+        model=MODEL_NAME,
+        api_key=MISTRAL_API_KEY,
+        temperature=0.1,    # low temp for deterministic phase behaviour
+        random_seed=42,
+        timeout=45,
+        max_retries=3,
+    )
+def _build_agent():
+    """Build the LangGraph ReAct agent with in-process memory."""
+    memory = MemorySaver()
+    llm    = _build_llm()
+    return create_react_agent(
+        model=llm,
+        tools=ALL_TOOLS,
+        checkpointer=memory,
+        prompt=SYSTEM_PROMPT,
+    )
+# Singleton agent (built once at import time)
+_react_agent = _build_agent()
+# ============================================================================
+# Config builder
+# ============================================================================
+def build_config(thread_id: str) -> dict:
+    """
+    Build LangGraph invocation config for a given conversation thread.
+    Parameters
+    ----------
+    thread_id : str — unique conversation identifier
+    Returns
+    -------
+    dict — passed as `config` to _react_agent.invoke()
+    """
+    return {"configurable": {"thread_id": thread_id}}
+# ============================================================================
+# State helpers
+# ============================================================================
+def _init_state(state: dict) -> dict:
+    """Ensure all required keys exist with safe defaults."""
+    defaults = {
+        "phase":        0,
+        "file_path":    None,
+        "run_key":      DEFAULT_RUN_KEY,
+        "review_df":    [],
+        "theme_map":    {},
+        "charts":       {},
+        "output_files": [],
+        "thread_id":    THREAD_PREFIX + uuid.uuid4().hex[:8],
+        "stop_gate":    None,
+        "context_resets": 0,
+    }
+    return {**defaults, **state}
+def _truthy(value: object) -> bool:
+    """Accept bool / int / common string truthy values from Gradio tables."""
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, (int, float)):
+        return value != 0
+    if isinstance(value, str):
+        return value.strip().lower() in {"true", "1", "yes", "y"}
+    return False
+def _trim_user_message(user_message: str) -> str:
+    """Hard-cap user message length to avoid accidental prompt blow-ups."""
+    text = str(user_message or "")
+    return (
+        text[:MAX_USER_MESSAGE_CHARS]
+        + "\n\n[SYSTEM: User message was truncated to keep context bounded.]"
+        if len(text) > MAX_USER_MESSAGE_CHARS
+        else text
+    )
+def _is_context_overflow_error(exc: Exception) -> bool:
+    """Detect model context-limit failures from Mistral / LangChain wrappers."""
+    msg = str(exc).lower()
+    return (
+        "maximum context length" in msg
+        or "too large for model" in msg
+        or "prompt contains" in msg
+        or '"code":"3051"' in msg
+    )
+def _is_transient_provider_error(exc: Exception) -> bool:
+    """Detect transient provider outages (e.g., Mistral 503 unreachable backend)."""
+    msg = str(exc).lower()
+    return (
+        "unreachable_backend" in msg
+        or "internal server error" in msg
+        or '"code":"1100"' in msg
+        or '"raw_status_code":503' in msg
+        or '"raw_status_code":502' in msg
+        or '"raw_status_code":504' in msg
+        or "service unavailable" in msg
+    )
+def _invoke_react_with_retries(enriched: str, thread_id: str) -> dict:
+    """Call the ReAct graph with bounded retries for transient provider failures."""
+    last_exc: Exception | None = None
+    for attempt in range(PROVIDER_RETRY_ATTEMPTS):
+        try:
+            return _react_agent.invoke(
+                {"messages": [HumanMessage(content=enriched)]},
+                config=build_config(thread_id),
+            )
+        except Exception as exc:
+            if _is_context_overflow_error(exc):
+                raise
+            if not _is_transient_provider_error(exc):
+                raise
+            last_exc = exc
+            if attempt < PROVIDER_RETRY_ATTEMPTS - 1:
+                time.sleep(PROVIDER_RETRY_BASE_DELAY_S * (attempt + 1))
+                continue
+            raise last_exc
+    # Unreachable, but keeps static type checkers satisfied.
+    raise RuntimeError("Unexpected retry flow in _invoke_react_with_retries")
+def _parse_review_df(review_df: list[dict]) -> dict:
+    """
+    Convert review table rows into theme_map for consolidate_into_themes.
+    Only rows where Approve == True are included.
+    Groups cluster IDs by the "Rename To" column value.
+    Parameters
+    ----------
+    review_df : list[dict] — rows from the Gradio Dataframe
+    Returns
+    -------
+    dict — {theme_name: [cluster_id, ...]}
+    """
+    approved  = list(filter(lambda r: _truthy(r.get("Approve")), review_df))
+    theme_map: dict[str, list[int]] = {}
+    def _add_row(row: dict) -> None:
+        name = (row.get("Rename To") or row.get("Topic Label") or "Unnamed").strip()
+        cid  = int(row.get("#", 0))
+        theme_map.setdefault(name, [])
+        theme_map[name].append(cid)
+    list(map(_add_row, approved))
+    return theme_map
+def _extract_charts(run_key: str, state: dict) -> dict:
+    """
+    Load chart paths from the run directory and merge into state["charts"].
+    Returns existing charts unchanged if the HTML files don't exist yet.
+    """
+    rdir = _run_dir(run_key)
+    candidates = {
+        "Intertopic Map": rdir / "intertopic.html",
+        "Top Words":      rdir / "topwords.html",
+        "Hierarchy":      rdir / "hierarchy.html",
+        "Heatmap":        rdir / "heatmap.html",
+    }
+    found = {
+        k: str(v)
+        for k, v in candidates.items()
+        if v.exists()
+    }
+    return {**state.get("charts", {}), **found}
+def _collect_output_files(state: dict) -> list[str]:
+    """Gather all generated artefact paths that currently exist on disk."""
+    from pathlib import Path as _P
+    run_key    = state.get("run_key", DEFAULT_RUN_KEY)
+    rdir       = _run_dir(run_key)
+    candidates = [
+        str(rdir / "summaries.json"),
+        str(rdir / "labels.json"),
+        str(rdir / "themes.json"),
+        str(rdir / "taxonomy_map.json"),
+        str(rdir / "narrative.txt"),
+        str(OUTPUT_DIR / "comparison.csv"),
+    ]
+    return list(filter(lambda p: _P(p).exists(), candidates))
+def _detect_phase_advance(reply: str, current_phase: int) -> int:
+    """
+    Read the agent's STOP / COMPLETE markers and return the updated phase index.
+    Phase only advances when the agent emits the correct marker string.
+    """
+    markers = {
+        "[PHASE 1 COMPLETE — READY FOR PHASE 2]":           1,
+        "[STOP GATE 1 — AWAITING REVIEW TABLE SUBMISSION]": 2,
+        "[STOP GATE 2 — AWAITING THEME MERGE CONFIRMATION]":3,
+        "[STOP GATE 3 — AWAITING SATURATION SIGN-OFF]":     4,
+        "[PHASE 5 COMPLETE — READY FOR PAJAIS MAPPING]":    5,
+        "[STOP GATE 4 — AWAITING TAXONOMY REVIEW]":         6,
+        "[ANALYSIS COMPLETE — ALL PHASES FINISHED]":        8,
+    }
+    marker_phase = next(
+        (v for k, v in markers.items() if k in reply),
+        None,
+    )
+    if marker_phase is not None:
+        return max(current_phase, marker_phase)
+    # Fallback: infer from common phase headings when explicit markers are absent.
+    text = reply.lower()
+    inferred = current_phase
+    inferred = max(
+        inferred,
+        1 if ("phase 1" in text and "familiar" in text) else current_phase,
+    )
+    inferred = max(
+        inferred,
+        2 if ("phase 2" in text and "initial code" in text) else current_phase,
+    )
+    inferred = max(
+        inferred,
+        3 if ("phase 3" in text and ("searching" in text or "theme" in text)) else current_phase,
+    )
+    inferred = max(
+        inferred,
+        4 if ("phase 4" in text and ("review" in text or "saturation" in text)) else current_phase,
+    )
+    inferred = max(
+        inferred,
+        5 if ("phase 5" in text and ("defining" in text or "naming" in text or "definition" in text)) else current_phase,
+    )
+    inferred = max(
+        inferred,
+        6 if (("phase 5.5" in text and ("taxonomy" in text or "pajais" in text))
+              or ("taxonomy comparison" in text and "pajais" in text))
+        else current_phase,
+    )
+    inferred = max(
+        inferred,
+        7 if ("phase 6" in text and "report" in text)
+        or ("analysis complete" in text and "all phases" in text)
+        else current_phase,
+    )
+    inferred = max(
+        inferred,
+        8 if ("analysis complete" in text and "all phases" in text)
+        else current_phase,
+    )
+    return inferred
+def _detect_stop_gate(reply: str) -> str | None:
+    """Return the active stop gate constant from the agent reply, or None."""
+    gate_markers = {
+        "[STOP GATE 1 — AWAITING REVIEW TABLE SUBMISSION]": GATE_POST_PHASE2,
+        "[STOP GATE 2 — AWAITING THEME MERGE CONFIRMATION]":GATE_POST_PHASE3,
+        "[STOP GATE 3 — AWAITING SATURATION SIGN-OFF]":     GATE_POST_PHASE4,
+        "[STOP GATE 4 — AWAITING TAXONOMY REVIEW]":         GATE_POST_PHASE55,
+    }
+    return next(
+        (v for k, v in gate_markers.items() if k in reply),
+        None,
+    )
+# ============================================================================
+# FIX ISSUE 3 — populate review_df from labels.json after Phase 2
+# ============================================================================
+def _populate_review_df(state: dict) -> dict:
+    """
+    After label_topics_with_llm() runs, convert labels.json into the review
+    table row format expected by app.py's gr.Dataframe.
+    Called whenever labels.json exists but state["review_df"] is still empty.
+    Row schema matches REVIEW_COLUMNS in app.py:
+      "#", "Topic Label", "Top Evidence", "Sentences", "Papers",
+      "Approve", "Rename To", "Reasoning"
+    """
+    labels_path = OUTPUT_DIR / state.get("run_key", DEFAULT_RUN_KEY) / "labels.json"
+    return (
+        {
+            **state,
+            "review_df": list(map(
+                lambda r: {
+                    "#":           r.get("cluster_id", 0),
+                    "Topic Label": r.get("label", ""),
+                    "Top Evidence":r["evidence"][0] if r.get("evidence") else "",
+                    "Sentences":   r.get("size", 0),
+                    "Papers":      "",
+                    "Approve":     False,
+                    "Rename To":   r.get("label", ""),
+                    "Reasoning":   r.get("reasoning", ""),
+                },
+                _load_json(labels_path),
+            )),
+        }
+        if labels_path.exists() and not state.get("review_df")
+        else state
+    )
+# ============================================================================
+# Context builder
+# ============================================================================
+def _build_context_message(user_message: str, state: dict) -> str:
+    """
+    Prepend structured pipeline context to every user message so the LLM
+    always knows the current phase, gate, and available data without relying
+    on its own (potentially stale) memory.
+    """
+    context = {
+        "current_phase":      state.get("phase", 0),
+        "phase_label":        PHASE_LABELS.get(state.get("phase", 0), "Unknown"),
+        "active_stop_gate":   state.get("stop_gate"),
+        "file_path":          state.get("file_path"),
+        "run_key":            state.get("run_key", DEFAULT_RUN_KEY),
+        "review_submitted":   bool(state.get("review_df")),
+        "theme_map_ready":    bool(state.get("theme_map")),
+        "charts_available":   list(state.get("charts", {}).keys()),
+        "output_files_count": len(state.get("output_files", [])),
+    }
+    ctx_block = json.dumps(context, indent=2)
+    return (
+        f"```json\n[PIPELINE CONTEXT]\n{ctx_block}\n```\n\n"
+        f"**User message:** {user_message}"
+    )
+# ============================================================================
+# Phase-specific pre-processing
+# ============================================================================
+def _preprocess_phase3(state: dict) -> tuple[str, dict]:
+    """
+    Before Phase 3: parse the submitted review table into theme_map and
+    inject it as a context annotation so the agent can call
+    consolidate_into_themes() with the correct arguments.
+    Called only when stop_gate == GATE_POST_PHASE2 and review_df is non-empty.
+    """
+    theme_map  = _parse_review_df(state.get("review_df", []))
+    state      = {**state, "theme_map": theme_map}
+    annotation = (
+        f"\n\n[SYSTEM: Review table submitted. "
+        f"Parsed theme_map = {json.dumps(theme_map)}. "
+        f"Proceed to Phase 3 and call consolidate_into_themes.]"
+    )
+    return annotation, state
+# ============================================================================
+# Public invoke interface
+# ============================================================================
+class ThematicAnalysisAgent:
+    """
+    Thin wrapper around the LangGraph ReAct agent.
+    app.py calls:
+        reply, new_state = agent.invoke(user_message, agent_state)
+    """
+    def invoke(self, user_message: str, state: dict) -> tuple[str, dict]:
+        """
+        Process one user turn and return (reply_markdown, updated_state).
+        Parameters
+        ----------
+        user_message : str  — raw text from the Gradio chat input
+        state        : dict — agent_state from app.py (a new copy is returned)
+        Returns
+        -------
+        tuple[str, dict]
+        """
+        state        = _init_state(state)
+        user_message = _trim_user_message(user_message)
+        if not MISTRAL_API_KEY:
+            return (
+                "MISTRAL_API_KEY is not set, so the agent cannot run tool-planning LLM calls. "
+                "Set the key and retry.\n\n"
+                "Example:\n"
+                "`export MISTRAL_API_KEY='your-key'`",
+                state,
+            )
+        thread_id = state["thread_id"]
+        gate      = state.get("stop_gate")
+        # FIX BUG 2 — single ternary, no dead lambda block before it
+        extra_context, state = (
+            _preprocess_phase3(state)
+            if (gate == GATE_POST_PHASE2 and state.get("review_df"))
+            else ("", state)
+        )
+        # Build enriched message with pipeline context prepended
+        enriched = _build_context_message(user_message + extra_context, state)
+        # Invoke the LangGraph ReAct agent
+        try:
+            result = _invoke_react_with_retries(enriched, thread_id)
+        except Exception as exc:
+            if _is_transient_provider_error(exc):
+                return (
+                    "Mistral is temporarily unavailable (503/unreachable_backend). "
+                    "Automatic retries were attempted. Please retry in 30-60 seconds.",
+                    state,
+                )
+            if not _is_context_overflow_error(exc):
+                raise
+            # Reset the LangGraph thread when context window is exhausted.
+            thread_id = THREAD_PREFIX + uuid.uuid4().hex[:8]
+            state = {
+                **state,
+                "thread_id": thread_id,
+                "context_resets": state.get("context_resets", 0) + 1,
+            }
+            retry_note = (
+                "\n\n[SYSTEM: Previous thread exceeded model context and was reset. "
+                "Continue from pipeline context and saved artifacts.]"
+            )
+            retry_enriched = _build_context_message(
+                user_message + extra_context + retry_note,
+                state,
+            )
+            try:
+                result = _invoke_react_with_retries(retry_enriched, thread_id)
+            except Exception as retry_exc:
+                if _is_transient_provider_error(retry_exc):
+                    return (
+                        "The previous request exceeded model context and the retry hit a "
+                        "temporary Mistral outage (503). Please resend your last short "
+                        "command in about a minute.",
+                        state,
+                    )
+                return (
+                    "The model context exceeded the provider limit and an automatic "
+                    "thread reset retry also failed. Please resend your last command "
+                    "(short form) to continue.",
+                    state,
+                )
+        # Extract the last AIMessage content as the reply
+        ai_messages = [
+            m for m in result.get("messages", [])
+            if hasattr(m, "content") and m.__class__.__name__ == "AIMessage"
+        ]
+        reply = (
+            ai_messages[-1].content
+            if ai_messages
+            else "Agent returned no response. Check MISTRAL_API_KEY and retry."
+        )
+        # Update state fields derived from the agent's reply
+        new_phase  = _detect_phase_advance(reply, state["phase"])
+        new_gate   = _detect_stop_gate(reply)
+        new_charts = _extract_charts(state["run_key"], state)
+        new_files  = _collect_output_files(state)
+        updated_state = {
+            **state,
+            "phase":        new_phase,
+            "stop_gate":    new_gate,
+            "charts":       new_charts,
+            "output_files": new_files,
+        }
+        # FIX ISSUE 3 — populate review table rows after Phase 2 labels are ready
+        updated_state = _populate_review_df(updated_state)
+        return reply, updated_state
+# ============================================================================
+# Module-level singleton — imported by app.py as `from agent import agent`
+# ============================================================================
+agent = ThematicAnalysisAgent()
+# ============================================================================
+# CLI smoke-test  (python agent.py)
+# ============================================================================
+if __name__ == "__main__":
+    test_state = {}
+    reply, state = agent.invoke(
+        "Hello — I have just uploaded my Scopus CSV. Please start the analysis.",
+        test_state,
+    )
+    print("=" * 60)
+    print("AGENT REPLY:\n")
+    print(reply)
+    print("\nSTATE:")
+    print(json.dumps(
+        {k: v for k, v in state.items() if k not in ("review_df",)},
+        indent=2, default=str,
+    ))

app.py ADDED Viewed

	@@ -0,0 +1,1016 @@

+"""
+BERTopic Thematic Analysis Agent — Production Gradio UI
+========================================================
+A dashboard-style Gradio interface for orchestrating BERTopic topic modelling
+via an LLM-backed agent defined in agent.py.
+Layout
+------
+- Top: Header + Phase progress bar
+- Body: Vertical cards in sequence
+    1) Data Input
+    2) Agent Console
+    3) Results (Tabs: Review | Charts | Downloads)
+Fixes applied (v2)
+------------------
+- BUG 3   : submit_review() now writes parsed review rows into
+            agent_state["review_df"] BEFORE calling the agent, so
+            _parse_review_df() in agent.py always receives a populated list.
+- ISSUE 2 : PHASES list updated to 7 labels matching the actual B&C phases
+            (was 6 labels misaligned with agent phase 0-6 mapping).
+- ISSUE 4 : Added a startup API-key warning banner rendered in the UI when
+            MISTRAL_API_KEY is not set in the environment.
+"""
+# ---------------------------------------------------------------------------
+# Imports
+# ---------------------------------------------------------------------------
+import gradio as gr
+import pandas as pd
+import json
+import os
+import shutil
+import uuid
+from pathlib import Path
+from urllib.parse import quote
+# ---------------------------------------------------------------------------
+# Agent import — graceful stub when agent.py is absent during dev/testing
+# ---------------------------------------------------------------------------
+try:
+    from agent import agent
+    AGENT_AVAILABLE = True
+except ImportError:
+    AGENT_AVAILABLE = False
+    class _StubAgent:
+        """Minimal stub so the UI works without agent.py."""
+        def invoke(self, message: str, state: dict) -> tuple[str, dict]:
+            reply = (
+                f"[STUB] Received: **{message}**\n\n"
+                "Connect `agent.py` to get real responses. "
+                f"Current phase: `{state.get('phase', 0)}`."
+            )
+            state["phase"] = min(state.get("phase", 0) + 1, 8)
+            return reply, state
+    agent = _StubAgent()
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+# FIX ISSUE 2 — 7 labels aligned to the agent's phase 1-6 (index = phase-1)
+PHASES = [
+    "Familiarisation",   # Phase 1
+    "Initial Codes",     # Phase 2
+    "Themes",            # Phase 3
+    "Review Themes",     # Phase 4
+    "Naming",            # Phase 5
+    "PAJAIS Mapping",    # Phase 5.5
+    "Report",            # Phase 6
+]
+CHART_OPTIONS = ["Intertopic Map", "Top Words", "Hierarchy", "Heatmap"]
+REVIEW_COLUMNS = [
+    "#", "Topic Label", "Top Evidence", "Sentences", "Papers",
+    "Approve", "Rename To", "Reasoning",
+]
+EMPTY_REVIEW_DF = pd.DataFrame(columns=REVIEW_COLUMNS)
+# FIX ISSUE 4 — detect missing API key at startup
+API_KEY_MISSING = not bool(os.environ.get("MISTRAL_API_KEY", ""))
+UPLOADS_DIR = Path("uploads")
+OUTPUTS_DIR = Path(__file__).resolve().parent / "outputs"
+# ---------------------------------------------------------------------------
+# Custom CSS — SaaS dashboard aesthetic
+# ---------------------------------------------------------------------------
+CUSTOM_CSS = """
+/* Fonts */
+@import url('https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,400;0,9..40,500;0,9..40,600;0,9..40,700;1,9..40,400&family=DM+Mono:wght@400;500&display=swap');
+/* Tokens */
+:root {
+    --bg-base:          #0f1117;
+    --bg-surface:       #181c27;
+    --bg-elevated:      #1f2437;
+    --bg-hover:         #252b3d;
+    --border:           #2a3048;
+    --border-active:    #4f6ef7;
+    --text-primary:     #e8eaf0;
+    --text-secondary:   #8b92a8;
+    --text-muted:       #555f7a;
+    --accent:           #4f6ef7;
+    --accent-soft:      rgba(79,110,247,0.15);
+    --accent-glow:      rgba(79,110,247,0.35);
+    --success:          #34d399;
+    --success-soft:     rgba(52,211,153,0.15);
+    --warning:          #fbbf24;
+    --warning-soft:     rgba(251,191,36,0.15);
+    --danger:           #f87171;
+    --radius-sm:        8px;
+    --radius-md:        14px;
+    --radius-lg:        20px;
+    --shadow-card:      0 4px 24px rgba(0,0,0,0.45), 0 1px 3px rgba(0,0,0,0.3);
+    --shadow-button:    0 2px 12px rgba(79,110,247,0.4);
+    --font-ui:          'DM Sans', system-ui, sans-serif;
+    --font-mono:        'DM Mono', 'Fira Code', monospace;
+    --transition:       0.2s cubic-bezier(0.4, 0, 0.2, 1);
+}
+body, .gradio-container {
+    background: var(--bg-base) !important;
+    color: var(--text-primary) !important;
+    font-family: var(--font-ui) !important;
+}
+.gradio-container { max-width: 1600px !important; padding: 0 !important; }
+/* Header */
+#app-header {
+    background: linear-gradient(135deg, #0f1117 0%, #181c27 50%, #1a1f32 100%);
+    border-bottom: 1px solid var(--border);
+    padding: 24px 36px 20px;
+    position: relative;
+    overflow: hidden;
+}
+#app-header::before {
+    content: '';
+    position: absolute;
+    top: -60px; right: -60px;
+    width: 240px; height: 240px;
+    background: radial-gradient(circle, rgba(79,110,247,0.18) 0%, transparent 70%);
+    pointer-events: none;
+}
+#app-header .header-title {
+    font-size: 1.7rem; font-weight: 700; letter-spacing: -0.03em;
+    color: var(--text-primary); margin: 0 0 4px;
+}
+#app-header .header-subtitle {
+    font-size: 0.875rem; color: var(--text-secondary); margin: 0;
+}
+#app-header .header-badge {
+    display: inline-flex; align-items: center; gap: 6px;
+    background: var(--accent-soft); border: 1px solid var(--accent);
+    border-radius: 100px; padding: 3px 12px; font-size: 0.75rem;
+    font-weight: 600; color: var(--accent); margin-left: 12px; vertical-align: middle;
+}
+/* API key warning banner */
+.api-warning {
+    background: var(--warning-soft);
+    border: 1px solid var(--warning);
+    border-radius: var(--radius-sm);
+    padding: 10px 16px;
+    font-size: 0.83rem;
+    font-weight: 500;
+    color: var(--warning);
+    margin: 12px 28px 0;
+}
+/* Phase progress bar */
+.phase-bar-wrap {
+    display: flex; align-items: center; gap: 0;
+    margin-top: 20px; position: relative;
+}
+.phase-bar-wrap::before {
+    content: '';
+    position: absolute;
+    left: 20px; right: 20px; top: 50%;
+    height: 2px; background: var(--border);
+    transform: translateY(-50%); z-index: 0;
+}
+.phase-item {
+    display: flex; flex-direction: column;
+    align-items: center; flex: 1; position: relative; z-index: 1;
+}
+.phase-dot {
+    width: 32px; height: 32px; border-radius: 50%;
+    display: flex; align-items: center; justify-content: center;
+    font-size: 0.8rem; font-weight: 700;
+    border: 2px solid var(--border); background: var(--bg-base);
+    transition: all var(--transition);
+}
+.phase-dot.done   { background: var(--success-soft); border-color: var(--success); color: var(--success); }
+.phase-dot.active { background: var(--accent-soft); border-color: var(--accent); color: var(--accent);
+                    box-shadow: 0 0 14px var(--accent-glow); }
+.phase-dot.pending { color: var(--text-muted); }
+.phase-label {
+    font-size: 0.65rem; font-weight: 500; color: var(--text-muted);
+    margin-top: 6px; text-align: center; letter-spacing: 0.02em; white-space: nowrap;
+}
+.phase-label.active { color: var(--accent); }
+.phase-label.done   { color: var(--success); }
+/* Main body */
+#main-body {
+    padding: 22px 28px 32px;
+    gap: 16px !important;
+    max-width: 1160px;
+    margin: 0 auto;
+    width: 100%;
+}
+.panel-card {
+    background:
+        radial-gradient(1200px 260px at 100% -15%, rgba(79,110,247,0.12), transparent 52%),
+        linear-gradient(180deg, rgba(31,36,55,0.9) 0%, rgba(24,28,39,0.95) 100%);
+    border: 1px solid var(--border);
+    border-radius: var(--radius-lg);
+    box-shadow: var(--shadow-card);
+    padding: 18px 18px 16px;
+    position: relative;
+    overflow: hidden;
+    margin-bottom: 2px;
+}
+.panel-card:last-child { margin-bottom: 0; }
+.panel-card::after {
+    content: '';
+    position: absolute;
+    inset: 0;
+    background: linear-gradient(120deg, rgba(255,255,255,0.02), transparent 25%, transparent 75%, rgba(255,255,255,0.02));
+    pointer-events: none;
+}
+.panel-data { margin-bottom: 2px; }
+.panel-chat { margin-bottom: 2px; }
+/* Card titles */
+.card-title {
+    font-size: 0.74rem; font-weight: 700; letter-spacing: 0.1em;
+    text-transform: uppercase; color: var(--text-muted);
+    margin: 0 0 16px; display: flex; align-items: center; gap: 10px;
+    border-bottom: 1px solid var(--border);
+    padding-bottom: 12px;
+}
+.card-title::before {
+    content: '';
+    width: 8px;
+    height: 8px;
+    border-radius: 50%;
+    background: var(--accent);
+    box-shadow: 0 0 10px var(--accent-glow);
+}
+.card-title span { font-size: 1.02rem; color: var(--text-primary); letter-spacing: 0.01em; }
+/* Stats */
+.stats-grid {
+    display: grid; grid-template-columns: 1fr 1fr; gap: 10px; margin-top: 12px;
+}
+.stat-card {
+    background: var(--bg-elevated); border: 1px solid var(--border);
+    border-radius: var(--radius-sm); padding: 12px 14px;
+}
+.stat-value { font-size: 1.4rem; font-weight: 700; color: var(--text-primary); line-height: 1; }
+.stat-label { font-size: 0.72rem; color: var(--text-muted); margin-top: 4px; text-transform: uppercase; letter-spacing: 0.05em; }
+.stat-card.accent .stat-value { color: var(--accent); }
+.stat-card.success .stat-value { color: var(--success); }
+/* Status pill */
+.status-pill {
+    display: inline-flex; align-items: center; gap: 6px;
+    padding: 5px 12px; border-radius: 100px; font-size: 0.78rem; font-weight: 600; margin-top: 12px;
+}
+.status-pill.idle    { background: rgba(139,146,168,0.12); color: var(--text-secondary); }
+.status-pill.ready   { background: var(--success-soft); color: var(--success); }
+.status-pill.working { background: var(--accent-soft); color: var(--accent); }
+.status-pill .dot { width: 7px; height: 7px; border-radius: 50%; background: currentColor; }
+.status-pill.working .dot { animation: pulse-dot 1.2s ease-in-out infinite; }
+@keyframes pulse-dot {
+    0%, 100% { opacity: 1; transform: scale(1); }
+    50%       { opacity: 0.4; transform: scale(0.7); }
+}
+/* Chatbot */
+#chatbot-container .chatbot {
+    background: var(--bg-elevated) !important;
+    border: 1px solid var(--border) !important;
+    border-radius: var(--radius-md) !important;
+}
+.message.user {
+    background: var(--accent-soft) !important;
+    border: 1px solid rgba(79,110,247,0.2) !important;
+    border-radius: 14px 14px 4px 14px !important;
+    color: var(--text-primary) !important;
+    font-size: 0.875rem !important;
+}
+.message.bot {
+    background: var(--bg-elevated) !important;
+    border: 1px solid var(--border) !important;
+    border-radius: 14px 14px 14px 4px !important;
+    color: var(--text-primary) !important;
+    font-size: 0.875rem !important;
+}
+/* Chat input */
+#chat-input-row { display: flex; gap: 10px; margin-top: 12px; align-items: flex-end; }
+#chat-input-row textarea {
+    background: var(--bg-elevated) !important; border: 1px solid var(--border) !important;
+    border-radius: var(--radius-md) !important; color: var(--text-primary) !important;
+    font-family: var(--font-ui) !important; font-size: 0.875rem !important;
+    resize: none !important; transition: border-color var(--transition) !important;
+}
+#chat-input-row textarea:focus {
+    border-color: var(--accent) !important;
+    box-shadow: 0 0 0 3px var(--accent-soft) !important;
+}
+/* Buttons */
+.btn-primary {
+    background: var(--accent) !important; border: none !important;
+    border-radius: var(--radius-sm) !important; color: #fff !important;
+    font-family: var(--font-ui) !important; font-weight: 600 !important;
+    font-size: 0.875rem !important; padding: 10px 20px !important;
+    cursor: pointer !important; box-shadow: var(--shadow-button) !important;
+    transition: all var(--transition) !important; white-space: nowrap;
+}
+.btn-primary:hover {
+    background: #3d5de6 !important;
+    box-shadow: 0 4px 20px rgba(79,110,247,0.55) !important;
+    transform: translateY(-1px) !important;
+}
+.btn-primary:disabled { opacity: 0.45 !important; cursor: not-allowed !important; transform: none !important; }
+.btn-secondary {
+    background: var(--bg-elevated) !important; border: 1px solid var(--border) !important;
+    border-radius: var(--radius-sm) !important; color: var(--text-secondary) !important;
+    font-family: var(--font-ui) !important; font-weight: 500 !important;
+    font-size: 0.875rem !important; padding: 10px 18px !important;
+    cursor: pointer !important; transition: all var(--transition) !important;
+}
+.btn-secondary:hover {
+    background: var(--bg-hover) !important; border-color: var(--accent) !important;
+    color: var(--text-primary) !important;
+}
+.btn-success {
+    background: rgba(52,211,153,0.15) !important; border: 1px solid var(--success) !important;
+    border-radius: var(--radius-sm) !important; color: var(--success) !important;
+    font-family: var(--font-ui) !important; font-weight: 600 !important;
+    font-size: 0.875rem !important; padding: 10px 20px !important;
+    cursor: pointer !important; transition: all var(--transition) !important;
+}
+.btn-success:hover { background: rgba(52,211,153,0.25) !important; box-shadow: 0 2px 14px rgba(52,211,153,0.3) !important; }
+/* Tabs */
+.tabs > .tab-nav {
+    background: var(--bg-elevated) !important; border-bottom: 1px solid var(--border) !important;
+    border-radius: var(--radius-md) var(--radius-md) 0 0 !important;
+    padding: 6px 6px 0 !important; gap: 4px !important;
+}
+.tabs > .tab-nav button {
+    background: transparent !important; border: none !important;
+    color: var(--text-muted) !important; font-family: var(--font-ui) !important;
+    font-size: 0.8rem !important; font-weight: 600 !important;
+    letter-spacing: 0.04em !important; padding: 8px 16px !important;
+    border-radius: var(--radius-sm) var(--radius-sm) 0 0 !important;
+    transition: all var(--transition) !important; cursor: pointer !important;
+}
+.tabs > .tab-nav button:hover { color: var(--text-primary) !important; background: var(--bg-hover) !important; }
+.tabs > .tab-nav button.selected {
+    color: var(--accent) !important; background: var(--accent-soft) !important;
+    box-shadow: inset 0 -2px 0 var(--accent) !important;
+}
+.tabitem {
+    background: var(--bg-elevated) !important; border: 1px solid var(--border) !important;
+    border-top: none !important; border-radius: 0 0 var(--radius-md) var(--radius-md) !important;
+    padding: 16px !important;
+}
+/* Dataframe */
+.dataframe-wrap table {
+    font-family: var(--font-mono) !important;
+    font-size: 0.78rem !important;
+    border-collapse: collapse !important;
+    width: 100% !important;
+    table-layout: fixed !important;
+}
+.dataframe-wrap th {
+    background: var(--bg-elevated) !important; color: var(--text-muted) !important;
+    font-family: var(--font-ui) !important; font-size: 0.72rem !important;
+    font-weight: 600 !important; letter-spacing: 0.06em !important;
+    text-transform: uppercase !important; padding: 10px 12px !important;
+    border-bottom: 1px solid var(--border) !important;
+}
+.dataframe-wrap td {
+    background: var(--bg-surface) !important; color: var(--text-primary) !important;
+    padding: 9px 12px !important; border-bottom: 1px solid var(--border) !important;
+    line-height: 1.35 !important;
+    vertical-align: top !important;
+}
+.dataframe-wrap th,
+.dataframe-wrap td {
+    white-space: nowrap !important;
+}
+.dataframe-wrap td > div,
+.dataframe-wrap td > span,
+.dataframe-wrap td > p {
+    display: block !important;
+    max-width: 100% !important;
+    white-space: nowrap !important;
+    overflow: hidden !important;
+    text-overflow: ellipsis !important;
+    cursor: pointer !important;
+}
+.dataframe-wrap td:focus-within > div,
+.dataframe-wrap td:focus-within > span,
+.dataframe-wrap td:focus-within > p {
+    white-space: normal !important;
+    overflow-wrap: anywhere !important;
+    word-break: break-word !important;
+    max-height: 9em !important;
+    overflow-y: auto !important;
+    padding-right: 2px !important;
+}
+.dataframe-wrap textarea,
+.dataframe-wrap input[type="text"] {
+    white-space: pre-wrap !important;
+    overflow-wrap: anywhere !important;
+    word-break: break-word !important;
+}
+.dataframe-wrap textarea {
+    min-height: 38px !important;
+    height: 38px !important;
+    max-height: 160px !important;
+    overflow-y: auto !important;
+    resize: vertical !important;
+}
+.dataframe-wrap tr:hover td { background: var(--bg-hover) !important; }
+.dataframe-wrap input[type="checkbox"] {
+    appearance: auto !important;
+    accent-color: var(--accent) !important;
+    cursor: pointer !important;
+    width: 16px;
+    height: 16px;
+}
+/* Chart frame */
+.chart-frame {
+    width: 100%; min-height: 420px; border: 1px solid var(--border);
+    border-radius: var(--radius-md); background: var(--bg-elevated); overflow: hidden;
+}
+/* Vertical card spacing on small screens */
+@media (max-width: 900px) {
+    #main-body {
+        padding: 14px 12px 20px;
+        gap: 12px !important;
+    }
+    .panel-card {
+        padding: 14px 12px;
+        border-radius: var(--radius-md);
+    }
+    .chart-frame { min-height: 320px; }
+}
+/* Download list */
+.file-list-item {
+    display: flex; align-items: center; gap: 10px;
+    background: var(--bg-elevated); border: 1px solid var(--border);
+    border-radius: var(--radius-sm); padding: 10px 14px; margin-bottom: 8px;
+    transition: all var(--transition);
+}
+.file-list-item:hover { border-color: var(--accent); background: var(--bg-hover); }
+.file-icon { font-size: 1.1rem; }
+.file-name { font-size: 0.83rem; color: var(--text-primary); flex: 1; font-family: var(--font-mono); }
+.file-size { font-size: 0.72rem; color: var(--text-muted); }
+/* Misc Gradio overrides */
+label, .label-wrap { color: var(--text-secondary) !important; font-family: var(--font-ui) !important; font-size: 0.8rem !important; }
+input:not([type="checkbox"]), textarea { background: var(--bg-elevated) !important; color: var(--text-primary) !important; border-color: var(--border) !important; }
+.gr-form:not(.panel-card), .gr-box:not(.panel-card) { background: transparent !important; border: none !important; }
+footer { display: none !important; }
+select { background: var(--bg-elevated) !important; border: 1px solid var(--border) !important; border-radius: var(--radius-sm) !important; color: var(--text-primary) !important; font-family: var(--font-ui) !important; font-size: 0.875rem !important; padding: 8px 12px !important; }
+/* Animations */
+.fade-in { animation: fadeIn 0.35s ease-out both; }
+@keyframes fadeIn { from { opacity: 0; transform: translateY(8px); } to { opacity: 1; transform: none; } }
+/* Scrollbar */
+::-webkit-scrollbar       { width: 6px; height: 6px; }
+::-webkit-scrollbar-track { background: var(--bg-base); }
+::-webkit-scrollbar-thumb { background: #2d3550; border-radius: 3px; }
+::-webkit-scrollbar-thumb:hover { background: #3d4770; }
+"""
+# ---------------------------------------------------------------------------
+# Helper — build phase-progress HTML
+# FIX ISSUE 2 — phase index maps correctly to 7-item PHASES list
+# ---------------------------------------------------------------------------
+def build_phase_html(current_phase: int) -> str:
+    """
+    Render the 7-step phase progress bar.
+    current_phase is the agent's phase (1-7); phase 0 = no phase started yet.
+    Phase 8 indicates full completion and renders all 7 steps as done.
+    """
+    items = []
+    for i, label in enumerate(PHASES):
+        phase_number = i + 1    # phases are 1-indexed
+        if phase_number < current_phase:
+            dot_cls, lbl_cls, icon = "done",   "done",   "v"
+        elif phase_number == current_phase:
+            dot_cls, lbl_cls, icon = "active", "active", str(phase_number)
+        else:
+            dot_cls, lbl_cls, icon = "pending", "",       str(phase_number)
+        items.append(f"""
+        <div class="phase-item">
+            <div class="phase-dot {dot_cls}">{icon}</div>
+            <div class="phase-label {lbl_cls}">{label}</div>
+        </div>""")
+    inner = "\n".join(items)
+    return f"""
+    <div id="app-header">
+        <div style="display:flex;align-items:baseline;gap:4px;">
+            <span class="header-title">BERTopic Thematic Analysis Agent</span>
+            <span class="header-badge">AI-Powered</span>
+        </div>
+        <p class="header-subtitle">
+            End-to-end topic modelling — upload a Scopus corpus, run the agent, review topics.
+        </p>
+        <div class="phase-bar-wrap">
+            {inner}
+        </div>
+    </div>"""
+# ---------------------------------------------------------------------------
+# Helper — dataset stats HTML
+# ---------------------------------------------------------------------------
+def build_stats_html(rows: int, cols: int, filename: str) -> str:
+    return f"""
+    <div class="stats-grid fade-in">
+        <div class="stat-card accent">
+            <div class="stat-value">{rows:,}</div>
+            <div class="stat-label">Rows</div>
+        </div>
+        <div class="stat-card">
+            <div class="stat-value">{cols}</div>
+            <div class="stat-label">Columns</div>
+        </div>
+    </div>
+    <div class="status-pill ready" style="margin-top:14px;">
+        <div class="dot"></div>
+        {filename}
+    </div>"""
+# ---------------------------------------------------------------------------
+# Helper — download file-list HTML
+# ---------------------------------------------------------------------------
+def build_file_list_html(paths: list[str]) -> str:
+    if not paths:
+        return "<p style='color:var(--text-muted);font-size:0.83rem;padding:8px 0;'>No files generated yet.</p>"
+    icons = {".csv": "CSV", ".json": "JSON", ".html": "HTML", ".png": "IMG", ".xlsx": "XLS", ".txt": "TXT"}
+    items = []
+    for p in paths:
+        p    = Path(p)
+        ext  = p.suffix.lower()
+        icon = icons.get(ext, "FILE")
+        size = ""
+        if p.exists():
+            b    = p.stat().st_size
+            size = f"{b/1024:.1f} KB" if b < 1_048_576 else f"{b/1_048_576:.1f} MB"
+        items.append(f"""
+        <div class="file-list-item fade-in">
+            <span class="file-icon" style="font-size:0.7rem;background:var(--accent-soft);color:var(--accent);
+                  padding:2px 5px;border-radius:4px;font-family:var(--font-mono);font-weight:600;">{icon}</span>
+            <span class="file-name">{p.name}</span>
+            <span class="file-size">{size}</span>
+        </div>""")
+    return "\n".join(items)
+# ---------------------------------------------------------------------------
+# Helper — placeholder chart HTML
+# ---------------------------------------------------------------------------
+def build_placeholder_chart(chart_type: str) -> str:
+    colour_map = {
+        "Intertopic Map": "#4f6ef7",
+        "Top Words":      "#34d399",
+        "Hierarchy":      "#fbbf24",
+        "Heatmap":        "#f87171",
+    }
+    col = colour_map.get(chart_type, "#4f6ef7")
+    return f"""
+    <div class="chart-frame" style="display:flex;align-items:center;justify-content:center;flex-direction:column;gap:10px;">
+        <div style="font-size:2rem;color:var(--text-muted);">CHART</div>
+        <div style="color:var(--text-secondary);font-size:0.9rem;font-weight:600;">{chart_type}</div>
+        <div style="color:var(--text-muted);font-size:0.78rem;">Run the agent to generate this chart.</div>
+        <div style="width:180px;height:4px;background:var(--border);border-radius:2px;margin-top:6px;">
+            <div style="width:0%;height:4px;background:{col};border-radius:2px;animation:grow 2s ease-in-out infinite alternate;"></div>
+        </div>
+    </div>
+    <style>@keyframes grow {{ from{{width:0%}} to{{width:75%}} }}</style>"""
+# ---------------------------------------------------------------------------
+# Core interaction handlers
+# ---------------------------------------------------------------------------
+def _persist_upload(file_obj) -> Path:
+    """Copy Gradio temp upload to a stable local path and return it."""
+    src = Path(file_obj.name)
+    UPLOADS_DIR.mkdir(parents=True, exist_ok=True)
+    dst = UPLOADS_DIR / f"{uuid.uuid4().hex[:10]}_{src.name}"
+    shutil.copy2(src, dst)
+    return dst.resolve()
+def handle_file_upload(file_obj, agent_state):
+    """Parse uploaded CSV, store file_path in state, trigger agent."""
+    if file_obj is None:
+        return (
+            "<p style='color:var(--text-muted);font-size:0.83rem;'>No file selected.</p>",
+            "<div class='status-pill idle'><div class='dot'></div>Awaiting upload</div>",
+            agent_state,
+            build_phase_html(agent_state.get("phase", 0)),
+        )
+    try:
+        persisted = _persist_upload(file_obj)
+        df       = pd.read_csv(persisted)
+        rows, cols = df.shape
+        filename = Path(file_obj.name).name
+        stats_html = build_stats_html(rows, cols, filename)
+        agent_state["file_path"] = str(persisted)
+        agent_state["file_name"] = filename
+        agent_state["rows"]      = rows
+        agent_state["cols"]      = cols
+    except Exception as exc:
+        stats_html = f"<p style='color:var(--danger);font-size:0.83rem;'>Upload error: {exc}</p>"
+    status_html = "<div class='status-pill ready'><div class='dot'></div>File ready</div>"
+    phase_html  = build_phase_html(agent_state.get("phase", 0))
+    return stats_html, status_html, agent_state, phase_html
+def handle_chat(user_message: str, chat_history: list, agent_state: dict):
+    """Stream one user turn through the agent."""
+    if not user_message.strip():
+        yield chat_history, agent_state, build_phase_html(agent_state.get("phase", 0))
+        return
+    chat_history = chat_history + [
+        {"role": "user", "content": user_message},
+        {"role": "assistant", "content": "Thinking..."},
+    ]
+    yield chat_history, agent_state, build_phase_html(agent_state.get("phase", 0))
+    file_path = agent_state.get("file_path")
+    if file_path and not Path(file_path).exists():
+        chat_history[-1]["content"] = (
+            "Uploaded CSV is no longer available on disk. "
+            "Please upload the file again and retry."
+        )
+        yield chat_history, agent_state, build_phase_html(agent_state.get("phase", 0))
+        return
+    try:
+        reply, agent_state = agent.invoke(user_message, agent_state)
+    except Exception as exc:
+        reply = f"Agent error: `{exc}`"
+    chat_history[-1]["content"] = reply
+    yield chat_history, agent_state, build_phase_html(agent_state.get("phase", 0))
+def auto_trigger_agent(agent_state: dict, chat_history: list):
+    """Fire an automatic Phase 1 trigger after file upload."""
+    filename = agent_state.get("file_name", "uploaded file")
+    rows     = agent_state.get("rows", 0)
+    auto_msg = (
+        f"A dataset has been uploaded: **{filename}** ({rows:,} rows). "
+        "Please start the thematic analysis pipeline."
+    )
+    results = []
+    for state in handle_chat(auto_msg, chat_history, agent_state):
+        results = state
+    return results   # (chat_history, agent_state, phase_html)
+def refresh_review_table(agent_state: dict):
+    """Render the review DataFrame from agent_state."""
+    raw = agent_state.get("review_df", [])
+    if raw:
+        try:
+            return gr.update(value=pd.DataFrame(raw), interactive=True)
+        except Exception:
+            pass
+    return gr.update(value=EMPTY_REVIEW_DF.copy(), interactive=True)
+def submit_review(review_df, agent_state: dict, chat_history: list):
+    """
+    FIX BUG 3 — write parsed review rows into agent_state["review_df"]
+    BEFORE calling the agent, so _parse_review_df() receives the populated list.
+    """
+    # Store the review table in state so agent.py can read it
+    agent_state["review_df"] = review_df.to_dict(orient="records")
+    # Send a short trigger message — the agent reads state, not the payload
+    msg = "Review table submitted. Please proceed to Phase 3 and consolidate themes."
+    results = []
+    for state in handle_chat(msg, chat_history, agent_state):
+        results = state
+    new_history, new_state, phase_html = results
+    return new_history, new_state, phase_html
+def refresh_downloads(agent_state: dict):
+    """Return downloadable artefact paths from agent state."""
+    files = agent_state.get("output_files", [])
+    html  = build_file_list_html(files)
+    valid = [f for f in files if os.path.exists(f)]
+    return html, valid if valid else None
+def get_chart_html(chart_choice: str, agent_state: dict) -> str:
+    """Return chart iframe or placeholder HTML."""
+    charts = agent_state.get("charts", {})
+    if chart_choice in charts:
+        src = charts[chart_choice]
+        if os.path.exists(src):
+            # Gradio 6 serves local files from /gradio_api/file=..., and
+            # paths must be URL-encoded when directories contain spaces.
+            normalised = str(Path(src).resolve()).replace("\\", "/")
+            encoded = quote(normalised, safe="/:")
+            return (
+                f'<iframe src="./gradio_api/file={encoded}" '
+                'class="chart-frame" frameborder="0"></iframe>'
+            )
+        return f'<div class="chart-frame fade-in">{src}</div>'
+    return build_placeholder_chart(chart_choice)
+# ---------------------------------------------------------------------------
+# Build UI
+# ---------------------------------------------------------------------------
+def build_app() -> gr.Blocks:
+    with gr.Blocks(
+        title="BERTopic Thematic Analysis Agent",
+    ) as app:
+        # ── Shared state ──────────────────────────────────────────────────
+        agent_state  = gr.State({})
+        chat_history = gr.State([])
+        # ���─ Header ───────────────────────────────────────────────────────
+        phase_bar = gr.HTML(value=build_phase_html(0), elem_id="phase-bar")
+        # FIX ISSUE 4 — show warning banner when API key is missing
+        if API_KEY_MISSING:
+            gr.HTML(
+                "<div class='api-warning'>"
+                "WARNING: MISTRAL_API_KEY is not set. "
+                "All LLM calls will fail. "
+                "Set it in HuggingFace Spaces: Settings -> Variables and secrets."
+                "</div>"
+            )
+        # ── Main vertical body ────────────────────────────────────────────
+        with gr.Column(elem_id="main-body"):
+            with gr.Column(elem_classes=["panel-card", "panel-data"]):
+                gr.HTML("""<div class="card-title"><span>Data Input</span></div>""")
+                file_input = gr.File(
+                    label="Upload Corpus (CSV)",
+                    file_types=[".csv"],
+                    interactive=True,
+                    elem_id="csv-upload",
+                )
+                file_status = gr.HTML(
+                    value="<div class='status-pill idle'><div class='dot'></div>Awaiting upload</div>"
+                )
+                dataset_stats = gr.HTML(
+                    value="<p style='color:var(--text-muted);font-size:0.83rem;"
+                          "padding:8px 0 0;'>Upload a CSV to see statistics.</p>"
+                )
+                gr.HTML("<hr style='border:none;border-top:1px solid var(--border);margin:16px 0;'>")
+                gr.HTML("""
+                <div style='font-size:0.72rem;color:var(--text-muted);line-height:1.7;'>
+                    <b style='color:var(--text-secondary);'>Expected columns</b><br>
+                    Title, Abstract, Authors, Year<br><br>
+                    <b style='color:var(--text-secondary);'>Quick commands</b><br>
+                    <code style='font-family:var(--font-mono);'>run abstract</code><br>
+                    <code style='font-family:var(--font-mono);'>show topics</code><br>
+                    <code style='font-family:var(--font-mono);'>export results</code>
+                </div>""")
+            with gr.Column(elem_classes=["panel-card", "panel-chat"]):
+                gr.HTML("""<div class="card-title"><span>Agent Console</span></div>""")
+                chatbot = gr.Chatbot(
+                    value=[],
+                    height=470,
+                    show_label=False,
+                    avatar_images=(None, None),
+                    elem_id="chatbot-container",
+                )
+                with gr.Row(elem_id="chat-input-row"):
+                    chat_input = gr.Textbox(
+                        placeholder='Type a command, e.g. "run abstract" ...',
+                        show_label=False,
+                        lines=1,
+                        scale=5,
+                        container=False,
+                    )
+                    send_btn = gr.Button(
+                        "Send",
+                        variant="primary",
+                        scale=1,
+                        min_width=90,
+                        elem_classes=["btn-primary"],
+                    )
+                with gr.Row():
+                    clear_btn = gr.Button(
+                        "Clear Chat",
+                        variant="secondary",
+                        scale=1,
+                        elem_classes=["btn-secondary"],
+                    )
+            with gr.Column(elem_classes=["panel-card", "panel-results"]):
+                gr.HTML("""<div class="card-title"><span>Results</span></div>""")
+                with gr.Tabs(elem_classes=["tabs"]):
+                    # ── Tab 1: Review Table ─────────────────────────────
+                    with gr.TabItem("Review", elem_classes=["tabitem"]):
+                        gr.HTML("""
+                        <p style='font-size:0.78rem;color:var(--text-muted);margin:0 0 12px;'>
+                            Edit <b>Approve</b>, <b>Rename To</b>, and <b>Reasoning</b> columns inline,
+                            then click <b>Submit Review</b>.
+                        </p>""")
+                        review_table = gr.Dataframe(
+                            value=EMPTY_REVIEW_DF.copy(),
+                            headers=REVIEW_COLUMNS,
+                            datatype=[
+                                "number", "str", "str", "number", "str",
+                                "bool",   "str", "str",
+                            ],
+                            interactive=True,
+                            wrap=True,
+                            elem_classes=["dataframe-wrap"],
+                        )
+                        with gr.Row():
+                            refresh_table_btn = gr.Button(
+                                "Refresh",
+                                variant="secondary",
+                                scale=1,
+                                elem_classes=["btn-secondary"],
+                            )
+                            submit_review_btn = gr.Button(
+                                "Submit Review",
+                                variant="primary",
+                                scale=2,
+                                elem_classes=["btn-success"],
+                            )
+                    # ── Tab 2: Charts ───────────────────────────────────
+                    with gr.TabItem("Charts", elem_classes=["tabitem"]):
+                        chart_selector = gr.Dropdown(
+                            choices=CHART_OPTIONS,
+                            value=CHART_OPTIONS[0],
+                            label="Select chart",
+                            interactive=True,
+                        )
+                        chart_display = gr.HTML(
+                            value=build_placeholder_chart(CHART_OPTIONS[0])
+                        )
+                    # ── Tab 3: Downloads ────────────────────────────────
+                    with gr.TabItem("Downloads", elem_classes=["tabitem"]):
+                        gr.HTML("""
+                        <p style='font-size:0.78rem;color:var(--text-muted);margin:0 0 12px;'>
+                            Files generated by the agent will appear here automatically.
+                        </p>""")
+                        download_file_list_html = gr.HTML(
+                            value="<p style='color:var(--text-muted);font-size:0.83rem;'>"
+                                  "No files generated yet.</p>"
+                        )
+                        download_files = gr.File(
+                            label="",
+                            file_count="multiple",
+                            interactive=False,
+                        )
+                        refresh_dl_btn = gr.Button(
+                            "Refresh Downloads",
+                            variant="secondary",
+                            elem_classes=["btn-secondary"],
+                        )
+        # ────────────────────────────────────────────────────────────────
+        # Event wiring
+        # ────────────────────────────────────────────────────────────────
+        def _on_file_upload(file_obj, a_state, c_history):
+            stats, status, a_state, phase_html = handle_file_upload(file_obj, a_state)
+            if file_obj is not None and "file_path" in a_state:
+                c_history, a_state, phase_html = auto_trigger_agent(a_state, c_history)
+            return stats, status, a_state, phase_html, c_history
+        file_input.change(
+            fn=_on_file_upload,
+            inputs=[file_input, agent_state, chat_history],
+            outputs=[dataset_stats, file_status, agent_state, phase_bar, chatbot],
+        )
+        def _on_send(msg, c_history, a_state):
+            accumulated = []
+            for result in handle_chat(msg, c_history, a_state):
+                accumulated = result
+                yield accumulated[0], accumulated[1], accumulated[2], ""
+        send_btn.click(
+            fn=_on_send,
+            inputs=[chat_input, chatbot, agent_state],
+            outputs=[chatbot, agent_state, phase_bar, chat_input],
+        )
+        chat_input.submit(
+            fn=_on_send,
+            inputs=[chat_input, chatbot, agent_state],
+            outputs=[chatbot, agent_state, phase_bar, chat_input],
+        )
+        clear_btn.click(
+            fn=lambda: ([], {}),
+            outputs=[chatbot, agent_state],
+        )
+        refresh_table_btn.click(
+            fn=refresh_review_table,
+            inputs=[agent_state],
+            outputs=[review_table],
+        )
+        # FIX BUG 3 — submit_review now writes review_df into state first
+        submit_review_btn.click(
+            fn=submit_review,
+            inputs=[review_table, agent_state, chatbot],
+            outputs=[chatbot, agent_state, phase_bar],
+        )
+        chart_selector.change(
+            fn=get_chart_html,
+            inputs=[chart_selector, agent_state],
+            outputs=[chart_display],
+        )
+        refresh_dl_btn.click(
+            fn=refresh_downloads,
+            inputs=[agent_state],
+            outputs=[download_file_list_html, download_files],
+        )
+        # Auto-refresh review table, downloads, and the active chart after every chat turn.
+        chatbot.change(
+            fn=lambda selected_chart, a: (
+                refresh_review_table(a),
+                *refresh_downloads(a),
+                get_chart_html(selected_chart, a),
+            ),
+            inputs=[chart_selector, agent_state],
+            outputs=[review_table, download_file_list_html, download_files, chart_display],
+        )
+    return app
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    demo = build_app()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True,
+        allowed_paths=[str(OUTPUTS_DIR.resolve())],
+        css=CUSTOM_CSS,
+        theme=gr.themes.Soft(
+            primary_hue=gr.themes.colors.indigo,
+            secondary_hue=gr.themes.colors.slate,
+            neutral_hue=gr.themes.colors.slate,
+            font=[gr.themes.GoogleFont("DM Sans"), "system-ui", "sans-serif"],
+        ),
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+gradio
+langchain-core
+langchain-mistralai
+langgraph
+sentence-transformers
+scikit-learn
+bertopic
+plotly
+numpy
+pandas
+hdbscan
+umap-learn
+pynndescent

tools.py ADDED Viewed

	@@ -0,0 +1,858 @@

+"""
+tools.py — BERTopic Thematic Analysis Pipeline Tools
+=====================================================
+Seven LangChain @tool functions implementing Braun & Clarke's (2006)
+six-phase thematic analysis pipeline.
+Conventions
+-----------
+- All tools accept / return plain Python dicts (JSON-serialisable).
+- Artefacts are written to  OUTPUT_DIR / run_key / <file>.
+- Functional style throughout: map, operator, numpy vectorised ops.
+- No for/while loops, no try/except, no if/else.
+Fixes applied (v2)
+------------------
+- BUG 1  : run_bertopic_discovery() now saves sent_labels.npy —
+           per-sentence cluster-label array required by Tool 4.
+- BUG 1  : consolidate_into_themes() _build_theme() rewritten —
+           centroid computed from actual merged-cluster embeddings
+           via sent_labels.npy mask (no dead `if False` scaffolding).
+- ISSUE 1: generate_comparison_csv() guards against missing title run
+           with a .exists() check instead of hard-crashing.
+Dependencies
+------------
+    pip install langchain langchain-core langchain-mistralai
+                sentence-transformers scikit-learn plotly pandas numpy
+"""
+# ---------------------------------------------------------------------------
+# Stdlib
+# ---------------------------------------------------------------------------
+import json
+import os
+import re
+import time
+from functools import reduce
+from pathlib import Path
+from operator import itemgetter
+# ---------------------------------------------------------------------------
+# Third-party
+# ---------------------------------------------------------------------------
+import numpy as np
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+import plotly.figure_factory as ff
+from sklearn.cluster import AgglomerativeClustering
+from sklearn.metrics.pairwise import cosine_similarity
+from sklearn.preprocessing import normalize
+from sentence_transformers import SentenceTransformer
+from langchain_core.tools import tool
+from langchain_core.prompts import PromptTemplate
+from langchain_core.output_parsers import JsonOutputParser
+from langchain_mistralai import ChatMistralAI
+# ---------------------------------------------------------------------------
+# Configuration
+# ---------------------------------------------------------------------------
+MISTRAL_API_KEY: str   = os.environ.get("MISTRAL_API_KEY", "")
+MODEL_NAME:      str   = "mistral-small-latest"
+EMBED_MODEL:     str   = "all-MiniLM-L6-v2"
+BASE_DIR:        Path  = Path(__file__).resolve().parent
+OUTPUT_DIR:      Path  = BASE_DIR / "outputs"
+N_EVIDENCE:      int   = 5       # sentences kept per cluster centroid
+DISTANCE_THRESH: float = 0.35   # cosine-distance threshold (1 - similarity)
+RANDOM_SEED:     int   = 42
+LLM_TIMEOUT_S:   int   = 45
+LLM_MAX_RETRIES: int   = 3
+MAX_LABEL_CLUSTERS: int = 60
+MIN_CLUSTER_SIZE_FOR_LABEL: int = 3
+MAX_TOOL_RETURN_PREVIEW: int = 12
+PROVIDER_RETRY_ATTEMPTS: int = 3
+PROVIDER_RETRY_BASE_DELAY_S: float = 1.5
+# Run configurations — keys map to source columns
+RUN_CONFIGS: dict[str, list[str]] = {
+    "abstract": ["Abstract"],
+    "title":    ["Title"],
+}
+# PAJAIS 25-category taxonomy (Pan-Pacific Journal of AIS)
+PAJAIS_TAXONOMY: list[str] = [
+    "Artificial Intelligence & Machine Learning",
+    "Big Data & Analytics",
+    "Blockchain & Distributed Ledger",
+    "Cloud Computing & Infrastructure",
+    "Cybersecurity & Privacy",
+    "Decision Support Systems",
+    "Digital Business & E-Commerce",
+    "Digital Health & Telemedicine",
+    "Digital Innovation & Transformation",
+    "Enterprise Systems & ERP",
+    "Fintech & Digital Finance",
+    "Green IS & Sustainability",
+    "Human-Computer Interaction",
+    "Information Systems Strategy",
+    "IT Governance & Management",
+    "Knowledge Management",
+    "Mobile Computing & IoT",
+    "Natural Language Processing & Text Mining",
+    "Organizational Behavior & IS",
+    "Platform Ecosystems & APIs",
+    "Privacy & Ethics in IS",
+    "Smart Cities & Digital Government",
+    "Social Media & Collaboration",
+    "Supply Chain & Logistics IS",
+    "Virtual Reality & Immersive Technologies",
+]
+# Boilerplate patterns to strip from abstracts
+_BOILERPLATE_RE = re.compile(
+    r"(©\s*\d{4}.*?(?:rights reserved|elsevier|springer|wiley)[^.]*\.?)"
+    r"|(all rights reserved\.?)"
+    r"|(published by.*?(?:ltd|inc|llc)[^.]*\.?)"
+    r"|(doi:\s*\S+)",
+    re.IGNORECASE,
+)
+# Sentence splitter — split on sentence-boundary punctuation, keep >= 20 chars
+_SENT_RE = re.compile(r"(?<=[.!?])\s+")
+# ---------------------------------------------------------------------------
+# Private helpers  (pure functions, no side-effects)
+# ---------------------------------------------------------------------------
+def _ensure_dir(path: Path) -> Path:
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+def _run_dir(run_key: str) -> Path:
+    return _ensure_dir(OUTPUT_DIR / run_key)
+def _clean_text(text: str) -> str:
+    return _BOILERPLATE_RE.sub("", str(text)).strip()
+def _split_sentences(text: str) -> list[str]:
+    return list(filter(
+        lambda s: len(s.strip()) >= 20,
+        _SENT_RE.split(_clean_text(text)),
+    ))
+def _embed(sentences: list[str]) -> np.ndarray:
+    """Encode sentences to L2-normalised 384-d vectors."""
+    model = SentenceTransformer(EMBED_MODEL)
+    raw   = model.encode(sentences, show_progress_bar=False, batch_size=64)
+    return normalize(raw, norm="l2")   # unit-norm -> cosine = dot product
+def _cluster(embeddings: np.ndarray, threshold: float) -> np.ndarray:
+    return AgglomerativeClustering(
+        metric="cosine",
+        linkage="average",
+        distance_threshold=threshold,
+        n_clusters=None,
+    ).fit_predict(embeddings)
+def _centroid(embeddings: np.ndarray) -> np.ndarray:
+    """Mean-pool rows then re-normalise to unit length."""
+    return normalize(embeddings.mean(axis=0, keepdims=True), norm="l2")[0]
+def _top_k_indices(embeddings: np.ndarray, centroid: np.ndarray, k: int) -> np.ndarray:
+    sims = cosine_similarity(embeddings, centroid.reshape(1, -1)).flatten()
+    return np.argsort(sims)[::-1][:k]
+def _llm() -> ChatMistralAI:
+    return ChatMistralAI(
+        model=MODEL_NAME,
+        api_key=MISTRAL_API_KEY,
+        temperature=0.2,
+        random_seed=RANDOM_SEED,
+        timeout=LLM_TIMEOUT_S,
+        max_retries=LLM_MAX_RETRIES,
+    )
+def _is_transient_provider_error(exc: Exception) -> bool:
+    """Detect transient Mistral outages that should be retried."""
+    msg = str(exc).lower()
+    return (
+        "unreachable_backend" in msg
+        or "internal server error" in msg
+        or '"code":"1100"' in msg
+        or '"raw_status_code":503' in msg
+        or '"raw_status_code":502' in msg
+        or '"raw_status_code":504' in msg
+        or "service unavailable" in msg
+    )
+def _invoke_with_retries(fn):
+    """Run an LLM call with bounded linear backoff on transient provider errors."""
+    last_exc: Exception | None = None
+    for attempt in range(PROVIDER_RETRY_ATTEMPTS):
+        try:
+            return fn()
+        except Exception as exc:
+            if not _is_transient_provider_error(exc):
+                raise
+            last_exc = exc
+            if attempt < PROVIDER_RETRY_ATTEMPTS - 1:
+                time.sleep(PROVIDER_RETRY_BASE_DELAY_S * (attempt + 1))
+                continue
+            raise last_exc
+    raise RuntimeError("Unexpected retry flow in _invoke_with_retries")
+def _save_json(path: Path, data: object) -> None:
+    path.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8")
+def _load_json(path: Path) -> object:
+    return json.loads(path.read_text(encoding="utf-8"))
+# ---------------------------------------------------------------------------
+# Plotly chart builders
+# ---------------------------------------------------------------------------
+def _chart_intertopic(summaries: list[dict]) -> go.Figure:
+    df = pd.DataFrame(summaries)
+    return px.scatter(
+        df,
+        x="cx", y="cy",
+        size="size",
+        text="cluster_id",
+        color="size",
+        color_continuous_scale="Blues",
+        title="Intertopic Distance Map",
+        labels={"cx": "Dim-1", "cy": "Dim-2", "size": "Sentences"},
+        template="plotly_dark",
+    )
+def _chart_top_words(summaries: list[dict]) -> go.Figure:
+    df = (
+        pd.DataFrame(summaries)
+        .nlargest(20, "size")
+        .assign(label=lambda d: d["cluster_id"].astype(str))
+    )
+    return px.bar(
+        df,
+        x="size", y="label",
+        orientation="h",
+        title="Top Clusters by Sentence Count",
+        labels={"size": "Sentences", "label": "Cluster"},
+        color="size",
+        color_continuous_scale="Teal",
+        template="plotly_dark",
+    )
+def _chart_hierarchy(labels: list[int], embeddings: np.ndarray) -> go.Figure:
+    unique     = sorted(set(labels))
+    labels_arr = np.array(labels)
+    centroids  = np.vstack([
+        _centroid(embeddings[labels_arr == lbl])
+        for lbl in unique
+    ])
+    dist_mat = 1 - cosine_similarity(centroids)
+    fig = ff.create_dendrogram(
+        dist_mat,
+        labels=[str(l) for l in unique],
+        colorscale=px.colors.sequential.Blues,
+    )
+    fig.update_layout(title="Cluster Hierarchy", template="plotly_dark")
+    return fig
+def _chart_heatmap(labels: list[int], embeddings: np.ndarray) -> go.Figure:
+    unique     = sorted(set(labels))
+    labels_arr = np.array(labels)
+    centroids  = np.vstack([
+        _centroid(embeddings[labels_arr == lbl])
+        for lbl in unique
+    ])
+    sim_mat = cosine_similarity(centroids)
+    return px.imshow(
+        sim_mat,
+        x=[str(l) for l in unique],
+        y=[str(l) for l in unique],
+        color_continuous_scale="Blues",
+        title="Cluster Similarity Heatmap",
+        template="plotly_dark",
+    )
+def _save_chart(fig: go.Figure, path: Path) -> str:
+    fig.write_html(str(path), full_html=True, include_plotlyjs="cdn")
+    return str(path)
+# ============================================================================
+# TOOL 1 — load_scopus_csv
+# ============================================================================
+@tool
+def load_scopus_csv(filepath: str) -> dict:
+    """
+    Load a Scopus-exported CSV and extract corpus statistics.
+    Parameters
+    ----------
+    filepath : str
+        Absolute or relative path to the CSV file.
+    Returns
+    -------
+    dict with keys:
+        paper_count, abstract_sentence_count, title_sentence_count,
+        columns, sample_abstracts, filepath
+    """
+    df = pd.read_csv(filepath).rename(columns=str.strip)
+    abstract_sentences = list(reduce(
+        lambda acc, sents: acc + sents,
+        map(_split_sentences, df["Abstract"].dropna().tolist()),
+        [],
+    ))
+    title_sentences = list(reduce(
+        lambda acc, sents: acc + sents,
+        map(_split_sentences, df["Title"].dropna().tolist()),
+        [],
+    ))
+    _ensure_dir(OUTPUT_DIR / "abstract")
+    _ensure_dir(OUTPUT_DIR / "title")
+    _save_json(OUTPUT_DIR / "abstract" / "sentences.json", abstract_sentences)
+    _save_json(OUTPUT_DIR / "title"    / "sentences.json", title_sentences)
+    df.to_csv(OUTPUT_DIR / "corpus.csv", index=False)
+    return {
+        "paper_count":             int(len(df)),
+        "abstract_sentence_count": int(len(abstract_sentences)),
+        "title_sentence_count":    int(len(title_sentences)),
+        "columns":                 df.columns.tolist(),
+        "sample_abstracts":        df["Abstract"].dropna().head(3).tolist(),
+        "filepath":                str(filepath),
+    }
+# ============================================================================
+# TOOL 2 — run_bertopic_discovery
+# ============================================================================
+@tool
+def run_bertopic_discovery(run_key: str, threshold: float = DISTANCE_THRESH) -> dict:
+    """
+    Embed sentences, cluster with AgglomerativeClustering, extract evidence,
+    and generate four Plotly charts.
+    Saved artefacts
+    ---------------
+    emb.npy         : (N, 384) float32  L2-normalised embeddings
+    sent_labels.npy : (N,)     int32    per-sentence cluster label  [BUG 1 FIX]
+    summaries.json  : list of cluster dicts with evidence sentences
+    Parameters
+    ----------
+    run_key   : str   — "abstract" or "title"
+    threshold : float — cosine distance threshold for AgglomerativeClustering
+    Returns
+    -------
+    dict with keys:
+        run_key, n_clusters, n_sentences, threshold,
+        chart_paths, summaries_path, embeddings_path
+    """
+    rdir      = _run_dir(run_key)
+    sentences = _load_json(OUTPUT_DIR / run_key / "sentences.json")
+    embeddings = _embed(sentences)
+    np.save(str(rdir / "emb.npy"), embeddings)
+    labels     = _cluster(embeddings, threshold).tolist()
+    unique_ids = sorted(set(labels))
+    # FIX BUG 1 — persist per-sentence label array so Tool 4 can build
+    # correct cluster masks without any guesswork or scaffolding.
+    np.save(str(rdir / "sent_labels.npy"), np.array(labels, dtype=np.int32))
+    labels_arr = np.array(labels)
+    def _cluster_summary(cid: int) -> dict:
+        mask    = labels_arr == cid
+        c_emb   = embeddings[mask]
+        c_sent  = list(np.array(sentences)[mask])
+        ctroid  = _centroid(c_emb)
+        top_idx = _top_k_indices(c_emb, ctroid, N_EVIDENCE)
+        return {
+            "cluster_id": int(cid),
+            "size":       int(mask.sum()),
+            "cx":         float(ctroid[0]),
+            "cy":         float(ctroid[1]),
+            "evidence":   list(np.array(c_sent)[top_idx]),
+        }
+    summaries = list(map(_cluster_summary, unique_ids))
+    _save_json(rdir / "summaries.json", summaries)
+    chart_paths = {
+        "Intertopic Map": _save_chart(_chart_intertopic(summaries),        rdir / "intertopic.html"),
+        "Top Words":      _save_chart(_chart_top_words(summaries),          rdir / "topwords.html"),
+        "Hierarchy":      _save_chart(_chart_hierarchy(labels, embeddings), rdir / "hierarchy.html"),
+        "Heatmap":        _save_chart(_chart_heatmap(labels, embeddings),   rdir / "heatmap.html"),
+    }
+    return {
+        "run_key":         run_key,
+        "n_clusters":      int(len(unique_ids)),
+        "n_sentences":     int(len(sentences)),
+        "threshold":       threshold,
+        "chart_paths":     chart_paths,
+        "summaries_path":  str(rdir / "summaries.json"),
+        "embeddings_path": str(rdir / "emb.npy"),
+    }
+# ============================================================================
+# TOOL 3 — label_topics_with_llm
+# ============================================================================
+_LABEL_PROMPT = PromptTemplate.from_template(
+    """You are an expert academic researcher specialising in Information Systems.
+Given the following cluster of research sentences, return a JSON object with EXACTLY these keys:
+  label      : short research-area name (<= 6 words)
+  category   : broader IS research category
+  confidence : float 0.0-1.0
+  reasoning  : one sentence explaining your choice
+  niche      : boolean - true if highly specialised / narrow
+Cluster ID    : {cluster_id}
+Sentence count: {size}
+Evidence sentences:
+{evidence}
+Respond with RAW JSON only. No markdown, no explanation outside the JSON.
+"""
+)
+@tool
+def label_topics_with_llm(run_key: str) -> dict:
+    """
+    Send each cluster's evidence sentences to Mistral and obtain structured labels.
+    Parameters
+    ----------
+    run_key : str — "abstract" or "title"
+    Returns
+    -------
+    dict with keys:
+        run_key, labels_path, labelled_count, labels_preview (list of dicts)
+    """
+    rdir      = _run_dir(run_key)
+    summaries_path = rdir / "summaries.json"
+    if not summaries_path.exists():
+        return {
+            "run_key":           run_key,
+            "labels_path":       str(rdir / "labels.json"),
+            "labelled_count":    0,
+            "total_clusters":    0,
+            "selected_clusters": 0,
+            "skipped_clusters":  0,
+            "labels_preview":    [],
+            "error": (
+                f"Missing discovery artifact: {summaries_path}. "
+                "Run run_bertopic_discovery first for this run_key."
+            ),
+        }
+    summaries = _load_json(summaries_path)
+    ranked = sorted(
+        filter(lambda s: s.get("size", 0) >= MIN_CLUSTER_SIZE_FOR_LABEL, summaries),
+        key=lambda s: s.get("size", 0),
+        reverse=True,
+    )
+    selected = ranked[:MAX_LABEL_CLUSTERS]
+    chain = _LABEL_PROMPT | _llm() | JsonOutputParser()
+    def _label_one(summary: dict) -> dict:
+        result = _invoke_with_retries(lambda: chain.invoke({
+            "cluster_id": summary["cluster_id"],
+            "size":       summary["size"],
+            "evidence":   "\n".join(
+                              f"  {i+1}. {s}"
+                              for i, s in enumerate(summary["evidence"])
+                          ),
+        }))
+        return {**summary, **result}
+    labelled = list(map(_label_one, selected))
+    _save_json(rdir / "labels.json", labelled)
+    # Keep tool output compact so the ReAct transcript does not overflow model context.
+    preview = list(map(
+        lambda r: {
+            "cluster_id": r.get("cluster_id"),
+            "label":      r.get("label"),
+            "category":   r.get("category"),
+            "confidence": r.get("confidence"),
+            "size":       r.get("size"),
+            "niche":      r.get("niche", False),
+        },
+        labelled[:MAX_TOOL_RETURN_PREVIEW],
+    ))
+    return {
+        "run_key":        run_key,
+        "labels_path":    str(rdir / "labels.json"),
+        "labelled_count": len(labelled),
+        "total_clusters": len(summaries),
+        "selected_clusters": len(selected),
+        "skipped_clusters": max(0, len(summaries) - len(selected)),
+        "labels_preview": preview,
+    }
+# ============================================================================
+# TOOL 4 — consolidate_into_themes
+# ============================================================================
+@tool
+def consolidate_into_themes(run_key: str, theme_map: dict) -> dict:
+    """
+    Merge approved / renamed topics into consolidated themes and recompute
+    centroids from the actual merged-cluster embeddings.
+    Parameters
+    ----------
+    run_key   : str  — "abstract" or "title"
+    theme_map : dict — {new_theme_name: [cluster_id, ...], ...}
+                       Only approved topics need appear here.
+    Returns
+    -------
+    dict with keys:
+        run_key, theme_count, themes_path, themes_preview (list of dicts)
+    """
+    rdir        = _run_dir(run_key)
+    labels_data = _load_json(rdir / "labels.json")
+    embeddings  = np.load(str(rdir / "emb.npy"))          # (N, 384)
+    sent_labels = np.load(str(rdir / "sent_labels.npy"))  # (N,) — FIX BUG 1
+    # Index label dicts by cluster_id for O(1) lookup
+    label_idx = {item["cluster_id"]: item for item in labels_data}
+    def _build_theme(theme_name: str, cids: list[int]) -> dict:
+        """
+        Build one consolidated theme from a list of cluster IDs.
+        Evidence : top-N sentences pooled across all merged clusters
+        Centroid : L2-normalised mean of all embeddings in the merged set
+        Size     : total sentence count across merged clusters
+        """
+        member_labels = list(map(label_idx.get, cids))
+        # Pool evidence sentences from all member clusters
+        all_evidence = reduce(
+            lambda acc, lbl: acc + lbl["evidence"],
+            filter(None, member_labels),
+            [],
+        )
+        # Total sentence count across merged clusters
+        total_size = reduce(
+            lambda acc, lbl: acc + lbl.get("size", 0),
+            filter(None, member_labels),
+            0,
+        )
+        # FIX BUG 1 — build correct cluster mask using persisted sent_labels
+        cluster_mask     = np.isin(sent_labels, np.array(cids, dtype=np.int32))
+        theme_embeddings = embeddings[cluster_mask]   # (M, 384)
+        # Guard: if mask is somehow empty fall back to zero vector
+        theme_centroid = (
+            _centroid(theme_embeddings)
+            if theme_embeddings.shape[0] > 0
+            else np.zeros(embeddings.shape[1], dtype=np.float32)
+        )
+        return {
+            "theme_name":  theme_name,
+            "cluster_ids": cids,
+            "size":        total_size,
+            "evidence":    all_evidence[:N_EVIDENCE],
+            "centroid":    theme_centroid.tolist(),
+            "sub_labels":  list(map(
+                               itemgetter("label"),
+                               filter(None, member_labels),
+                           )),
+        }
+    themes = list(map(
+        lambda kv: _build_theme(kv[0], kv[1]),
+        theme_map.items(),
+    ))
+    _save_json(rdir / "themes.json", themes)
+    preview = list(map(
+        lambda t: {
+            "theme_name":   t.get("theme_name"),
+            "size":         t.get("size", 0),
+            "cluster_count": len(t.get("cluster_ids", [])),
+        },
+        themes[:MAX_TOOL_RETURN_PREVIEW],
+    ))
+    return {
+        "run_key":     run_key,
+        "theme_count": len(themes),
+        "themes_path": str(rdir / "themes.json"),
+        "themes_preview": preview,
+    }
+# ============================================================================
+# TOOL 5 — compare_with_taxonomy
+# ============================================================================
+_TAXONOMY_PROMPT = PromptTemplate.from_template(
+    """You are an IS research taxonomist. Map the following research theme to the
+PAJAIS taxonomy. Return RAW JSON with EXACTLY these keys:
+  theme_name    : the input theme name (unchanged)
+  pajais_match  : best matching PAJAIS category OR the string "NOVEL"
+  confidence    : float 0.0-1.0
+  reasoning     : one sentence
+  is_novel      : boolean
+PAJAIS categories:
+{taxonomy}
+Theme to map:
+  Name     : {theme_name}
+  Evidence : {evidence}
+Respond with RAW JSON only. No markdown.
+"""
+)
+@tool
+def compare_with_taxonomy(run_key: str) -> dict:
+    """
+    Map consolidated themes to PAJAIS taxonomy via Mistral.
+    Parameters
+    ----------
+    run_key : str — "abstract" or "title"
+    Returns
+    -------
+    dict with keys:
+        run_key, taxonomy_path, mapped_count, novel_count, mapping_preview
+    """
+    rdir   = _run_dir(run_key)
+    themes = _load_json(rdir / "themes.json")
+    chain  = _TAXONOMY_PROMPT | _llm() | JsonOutputParser()
+    taxonomy_str = "\n".join(f"  - {cat}" for cat in PAJAIS_TAXONOMY)
+    def _map_theme(theme: dict) -> dict:
+        result = _invoke_with_retries(lambda: chain.invoke({
+            "taxonomy":   taxonomy_str,
+            "theme_name": theme["theme_name"],
+            "evidence":   " | ".join(theme.get("evidence", [])[:3]),
+        }))
+        return {**theme, **result}
+    taxonomy_map = list(map(_map_theme, themes))
+    _save_json(rdir / "taxonomy_map.json", taxonomy_map)
+    novel_count  = sum(1 for t in taxonomy_map if t.get("is_novel", False))
+    mapped_count = len(taxonomy_map) - novel_count
+    preview = list(map(
+        lambda t: {
+            "theme_name":   t.get("theme_name"),
+            "pajais_match": t.get("pajais_match", "NOVEL"),
+            "confidence":   t.get("confidence", 0),
+            "is_novel":     t.get("is_novel", False),
+        },
+        taxonomy_map[:MAX_TOOL_RETURN_PREVIEW],
+    ))
+    return {
+        "run_key":       run_key,
+        "taxonomy_path": str(rdir / "taxonomy_map.json"),
+        "mapped_count":  mapped_count,
+        "novel_count":   novel_count,
+        "mapping_preview": preview,
+    }
+# ============================================================================
+# TOOL 6 — generate_comparison_csv
+# ============================================================================
+@tool
+def generate_comparison_csv() -> dict:
+    """
+    Side-by-side comparison of abstract-run vs title-run themes.
+    FIX ISSUE 1: title run is optional — no longer crashes if only the
+    abstract run has been completed. title_map defaults to [] when the
+    title taxonomy_map.json file does not exist.
+    Returns
+    -------
+    dict with keys:
+        csv_path, row_count, columns, preview (list of dicts)
+    """
+    abstract_path = OUTPUT_DIR / "abstract" / "taxonomy_map.json"
+    title_path    = OUTPUT_DIR / "title"    / "taxonomy_map.json"
+    abstract_map = _load_json(abstract_path)
+    # FIX ISSUE 1: guard against missing title run
+    title_map = (
+        _load_json(title_path)
+        if title_path.exists()
+        else []
+    )
+    def _row(a_theme: dict, t_theme: dict | None) -> dict:
+        return {
+            "Abstract Theme":      a_theme.get("theme_name",   ""),
+            "Abstract PAJAIS":     a_theme.get("pajais_match",  ""),
+            "Abstract Confidence": a_theme.get("confidence",    0),
+            "Abstract Novel":      a_theme.get("is_novel",     False),
+            "Title Theme":         t_theme.get("theme_name",   "") if t_theme else "",
+            "Title PAJAIS":        t_theme.get("pajais_match",  "") if t_theme else "",
+            "Title Confidence":    t_theme.get("confidence",    0)  if t_theme else 0,
+            "Title Novel":         t_theme.get("is_novel",     False) if t_theme else False,
+        }
+    max_len  = max(len(abstract_map), len(title_map)) if title_map else len(abstract_map)
+    padded_a = abstract_map + [{}] * (max_len - len(abstract_map))
+    padded_t = title_map    + [{}] * (max_len - len(title_map))
+    rows = list(map(_row, padded_a, padded_t))
+    df   = pd.DataFrame(rows)
+    out_path = OUTPUT_DIR / "comparison.csv"
+    df.to_csv(out_path, index=False)
+    return {
+        "csv_path":  str(out_path),
+        "row_count": len(df),
+        "columns":   df.columns.tolist(),
+        "preview":   df.head(5).to_dict(orient="records"),
+    }
+# ============================================================================
+# TOOL 7 — export_narrative
+# ============================================================================
+_NARRATIVE_PROMPT = PromptTemplate.from_template(
+    """You are an academic researcher writing a methodology and findings section.
+Write a 500-word academic narrative describing the thematic analysis results below.
+Structure: (1) methodology overview, (2) major themes found, (3) PAJAIS alignment,
+(4) novel contributions, (5) limitations.
+Use formal academic English. Do NOT use bullet points.
+Abstract themes & taxonomy:
+{abstract_themes}
+Title themes & taxonomy:
+{title_themes}
+Respond with plain text only.
+"""
+)
+@tool
+def export_narrative(run_key: str) -> dict:
+    """
+    Generate a 500-word academic narrative and save to narrative.txt.
+    Parameters
+    ----------
+    run_key : str — "abstract" or "title" (primary source)
+    Returns
+    -------
+    dict with keys:
+        narrative_path, word_count, preview (first 300 chars)
+    """
+    rdir       = _run_dir(run_key)
+    title_path = OUTPUT_DIR / "title" / "taxonomy_map.json"
+    abstract_map = _load_json(OUTPUT_DIR / "abstract" / "taxonomy_map.json")
+    title_map    = _load_json(title_path) if title_path.exists() else []
+    def _theme_summary(t: dict) -> str:
+        return (
+            f"  - {t.get('theme_name','?')} -> {t.get('pajais_match','?')} "
+            f"(conf={t.get('confidence',0):.2f}, novel={t.get('is_novel',False)})"
+        )
+    abstract_str = "\n".join(map(_theme_summary, abstract_map))
+    title_str    = "\n".join(map(_theme_summary, title_map)) or "Not run."
+    chain    = _NARRATIVE_PROMPT | _llm()
+    response = _invoke_with_retries(lambda: chain.invoke({
+        "abstract_themes": abstract_str,
+        "title_themes":    title_str,
+    }))
+    narrative = response.content if hasattr(response, "content") else str(response)
+    out_path  = rdir / "narrative.txt"
+    out_path.write_text(narrative, encoding="utf-8")
+    return {
+        "narrative_path": str(out_path),
+        "word_count":     len(narrative.split()),
+        "preview":        narrative[:300],
+    }
+# ---------------------------------------------------------------------------
+# Tool registry — imported by agent.py
+# ---------------------------------------------------------------------------
+ALL_TOOLS = [
+    load_scopus_csv,
+    run_bertopic_discovery,
+    label_topics_with_llm,
+    consolidate_into_themes,
+    compare_with_taxonomy,
+    generate_comparison_csv,
+    export_narrative,
+]