File size: 22,710 Bytes
304331d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
"""
app.py β€” Braun & Clarke (2006) Thematic Analysis Agent UI.

Implements the 6-phase reflexive thematic analysis procedure from
Braun, V., & Clarke, V. (2006). Using thematic analysis in psychology.
Qualitative Research in Psychology, 3(2), 77-101.

Three UX features:
    1. Phase banner β€” large prominent display of current B&C phase
    2. Dynamic phase actions β€” only actions valid for current phase shown
    3. Auto-populated review table β€” loads from tool checkpoint files

9-column review table: #, Code/Theme Label, Data Extract, Extracts,
Data Items, Approve, Rename To, Move To, Analytic Memo.
"""

import gradio as gr
import pandas as pd
import json
import os
import re
import tempfile
from datetime import datetime
from pathlib import Path
from agent import run as agent_run

THREAD_ID = f"thematic-analysis-{datetime.now().strftime('%Y%m%d%H%M%S')}"

REVIEW_COLS = [
    "#", "Code / Theme Label", "Data Extract", "Extracts", "Data Items",
    "Approve", "Rename To", "Move To", "Analytic Memo",
]

EMPTY_TABLE = pd.DataFrame(
    {"#": ["-"], "Code / Theme Label": ["No codes yet β€” run analysis first"],
     "Data Extract": [""], "Extracts": [""], "Data Items": [""],
     "Approve": [""], "Rename To": [""], "Move To": [""], "Analytic Memo": [""]},
)

PHASE_INFO = {
    0: ("Getting started", "⬜⬜⬜⬜⬜⬜",
        "Upload your Scopus CSV data set, then click **Analyse my data set**"),
    1: ("Phase 1 β€” Familiarisation with the Data", "🟦⬜⬜⬜⬜⬜",
        "Click **Run analysis on abstracts** or **Run analysis on titles** "
        "to begin familiarisation with the data corpus"),
    2: ("Phase 2 β€” Generating Initial Codes", "🟦🟦⬜⬜⬜⬜",
        "Review initial codes in the table below. Edit Approve / Rename / "
        "Move extracts, then click **Submit Review** to collate codes into themes"),
    3: ("Phase 3 β€” Searching for Themes", "🟦🟦🟦⬜⬜⬜",
        "Review candidate themes (collated initial codes). Edit the table "
        "and click **Submit Review** to proceed to theme review"),
    4: ("Phase 4 β€” Reviewing Themes", "🟦🟦🟦🟦⬜⬜",
        "Review themes against coded extracts (Level 1) and the entire "
        "data set (Level 2). Click **Submit Review** to confirm"),
    5: ("Phase 5 β€” Defining and Naming Themes", "🟦🟦🟦🟦🟦⬜",
        "Review theme definitions and names. Edit and click **Submit Review**"),
    6: ("Phase 6 β€” Producing the Report", "🟦🟦🟦🟦🟦🟦",
        "Review the scholarly report and thematic map. "
        "**Submit Review** to finalise"),
}

PHASE_PROMPTS = {
    0: ["Analyse my data set"],
    1: ["Run analysis on abstracts", "Run analysis on titles",
        "Show data corpus statistics"],
    2: ["Proceed to searching for themes", "Show initial codes",
        "How many orphan extracts?"],
    3: ["Proceed to reviewing themes", "Show candidate themes",
        "Explain theme collation"],
    4: ["Proceed to defining themes", "Show thematic map"],
    5: ["Proceed to producing the report", "Show theme definitions",
        "Compare themes with PAJAIS taxonomy"],
    6: ["Produce final scholarly report", "Show comparison table",
        "Export all results"],
}

REFERENCES_MD = """
## Methodology References

Click any link to open the paper in a new tab. These are the foundational
papers you can cite in your methodology section.

---

### πŸ“– Thematic Analysis (the method)

**Braun, V., & Clarke, V. (2006).** Using thematic analysis in psychology.
*Qualitative Research in Psychology*, 3(2), 77–101.
πŸ”— [DOI: 10.1191/1478088706qp063oa](https://doi.org/10.1191/1478088706qp063oa)

> The foundational paper defining the six-phase reflexive thematic
> analysis procedure. Cite this as the primary methodology reference.
> Every phase name, terminology, and review step in this agent maps
> directly to the procedures on pp. 87–93.

**Braun, V., & Clarke, V. (2019).** Reflecting on reflexive thematic analysis.
*Qualitative Research in Sport, Exercise and Health*, 11(4), 589–597.
πŸ”— [DOI: 10.1080/2159676X.2019.1628806](https://doi.org/10.1080/2159676X.2019.1628806)

> A later clarification emphasising the reflexive, recursive, and
> researcher-in-the-loop nature of the method. Useful for defending
> the human-approval design of this agent.

**Braun, V., & Clarke, V. (2021).** One size fits all? What counts as
quality practice in (reflexive) thematic analysis? *Qualitative Research
in Psychology*, 18(3), 328–352.
πŸ”— [DOI: 10.1080/14780887.2020.1769238](https://doi.org/10.1080/14780887.2020.1769238)

> Quality criteria for thematic analysis β€” useful for defending the
> STOP gate design as reviewer-approval checkpoints.

---

### 🧠 Embedding Model (Sentence-BERT)

**Reimers, N., & Gurevych, I. (2019).** Sentence-BERT: Sentence Embeddings
using Siamese BERT-Networks. *Proceedings of EMNLP-IJCNLP 2019*.
πŸ”— [arXiv: 1908.10084](https://arxiv.org/abs/1908.10084)

> The paper behind `sentence-transformers/all-MiniLM-L6-v2`, the embedding
> model used to convert data extracts into 384-dimensional vectors.
> Establishes cosine similarity as the canonical comparison metric for
> SBERT embeddings β€” justifies our use of cosine distance.

---

### πŸ”¬ Topic Modelling Framework (BERTopic)

**Grootendorst, M. (2022).** BERTopic: Neural topic modeling with a
class-based TF-IDF procedure. *arXiv preprint*.
πŸ”— [arXiv: 2203.05794](https://arxiv.org/abs/2203.05794)

> The BERTopic framework. Our approach follows its documented
> Agglomerative Clustering configuration with `distance_threshold=0.5`
> as a substitute for HDBSCAN when fine-grained control over code
> granularity is required.

---

### βš™οΈ Clustering Algorithm (scikit-learn)

**Pedregosa, F., et al. (2011).** Scikit-learn: Machine Learning in Python.
*Journal of Machine Learning Research*, 12, 2825–2830.
πŸ”— [JMLR](https://jmlr.org/papers/v12/pedregosa11a.html)

> Cite this for `sklearn.cluster.AgglomerativeClustering` with
> `metric='cosine'`, `linkage='average'`, `distance_threshold=0.50`.

**MΓΌllner, D. (2011).** Modern hierarchical, agglomerative clustering
algorithms. *arXiv preprint*.
πŸ”— [arXiv: 1109.2378](https://arxiv.org/abs/1109.2378)

> Comprehensive reference for agglomerative clustering algorithms and
> linkage methods β€” useful for justifying the choice of `average`
> linkage over `ward` for cosine-distance data.

---

### πŸ€– Language Model (Mistral)

**Jiang, A. Q., et al. (2023).** Mistral 7B. *arXiv preprint*.
πŸ”— [arXiv: 2310.06825](https://arxiv.org/abs/2310.06825)

> The family of LLMs used for initial code labelling and narrative
> generation. Our agent uses `mistral-large-latest` for these
> LLM-dependent tool calls.

---

### πŸ“š LangChain / LangGraph

**Chase, H., et al. (2023).** LangChain. *GitHub repository*.
πŸ”— [github.com/langchain-ai/langchain](https://github.com/langchain-ai/langchain)

**Chase, H., et al. (2024).** LangGraph. *GitHub repository*.
πŸ”— [github.com/langchain-ai/langgraph](https://github.com/langchain-ai/langgraph)

> The agent orchestration framework. `create_agent` (LangChain v1)
> with `InMemorySaver` (LangGraph) provides the stateful multi-turn
> conversation with tool-use capability underlying this agent.

---

### 🎨 User Interface (Gradio)

**Abid, A., et al. (2019).** Gradio: Hassle-free sharing and testing of
ML models in the wild. *arXiv preprint*.
πŸ”— [arXiv: 1906.02569](https://arxiv.org/abs/1906.02569)

> The web UI framework. This application uses Gradio 6.x components:
> `gr.Blocks`, `gr.Chatbot`, `gr.Dataframe`, `gr.File`, etc.

---

## How to cite this agent in your report

> "Thematic analysis was conducted following Braun and Clarke's (2006)
> six-phase reflexive procedure, computationally assisted using a
> researcher-in-the-loop agent. Data extracts were embedded using
> `all-MiniLM-L6-v2` (Reimers & Gurevych, 2019), clustered with
> `sklearn.cluster.AgglomerativeClustering` (Pedregosa et al., 2011)
> using `metric='cosine'`, `linkage='average'`, and
> `distance_threshold=0.50`, following the Agglomerative Clustering
> configuration documented in the BERTopic framework (Grootendorst, 2022).
> Initial code labels and the final scholarly narrative were generated
> using `mistral-large-latest` (Jiang et al., 2023). At every phase
> boundary, the researcher reviewed and approved computational outputs
> via a structured review table before the analysis advanced, preserving
> the reflexive, recursive, and analyst-led character of thematic
> analysis (Braun & Clarke, 2019; 2021)."
"""


def _prompt_button_updates(phase: int) -> tuple:
    """Return gr.update values for the 4 phase-specific prompt buttons.

    Shows only prompts relevant to the current phase. Unused buttons
    are hidden (visible=False) so the UI stays clean.

    Returns:
        Tuple of 4 gr.update objects for btn1, btn2, btn3, btn4.
    """
    prompts = (PHASE_PROMPTS.get(phase, PHASE_PROMPTS[0]) + [""] * 4)[:4]
    return tuple(
        gr.update(value=p, visible=bool(p))
        for p in prompts
    )

_path = lambda file: str(
    (hasattr(file, "name") and file.name)
    or (isinstance(file, str) and file)
    or ""
)
_name = lambda file: os.path.basename(_path(file))


def _extract_phase(text: str) -> int:
    """Extract phase number from agent response. Returns 0 if not found."""
    found = re.findall(r"Phase (\d)", str(text))
    return int((found or ["0"])[0])


def _phase_banner(num: int) -> str:
    """Generate prominent phase banner with progress bar and next step."""
    name, progress, instruction = PHASE_INFO.get(num, PHASE_INFO[0])
    return (
        f"## {progress}  {name}\n\n"
        f"**NEXT STEP β†’** {instruction}"
    )


def _load_review_table(base_dir: str) -> pd.DataFrame:
    """Load latest checkpoint file into the 9-column review table.

    Scans base_dir for topic_labels.json, themes.json, taxonomy_alignment.json,
    summaries.json. Loads the most recently modified one and formats it.
    Returns EMPTY_TABLE if nothing found.
    """
    base = Path(str(base_dir or "/tmp/nonexistent_dir_placeholder"))
    candidates = (
        base_dir and base.exists() and sorted(
            (
                list(base.glob("topic_labels.json"))
                + list(base.glob("themes.json"))
                + list(base.glob("taxonomy_alignment.json"))
                + list(base.glob("summaries.json"))
            ),
            key=lambda p: p.stat().st_mtime,
            reverse=True,
        )
    ) or []

    latest = (candidates[:1] or [None])[0]
    return (latest and [_format_checkpoint(latest)] or [EMPTY_TABLE.copy()])[0]


def _format_checkpoint(path) -> pd.DataFrame:
    """Format a checkpoint JSON file into review table rows.

    Merges data from multiple checkpoint files:
      topic_labels.json has labels/rationale (from Mistral LLM)
      summaries.json   has sizes/indices/representative text (deterministic)
    Normalises topic_id to int for cross-file joins. Tries multiple key
    name variants for robustness against LLM output variation.
    """
    raw  = json.loads(Path(path).read_text())
    base = Path(path).parent

    data = (isinstance(raw, dict) and raw.get("clusters", raw.get(
                "per_theme", raw.get("topics", raw.get("themes", []))))) or \
           (isinstance(raw, list) and raw) or []

    _get_tid = lambda d: int(d.get("topic_id",
                                   d.get("theme_id",
                                         d.get("id",
                                               d.get("cluster_id", -1)))))

    summaries_data = {}
    summaries_path = base / "summaries.json"
    summaries_raw = (
        summaries_path.exists() and json.loads(summaries_path.read_text()) or {}
    )
    summaries_list = (
        isinstance(summaries_raw, dict) and summaries_raw.get("clusters", [])
    ) or (isinstance(summaries_raw, list) and summaries_raw) or []
    list(map(
        lambda s: summaries_data.update({_get_tid(s): s}),
        summaries_list,
    ))

    labels_data = {}
    labels_path = base / "topic_labels.json"
    labels_raw = (
        labels_path.exists() and path.name != "topic_labels.json"
        and json.loads(labels_path.read_text()) or {}
    )
    labels_list = (
        isinstance(labels_raw, dict) and labels_raw.get("clusters",
                       labels_raw.get("topics", labels_raw.get("themes", [])))
    ) or (isinstance(labels_raw, list) and labels_raw) or []
    list(map(
        lambda l: labels_data.update({_get_tid(l): l}),
        labels_list,
    ))

    _label_of = lambda d: (
        d.get("label") or d.get("Label") or d.get("name") or
        d.get("topic_label") or d.get("theme_label") or
        d.get("title") or ""
    )

    def _row(item: dict) -> dict:
        """Map one JSON item to review table columns, merging all sources."""
        tid     = _get_tid(item)
        summary = summaries_data.get(tid, {})
        labels  = labels_data.get(tid, {})
        label   = _label_of(item) or _label_of(labels) or f"code_{tid}"
        extract = (
            item.get("representative") or summary.get("representative")
            or labels.get("representative") or item.get("notes", "")
        )
        size = (item.get("size") or summary.get("size")
                or item.get("total_papers") or 0)
        memo = (item.get("rationale") or labels.get("rationale")
                or item.get("notes") or "")
        return {
            "#":                  tid,
            "Code / Theme Label": str(label),
            "Data Extract":       str(extract)[:150],
            "Extracts":           size,
            "Data Items":         size,
            "Approve":            "Yes",
            "Rename To":          "",
            "Move To":            "",
            "Analytic Memo":      str(memo),
        }

    rows = list(map(_row, data[:200]))
    return (rows and [pd.DataFrame(rows, columns=REVIEW_COLS)] or [EMPTY_TABLE.copy()])[0]


def on_file_upload(file):
    """Extract CSV stats and return updates for info, state, banner, buttons."""
    path = _path(file)
    default = (
        "Upload a CSV to begin.", "", _phase_banner(0),
        *_prompt_button_updates(0),
    )
    return (not path) and default or _do_file_upload(path, file)


def _do_file_upload(path: str, file) -> tuple:
    """Actual file processing after path validation."""
    df = pd.read_csv(path)
    rows, cols = df.shape
    base = str(Path(path).parent)
    info = (
        f"**Loaded:** `{_name(file)}`\n\n"
        f"**Shape:** {rows:,} rows x {cols} columns\n\n"
        f"**Columns:** {', '.join(df.columns[:6].tolist())}\n\n"
        f"*Click a prompt below and press Send to begin.*"
    )
    return (info, base, _phase_banner(1), *_prompt_button_updates(1))


def on_send(user_msg, history, file, base_dir):
    """Pass user message to agent. Update banner, table, and prompt buttons."""
    msg = (user_msg or "").strip() or "help"
    csv_tag = f"[CSV: {_path(file)}]\n" * bool(file)

    history = list(history or [])
    history.append({"role": "user", "content": msg})
    history.append({"role": "assistant", "content": "Thinking..."})
    yield (
        history, "", gr.skip(), gr.skip(), gr.skip(),
        gr.skip(), gr.skip(), gr.skip(), gr.skip(),
    )

    reply = agent_run(csv_tag + msg, thread_id=THREAD_ID)
    history[-1] = {"role": "assistant", "content": reply}

    phase = _extract_phase(reply)
    banner = _phase_banner(phase)
    table = _load_review_table(base_dir)
    btn_updates = _prompt_button_updates(phase)

    yield (history, "", banner, table, base_dir, *btn_updates)


def on_submit_review(table_df, history, base_dir):
    """Serialise review table edits to agent. Return updated UI."""
    history = list(history or [])
    edits = table_df.to_json(orient="records", indent=2)

    history.append({"role": "user", "content": "[REVIEW SUBMITTED]"})
    history.append({"role": "assistant", "content": "Processing review..."})

    reply = agent_run(
        "Reviewer submitted table edits.\n\n"
        f"```json\n{edits}\n```\n\n"
        "Process: Approve/Reject decisions, Rename To values, "
        "Move To reassignments (call reassign_sentences if moves exist), "
        "Reasoning notes. Then check STOP gates and proceed.",
        thread_id=THREAD_ID,
    )
    history[-1] = {"role": "assistant", "content": reply}

    phase = _extract_phase(reply)
    return (
        history, _phase_banner(phase), _load_review_table(base_dir),
        *_prompt_button_updates(phase),
    )


def on_download(table_df, history):
    """Export review CSV and chat TXT."""
    csv_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", prefix="review_")
    table_df.to_csv(csv_tmp.name, index=False)

    txt_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", prefix="chat_")
    txt_tmp.write(
        "\n\n".join(
            list(map(
                lambda m: f"{m.get('role', '').upper()}: {m.get('content', '')}",
                history or [],
            ))
        ).encode("utf-8")
    )
    txt_tmp.close()
    return [csv_tmp.name, txt_tmp.name]


with gr.Blocks(title="Thematic Analysis Agent") as demo:

    base_dir_state = gr.State(value="")

    gr.Markdown("# Thematic Analysis Agent")
    gr.Markdown(
        "**Braun & Clarke (2006) 6-Phase Reflexive Thematic Analysis** "
        "| Sentence-BERT Embeddings | Agglomerative Clustering | "
        "Cosine Distance 0.50"
    )

    phase_banner = gr.Markdown(value=_phase_banner(0))

    with gr.Tabs():

      with gr.Tab("πŸ”¬ Analysis"):
        gr.Markdown("---\n### Section 1 β€” Data Corpus")
        with gr.Row():
            with gr.Column(scale=3):
                file_input = gr.File(
                    label="Upload data corpus (Scopus CSV)",
                    file_types=[".csv"],
                    file_count="single",
                )
            with gr.Column(scale=5):
                file_info = gr.Markdown("Upload a CSV to begin.")

        gr.Markdown("---\n### Section 2 β€” Analyst Dialogue")
        chatbot = gr.Chatbot(label="Thematic Analysis Agent", height=200)
        with gr.Row():
            msg_box = gr.Textbox(
                placeholder="Type a message or click a phase action below",
                show_label=False, scale=7, lines=1,
            )
            send_btn = gr.Button("Send", variant="primary", scale=1)

        gr.Markdown("**Phase actions** (click to proceed β€” only actions "
                    "valid for the current B&C phase are shown)")
        with gr.Row():
            prompt_btn_1 = gr.Button("Analyse my data set",
                                     variant="secondary", scale=1, size="sm")
            prompt_btn_2 = gr.Button("", variant="secondary", scale=1,
                                     size="sm", visible=False)
            prompt_btn_3 = gr.Button("", variant="secondary", scale=1,
                                     size="sm", visible=False)
            prompt_btn_4 = gr.Button("", variant="secondary", scale=1,
                                     size="sm", visible=False)

        gr.Markdown("---\n### Section 3 β€” Initial Codes / Candidate Themes / Themes")
        gr.Markdown(
            "Auto-populated from tool outputs. Labels are **initial codes** "
            "in Phase 2, **candidate themes** in Phase 3, and **themes** in "
            "Phases 4–6. Edit **Approve**, **Rename To**, **Move To**, "
            "**Analytic Memo** columns, then click **Submit Review**."
        )
        review_table = gr.Dataframe(
            value=EMPTY_TABLE,
            headers=REVIEW_COLS,
            datatype=["number", "str", "str", "number", "number",
                      "str", "str", "str", "str"],
            column_count=(9, "fixed"),
            interactive=True,
            wrap=True,
            max_height=400,
        )
        with gr.Row():
            clear_btn = gr.Button("Clear table", variant="secondary", scale=2)
            sub_btn = gr.Button("Submit Review", variant="primary", scale=4)

        with gr.Accordion("Download", open=False):
            dl_btn = gr.Button("Generate downloads", variant="primary")
            dl_files = gr.File(label="Downloads", file_count="multiple",
                              interactive=False)

      with gr.Tab("πŸ“š References"):
        gr.Markdown(REFERENCES_MD)

    file_input.change(
        on_file_upload,
        inputs=[file_input],
        outputs=[file_info, base_dir_state, phase_banner,
                 prompt_btn_1, prompt_btn_2, prompt_btn_3, prompt_btn_4],
    )
    send_btn.click(
        on_send,
        inputs=[msg_box, chatbot, file_input, base_dir_state],
        outputs=[chatbot, msg_box, phase_banner, review_table, base_dir_state,
                 prompt_btn_1, prompt_btn_2, prompt_btn_3, prompt_btn_4],
    )
    msg_box.submit(
        on_send,
        inputs=[msg_box, chatbot, file_input, base_dir_state],
        outputs=[chatbot, msg_box, phase_banner, review_table, base_dir_state,
                 prompt_btn_1, prompt_btn_2, prompt_btn_3, prompt_btn_4],
    )
    prompt_btn_1.click(
        on_send,
        inputs=[prompt_btn_1, chatbot, file_input, base_dir_state],
        outputs=[chatbot, msg_box, phase_banner, review_table, base_dir_state,
                 prompt_btn_1, prompt_btn_2, prompt_btn_3, prompt_btn_4],
    )
    prompt_btn_2.click(
        on_send,
        inputs=[prompt_btn_2, chatbot, file_input, base_dir_state],
        outputs=[chatbot, msg_box, phase_banner, review_table, base_dir_state,
                 prompt_btn_1, prompt_btn_2, prompt_btn_3, prompt_btn_4],
    )
    prompt_btn_3.click(
        on_send,
        inputs=[prompt_btn_3, chatbot, file_input, base_dir_state],
        outputs=[chatbot, msg_box, phase_banner, review_table, base_dir_state,
                 prompt_btn_1, prompt_btn_2, prompt_btn_3, prompt_btn_4],
    )
    prompt_btn_4.click(
        on_send,
        inputs=[prompt_btn_4, chatbot, file_input, base_dir_state],
        outputs=[chatbot, msg_box, phase_banner, review_table, base_dir_state,
                 prompt_btn_1, prompt_btn_2, prompt_btn_3, prompt_btn_4],
    )
    clear_btn.click(lambda: EMPTY_TABLE.copy(), outputs=[review_table])
    sub_btn.click(
        on_submit_review,
        inputs=[review_table, chatbot, base_dir_state],
        outputs=[chatbot, phase_banner, review_table,
                 prompt_btn_1, prompt_btn_2, prompt_btn_3, prompt_btn_4],
    )
    dl_btn.click(on_download, inputs=[review_table, chatbot], outputs=[dl_files])

demo.launch(ssr_mode=False, theme=gr.themes.Soft())