Dash10107 commited on
Commit
bf8d4f9
·
verified ·
1 Parent(s): 1543740

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +420 -549
app.py CHANGED
@@ -1,616 +1,487 @@
1
- """
2
- app.py Topic Modelling Agentic AI | Gradio UI
3
- ═══════════════════════════════════════════════════
4
- Version: 3.1.0 | April 2026
5
- Stack: Gradio 5.x + LangGraph + Mistral + BERTopic
6
- Deploy: HuggingFace Spaces (sdk: gradio)
7
- Rules: Zero gr.HTML(). All UI via native Gradio components.
8
- See GRADIO_UI_GUIDELINES_v2.docx for full standards.
9
-
10
- ARCHITECTURE20 Blocks in 5 Sections
11
- ─────────────────────────────────────────
12
- Section 1: Setup (B1–B3) Imports, agent, theme
13
- Section 2: Helpers (B4–B10) Pure Python functions, no UI
14
- Section 3: UI Layout (B11–B17) gr.Blocks with native components
15
- Section 4: Event Wiring (B18–B19) Connect UI to functions
16
- Section 5: Launch (B20) Start server
17
-
18
- BLOCK COMMUNICATION MAP
19
- ─────────────────────────
20
- B6 (respond) ←→ B2 (agent) : invokes agent for chat
21
- B6 (respond) → B4 (output) : scans for download files
22
- B7 (chart) → B17a (display) : loads Plotly JSON → gr.Plot
23
- B8 (table) → B16 (review) : builds rows → gr.Dataframe
24
- B9 (papers) ← B16 (review) : triggered by row click
25
- B10 (submit) → B2 (agent) : sends review edits to agent
26
- B18 (wiring) → B5,B7,B8 : refreshes progress, charts, table
27
- """
28
- import os
29
- import glob
30
- import json
31
-
32
- import plotly.io as pio
33
- import gradio as gr
34
- from langchain_mistralai import ChatMistralAI
35
- from langgraph.prebuilt import create_react_agent
36
- from langgraph.checkpoint.memory import MemorySaver
37
- from agent import SYSTEM_PROMPT, get_local_tools
38
-
39
- print(">>> app.py: imports complete")
40
-
41
-
42
- llm = ChatMistralAI(model="mistral-small-latest", temperature=0, timeout=300)
43
- tools = get_local_tools()
44
- agent = create_react_agent(
45
- model=llm, tools=tools, prompt=SYSTEM_PROMPT, checkpointer=MemorySaver()
46
- )
47
- print(f">>> app.py: agent ready ({len(tools)} tools)")
48
-
49
- _msg_count = 0 # Global message counter (shared across users)
50
- _uploaded = {"path": ""} # Last uploaded CSV path (shared session)
51
- # ── end B2: Agent setup ────────────────────────────────────────
52
-
53
-
54
- # ── B3: Theme ───────────────────────────────────────────────────
55
- # PURPOSE: Define the visual identity of the entire application.
56
- # Uses teal/indigo on zinc — purposeful scientific feel.
57
- # Plus Jakarta Sans: geometric-humanist, modern but not generic.
58
- # Fira Code for monospace elements (phase progress, etc).
59
- # USED BY: B20 (demo.launch) — theme applied at launch time.
60
- # ────────────────────────────────────────────────────────────────
61
- theme = gr.themes.Default(
62
- primary_hue="teal",
63
- secondary_hue="indigo",
64
- neutral_hue="zinc",
65
- font=gr.themes.GoogleFont("Plus Jakarta Sans"),
66
- font_mono=gr.themes.GoogleFont("Fira Code"),
67
- radius_size="sm",
68
- spacing_size="md",
69
- ).set(
70
- button_primary_background_fill="*primary_600",
71
- button_primary_background_fill_hover="*primary_500",
72
- button_primary_text_color="white",
73
- block_label_text_size="sm",
74
- block_title_text_weight="600",
75
- )
76
- # ── end B3: Theme ──────────────────────────────────────────────
77
-
78
- def _latest_output():
79
- """Scan /tmp for ALL rq4_* files, sorted by phase order.
80
- Returns list of filepaths for gr.File download component."""
81
- phase_order = {
82
- "summaries": 1, "labels": 2, "themes": 3, "taxonomy": 4,
83
- "emb": 0, "intertopic": 5, "bars": 6, "hierarchy": 7,
84
- "heatmap": 8, "comparison": 9, "narrative": 10,
85
  }
86
- files = (
87
- glob.glob("/tmp/rq4_*.csv")
88
- + glob.glob("/tmp/rq4_*.json")
89
- + glob.glob("/tmp/checkpoints/rq4_*.json")
90
- )
91
- scored = list(map(
92
- lambda f: (sum(v * (k in f) for k, v in phase_order.items()), f),
93
- files,
94
- ))
95
- scored.sort(key=lambda x: x[0])
96
- return list(map(lambda x: x[1], scored)) or None
97
- # ── end B4: _latest_output ─────────────────────────────────────
98
-
99
- def _build_progress():
100
- """Return emoji progress pipeline. NO HTML — just text + emoji.
101
- Displayed in gr.Markdown component (B14)."""
102
- checks = [
103
- ("Load", bool(glob.glob("/tmp/checkpoints/rq4_*_summaries.json")
104
- or glob.glob("/tmp/checkpoints/rq4_*_emb.npy"))),
105
- ("Codes", bool(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))),
106
- ("Themes", bool(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))),
107
- ("Review", bool(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))),
108
- ("Names", bool(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))),
109
- ("PAJAIS", bool(glob.glob("/tmp/checkpoints/rq4_*_taxonomy_map.json"))),
110
- ("Report", bool(glob.glob("/tmp/rq4_comparison.csv")
111
- or glob.glob("/tmp/rq4_narrative.txt"))),
112
- ]
113
- return " → ".join(f"{'✅' if done else '⬜'} {name}" for name, done in checks)
114
- # ── end B5: _build_progress ────────────────────────────────────
115
-
116
-
117
- def respond(message, chat_history, uploaded_file):
118
- """Handle one chat turn with the LangGraph agent.
119
- Yields twice: progress bubble → final response."""
120
- global _msg_count
121
- _msg_count += 1
122
-
123
- # Store file path — uses `or` short-circuit instead of if/else
124
- _uploaded["path"] = uploaded_file or _uploaded.get("path", "")
125
-
126
- # Tell agent where the CSV is (prevents hallucinated filepaths)
127
- file_note = (
128
- f"\n[CSV file at: {_uploaded['path']}]" * bool(_uploaded["path"])
129
- ) or "\n[No CSV uploaded yet — ask user to upload a file first]"
130
-
131
- # Tell agent what phase we're in based on existing checkpoint files
132
- phase_context = (
133
- "\n[Phase context: labels exist]"
134
- * bool(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))
135
- or "\n[Phase context: embeddings exist]"
136
- * bool(glob.glob("/tmp/checkpoints/rq4_*_emb.npy"))
137
- or "\n[Phase context: fresh start]"
138
- )
139
 
140
- text = ((message or "").strip() or "Analyze my Scopus CSV") + file_note + phase_context
141
- print(f"\n{'='*60}\n>>> MSG #{_msg_count}: '{text[:120]}'\n{'='*60}")
 
 
 
 
142
 
143
- # YIELD 1: Show "thinking" bubble immediately
144
- chat_history = chat_history + [
145
- {"role": "user", "content": (message or "").strip()},
146
- {"role": "assistant", "content": "🔬 **Working...** _Agent is thinking..._"},
147
- ]
148
- yield chat_history, "", _latest_output()
 
149
 
150
- # Invoke agent — Mistral brain decides which tools to call
151
- result = agent.invoke(
152
- {"messages": [("human", text)]},
153
- config={"configurable": {"thread_id": "session"}},
154
- )
155
- response = result["messages"][-1].content
156
- print(f">>> Response ({len(response)} chars)")
157
-
158
- # YIELD 2: Replace thinking bubble with actual response
159
- chat_history[-1] = {"role": "assistant", "content": response}
160
- gr.Info(f"Agent responded ({len(response)} chars)")
161
- yield chat_history, "", _latest_output()
162
- # ── end B6: respond ────────────────────────────────────────────
163
-
164
-
165
- def _load_chart(chart_name):
166
- """Load Plotly chart from JSON file. Returns figure for gr.Plot.
167
- No HTML, no iframe — just a native Plotly figure object."""
168
- path = f"/tmp/{chart_name}"
169
- (not os.path.exists(path)) and (not None) # guard
170
- return pio.from_json(open(path).read()) * bool(os.path.exists(path)) or None
171
-
172
- def _get_chart_choices():
173
- """Find all rq4_*.json chart files in /tmp."""
174
- files = sorted(glob.glob("/tmp/rq4_*.json"))
175
- return list(map(os.path.basename, files))
176
- # ── end B7: _load_chart ───────────────────────────────────────
177
-
178
-
179
- def _load_review_table():
180
- """Build review table from latest checkpoint JSON.
181
- Approve column is bool (renders as checkbox in gr.Dataframe).
182
- Priority: taxonomy_map > themes > labels > summaries."""
183
- taxonomy_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_taxonomy_map.json"))
184
- theme_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))
185
- label_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))
186
- summary_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_summaries.json"))
187
-
188
- # Pick most advanced checkpoint available
189
- path = (
190
- (taxonomy_files and taxonomy_files[-1])
191
- or (theme_files and theme_files[-1])
192
- or (label_files and label_files[-1])
193
- or (summary_files and summary_files[-1])
194
- or ""
195
- )
196
- is_taxonomy = bool(taxonomy_files and taxonomy_files[-1] == path)
197
- data = (os.path.exists(path) and json.load(open(path))) or []
198
 
199
- # For taxonomy: merge with themes to get sentence/paper counts
200
- theme_lookup = {}
201
- (is_taxonomy and theme_files) and theme_lookup.update(
202
- {t.get("label", ""): t for t in json.load(open(theme_files[-1]))}
203
- )
 
204
 
205
- rows = list(map(
206
- lambda pair: [
207
- pair[0], # #
208
- pair[1].get("label", pair[1].get("top_words", ""))[:60], # Label
209
- # Evidence: PAJAIS mapping for taxonomy, nearest sentence otherwise
210
- (
211
- is_taxonomy
212
- and f"→ {pair[1].get('pajais_match', '?')} | {pair[1].get('reasoning', '')}"[:120]
213
- ) or (
214
- (pair[1].get("nearest", [{}])[0].get("sentence", "")[:120] + "...")
215
- * bool(pair[1].get("nearest"))
216
- ),
217
- # Sentence/paper counts
218
- theme_lookup.get(pair[1].get("label", ""), pair[1]).get(
219
- "sentence_count", pair[1].get("sentence_count", 0)),
220
- theme_lookup.get(pair[1].get("label", ""), pair[1]).get(
221
- "paper_count", pair[1].get("paper_count", 0)),
222
- True, # Approve (bool → checkbox)
223
- "", # Rename To
224
- "", # Reasoning
225
- ],
226
- enumerate(data),
227
- ))
228
- return rows or [[0, "No data yet", "", 0, 0, False, "", ""]]
229
- # ── end B8: _load_review_table ─────────────────────────────────
230
-
231
-
232
- def _show_papers_by_select(table_data, evt: gr.SelectData):
233
- """Show papers for clicked row. Uses column 0 as topic_id.
234
- Triggered by review_table.select() — no separate Topic # input needed."""
235
- row_idx = evt.index[0]
236
-
237
- # Get topic_id from column 0 of the clicked row (not row index)
238
- topic_id = int(table_data.iloc[row_idx, 0]) if hasattr(table_data, 'iloc') else int(table_data[row_idx][0])
239
-
240
- # Load paper data from checkpoint files
241
- label_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))
242
- summary_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_summaries.json"))
243
- all_files = label_files or summary_files
244
-
245
- lines = []
246
- for f in all_files:
247
- source = os.path.basename(f).split("_")[1]
248
- data = json.load(open(f))
249
- for t in data:
250
- (t.get("topic_id") == topic_id) and lines.append(
251
- f"═══ {source.upper()} — Topic {topic_id}: "
252
- f"{t.get('label', t.get('top_words', '')[:50])} ═══\n"
253
- f"{t.get('sentence_count', 0)} sentences from {t.get('paper_count', 0)} papers\n"
254
- f"AI Reasoning: {t.get('reasoning', 'not yet labeled')}\n\n"
255
- f"── 5 NEAREST CENTROID SENTENCES (evidence) ──\n"
256
- + "\n".join(
257
- f" {i+1}. \"{t['nearest'][i]['sentence'][:200]}\"\n"
258
- f" Paper: {t['nearest'][i].get('title', '')[:100]}"
259
- for i in range(min(5, len(t.get('nearest', []))))
260
- )
261
- + "\n\n── ALL PAPER TITLES ──\n"
262
- + "\n".join(
263
- f" {i+1}. {title}"
264
- for i, title in enumerate(t.get('paper_titles', []))
265
- )
266
- )
267
- return "\n\n".join(lines) or f"Topic {topic_id} not found."
268
- # ── end B9: _show_papers_by_select ─────────────────────────────
269
-
270
-
271
- def _submit_review(table_data, chat_history):
272
- """Convert review table edits into agent message.
273
- Approve column is bool (checkbox), not string."""
274
- rows = table_data.values.tolist()
275
- lines = list(map(
276
- lambda r: (
277
- f"Topic {int(r[0])}: "
278
- + (f"RENAME to '{r[6]}'" * bool(str(r[6]).strip()))
279
- + (f"APPROVE '{r[1]}'" * (not bool(str(r[6]).strip())) * bool(r[5]))
280
- + (f"REJECT" * (not r[5]))
281
- + (f" — reason: {r[7]}" * bool(str(r[7]).strip()))
282
- ),
283
- rows,
284
- ))
285
- review_msg = "Review decisions:\n" + "\n".join(lines)
286
- print(f">>> Review submitted: {review_msg[:200]}")
287
-
288
- # YIELD 1: Show processing bubble
289
- chat_history = chat_history + [
290
- {"role": "user", "content": review_msg},
291
- {"role": "assistant", "content": "🔬 **Processing review decisions...**"},
292
- ]
293
- gr.Info("Review submitted to agent")
294
- yield (chat_history, _latest_output(), gr.update(),
295
- gr.update(), gr.update(), _build_progress())
296
-
297
- # Invoke agent with review decisions
298
- result = agent.invoke(
299
- {"messages": [("human", review_msg)]},
300
- config={"configurable": {"thread_id": "session"}},
301
- )
302
- response = result["messages"][-1].content
303
-
304
- # YIELD 2: Final response + refreshed table/charts
305
- chat_history[-1] = {"role": "assistant", "content": response}
306
- gr.Info("Review processed — table updated")
307
- yield (
308
- chat_history,
309
- _latest_output(),
310
- gr.update(choices=_get_chart_choices()),
311
- gr.update(),
312
- gr.update(value=_load_review_table()),
313
- _build_progress(),
314
- )
315
 
 
 
 
 
 
 
 
316
 
317
- print(">>> Building UI...")
 
 
 
 
 
 
 
318
 
 
 
 
 
 
 
 
319
 
320
- with gr.Blocks(
321
- title="Topic Modelling — Agentic AI",
322
- fill_width=True,
323
- css="""
324
- /* Accent bar at very top of page */
325
- .gradio-container::before {
326
- content: "";
327
- display: block;
328
- height: 3px;
329
- background: linear-gradient(90deg, #0d9488, #6366f1);
330
- margin-bottom: 4px;
331
- }
332
- /* Tabs: tighter padding, bolder active state */
333
- .tab-nav button {
334
- font-size: 13px !important;
335
- font-weight: 500 !important;
336
- letter-spacing: 0.01em;
337
- padding: 6px 16px !important;
338
- }
339
- .tab-nav button.selected {
340
- font-weight: 700 !important;
341
- border-bottom: 2px solid #0d9488 !important;
342
- }
343
- /* Dataframe: subtle zebra rows */
344
- .table-wrap tr:nth-child(even) td {
345
- background-color: rgba(13, 148, 136, 0.04);
346
- }
347
- /* Chat: teal left-border on assistant bubbles */
348
- .message.bot {
349
- border-left: 3px solid #0d9488 !important;
350
- }
351
- /* Phase progress: monospace, slightly muted */
352
- .phase-bar p {
353
- font-family: "Fira Code", monospace;
354
- font-size: 12px;
355
- letter-spacing: 0.03em;
356
- opacity: 0.80;
357
- }
358
- /* Upload area: cleaner dashed border */
359
- .upload-container {
360
- border-style: dashed !important;
361
- border-width: 1px !important;
362
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
  """,
364
  ) as demo:
365
 
366
-
367
- # ── B12: Header ────────────────────────────────────────────
368
- # PURPOSE: Application title and subtitle.
369
- # ───────────────────────────────────────────────────────────
370
- gr.Markdown(
371
- "# 🔬 Topic Modelling · Agentic AI\n"
372
- "<sub>Mistral · Cosine Clustering · 384d Embeddings · Braun & Clarke Thematic Analysis</sub>"
373
- )
374
- # ── end B12: Header ────────────────────────────────────────
375
-
376
-
377
- # ── B13: Data input ────────────────────────────────────────
378
- # PURPOSE: CSV file upload area with inline instructions.
379
- # Researcher uploads their Scopus CSV export here.
380
- # On upload, B19 auto-triggers the first analysis.
381
- # COMPONENTS: gr.File (upload) + gr.Markdown (instructions)
382
- # EVENTS: upload.change → B19 (_auto_load_csv)
383
- # ───────────────────────────────────────────────────────────
384
- gr.Markdown("**① Upload**")
385
- with gr.Row():
386
- upload = gr.File(label="📂 Scopus CSV", file_types=[".csv"])
387
  gr.Markdown(
388
- "Upload your Scopus CSV export, then type `run abstract only` in the chat below "
389
- "to begin the analysis pipeline."
390
- )
391
- # ── end B13: Data input ────────────────────────────────────
392
-
393
-
394
- # ── B14: Progress pipeline ─────────────────────────────────
395
- # PURPOSE: Visual indicator of which Braun & Clarke analysis
396
- # phases are complete. Updated after every agent action.
397
- # elem_classes="phase-bar" targets the monospace CSS rule in B11.
398
- # COMPONENT: gr.Markdown — displays emoji string from B5
399
- # UPDATED BY: B18 (after chat), B10 (after review), B19 (after upload)
400
- # ───────────────────────────────────────────────────────────
401
- phase_progress = gr.Markdown(value=_build_progress(), elem_classes=["phase-bar"])
402
- # ── end B14: Progress pipeline ─────────────────────────────
403
-
404
-
405
- # ── B15: Chatbot + input ───────────────────────────────────
406
- # PURPOSE: Main conversation interface between researcher and
407
- # the LangGraph agent.
408
- # COMPONENTS: gr.Chatbot (display), gr.Textbox (input), gr.Button (send)
409
- # EVENTS: msg.submit → B18, send.click → B18
410
- # ───────────────────────────────────────────────────────────
411
- gr.Markdown("**② Conversation** — follow the guided workflow")
412
- with gr.Group():
413
- chatbot = gr.Chatbot(
414
- height=320,
415
- show_label=False,
416
- avatar_images=(
417
- None,
418
- "https://api.dicebear.com/7.x/bottts-neutral/svg?seed=bertopic",
419
- ),
420
- placeholder=(
421
- "**Ready.** Upload a Scopus CSV above, then type:\n\n"
422
- "`run abstract only` · `approve all` · `show topic 4 papers` · `done`"
423
- ),
424
  )
425
- with gr.Row():
426
- msg = gr.Textbox(
427
- placeholder="run · approve · show topic 4 papers · group 0 1 5 · done",
428
- show_label=False, scale=9, lines=1, max_lines=1, container=False,
429
- )
430
- send = gr.Button("⏎ Send", variant="primary", scale=1, min_width=80)
431
- # ── end B15: Chatbot + input ───────────────────────────────
432
-
433
-
434
- # ── B16: Review table tab ──────────────────────────────────
435
- # PURPOSE: Interactive topic review table where the researcher
436
- # approves, renames, or annotates BERTopic-discovered
437
- # topics. This is the core human-in-the-loop interface.
438
- #
439
- # KEY FEATURES (all native Gradio, no HTML):
440
- # - static_columns=[0,1,2,3,4] — first 5 columns read-only
441
- # - datatype "bool" on column 5 — Approve renders as checkbox
442
- # - pinned_columns=2 — # and Label stay visible when scrolling
443
- # - show_search="filter" — built-in column filtering
444
- # - .select() event — clicking any row auto-loads that topic's papers
445
- #
446
- # COMPONENTS: gr.Dataframe, gr.Button (submit), gr.Textbox (papers)
447
- # EVENTS: review_table.select → B9, submit_review.click → B10
448
- # ───────────────────────────────────────────────────────────
449
- gr.Markdown("**③ Review & Export**")
450
- with gr.Tabs():
451
- with gr.Tab("📋 Topics"):
452
- gr.Markdown(
453
- "*Toggle **Approve**, fill in **Rename To** or **Reasoning**, "
454
- "then click Submit. Click any row to inspect its source papers below.*"
455
- )
456
- review_table = gr.Dataframe(
457
- headers=[
458
- "#", "Topic Label", "Top Evidence Sentence",
459
- "Sentences", "Papers", "Approve", "Rename To", "Your Reasoning",
460
- ],
461
- datatype=[
462
- "number", "str", "str", "number", "number",
463
- "bool", "str", "str",
464
- ],
465
- interactive=True,
466
- column_count=8,
467
- # NOTE: These features need Gradio >=5.23. Uncomment when available:
468
- # static_columns=[0, 1, 2, 3, 4],
469
- # pinned_columns=2,
470
- # show_search="filter",
471
- # show_row_numbers=True,
472
- # show_fullscreen_button=True,
473
- # show_copy_button=True,
474
- # column_widths=["60px","200px","250px","80px","70px","70px","150px","200px"],
475
- )
476
- submit_review = gr.Button("✅ Submit Review to Agent", variant="primary")
477
-
478
- gr.Markdown("---")
479
- gr.Markdown("**📄 Papers in selected topic** *(click any row above)*")
480
- paper_list = gr.Textbox(
481
- label="Papers in selected topic",
482
- lines=8, interactive=False,
483
- )
484
- # ── end B16: Review table tab ──────────────────────────────
485
-
486
-
487
- # ── B17a: Charts tab ───────────────────────────────────
488
- # PURPOSE: Display BERTopic visualization charts rendered
489
- # natively in gr.Plot from Plotly JSON files.
490
- # COMPONENTS: gr.Dropdown (selector), gr.Plot (display)
491
- # EVENTS: chart_selector.change → B7 (_load_chart)
492
- # ───────────────────────────────────────────────────────
493
- with gr.Tab("📊 Visualise"):
494
- chart_selector = gr.Dropdown(
495
- choices=[], label="Select chart", interactive=True,
496
- )
497
- chart_display = gr.Plot(label="BERTopic Visualization")
498
- # ── end B17a: Charts tab ───────────────────────────────
499
-
500
-
501
- # ── B17b: Download tab ─────────────────────────────────
502
- # PURPOSE: Multi-file download for all pipeline outputs.
503
- # COMPONENTS: gr.Markdown (descriptions), gr.File (download)
504
- # UPDATED BY: B18, B10, B19 — refreshed after each action
505
- # ───────────────────────────────────────────────────────
506
- with gr.Tab("⬇ Export"):
507
- gr.Markdown(
508
- "**Files by Phase (per run: abstract / title):**\n\n"
509
- "**Phase 2 — Discovery:** `summaries.json` · `emb.npy`\n\n"
510
- "**Phase 2 — Labeling:** `labels.json`\n\n"
511
- "**Phase 2 — Charts:** `intertopic.json` · `bars.json` · "
512
- "`hierarchy.json` · `heatmap.json`\n\n"
513
- "**Phase 3 — Themes:** `themes.json`\n\n"
514
- "**Phase 5.5 — Taxonomy:** `taxonomy_map.json`\n\n"
515
- "**Phase 6 — Report:** `comparison.csv` · `narrative.txt`"
516
- )
517
- download = gr.File(label="All output files", file_count="multiple")
518
- # ── end B17b: Download tab ─────────────────────────────
519
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
520
 
521
  chart_selector.change(_load_chart, [chart_selector], [chart_display])
522
 
523
  review_table.select(
524
- _show_papers_by_select, [review_table], [paper_list],
 
 
525
  )
526
 
527
  submit_review.click(
528
- _submit_review, [review_table, chatbot],
529
- [chatbot, download, chart_selector, chart_display,
530
- review_table, phase_progress],
 
 
 
 
 
 
 
531
  )
532
 
533
  def respond_with_viz(message, chat_history, uploaded_file):
534
- """Wrap respond() and update charts + table + progress after each turn."""
535
  gen = respond(message, chat_history, uploaded_file)
536
 
537
- # First yield (progress bubble)
538
  hist, txt, dl = next(gen)
539
- yield (hist, txt, dl, gr.update(choices=_get_chart_choices()),
540
- gr.update(), gr.update(), _build_progress())
 
 
 
 
 
 
 
541
 
542
- # Second yield (final response + populate table + charts)
543
  hist, txt, dl = next(gen)
544
  choices = _get_chart_choices()
545
  first_chart = (choices and _load_chart(choices[-1])) or gr.update()
546
  table_data = _load_review_table()
 
547
  yield (
548
- hist, txt, dl,
549
- gr.update(choices=choices, value=(choices and choices[-1]) or None),
 
 
 
 
 
550
  first_chart,
551
  gr.update(value=table_data),
552
  _build_progress(),
553
  )
554
 
555
  msg.submit(
556
- respond_with_viz, [msg, chatbot, upload],
557
- [chatbot, msg, download, chart_selector, chart_display,
558
- review_table, phase_progress],
 
 
 
 
 
 
 
 
559
  )
 
560
  send.click(
561
- respond_with_viz, [msg, chatbot, upload],
562
- [chatbot, msg, download, chart_selector, chart_display,
563
- review_table, phase_progress],
 
 
 
 
 
 
 
 
564
  )
565
- # ── end B18: respond_with_viz + event bindings ─────────────
566
-
567
-
568
- # ── B19: _auto_load_csv() ──────────────────────────────────
569
- # PURPOSE: Automatically triggers analysis when a CSV file is
570
- # uploaded. Sends "Analyze my Scopus CSV" as the
571
- # initial message so no manual typing is needed.
572
- # TRIGGERED BY: upload.change event
573
- # CALLS: B6 (respond) with auto-message
574
- # OUTPUTS: chatbot, download, chart_selector, chart_display,
575
- # review_table, phase_progress
576
- # ───────────────────────────────────────────────────────────
577
  def _auto_load_csv(uploaded_file, chat_history):
578
- """Auto-trigger analysis when CSV is uploaded — no typing needed."""
579
  gen = respond("Analyze my Scopus CSV", chat_history, uploaded_file)
580
 
581
- # First yield (progress)
582
  hist, txt, dl = next(gen)
583
- yield (hist, dl, gr.update(), gr.update(),
584
- gr.update(), _build_progress())
 
 
 
 
 
 
585
 
586
- # Second yield (final + populate everything)
587
  hist, txt, dl = next(gen)
588
  choices = _get_chart_choices()
589
  first_chart = (choices and _load_chart(choices[-1])) or gr.update()
590
  table_data = _load_review_table()
 
591
  yield (
592
- hist, dl,
593
- gr.update(choices=choices, value=(choices and choices[-1]) or None),
 
 
 
 
594
  first_chart,
595
  gr.update(value=table_data),
596
  _build_progress(),
597
  )
598
 
599
  upload.change(
600
- _auto_load_csv, [upload, chatbot],
601
- [chatbot, download, chart_selector, chart_display,
602
- review_table, phase_progress],
 
 
 
 
 
 
 
603
  )
604
- # ── end B19: _auto_load_csv ────────────────────────────────
605
-
606
-
607
 
608
  print(">>> Launching...")
 
609
  demo.launch(
610
  server_name="0.0.0.0",
611
  server_port=7860,
612
  ssr_mode=False,
613
- theme=theme, # Gradio 6: moved from gr.Blocks()
614
- footer_links=[], # Gradio 6: hides footer, replaces show_api
615
- )
616
- # ── end B20: Launch ────────────────────────────────────────────
 
1
+ # Replace ONLY the UI/layout section beginning from:
2
+ # print(">>> Building UI...")
3
+ # down to demo.launch(...)
4
+ #
5
+ # Keep all helper functions and logic exactly as they are.
6
+
7
+ print(">>> Building UI...")
8
+
9
+ with gr.Blocks(
10
+ title="Topic Modelling Agentic AI",
11
+ fill_width=True,
12
+ theme=theme,
13
+ css="""
14
+ :root {
15
+ --accent: #0f766e;
16
+ --accent-soft: rgba(15,118,110,0.10);
17
+ --panel: #ffffff;
18
+ --panel-border: #e5e7eb;
19
+ --muted: #6b7280;
20
+ --bg-soft: #f8fafc;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ .gradio-container {
24
+ max-width: 1650px !important;
25
+ margin: 0 auto !important;
26
+ padding: 18px 22px 22px 22px !important;
27
+ background: linear-gradient(to bottom, #fafafa, #f4f7fb);
28
+ }
29
 
30
+ .gradio-container::before {
31
+ content: "";
32
+ display: block;
33
+ height: 4px;
34
+ margin: -18px -22px 18px -22px;
35
+ background: linear-gradient(90deg, #0f766e, #4f46e5);
36
+ }
37
 
38
+ .app-header {
39
+ padding: 4px 0 14px 0;
40
+ border-bottom: 1px solid var(--panel-border);
41
+ margin-bottom: 10px;
42
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
+ .app-header h1 {
45
+ font-size: 28px !important;
46
+ font-weight: 700 !important;
47
+ margin-bottom: 4px !important;
48
+ color: #111827;
49
+ }
50
 
51
+ .app-subtitle {
52
+ color: var(--muted);
53
+ font-size: 13px;
54
+ letter-spacing: 0.02em;
55
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
+ .section-card {
58
+ border: 1px solid var(--panel-border);
59
+ border-radius: 18px;
60
+ background: white;
61
+ padding: 16px;
62
+ box-shadow: 0 1px 3px rgba(0,0,0,0.04);
63
+ }
64
 
65
+ .compact-label {
66
+ font-size: 12px !important;
67
+ font-weight: 700 !important;
68
+ text-transform: uppercase;
69
+ letter-spacing: 0.08em;
70
+ color: var(--muted);
71
+ margin-bottom: 10px !important;
72
+ }
73
 
74
+ .phase-bar {
75
+ background: white;
76
+ border: 1px solid var(--panel-border);
77
+ border-radius: 14px;
78
+ padding: 10px 14px;
79
+ margin: 10px 0 14px 0;
80
+ }
81
 
82
+ .phase-bar p {
83
+ margin: 0 !important;
84
+ font-family: "Fira Code", monospace !important;
85
+ font-size: 12px !important;
86
+ color: #374151;
87
+ line-height: 1.4;
88
+ }
89
+
90
+ .upload-panel {
91
+ border: 1px dashed #cbd5e1 !important;
92
+ border-radius: 16px !important;
93
+ background: #fbfdff !important;
94
+ padding: 8px !important;
95
+ }
96
+
97
+ .chat-shell {
98
+ border: 1px solid var(--panel-border);
99
+ border-radius: 18px;
100
+ background: white;
101
+ overflow: hidden;
102
+ }
103
+
104
+ .message.bot {
105
+ border-left: 3px solid var(--accent) !important;
106
+ background: rgba(15,118,110,0.03) !important;
107
+ }
108
+
109
+ .message.user {
110
+ background: #f3f4f6 !important;
111
+ }
112
+
113
+ .tab-nav {
114
+ gap: 6px !important;
115
+ margin-bottom: 12px !important;
116
+ }
117
+
118
+ .tab-nav button {
119
+ border-radius: 10px !important;
120
+ padding: 8px 14px !important;
121
+ font-size: 13px !important;
122
+ font-weight: 600 !important;
123
+ color: #4b5563 !important;
124
+ background: #f3f4f6 !important;
125
+ border: 1px solid transparent !important;
126
+ transition: all 0.15s ease !important;
127
+ }
128
+
129
+ .tab-nav button.selected {
130
+ background: white !important;
131
+ color: #111827 !important;
132
+ border: 1px solid #d1d5db !important;
133
+ box-shadow: 0 1px 2px rgba(0,0,0,0.05);
134
+ }
135
+
136
+ .table-wrap {
137
+ border-radius: 14px !important;
138
+ overflow: hidden !important;
139
+ border: 1px solid var(--panel-border) !important;
140
+ }
141
+
142
+ .table-wrap tr:nth-child(even) td {
143
+ background: #fafafa !important;
144
+ }
145
+
146
+ .table-wrap th {
147
+ background: #f8fafc !important;
148
+ font-weight: 700 !important;
149
+ font-size: 12px !important;
150
+ color: #374151 !important;
151
+ }
152
+
153
+ .table-wrap td {
154
+ font-size: 13px !important;
155
+ }
156
+
157
+ .panel-title {
158
+ font-size: 14px !important;
159
+ font-weight: 700 !important;
160
+ color: #111827 !important;
161
+ margin-bottom: 10px !important;
162
+ }
163
+
164
+ .small-note {
165
+ font-size: 12px !important;
166
+ color: #6b7280 !important;
167
+ margin-top: 4px !important;
168
+ }
169
+
170
+ button.primary {
171
+ border-radius: 12px !important;
172
+ font-weight: 600 !important;
173
+ }
174
  """,
175
  ) as demo:
176
 
177
+ with gr.Column(elem_classes=["app-header"]):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  gr.Markdown(
179
+ """
180
+ # Topic Modelling Agentic AI
181
+ <div class="app-subtitle">
182
+ Mistral · BERTopic · 384d Embeddings · Braun & Clarke Thematic Analysis
183
+ </div>
184
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
+ phase_progress = gr.Markdown(
188
+ value=_build_progress(),
189
+ elem_classes=["phase-bar"],
190
+ )
191
+
192
+ with gr.Row(equal_height=True):
193
+
194
+ # LEFT SIDEBAR
195
+ with gr.Column(scale=3, min_width=340):
196
+
197
+ with gr.Group(elem_classes=["section-card"]):
198
+ gr.Markdown(
199
+ "<div class='compact-label'>Data Source</div>"
200
+ )
201
+
202
+ upload = gr.File(
203
+ label="Scopus CSV",
204
+ file_types=[".csv"],
205
+ elem_classes=["upload-panel"],
206
+ )
207
+
208
+ gr.Markdown(
209
+ """
210
+ <div class='small-note'>
211
+ Upload your Scopus CSV export. The analysis starts automatically after upload.
212
+ You can then continue using the chat to refine, review, approve, or rename topics.
213
+ </div>
214
+ """
215
+ )
216
+
217
+ with gr.Group(elem_classes=["section-card"]):
218
+ gr.Markdown(
219
+ "<div class='compact-label'>Available Commands</div>"
220
+ )
221
+
222
+ gr.Markdown(
223
+ """
224
+ - `run abstract only`
225
+ - `approve all`
226
+ - `show topic 4 papers`
227
+ - `group 0 1 5`
228
+ - `done`
229
+ """
230
+ )
231
+
232
+ with gr.Group(elem_classes=["section-card"]):
233
+ gr.Markdown(
234
+ "<div class='compact-label'>Export Files</div>"
235
+ )
236
+
237
+ download = gr.File(
238
+ label="Generated Outputs",
239
+ file_count="multiple",
240
+ )
241
+
242
+ # MAIN CONTENT
243
+ with gr.Column(scale=9):
244
+
245
+ with gr.Row(equal_height=True):
246
+
247
+ # CHAT PANEL
248
+ with gr.Column(scale=5):
249
+ with gr.Group(elem_classes=["chat-shell"]):
250
+
251
+ gr.Markdown(
252
+ "<div class='panel-title'>Conversation</div>"
253
+ )
254
+
255
+ chatbot = gr.Chatbot(
256
+ height=520,
257
+ show_label=False,
258
+ bubble_full_width=False,
259
+ avatar_images=(
260
+ None,
261
+ "https://api.dicebear.com/7.x/bottts-neutral/svg?seed=bertopic",
262
+ ),
263
+ placeholder=(
264
+ "Ask the agent to analyse, review, merge, rename, "
265
+ "or explain discovered topics."
266
+ ),
267
+ )
268
+
269
+ with gr.Row():
270
+ msg = gr.Textbox(
271
+ placeholder="Type a command or question...",
272
+ show_label=False,
273
+ lines=1,
274
+ max_lines=4,
275
+ scale=8,
276
+ )
277
+
278
+ send = gr.Button(
279
+ "Send",
280
+ variant="primary",
281
+ scale=1,
282
+ min_width=90,
283
+ )
284
+
285
+ # RIGHT PANEL
286
+ with gr.Column(scale=7):
287
+
288
+ with gr.Tabs():
289
+
290
+ with gr.Tab("Topics Review"):
291
+
292
+ gr.Markdown(
293
+ """
294
+ <div class='small-note'>
295
+ Approve, reject, rename, or annotate discovered topics. Click a row to inspect supporting papers.
296
+ </div>
297
+ """
298
+ )
299
+
300
+ review_table = gr.Dataframe(
301
+ headers=[
302
+ "#",
303
+ "Topic Label",
304
+ "Top Evidence Sentence",
305
+ "Sentences",
306
+ "Papers",
307
+ "Approve",
308
+ "Rename To",
309
+ "Your Reasoning",
310
+ ],
311
+ datatype=[
312
+ "number",
313
+ "str",
314
+ "str",
315
+ "number",
316
+ "number",
317
+ "bool",
318
+ "str",
319
+ "str",
320
+ ],
321
+ interactive=True,
322
+ column_count=8,
323
+ wrap=True,
324
+ height=340,
325
+ )
326
+
327
+ submit_review = gr.Button(
328
+ "Submit Review Decisions",
329
+ variant="primary",
330
+ )
331
+
332
+ gr.Markdown(
333
+ "<div class='panel-title' style='margin-top:18px;'>Source Papers</div>"
334
+ )
335
+
336
+ paper_list = gr.Textbox(
337
+ show_label=False,
338
+ lines=10,
339
+ interactive=False,
340
+ placeholder="Select a topic row to inspect its papers and evidence.",
341
+ )
342
+
343
+ with gr.Tab("Visualisations"):
344
+
345
+ chart_selector = gr.Dropdown(
346
+ choices=[],
347
+ label="Chart",
348
+ interactive=True,
349
+ )
350
+
351
+ chart_display = gr.Plot(
352
+ label="BERTopic Visualisation",
353
+ height=650,
354
+ )
355
 
356
  chart_selector.change(_load_chart, [chart_selector], [chart_display])
357
 
358
  review_table.select(
359
+ _show_papers_by_select,
360
+ [review_table],
361
+ [paper_list],
362
  )
363
 
364
  submit_review.click(
365
+ _submit_review,
366
+ [review_table, chatbot],
367
+ [
368
+ chatbot,
369
+ download,
370
+ chart_selector,
371
+ chart_display,
372
+ review_table,
373
+ phase_progress,
374
+ ],
375
  )
376
 
377
  def respond_with_viz(message, chat_history, uploaded_file):
 
378
  gen = respond(message, chat_history, uploaded_file)
379
 
 
380
  hist, txt, dl = next(gen)
381
+ yield (
382
+ hist,
383
+ txt,
384
+ dl,
385
+ gr.update(choices=_get_chart_choices()),
386
+ gr.update(),
387
+ gr.update(),
388
+ _build_progress(),
389
+ )
390
 
 
391
  hist, txt, dl = next(gen)
392
  choices = _get_chart_choices()
393
  first_chart = (choices and _load_chart(choices[-1])) or gr.update()
394
  table_data = _load_review_table()
395
+
396
  yield (
397
+ hist,
398
+ txt,
399
+ dl,
400
+ gr.update(
401
+ choices=choices,
402
+ value=(choices and choices[-1]) or None,
403
+ ),
404
  first_chart,
405
  gr.update(value=table_data),
406
  _build_progress(),
407
  )
408
 
409
  msg.submit(
410
+ respond_with_viz,
411
+ [msg, chatbot, upload],
412
+ [
413
+ chatbot,
414
+ msg,
415
+ download,
416
+ chart_selector,
417
+ chart_display,
418
+ review_table,
419
+ phase_progress,
420
+ ],
421
  )
422
+
423
  send.click(
424
+ respond_with_viz,
425
+ [msg, chatbot, upload],
426
+ [
427
+ chatbot,
428
+ msg,
429
+ download,
430
+ chart_selector,
431
+ chart_display,
432
+ review_table,
433
+ phase_progress,
434
+ ],
435
  )
436
+
 
 
 
 
 
 
 
 
 
 
 
437
  def _auto_load_csv(uploaded_file, chat_history):
 
438
  gen = respond("Analyze my Scopus CSV", chat_history, uploaded_file)
439
 
 
440
  hist, txt, dl = next(gen)
441
+ yield (
442
+ hist,
443
+ dl,
444
+ gr.update(),
445
+ gr.update(),
446
+ gr.update(),
447
+ _build_progress(),
448
+ )
449
 
 
450
  hist, txt, dl = next(gen)
451
  choices = _get_chart_choices()
452
  first_chart = (choices and _load_chart(choices[-1])) or gr.update()
453
  table_data = _load_review_table()
454
+
455
  yield (
456
+ hist,
457
+ dl,
458
+ gr.update(
459
+ choices=choices,
460
+ value=(choices and choices[-1]) or None,
461
+ ),
462
  first_chart,
463
  gr.update(value=table_data),
464
  _build_progress(),
465
  )
466
 
467
  upload.change(
468
+ _auto_load_csv,
469
+ [upload, chatbot],
470
+ [
471
+ chatbot,
472
+ download,
473
+ chart_selector,
474
+ chart_display,
475
+ review_table,
476
+ phase_progress,
477
+ ],
478
  )
 
 
 
479
 
480
  print(">>> Launching...")
481
+
482
  demo.launch(
483
  server_name="0.0.0.0",
484
  server_port=7860,
485
  ssr_mode=False,
486
+ footer_links=[],
487
+ )