Dash10107 commited on
Commit
b6a5e1c
Β·
verified Β·
1 Parent(s): bf8d4f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +549 -420
app.py CHANGED
@@ -1,487 +1,616 @@
1
- # Replace ONLY the UI/layout section beginning from:
2
- # print(">>> Building UI...")
3
- # down to demo.launch(...)
4
- #
5
- # Keep all helper functions and logic exactly as they are.
6
-
7
- print(">>> Building UI...")
8
-
9
- with gr.Blocks(
10
- title="Topic Modelling β€” Agentic AI",
11
- fill_width=True,
12
- theme=theme,
13
- css="""
14
- :root {
15
- --accent: #0f766e;
16
- --accent-soft: rgba(15,118,110,0.10);
17
- --panel: #ffffff;
18
- --panel-border: #e5e7eb;
19
- --muted: #6b7280;
20
- --bg-soft: #f8fafc;
21
- }
22
-
23
- .gradio-container {
24
- max-width: 1650px !important;
25
- margin: 0 auto !important;
26
- padding: 18px 22px 22px 22px !important;
27
- background: linear-gradient(to bottom, #fafafa, #f4f7fb);
28
- }
29
-
30
- .gradio-container::before {
31
- content: "";
32
- display: block;
33
- height: 4px;
34
- margin: -18px -22px 18px -22px;
35
- background: linear-gradient(90deg, #0f766e, #4f46e5);
36
- }
37
-
38
- .app-header {
39
- padding: 4px 0 14px 0;
40
- border-bottom: 1px solid var(--panel-border);
41
- margin-bottom: 10px;
42
- }
43
-
44
- .app-header h1 {
45
- font-size: 28px !important;
46
- font-weight: 700 !important;
47
- margin-bottom: 4px !important;
48
- color: #111827;
49
- }
50
-
51
- .app-subtitle {
52
- color: var(--muted);
53
- font-size: 13px;
54
- letter-spacing: 0.02em;
55
- }
56
-
57
- .section-card {
58
- border: 1px solid var(--panel-border);
59
- border-radius: 18px;
60
- background: white;
61
- padding: 16px;
62
- box-shadow: 0 1px 3px rgba(0,0,0,0.04);
63
- }
64
-
65
- .compact-label {
66
- font-size: 12px !important;
67
- font-weight: 700 !important;
68
- text-transform: uppercase;
69
- letter-spacing: 0.08em;
70
- color: var(--muted);
71
- margin-bottom: 10px !important;
72
- }
73
-
74
- .phase-bar {
75
- background: white;
76
- border: 1px solid var(--panel-border);
77
- border-radius: 14px;
78
- padding: 10px 14px;
79
- margin: 10px 0 14px 0;
80
- }
81
-
82
- .phase-bar p {
83
- margin: 0 !important;
84
- font-family: "Fira Code", monospace !important;
85
- font-size: 12px !important;
86
- color: #374151;
87
- line-height: 1.4;
88
- }
89
-
90
- .upload-panel {
91
- border: 1px dashed #cbd5e1 !important;
92
- border-radius: 16px !important;
93
- background: #fbfdff !important;
94
- padding: 8px !important;
95
- }
96
-
97
- .chat-shell {
98
- border: 1px solid var(--panel-border);
99
- border-radius: 18px;
100
- background: white;
101
- overflow: hidden;
102
- }
103
-
104
- .message.bot {
105
- border-left: 3px solid var(--accent) !important;
106
- background: rgba(15,118,110,0.03) !important;
107
- }
108
-
109
- .message.user {
110
- background: #f3f4f6 !important;
111
- }
112
-
113
- .tab-nav {
114
- gap: 6px !important;
115
- margin-bottom: 12px !important;
116
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
- .tab-nav button {
119
- border-radius: 10px !important;
120
- padding: 8px 14px !important;
121
- font-size: 13px !important;
122
- font-weight: 600 !important;
123
- color: #4b5563 !important;
124
- background: #f3f4f6 !important;
125
- border: 1px solid transparent !important;
126
- transition: all 0.15s ease !important;
127
- }
128
 
129
- .tab-nav button.selected {
130
- background: white !important;
131
- color: #111827 !important;
132
- border: 1px solid #d1d5db !important;
133
- box-shadow: 0 1px 2px rgba(0,0,0,0.05);
134
- }
135
 
136
- .table-wrap {
137
- border-radius: 14px !important;
138
- overflow: hidden !important;
139
- border: 1px solid var(--panel-border) !important;
140
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
- .table-wrap tr:nth-child(even) td {
143
- background: #fafafa !important;
144
- }
 
 
145
 
146
- .table-wrap th {
147
- background: #f8fafc !important;
148
- font-weight: 700 !important;
149
- font-size: 12px !important;
150
- color: #374151 !important;
151
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
- .table-wrap td {
154
- font-size: 13px !important;
155
- }
156
 
157
- .panel-title {
158
- font-size: 14px !important;
159
- font-weight: 700 !important;
160
- color: #111827 !important;
161
- margin-bottom: 10px !important;
162
- }
163
 
164
- .small-note {
165
- font-size: 12px !important;
166
- color: #6b7280 !important;
167
- margin-top: 4px !important;
168
- }
169
 
170
- button.primary {
171
- border-radius: 12px !important;
172
- font-weight: 600 !important;
173
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  """,
175
  ) as demo:
176
 
177
- with gr.Column(elem_classes=["app-header"]):
178
- gr.Markdown(
179
- """
180
- # Topic Modelling Agentic AI
181
- <div class="app-subtitle">
182
- Mistral Β· BERTopic Β· 384d Embeddings Β· Braun & Clarke Thematic Analysis
183
- </div>
184
- """
185
- )
186
 
187
- phase_progress = gr.Markdown(
188
- value=_build_progress(),
189
- elem_classes=["phase-bar"],
 
 
 
190
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
- with gr.Row(equal_height=True):
193
-
194
- # LEFT SIDEBAR
195
- with gr.Column(scale=3, min_width=340):
196
-
197
- with gr.Group(elem_classes=["section-card"]):
198
- gr.Markdown(
199
- "<div class='compact-label'>Data Source</div>"
200
- )
201
-
202
- upload = gr.File(
203
- label="Scopus CSV",
204
- file_types=[".csv"],
205
- elem_classes=["upload-panel"],
206
- )
207
-
208
- gr.Markdown(
209
- """
210
- <div class='small-note'>
211
- Upload your Scopus CSV export. The analysis starts automatically after upload.
212
- You can then continue using the chat to refine, review, approve, or rename topics.
213
- </div>
214
- """
215
- )
216
-
217
- with gr.Group(elem_classes=["section-card"]):
218
- gr.Markdown(
219
- "<div class='compact-label'>Available Commands</div>"
220
- )
221
-
222
- gr.Markdown(
223
- """
224
- - `run abstract only`
225
- - `approve all`
226
- - `show topic 4 papers`
227
- - `group 0 1 5`
228
- - `done`
229
- """
230
- )
231
-
232
- with gr.Group(elem_classes=["section-card"]):
233
- gr.Markdown(
234
- "<div class='compact-label'>Export Files</div>"
235
- )
236
-
237
- download = gr.File(
238
- label="Generated Outputs",
239
- file_count="multiple",
240
- )
241
-
242
- # MAIN CONTENT
243
- with gr.Column(scale=9):
244
-
245
- with gr.Row(equal_height=True):
246
-
247
- # CHAT PANEL
248
- with gr.Column(scale=5):
249
- with gr.Group(elem_classes=["chat-shell"]):
250
-
251
- gr.Markdown(
252
- "<div class='panel-title'>Conversation</div>"
253
- )
254
-
255
- chatbot = gr.Chatbot(
256
- height=520,
257
- show_label=False,
258
- bubble_full_width=False,
259
- avatar_images=(
260
- None,
261
- "https://api.dicebear.com/7.x/bottts-neutral/svg?seed=bertopic",
262
- ),
263
- placeholder=(
264
- "Ask the agent to analyse, review, merge, rename, "
265
- "or explain discovered topics."
266
- ),
267
- )
268
-
269
- with gr.Row():
270
- msg = gr.Textbox(
271
- placeholder="Type a command or question...",
272
- show_label=False,
273
- lines=1,
274
- max_lines=4,
275
- scale=8,
276
- )
277
-
278
- send = gr.Button(
279
- "Send",
280
- variant="primary",
281
- scale=1,
282
- min_width=90,
283
- )
284
-
285
- # RIGHT PANEL
286
- with gr.Column(scale=7):
287
-
288
- with gr.Tabs():
289
-
290
- with gr.Tab("Topics Review"):
291
-
292
- gr.Markdown(
293
- """
294
- <div class='small-note'>
295
- Approve, reject, rename, or annotate discovered topics. Click a row to inspect supporting papers.
296
- </div>
297
- """
298
- )
299
-
300
- review_table = gr.Dataframe(
301
- headers=[
302
- "#",
303
- "Topic Label",
304
- "Top Evidence Sentence",
305
- "Sentences",
306
- "Papers",
307
- "Approve",
308
- "Rename To",
309
- "Your Reasoning",
310
- ],
311
- datatype=[
312
- "number",
313
- "str",
314
- "str",
315
- "number",
316
- "number",
317
- "bool",
318
- "str",
319
- "str",
320
- ],
321
- interactive=True,
322
- column_count=8,
323
- wrap=True,
324
- height=340,
325
- )
326
-
327
- submit_review = gr.Button(
328
- "Submit Review Decisions",
329
- variant="primary",
330
- )
331
-
332
- gr.Markdown(
333
- "<div class='panel-title' style='margin-top:18px;'>Source Papers</div>"
334
- )
335
-
336
- paper_list = gr.Textbox(
337
- show_label=False,
338
- lines=10,
339
- interactive=False,
340
- placeholder="Select a topic row to inspect its papers and evidence.",
341
- )
342
-
343
- with gr.Tab("Visualisations"):
344
-
345
- chart_selector = gr.Dropdown(
346
- choices=[],
347
- label="Chart",
348
- interactive=True,
349
- )
350
-
351
- chart_display = gr.Plot(
352
- label="BERTopic Visualisation",
353
- height=650,
354
- )
355
 
356
  chart_selector.change(_load_chart, [chart_selector], [chart_display])
357
 
358
  review_table.select(
359
- _show_papers_by_select,
360
- [review_table],
361
- [paper_list],
362
  )
363
 
364
  submit_review.click(
365
- _submit_review,
366
- [review_table, chatbot],
367
- [
368
- chatbot,
369
- download,
370
- chart_selector,
371
- chart_display,
372
- review_table,
373
- phase_progress,
374
- ],
375
  )
376
 
377
  def respond_with_viz(message, chat_history, uploaded_file):
 
378
  gen = respond(message, chat_history, uploaded_file)
379
 
 
380
  hist, txt, dl = next(gen)
381
- yield (
382
- hist,
383
- txt,
384
- dl,
385
- gr.update(choices=_get_chart_choices()),
386
- gr.update(),
387
- gr.update(),
388
- _build_progress(),
389
- )
390
 
 
391
  hist, txt, dl = next(gen)
392
  choices = _get_chart_choices()
393
  first_chart = (choices and _load_chart(choices[-1])) or gr.update()
394
  table_data = _load_review_table()
395
-
396
  yield (
397
- hist,
398
- txt,
399
- dl,
400
- gr.update(
401
- choices=choices,
402
- value=(choices and choices[-1]) or None,
403
- ),
404
  first_chart,
405
  gr.update(value=table_data),
406
  _build_progress(),
407
  )
408
 
409
  msg.submit(
410
- respond_with_viz,
411
- [msg, chatbot, upload],
412
- [
413
- chatbot,
414
- msg,
415
- download,
416
- chart_selector,
417
- chart_display,
418
- review_table,
419
- phase_progress,
420
- ],
421
  )
422
-
423
  send.click(
424
- respond_with_viz,
425
- [msg, chatbot, upload],
426
- [
427
- chatbot,
428
- msg,
429
- download,
430
- chart_selector,
431
- chart_display,
432
- review_table,
433
- phase_progress,
434
- ],
435
  )
436
-
 
 
 
 
 
 
 
 
 
 
 
437
  def _auto_load_csv(uploaded_file, chat_history):
 
438
  gen = respond("Analyze my Scopus CSV", chat_history, uploaded_file)
439
 
 
440
  hist, txt, dl = next(gen)
441
- yield (
442
- hist,
443
- dl,
444
- gr.update(),
445
- gr.update(),
446
- gr.update(),
447
- _build_progress(),
448
- )
449
 
 
450
  hist, txt, dl = next(gen)
451
  choices = _get_chart_choices()
452
  first_chart = (choices and _load_chart(choices[-1])) or gr.update()
453
  table_data = _load_review_table()
454
-
455
  yield (
456
- hist,
457
- dl,
458
- gr.update(
459
- choices=choices,
460
- value=(choices and choices[-1]) or None,
461
- ),
462
  first_chart,
463
  gr.update(value=table_data),
464
  _build_progress(),
465
  )
466
 
467
  upload.change(
468
- _auto_load_csv,
469
- [upload, chatbot],
470
- [
471
- chatbot,
472
- download,
473
- chart_selector,
474
- chart_display,
475
- review_table,
476
- phase_progress,
477
- ],
478
  )
 
 
479
 
480
- print(">>> Launching...")
481
 
 
482
  demo.launch(
483
  server_name="0.0.0.0",
484
  server_port=7860,
485
  ssr_mode=False,
486
- footer_links=[],
487
- )
 
 
 
1
+ """
2
+ app.py β€” Topic Modelling Agentic AI | Gradio UI
3
+ ═══════════════════════════════════════════════════
4
+ Version: 3.1.0 | April 2026
5
+ Stack: Gradio 5.x + LangGraph + Mistral + BERTopic
6
+ Deploy: HuggingFace Spaces (sdk: gradio)
7
+ Rules: Zero gr.HTML(). All UI via native Gradio components.
8
+ See GRADIO_UI_GUIDELINES_v2.docx for full standards.
9
+
10
+ ARCHITECTURE β€” 20 Blocks in 5 Sections
11
+ ─────────────────────────────────────────
12
+ Section 1: Setup (B1–B3) Imports, agent, theme
13
+ Section 2: Helpers (B4–B10) Pure Python functions, no UI
14
+ Section 3: UI Layout (B11–B17) gr.Blocks with native components
15
+ Section 4: Event Wiring (B18–B19) Connect UI to functions
16
+ Section 5: Launch (B20) Start server
17
+
18
+ BLOCK COMMUNICATION MAP
19
+ ─────────────────────────
20
+ B6 (respond) ←→ B2 (agent) : invokes agent for chat
21
+ B6 (respond) β†’ B4 (output) : scans for download files
22
+ B7 (chart) β†’ B17a (display) : loads Plotly JSON β†’ gr.Plot
23
+ B8 (table) β†’ B16 (review) : builds rows β†’ gr.Dataframe
24
+ B9 (papers) ← B16 (review) : triggered by row click
25
+ B10 (submit) β†’ B2 (agent) : sends review edits to agent
26
+ B18 (wiring) β†’ B5,B7,B8 : refreshes progress, charts, table
27
+ """
28
+ import os
29
+ import glob
30
+ import json
31
+
32
+ import plotly.io as pio
33
+ import gradio as gr
34
+ from langchain_mistralai import ChatMistralAI
35
+ from langgraph.prebuilt import create_react_agent
36
+ from langgraph.checkpoint.memory import MemorySaver
37
+ from agent import SYSTEM_PROMPT, get_local_tools
38
+
39
+ print(">>> app.py: imports complete")
40
+
41
+
42
+ llm = ChatMistralAI(model="mistral-small-latest", temperature=0, timeout=300)
43
+ tools = get_local_tools()
44
+ agent = create_react_agent(
45
+ model=llm, tools=tools, prompt=SYSTEM_PROMPT, checkpointer=MemorySaver()
46
+ )
47
+ print(f">>> app.py: agent ready ({len(tools)} tools)")
48
+
49
+ _msg_count = 0 # Global message counter (shared across users)
50
+ _uploaded = {"path": ""} # Last uploaded CSV path (shared session)
51
+ # ── end B2: Agent setup ────────────────────────────────────────
52
+
53
+
54
+ # ── B3: Theme ───────────────────────────────────────────────────
55
+ # PURPOSE: Define the visual identity of the entire application.
56
+ # Uses teal/indigo on zinc β€” purposeful scientific feel.
57
+ # Plus Jakarta Sans: geometric-humanist, modern but not generic.
58
+ # Fira Code for monospace elements (phase progress, etc).
59
+ # USED BY: B20 (demo.launch) β€” theme applied at launch time.
60
+ # ────────────────────────────────────────────────────────────────
61
+ theme = gr.themes.Default(
62
+ primary_hue="teal",
63
+ secondary_hue="indigo",
64
+ neutral_hue="zinc",
65
+ font=gr.themes.GoogleFont("Plus Jakarta Sans"),
66
+ font_mono=gr.themes.GoogleFont("Fira Code"),
67
+ radius_size="sm",
68
+ spacing_size="md",
69
+ ).set(
70
+ button_primary_background_fill="*primary_600",
71
+ button_primary_background_fill_hover="*primary_500",
72
+ button_primary_text_color="white",
73
+ block_label_text_size="sm",
74
+ block_title_text_weight="600",
75
+ )
76
+ # ── end B3: Theme ──────────────────────────────────────────────
77
+
78
+ def _latest_output():
79
+ """Scan /tmp for ALL rq4_* files, sorted by phase order.
80
+ Returns list of filepaths for gr.File download component."""
81
+ phase_order = {
82
+ "summaries": 1, "labels": 2, "themes": 3, "taxonomy": 4,
83
+ "emb": 0, "intertopic": 5, "bars": 6, "hierarchy": 7,
84
+ "heatmap": 8, "comparison": 9, "narrative": 10,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  }
86
+ files = (
87
+ glob.glob("/tmp/rq4_*.csv")
88
+ + glob.glob("/tmp/rq4_*.json")
89
+ + glob.glob("/tmp/checkpoints/rq4_*.json")
90
+ )
91
+ scored = list(map(
92
+ lambda f: (sum(v * (k in f) for k, v in phase_order.items()), f),
93
+ files,
94
+ ))
95
+ scored.sort(key=lambda x: x[0])
96
+ return list(map(lambda x: x[1], scored)) or None
97
+ # ── end B4: _latest_output ─────────────────────────────────────
98
+
99
+ def _build_progress():
100
+ """Return emoji progress pipeline. NO HTML β€” just text + emoji.
101
+ Displayed in gr.Markdown component (B14)."""
102
+ checks = [
103
+ ("Load", bool(glob.glob("/tmp/checkpoints/rq4_*_summaries.json")
104
+ or glob.glob("/tmp/checkpoints/rq4_*_emb.npy"))),
105
+ ("Codes", bool(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))),
106
+ ("Themes", bool(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))),
107
+ ("Review", bool(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))),
108
+ ("Names", bool(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))),
109
+ ("PAJAIS", bool(glob.glob("/tmp/checkpoints/rq4_*_taxonomy_map.json"))),
110
+ ("Report", bool(glob.glob("/tmp/rq4_comparison.csv")
111
+ or glob.glob("/tmp/rq4_narrative.txt"))),
112
+ ]
113
+ return " β†’ ".join(f"{'βœ…' if done else '⬜'} {name}" for name, done in checks)
114
+ # ── end B5: _build_progress ────────────────────────────────────
115
+
116
+
117
+ def respond(message, chat_history, uploaded_file):
118
+ """Handle one chat turn with the LangGraph agent.
119
+ Yields twice: progress bubble β†’ final response."""
120
+ global _msg_count
121
+ _msg_count += 1
122
+
123
+ # Store file path β€” uses `or` short-circuit instead of if/else
124
+ _uploaded["path"] = uploaded_file or _uploaded.get("path", "")
125
+
126
+ # Tell agent where the CSV is (prevents hallucinated filepaths)
127
+ file_note = (
128
+ f"\n[CSV file at: {_uploaded['path']}]" * bool(_uploaded["path"])
129
+ ) or "\n[No CSV uploaded yet β€” ask user to upload a file first]"
130
+
131
+ # Tell agent what phase we're in based on existing checkpoint files
132
+ phase_context = (
133
+ "\n[Phase context: labels exist]"
134
+ * bool(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))
135
+ or "\n[Phase context: embeddings exist]"
136
+ * bool(glob.glob("/tmp/checkpoints/rq4_*_emb.npy"))
137
+ or "\n[Phase context: fresh start]"
138
+ )
139
 
140
+ text = ((message or "").strip() or "Analyze my Scopus CSV") + file_note + phase_context
141
+ print(f"\n{'='*60}\n>>> MSG #{_msg_count}: '{text[:120]}'\n{'='*60}")
 
 
 
 
 
 
 
 
142
 
143
+ # YIELD 1: Show "thinking" bubble immediately
144
+ chat_history = chat_history + [
145
+ {"role": "user", "content": (message or "").strip()},
146
+ {"role": "assistant", "content": "πŸ”¬ **Working...** _Agent is thinking..._"},
147
+ ]
148
+ yield chat_history, "", _latest_output()
149
 
150
+ # Invoke agent β€” Mistral brain decides which tools to call
151
+ result = agent.invoke(
152
+ {"messages": [("human", text)]},
153
+ config={"configurable": {"thread_id": "session"}},
154
+ )
155
+ response = result["messages"][-1].content
156
+ print(f">>> Response ({len(response)} chars)")
157
+
158
+ # YIELD 2: Replace thinking bubble with actual response
159
+ chat_history[-1] = {"role": "assistant", "content": response}
160
+ gr.Info(f"Agent responded ({len(response)} chars)")
161
+ yield chat_history, "", _latest_output()
162
+ # ── end B6: respond ────────────────────────────────────────────
163
+
164
+
165
+ def _load_chart(chart_name):
166
+ """Load Plotly chart from JSON file. Returns figure for gr.Plot.
167
+ No HTML, no iframe β€” just a native Plotly figure object."""
168
+ path = f"/tmp/{chart_name}"
169
+ (not os.path.exists(path)) and (not None) # guard
170
+ return pio.from_json(open(path).read()) * bool(os.path.exists(path)) or None
171
+
172
+ def _get_chart_choices():
173
+ """Find all rq4_*.json chart files in /tmp."""
174
+ files = sorted(glob.glob("/tmp/rq4_*.json"))
175
+ return list(map(os.path.basename, files))
176
+ # ── end B7: _load_chart ───────────────────────────────────────
177
+
178
+
179
+ def _load_review_table():
180
+ """Build review table from latest checkpoint JSON.
181
+ Approve column is bool (renders as checkbox in gr.Dataframe).
182
+ Priority: taxonomy_map > themes > labels > summaries."""
183
+ taxonomy_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_taxonomy_map.json"))
184
+ theme_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))
185
+ label_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))
186
+ summary_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_summaries.json"))
187
+
188
+ # Pick most advanced checkpoint available
189
+ path = (
190
+ (taxonomy_files and taxonomy_files[-1])
191
+ or (theme_files and theme_files[-1])
192
+ or (label_files and label_files[-1])
193
+ or (summary_files and summary_files[-1])
194
+ or ""
195
+ )
196
+ is_taxonomy = bool(taxonomy_files and taxonomy_files[-1] == path)
197
+ data = (os.path.exists(path) and json.load(open(path))) or []
198
 
199
+ # For taxonomy: merge with themes to get sentence/paper counts
200
+ theme_lookup = {}
201
+ (is_taxonomy and theme_files) and theme_lookup.update(
202
+ {t.get("label", ""): t for t in json.load(open(theme_files[-1]))}
203
+ )
204
 
205
+ rows = list(map(
206
+ lambda pair: [
207
+ pair[0], # #
208
+ pair[1].get("label", pair[1].get("top_words", ""))[:60], # Label
209
+ # Evidence: PAJAIS mapping for taxonomy, nearest sentence otherwise
210
+ (
211
+ is_taxonomy
212
+ and f"β†’ {pair[1].get('pajais_match', '?')} | {pair[1].get('reasoning', '')}"[:120]
213
+ ) or (
214
+ (pair[1].get("nearest", [{}])[0].get("sentence", "")[:120] + "...")
215
+ * bool(pair[1].get("nearest"))
216
+ ),
217
+ # Sentence/paper counts
218
+ theme_lookup.get(pair[1].get("label", ""), pair[1]).get(
219
+ "sentence_count", pair[1].get("sentence_count", 0)),
220
+ theme_lookup.get(pair[1].get("label", ""), pair[1]).get(
221
+ "paper_count", pair[1].get("paper_count", 0)),
222
+ True, # Approve (bool β†’ checkbox)
223
+ "", # Rename To
224
+ "", # Reasoning
225
+ ],
226
+ enumerate(data),
227
+ ))
228
+ return rows or [[0, "No data yet", "", 0, 0, False, "", ""]]
229
+ # ── end B8: _load_review_table ─────────────────────────────────
230
+
231
+
232
+ def _show_papers_by_select(table_data, evt: gr.SelectData):
233
+ """Show papers for clicked row. Uses column 0 as topic_id.
234
+ Triggered by review_table.select() β€” no separate Topic # input needed."""
235
+ row_idx = evt.index[0]
236
+
237
+ # Get topic_id from column 0 of the clicked row (not row index)
238
+ topic_id = int(table_data.iloc[row_idx, 0]) if hasattr(table_data, 'iloc') else int(table_data[row_idx][0])
239
+
240
+ # Load paper data from checkpoint files
241
+ label_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))
242
+ summary_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_summaries.json"))
243
+ all_files = label_files or summary_files
244
+
245
+ lines = []
246
+ for f in all_files:
247
+ source = os.path.basename(f).split("_")[1]
248
+ data = json.load(open(f))
249
+ for t in data:
250
+ (t.get("topic_id") == topic_id) and lines.append(
251
+ f"═══ {source.upper()} β€” Topic {topic_id}: "
252
+ f"{t.get('label', t.get('top_words', '')[:50])} ═══\n"
253
+ f"{t.get('sentence_count', 0)} sentences from {t.get('paper_count', 0)} papers\n"
254
+ f"AI Reasoning: {t.get('reasoning', 'not yet labeled')}\n\n"
255
+ f"── 5 NEAREST CENTROID SENTENCES (evidence) ──\n"
256
+ + "\n".join(
257
+ f" {i+1}. \"{t['nearest'][i]['sentence'][:200]}\"\n"
258
+ f" Paper: {t['nearest'][i].get('title', '')[:100]}"
259
+ for i in range(min(5, len(t.get('nearest', []))))
260
+ )
261
+ + "\n\n── ALL PAPER TITLES ──\n"
262
+ + "\n".join(
263
+ f" {i+1}. {title}"
264
+ for i, title in enumerate(t.get('paper_titles', []))
265
+ )
266
+ )
267
+ return "\n\n".join(lines) or f"Topic {topic_id} not found."
268
+ # ── end B9: _show_papers_by_select ─────────────────────────────
269
+
270
+
271
+ def _submit_review(table_data, chat_history):
272
+ """Convert review table edits into agent message.
273
+ Approve column is bool (checkbox), not string."""
274
+ rows = table_data.values.tolist()
275
+ lines = list(map(
276
+ lambda r: (
277
+ f"Topic {int(r[0])}: "
278
+ + (f"RENAME to '{r[6]}'" * bool(str(r[6]).strip()))
279
+ + (f"APPROVE '{r[1]}'" * (not bool(str(r[6]).strip())) * bool(r[5]))
280
+ + (f"REJECT" * (not r[5]))
281
+ + (f" β€” reason: {r[7]}" * bool(str(r[7]).strip()))
282
+ ),
283
+ rows,
284
+ ))
285
+ review_msg = "Review decisions:\n" + "\n".join(lines)
286
+ print(f">>> Review submitted: {review_msg[:200]}")
287
+
288
+ # YIELD 1: Show processing bubble
289
+ chat_history = chat_history + [
290
+ {"role": "user", "content": review_msg},
291
+ {"role": "assistant", "content": "πŸ”¬ **Processing review decisions...**"},
292
+ ]
293
+ gr.Info("Review submitted to agent")
294
+ yield (chat_history, _latest_output(), gr.update(),
295
+ gr.update(), gr.update(), _build_progress())
296
+
297
+ # Invoke agent with review decisions
298
+ result = agent.invoke(
299
+ {"messages": [("human", review_msg)]},
300
+ config={"configurable": {"thread_id": "session"}},
301
+ )
302
+ response = result["messages"][-1].content
303
+
304
+ # YIELD 2: Final response + refreshed table/charts
305
+ chat_history[-1] = {"role": "assistant", "content": response}
306
+ gr.Info("Review processed β€” table updated")
307
+ yield (
308
+ chat_history,
309
+ _latest_output(),
310
+ gr.update(choices=_get_chart_choices()),
311
+ gr.update(),
312
+ gr.update(value=_load_review_table()),
313
+ _build_progress(),
314
+ )
315
 
 
 
 
316
 
317
+ print(">>> Building UI...")
 
 
 
 
 
318
 
 
 
 
 
 
319
 
320
+ with gr.Blocks(
321
+ title="Topic Modelling β€” Agentic AI",
322
+ fill_width=True,
323
+ css="""
324
+ /* Accent bar at very top of page */
325
+ .gradio-container::before {
326
+ content: "";
327
+ display: block;
328
+ height: 3px;
329
+ background: linear-gradient(90deg, #0d9488, #6366f1);
330
+ margin-bottom: 4px;
331
+ }
332
+ /* Tabs: tighter padding, bolder active state */
333
+ .tab-nav button {
334
+ font-size: 13px !important;
335
+ font-weight: 500 !important;
336
+ letter-spacing: 0.01em;
337
+ padding: 6px 16px !important;
338
+ }
339
+ .tab-nav button.selected {
340
+ font-weight: 700 !important;
341
+ border-bottom: 2px solid #0d9488 !important;
342
+ }
343
+ /* Dataframe: subtle zebra rows */
344
+ .table-wrap tr:nth-child(even) td {
345
+ background-color: rgba(13, 148, 136, 0.04);
346
+ }
347
+ /* Chat: teal left-border on assistant bubbles */
348
+ .message.bot {
349
+ border-left: 3px solid #0d9488 !important;
350
+ }
351
+ /* Phase progress: monospace, slightly muted */
352
+ .phase-bar p {
353
+ font-family: "Fira Code", monospace;
354
+ font-size: 12px;
355
+ letter-spacing: 0.03em;
356
+ opacity: 0.80;
357
+ }
358
+ /* Upload area: cleaner dashed border */
359
+ .upload-container {
360
+ border-style: dashed !important;
361
+ border-width: 1px !important;
362
+ }
363
  """,
364
  ) as demo:
365
 
 
 
 
 
 
 
 
 
 
366
 
367
+ # ── B12: Header ────────────────────────────────────────────
368
+ # PURPOSE: Application title and subtitle.
369
+ # ───────────────────────────────────────────────────────────
370
+ gr.Markdown(
371
+ "# πŸ”¬ Topic Modelling Β· Agentic AI\n"
372
+ "<sub>Mistral Β· Cosine Clustering Β· 384d Embeddings Β· Braun & Clarke Thematic Analysis</sub>"
373
  )
374
+ # ── end B12: Header ────────────────────────────────────────
375
+
376
+
377
+ # ── B13: Data input ────────────────────────────────────────
378
+ # PURPOSE: CSV file upload area with inline instructions.
379
+ # Researcher uploads their Scopus CSV export here.
380
+ # On upload, B19 auto-triggers the first analysis.
381
+ # COMPONENTS: gr.File (upload) + gr.Markdown (instructions)
382
+ # EVENTS: upload.change β†’ B19 (_auto_load_csv)
383
+ # ───────────────────────────────────────────────────────────
384
+ gr.Markdown("**β‘  Upload**")
385
+ with gr.Row():
386
+ upload = gr.File(label="πŸ“‚ Scopus CSV", file_types=[".csv"])
387
+ gr.Markdown(
388
+ "Upload your Scopus CSV export, then type `run abstract only` in the chat below "
389
+ "to begin the analysis pipeline."
390
+ )
391
+ # ── end B13: Data input ────────────────────────────────────
392
+
393
+
394
+ # ── B14: Progress pipeline ─────────────────────────────────
395
+ # PURPOSE: Visual indicator of which Braun & Clarke analysis
396
+ # phases are complete. Updated after every agent action.
397
+ # elem_classes="phase-bar" targets the monospace CSS rule in B11.
398
+ # COMPONENT: gr.Markdown β€” displays emoji string from B5
399
+ # UPDATED BY: B18 (after chat), B10 (after review), B19 (after upload)
400
+ # ───────────────────────────────────────────────────────────
401
+ phase_progress = gr.Markdown(value=_build_progress(), elem_classes=["phase-bar"])
402
+ # ── end B14: Progress pipeline ─────────────────────────────
403
+
404
+
405
+ # ── B15: Chatbot + input ───────────────────────────────────
406
+ # PURPOSE: Main conversation interface between researcher and
407
+ # the LangGraph agent.
408
+ # COMPONENTS: gr.Chatbot (display), gr.Textbox (input), gr.Button (send)
409
+ # EVENTS: msg.submit β†’ B18, send.click β†’ B18
410
+ # ───────────────────────────────────────────────────────────
411
+ gr.Markdown("**β‘‘ Conversation** β€” follow the guided workflow")
412
+ with gr.Group():
413
+ chatbot = gr.Chatbot(
414
+ height=320,
415
+ show_label=False,
416
+ avatar_images=(
417
+ None,
418
+ "https://api.dicebear.com/7.x/bottts-neutral/svg?seed=bertopic",
419
+ ),
420
+ placeholder=(
421
+ "**Ready.** Upload a Scopus CSV above, then type:\n\n"
422
+ "`run abstract only` Β· `approve all` Β· `show topic 4 papers` Β· `done`"
423
+ ),
424
+ )
425
+ with gr.Row():
426
+ msg = gr.Textbox(
427
+ placeholder="run Β· approve Β· show topic 4 papers Β· group 0 1 5 Β· done",
428
+ show_label=False, scale=9, lines=1, max_lines=1, container=False,
429
+ )
430
+ send = gr.Button("⏎ Send", variant="primary", scale=1, min_width=80)
431
+ # ── end B15: Chatbot + input ───────────────────────────────
432
+
433
+
434
+ # ── B16: Review table tab ──────────────────────────────────
435
+ # PURPOSE: Interactive topic review table where the researcher
436
+ # approves, renames, or annotates BERTopic-discovered
437
+ # topics. This is the core human-in-the-loop interface.
438
+ #
439
+ # KEY FEATURES (all native Gradio, no HTML):
440
+ # - static_columns=[0,1,2,3,4] β€” first 5 columns read-only
441
+ # - datatype "bool" on column 5 β€” Approve renders as checkbox
442
+ # - pinned_columns=2 β€” # and Label stay visible when scrolling
443
+ # - show_search="filter" β€” built-in column filtering
444
+ # - .select() event β€” clicking any row auto-loads that topic's papers
445
+ #
446
+ # COMPONENTS: gr.Dataframe, gr.Button (submit), gr.Textbox (papers)
447
+ # EVENTS: review_table.select β†’ B9, submit_review.click β†’ B10
448
+ # ───────────────────────────────────────────────────────────
449
+ gr.Markdown("**β‘’ Review & Export**")
450
+ with gr.Tabs():
451
+ with gr.Tab("πŸ“‹ Topics"):
452
+ gr.Markdown(
453
+ "*Toggle **Approve**, fill in **Rename To** or **Reasoning**, "
454
+ "then click Submit. Click any row to inspect its source papers below.*"
455
+ )
456
+ review_table = gr.Dataframe(
457
+ headers=[
458
+ "#", "Topic Label", "Top Evidence Sentence",
459
+ "Sentences", "Papers", "Approve", "Rename To", "Your Reasoning",
460
+ ],
461
+ datatype=[
462
+ "number", "str", "str", "number", "number",
463
+ "bool", "str", "str",
464
+ ],
465
+ interactive=True,
466
+ column_count=8,
467
+ # NOTE: These features need Gradio >=5.23. Uncomment when available:
468
+ # static_columns=[0, 1, 2, 3, 4],
469
+ # pinned_columns=2,
470
+ # show_search="filter",
471
+ # show_row_numbers=True,
472
+ # show_fullscreen_button=True,
473
+ # show_copy_button=True,
474
+ # column_widths=["60px","200px","250px","80px","70px","70px","150px","200px"],
475
+ )
476
+ submit_review = gr.Button("βœ… Submit Review to Agent", variant="primary")
477
+
478
+ gr.Markdown("---")
479
+ gr.Markdown("**πŸ“„ Papers in selected topic** *(click any row above)*")
480
+ paper_list = gr.Textbox(
481
+ label="Papers in selected topic",
482
+ lines=8, interactive=False,
483
+ )
484
+ # ── end B16: Review table tab ──────────────────────────────
485
+
486
+
487
+ # ── B17a: Charts tab ───────────────────────────────────
488
+ # PURPOSE: Display BERTopic visualization charts rendered
489
+ # natively in gr.Plot from Plotly JSON files.
490
+ # COMPONENTS: gr.Dropdown (selector), gr.Plot (display)
491
+ # EVENTS: chart_selector.change β†’ B7 (_load_chart)
492
+ # ───────────────────────────────────────────────────────
493
+ with gr.Tab("πŸ“Š Visualise"):
494
+ chart_selector = gr.Dropdown(
495
+ choices=[], label="Select chart", interactive=True,
496
+ )
497
+ chart_display = gr.Plot(label="BERTopic Visualization")
498
+ # ── end B17a: Charts tab ───────────────────────────────
499
+
500
+
501
+ # ── B17b: Download tab ─────────────────────────────────
502
+ # PURPOSE: Multi-file download for all pipeline outputs.
503
+ # COMPONENTS: gr.Markdown (descriptions), gr.File (download)
504
+ # UPDATED BY: B18, B10, B19 β€” refreshed after each action
505
+ # ───────────────────────────────────────────────────────
506
+ with gr.Tab("⬇ Export"):
507
+ gr.Markdown(
508
+ "**Files by Phase (per run: abstract / title):**\n\n"
509
+ "**Phase 2 β€” Discovery:** `summaries.json` Β· `emb.npy`\n\n"
510
+ "**Phase 2 β€” Labeling:** `labels.json`\n\n"
511
+ "**Phase 2 β€” Charts:** `intertopic.json` Β· `bars.json` Β· "
512
+ "`hierarchy.json` Β· `heatmap.json`\n\n"
513
+ "**Phase 3 β€” Themes:** `themes.json`\n\n"
514
+ "**Phase 5.5 β€” Taxonomy:** `taxonomy_map.json`\n\n"
515
+ "**Phase 6 β€” Report:** `comparison.csv` Β· `narrative.txt`"
516
+ )
517
+ download = gr.File(label="All output files", file_count="multiple")
518
+ # ── end B17b: Download tab ─────────────────────────────
519
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
520
 
521
  chart_selector.change(_load_chart, [chart_selector], [chart_display])
522
 
523
  review_table.select(
524
+ _show_papers_by_select, [review_table], [paper_list],
 
 
525
  )
526
 
527
  submit_review.click(
528
+ _submit_review, [review_table, chatbot],
529
+ [chatbot, download, chart_selector, chart_display,
530
+ review_table, phase_progress],
 
 
 
 
 
 
 
531
  )
532
 
533
  def respond_with_viz(message, chat_history, uploaded_file):
534
+ """Wrap respond() and update charts + table + progress after each turn."""
535
  gen = respond(message, chat_history, uploaded_file)
536
 
537
+ # First yield (progress bubble)
538
  hist, txt, dl = next(gen)
539
+ yield (hist, txt, dl, gr.update(choices=_get_chart_choices()),
540
+ gr.update(), gr.update(), _build_progress())
 
 
 
 
 
 
 
541
 
542
+ # Second yield (final response + populate table + charts)
543
  hist, txt, dl = next(gen)
544
  choices = _get_chart_choices()
545
  first_chart = (choices and _load_chart(choices[-1])) or gr.update()
546
  table_data = _load_review_table()
 
547
  yield (
548
+ hist, txt, dl,
549
+ gr.update(choices=choices, value=(choices and choices[-1]) or None),
 
 
 
 
 
550
  first_chart,
551
  gr.update(value=table_data),
552
  _build_progress(),
553
  )
554
 
555
  msg.submit(
556
+ respond_with_viz, [msg, chatbot, upload],
557
+ [chatbot, msg, download, chart_selector, chart_display,
558
+ review_table, phase_progress],
 
 
 
 
 
 
 
 
559
  )
 
560
  send.click(
561
+ respond_with_viz, [msg, chatbot, upload],
562
+ [chatbot, msg, download, chart_selector, chart_display,
563
+ review_table, phase_progress],
 
 
 
 
 
 
 
 
564
  )
565
+ # ── end B18: respond_with_viz + event bindings ─────────────
566
+
567
+
568
+ # ── B19: _auto_load_csv() ──────────────────────────────────
569
+ # PURPOSE: Automatically triggers analysis when a CSV file is
570
+ # uploaded. Sends "Analyze my Scopus CSV" as the
571
+ # initial message so no manual typing is needed.
572
+ # TRIGGERED BY: upload.change event
573
+ # CALLS: B6 (respond) with auto-message
574
+ # OUTPUTS: chatbot, download, chart_selector, chart_display,
575
+ # review_table, phase_progress
576
+ # ───────────────────────────────────────────────────────────
577
  def _auto_load_csv(uploaded_file, chat_history):
578
+ """Auto-trigger analysis when CSV is uploaded β€” no typing needed."""
579
  gen = respond("Analyze my Scopus CSV", chat_history, uploaded_file)
580
 
581
+ # First yield (progress)
582
  hist, txt, dl = next(gen)
583
+ yield (hist, dl, gr.update(), gr.update(),
584
+ gr.update(), _build_progress())
 
 
 
 
 
 
585
 
586
+ # Second yield (final + populate everything)
587
  hist, txt, dl = next(gen)
588
  choices = _get_chart_choices()
589
  first_chart = (choices and _load_chart(choices[-1])) or gr.update()
590
  table_data = _load_review_table()
 
591
  yield (
592
+ hist, dl,
593
+ gr.update(choices=choices, value=(choices and choices[-1]) or None),
 
 
 
 
594
  first_chart,
595
  gr.update(value=table_data),
596
  _build_progress(),
597
  )
598
 
599
  upload.change(
600
+ _auto_load_csv, [upload, chatbot],
601
+ [chatbot, download, chart_selector, chart_display,
602
+ review_table, phase_progress],
 
 
 
 
 
 
 
603
  )
604
+ # ── end B19: _auto_load_csv ────────────────────────────────
605
+
606
 
 
607
 
608
+ print(">>> Launching...")
609
  demo.launch(
610
  server_name="0.0.0.0",
611
  server_port=7860,
612
  ssr_mode=False,
613
+ theme=theme, # Gradio 6: moved from gr.Blocks()
614
+ footer_links=[], # Gradio 6: hides footer, replaces show_api
615
+ )
616
+ # ── end B20: Launch ────────────────────────────────────────────