Dash10107 commited on
Commit
1543740
Β·
verified Β·
1 Parent(s): 3454e5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -259
app.py CHANGED
@@ -1,7 +1,7 @@
1
  """
2
  app.py β€” Topic Modelling Agentic AI | Gradio UI
3
  ═══════════════════════════════════════════════════
4
- Version: 3.0.0 | April 2026
5
  Stack: Gradio 5.x + LangGraph + Mistral + BERTopic
6
  Deploy: HuggingFace Spaces (sdk: gradio)
7
  Rules: Zero gr.HTML(). All UI via native Gradio components.
@@ -39,23 +39,6 @@ from agent import SYSTEM_PROMPT, get_local_tools
39
  print(">>> app.py: imports complete")
40
 
41
 
42
- # ╔═══════════════════════════════════════════════════════════════╗
43
- # β•‘ SECTION 1 β€” SETUP β•‘
44
- # β•‘ One-time initialization: agent creation and visual theme. β•‘
45
- # β•‘ Nothing here renders UI β€” it prepares the backend brain β•‘
46
- # β•‘ and the visual identity for the entire application. β•‘
47
- # β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
48
-
49
-
50
- # ── B2: Agent setup ─────────────────────────────────────────────
51
- # PURPOSE: Create the LangGraph ReAct agent that powers all chat.
52
- # Connects Mistral LLM to BERTopic tools with memory so
53
- # the agent remembers context across conversation turns.
54
- # PRODUCES: `agent` β€” used by B6 (respond) and B10 (_submit_review)
55
- # IMPORTS: SYSTEM_PROMPT, get_local_tools from agent.py
56
- # NOTE: MemorySaver keeps conversation in RAM (resets on restart).
57
- # For persistent memory, swap to SQLite checkpointer.
58
- # ────────────────────────────────────────────────────────────────
59
  llm = ChatMistralAI(model="mistral-small-latest", temperature=0, timeout=300)
60
  tools = get_local_tools()
61
  agent = create_react_agent(
@@ -70,43 +53,28 @@ _uploaded = {"path": ""} # Last uploaded CSV path (shared session)
70
 
71
  # ── B3: Theme ───────────────────────────────────────────────────
72
  # PURPOSE: Define the visual identity of the entire application.
73
- # Replaces ALL custom CSS that was previously in HEADER_HTML:
74
- # - DM Sans font (was @import url in <style> block)
75
- # - Slate color palette (was hardcoded hex in inline styles)
76
- # - Soft rounded corners and spacing
77
- # USED BY: B20 (demo.launch) β€” Gradio 6 moved theme from gr.Blocks
78
- # to launch(). The theme object is created here but applied
79
- # in B20 via demo.launch(theme=theme).
80
- # REPLACES: Old HEADER_HTML lines 33-38 (<style> block with CSS)
81
  # ────────────────────────────────────────────────────────────────
82
- theme = gr.themes.Soft(
83
- primary_hue="slate",
84
- font=gr.themes.GoogleFont("DM Sans"),
85
- font_mono=gr.themes.GoogleFont("JetBrains Mono"),
 
 
 
 
 
 
 
 
 
 
86
  )
87
  # ── end B3: Theme ──────────────────────────────────────────────
88
 
89
-
90
- # ╔═══════════════════════════════════════════════════════════════╗
91
- # β•‘ SECTION 2 β€” HELPER FUNCTIONS β•‘
92
- # β•‘ Pure Python functions that process data and return clean β•‘
93
- # β•‘ values (strings, lists, figures). NONE of these functions β•‘
94
- # β•‘ return HTML strings. They feed data to UI components in β•‘
95
- # β•‘ Section 3 via event handlers in Section 4. β•‘
96
- # β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
97
-
98
-
99
- # ── B4: _latest_output() ───────────────────────────────────────
100
- # PURPOSE: Scan /tmp for all rq4_* output files generated by the
101
- # BERTopic agent pipeline (CSVs, JSONs, chart files).
102
- # Sorts them by pipeline phase order so the download
103
- # component shows files in logical sequence.
104
- # RETURNS: List[str] of filepaths sorted by phase, or None
105
- # USED BY: B6 (respond) β€” attaches to download component after
106
- # each agent response
107
- # B10 (_submit_review) β€” refreshes downloads after review
108
- # B19 (_auto_load_csv) β€” refreshes after initial upload
109
- # ────────────────────────────────────────────────────────────────
110
  def _latest_output():
111
  """Scan /tmp for ALL rq4_* files, sorted by phase order.
112
  Returns list of filepaths for gr.File download component."""
@@ -128,20 +96,6 @@ def _latest_output():
128
  return list(map(lambda x: x[1], scored)) or None
129
  # ── end B4: _latest_output ─────────────────────────────────────
130
 
131
-
132
- # ── B5: _build_progress() ──────────────────────────────────────
133
- # PURPOSE: Check which Braun & Clarke phases are complete by
134
- # scanning for checkpoint files on disk. Returns a
135
- # human-readable emoji string showing pipeline status.
136
- # RETURNS: str like "βœ… Load β†’ βœ… Codes β†’ ⏳ Themes β†’ ⬜ Report"
137
- # USED BY: B14 (phase_progress initial value)
138
- # B18 (respond_with_viz) β€” refreshes after each agent turn
139
- # B10 (_submit_review) β€” refreshes after review submission
140
- # B19 (_auto_load_csv) β€” refreshes after CSV upload
141
- # REPLACES: Old _build_progress() which returned 24 lines of HTML
142
- # with inline-styled <span> elements and color codes.
143
- # Now returns pure text with emoji β€” gr.Markdown renders it.
144
- # ────────────────────────────────────────────────────────────────
145
  def _build_progress():
146
  """Return emoji progress pipeline. NO HTML β€” just text + emoji.
147
  Displayed in gr.Markdown component (B14)."""
@@ -160,27 +114,6 @@ def _build_progress():
160
  # ── end B5: _build_progress ────────────────────────────────────
161
 
162
 
163
- # ── B6: respond() ──────────────────────────────────────────────
164
- # PURPOSE: Core chat handler. This is the brain of the app.
165
- # 1. Stores uploaded CSV file path (if new upload)
166
- # 2. Appends file location + phase context to user message
167
- # so the agent knows what data is available
168
- # 3. Yields a "thinking..." bubble immediately (user sees
169
- # instant feedback while agent processes)
170
- # 4. Invokes the LangGraph agent (Mistral decides which
171
- # BERTopic tools to call)
172
- # 5. Replaces thinking bubble with actual agent response
173
- # 6. Attaches latest output files to download component
174
- # INPUTS: message (str), chat_history (list[dict]), uploaded_file (str|None)
175
- # YIELDS: Tuple of (chat_history, empty_string, download_files)
176
- # β€” yields TWICE: first with progress bubble, then with final response
177
- # TALKS TO: B2 (agent.invoke) β€” sends message, gets response
178
- # B4 (_latest_output) β€” gets download file list
179
- # USED BY: B18 (respond_with_viz wraps this)
180
- # B19 (_auto_load_csv wraps this)
181
- # NOTE: Uses single thread_id="session" so agent remembers
182
- # previous turns (loaded CSV path, current phase, etc.)
183
- # ────────────────────────────────────────────────────────────────
184
  def respond(message, chat_history, uploaded_file):
185
  """Handle one chat turn with the LangGraph agent.
186
  Yields twice: progress bubble β†’ final response."""
@@ -229,20 +162,6 @@ def respond(message, chat_history, uploaded_file):
229
  # ── end B6: respond ────────────────────────────────────────────
230
 
231
 
232
- # ── B7: _load_chart() ──────────────────────────────────────────
233
- # PURPOSE: Load a BERTopic visualization chart from a saved Plotly
234
- # JSON file on disk and return the figure object.
235
- # The gr.Plot component in B17a renders this directly β€”
236
- # no iframe, no HTML escaping, no srcdoc hack.
237
- # INPUT: chart_name (str) β€” filename like "rq4_intertopic.json"
238
- # RETURNS: plotly.graph_objects.Figure or None
239
- # USED BY: B17a (chart_selector.change event)
240
- # B18 (respond_with_viz) β€” auto-shows latest chart
241
- # REPLACES: Old _load_chart() which used html.escape() + iframe
242
- # srcdoc to embed HTML files. That was 8 lines of hack.
243
- # REQUIRES: BERTopic tools in tools.py must save charts as Plotly
244
- # JSON via pio.to_json(fig) instead of fig.write_html().
245
- # ────────────────────────────────────────────────────────────────
246
  def _load_chart(chart_name):
247
  """Load Plotly chart from JSON file. Returns figure for gr.Plot.
248
  No HTML, no iframe β€” just a native Plotly figure object."""
@@ -257,22 +176,6 @@ def _get_chart_choices():
257
  # ── end B7: _load_chart ───────────────────────────────────────
258
 
259
 
260
- # ── B8: _load_review_table() ───────────────────────────────────
261
- # PURPOSE: Load the latest BERTopic phase data (taxonomy, themes,
262
- # labels, or summaries β€” whichever is most recent) and
263
- # build a review table for the researcher to approve,
264
- # rename, or annotate topics.
265
- # RETURNS: List[List] with 8 columns matching the Dataframe schema:
266
- # [#, Label, Evidence, Sentences, Papers, Approve, Rename, Reasoning]
267
- # - Column 5 (Approve) is bool (True/False) β†’ renders as checkbox
268
- # - Columns 0-4 are read-only (enforced by static_columns in B16)
269
- # - Columns 5-7 are editable by the researcher
270
- # USED BY: B16 (initial table value)
271
- # B10 (_submit_review) β€” reloads after agent processes review
272
- # B18 (respond_with_viz) β€” refreshes after each agent turn
273
- # REPLACES: Old version which returned "yes"/"no" strings for Approve.
274
- # Now returns True/False so gr.Dataframe renders checkboxes.
275
- # ────────────────────────────────────────────────────────────────
276
  def _load_review_table():
277
  """Build review table from latest checkpoint JSON.
278
  Approve column is bool (renders as checkbox in gr.Dataframe).
@@ -326,19 +229,6 @@ def _load_review_table():
326
  # ── end B8: _load_review_table ─────────────────────────────────
327
 
328
 
329
- # ── B9: _show_papers_by_select() ───────────────────────────────
330
- # PURPOSE: When the researcher clicks any row in the review table,
331
- # this function fires and shows the papers belonging to
332
- # that topic. Eliminates the old workflow of typing a
333
- # Topic # into a separate input and clicking "Show Papers".
334
- # INPUT: gr.SelectData event β€” contains .index (row, col) and .value
335
- # RETURNS: str β€” formatted paper list for gr.Textbox (paper_list)
336
- # TRIGGERED BY: review_table.select() event in B16
337
- # REPLACES: Old _show_papers(topic_id) + topic_num (gr.Number) +
338
- # view_papers_btn (gr.Button) β€” all three components removed.
339
- # NOTE: Uses column 0 value (the # column) as topic_id, NOT the
340
- # row index, because filtering/sorting may reorder rows.
341
- # ────────────────────────────────────────────────────────────────
342
  def _show_papers_by_select(table_data, evt: gr.SelectData):
343
  """Show papers for clicked row. Uses column 0 as topic_id.
344
  Triggered by review_table.select() β€” no separate Topic # input needed."""
@@ -378,22 +268,6 @@ def _show_papers_by_select(table_data, evt: gr.SelectData):
378
  # ── end B9: _show_papers_by_select ─────────────────────────────
379
 
380
 
381
- # ── B10: _submit_review() ──────────────────────────────────────
382
- # PURPOSE: When the researcher finishes editing the review table
383
- # (checking Approve boxes, typing Rename values, adding
384
- # Reasoning notes) and clicks "Submit Review", this
385
- # function converts those edits into a natural language
386
- # message and sends it to the agent for processing.
387
- # INPUTS: table_data (DataFrame from gr.Dataframe), chat_history (list)
388
- # YIELDS: Tuple of (chat, download, chart_choices, chart_fig,
389
- # review_rows, progress_str) β€” yields twice (progress β†’ final)
390
- # TALKS TO: B2 (agent.invoke) β€” sends review decisions
391
- # B4 (_latest_output) β€” refreshes downloads
392
- # B5 (_build_progress) β€” refreshes pipeline status
393
- # B7 (_get_chart_choices) β€” refreshes chart dropdown
394
- # B8 (_load_review_table) β€” reloads table with updated data
395
- # NOTE: Column 5 (Approve) is now bool. True = approve, False = reject.
396
- # ────────────────────────────────────────────────────────────────
397
  def _submit_review(table_data, chat_history):
398
  """Convert review table edits into agent message.
399
  Approve column is bool (checkbox), not string."""
@@ -438,43 +312,64 @@ def _submit_review(table_data, chat_history):
438
  gr.update(value=_load_review_table()),
439
  _build_progress(),
440
  )
441
- # ── end B10: _submit_review ────────────────────────────────────
442
-
443
 
444
- # ╔═══════════════════════════════════════════════════════════════╗
445
- # β•‘ SECTION 3 β€” UI LAYOUT β•‘
446
- # β•‘ All visual components defined here using ONLY native Gradio β•‘
447
- # β•‘ widgets. Zero gr.HTML() calls. Theming via B3. β•‘
448
- # β•‘ Layout: Header β†’ Upload β†’ Progress β†’ Chat β†’ Results tabs β•‘
449
- # β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
450
 
451
  print(">>> Building UI...")
452
 
453
 
454
- # ── B11: gr.Blocks container ───────────────────────────────────
455
- # PURPOSE: Root container for the entire application UI.
456
- # Enables full browser width via fill_width.
457
- # CONTAINS: All UI blocks B12 through B17b
458
- # CONFIG: title β€” browser tab title (stays on Blocks in Gradio 6)
459
- # fill_width β€” removes side padding, uses full browser width
460
- # NOTE: In Gradio 6.0, theme/css/footer_links moved from
461
- # gr.Blocks() to demo.launch(). See B20 for those params.
462
- # ────────────────────────────────────────────────────────────────
463
  with gr.Blocks(
464
  title="Topic Modelling β€” Agentic AI",
465
  fill_width=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466
  ) as demo:
467
 
468
 
469
  # ── B12: Header ────────────────────────────────────────────
470
- # PURPOSE: Application title and subtitle. Single gr.Markdown
471
- # call replaces 15 lines of HEADER_HTML that included
472
- # a gradient background div, font imports, and inline CSS.
473
- # REPLACES: Old HEADER_HTML constant (lines 32-47 of old app.py)
474
  # ───────────────────────────────────────────────────────────
475
  gr.Markdown(
476
- "# πŸ”¬ Topic Modelling β€” Agentic AI\n"
477
- "*Mistral Β· Cosine Clustering Β· 384d Β· B&C Thematic Analysis*"
478
  )
479
  # ── end B12: Header ────────────────────────────────────────
480
 
@@ -486,49 +381,53 @@ with gr.Blocks(
486
  # COMPONENTS: gr.File (upload) + gr.Markdown (instructions)
487
  # EVENTS: upload.change β†’ B19 (_auto_load_csv)
488
  # ────────────────────���──────────────────────────────────────
489
- gr.Markdown("**β‘  Data input**")
490
  with gr.Row():
491
- upload = gr.File(label="πŸ“‚ Upload Scopus CSV", file_types=[".csv"])
492
- gr.Markdown("**Upload your CSV** then type `run abstract only` in chat below")
 
 
 
493
  # ── end B13: Data input ────────────────────────────────────
494
 
495
 
496
  # ── B14: Progress pipeline ─────────────────────────────────
497
  # PURPOSE: Visual indicator of which Braun & Clarke analysis
498
  # phases are complete. Updated after every agent action.
499
- # Now uses gr.Markdown with emoji text (was gr.HTML
500
- # with inline-styled colored <span> elements).
501
  # COMPONENT: gr.Markdown β€” displays emoji string from B5
502
  # UPDATED BY: B18 (after chat), B10 (after review), B19 (after upload)
503
- # REPLACES: Old gr.HTML(value=_build_progress()) with 24 lines of HTML
504
  # ───────────────────────────────────────────────────────────
505
- phase_progress = gr.Markdown(value=_build_progress())
506
  # ── end B14: Progress pipeline ─────────────────────────────
507
 
508
 
509
  # ── B15: Chatbot + input ───────────────────────────────────
510
  # PURPOSE: Main conversation interface between researcher and
511
- # the LangGraph agent. The chatbot displays message
512
- # history with markdown rendering. The textbox + button
513
- # below it capture user input.
514
  # COMPONENTS: gr.Chatbot (display), gr.Textbox (input), gr.Button (send)
515
  # EVENTS: msg.submit β†’ B18, send.click β†’ B18
516
- # NOTE: placeholder text guides the researcher on available commands.
517
- # height=300 keeps chat visible while showing results below.
518
  # ───────────────────────────────────────────────────────────
519
- gr.Markdown("**β‘‘ Agent conversation** β€” follow the prompts below")
520
  with gr.Group():
521
  chatbot = gr.Chatbot(
522
- height=300,
523
  show_label=False,
524
- placeholder="Upload your Scopus CSV above, then type: run abstract only",
 
 
 
 
 
 
 
525
  )
526
  with gr.Row():
527
  msg = gr.Textbox(
528
  placeholder="run Β· approve Β· show topic 4 papers Β· group 0 1 5 Β· done",
529
  show_label=False, scale=9, lines=1, max_lines=1, container=False,
530
  )
531
- send = gr.Button("Send", variant="primary", scale=1, min_width=70)
532
  # ── end B15: Chatbot + input ───────────────────────────────
533
 
534
 
@@ -538,32 +437,21 @@ with gr.Blocks(
538
  # topics. This is the core human-in-the-loop interface.
539
  #
540
  # KEY FEATURES (all native Gradio, no HTML):
541
- # - static_columns=[0,1,2,3,4] β€” first 5 columns (#, Label,
542
- # Evidence, Sentences, Papers) are READ-ONLY. Prevents
543
- # accidental edits to agent-generated data.
544
- # - datatype "bool" on column 5 β€” Approve renders as a native
545
- # CHECKBOX. Researcher clicks to toggle, no typing needed.
546
- # - pinned_columns=2 β€” # and Label columns stay visible when
547
- # scrolling horizontally through wider columns.
548
- # - show_search="filter" β€” built-in column filtering. Researcher
549
- # can filter by paper count, sentence count, etc.
550
- # - .select() event β€” clicking any row auto-loads that topic's
551
- # papers in the textbox below. REPLACES the old workflow of
552
- # Topic # input + Show Papers button (both removed).
553
  #
554
  # COMPONENTS: gr.Dataframe, gr.Button (submit), gr.Textbox (papers)
555
- # EVENTS: review_table.select β†’ B9 (_show_papers_by_select)
556
- # submit_review.click β†’ B10 (_submit_review)
557
- # DATA: Loaded by B8 (_load_review_table)
558
- # REPLACES: Old gr.Dataframe (no static_columns, string Approve,
559
- # no search) + topic_num + view_papers_btn
560
  # ───────────────────────────────────────────────────────────
561
- gr.Markdown("**β‘’ Results** β€” review table, charts, downloads")
562
  with gr.Tabs():
563
- with gr.Tab("πŸ“‹ Review Table"):
564
  gr.Markdown(
565
- "*Edit Approve / Rename To / Reasoning β†’ click Submit. "
566
- "Click any row to see its papers below.*"
567
  )
568
  review_table = gr.Dataframe(
569
  headers=[
@@ -587,7 +475,6 @@ with gr.Blocks(
587
  )
588
  submit_review = gr.Button("βœ… Submit Review to Agent", variant="primary")
589
 
590
- # Paper viewer β€” triggered by clicking any row (replaces Topic # + button)
591
  gr.Markdown("---")
592
  gr.Markdown("**πŸ“„ Papers in selected topic** *(click any row above)*")
593
  paper_list = gr.Textbox(
@@ -598,18 +485,14 @@ with gr.Blocks(
598
 
599
 
600
  # ── B17a: Charts tab ───────────────────────────────────
601
- # PURPOSE: Display BERTopic visualization charts (intertopic
602
- # distance map, bar chart, hierarchy, heatmap).
603
- # Charts are loaded as Plotly figure objects from
604
- # JSON files and rendered natively in gr.Plot.
605
  # COMPONENTS: gr.Dropdown (selector), gr.Plot (display)
606
  # EVENTS: chart_selector.change β†’ B7 (_load_chart)
607
- # REPLACES: Old iframe + srcdoc hack that used html.escape()
608
- # to embed HTML files. Now uses gr.Plot directly.
609
  # ───────────────────────────────────────────────────────
610
- with gr.Tab("πŸ“Š Charts"):
611
  chart_selector = gr.Dropdown(
612
- choices=[], label="Select Chart", interactive=True,
613
  )
614
  chart_display = gr.Plot(label="BERTopic Visualization")
615
  # ── end B17a: Charts tab ───────────────────────────────
@@ -617,12 +500,10 @@ with gr.Blocks(
617
 
618
  # ── B17b: Download tab ─────────────────────────────────
619
  # PURPOSE: Multi-file download for all pipeline outputs.
620
- # Shows file descriptions by phase and a gr.File
621
- # component with all generated files.
622
  # COMPONENTS: gr.Markdown (descriptions), gr.File (download)
623
  # UPDATED BY: B18, B10, B19 β€” refreshed after each action
624
  # ───────────────────────────────────────────────────────
625
- with gr.Tab("πŸ“₯ Download"):
626
  gr.Markdown(
627
  "**Files by Phase (per run: abstract / title):**\n\n"
628
  "**Phase 2 β€” Discovery:** `summaries.json` Β· `emb.npy`\n\n"
@@ -637,28 +518,6 @@ with gr.Blocks(
637
  # ── end B17b: Download tab ─────────────────────────────
638
 
639
 
640
- # ╔═══════════════════════════════════════════════════════════╗
641
- # β•‘ SECTION 4 β€” EVENT WIRING β•‘
642
- # β•‘ Connect UI components to helper functions. This is β•‘
643
- # β•‘ where data flows are defined: which function runs when β•‘
644
- # β•‘ a button is clicked, a file is uploaded, or a row is β•‘
645
- # β•‘ selected. No HTML, no CSS β€” just Python event binding. β•‘
646
- # β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
647
-
648
-
649
- # ── B18: respond_with_viz() + event bindings ───────────────
650
- # PURPOSE: Wrapper around B6 (respond) that also refreshes
651
- # the chart dropdown, chart display, review table,
652
- # and progress pipeline after each agent response.
653
- # This is the main "after every chat turn, update
654
- # everything" orchestrator.
655
- # CALLS: B6 (respond), B5 (_build_progress), B7 (_load_chart,
656
- # _get_chart_choices), B8 (_load_review_table)
657
- # BINDINGS: msg.submit β†’ this function
658
- # send.click β†’ this function
659
- # OUTPUTS: chatbot, msg, download, chart_selector, chart_display,
660
- # review_table, phase_progress (7 components updated)
661
- # ───────────────────────────────────────────────────────────
662
  chart_selector.change(_load_chart, [chart_selector], [chart_display])
663
 
664
  review_table.select(
@@ -708,9 +567,8 @@ with gr.Blocks(
708
 
709
  # ── B19: _auto_load_csv() ──────────────────────────────────
710
  # PURPOSE: Automatically triggers analysis when a CSV file is
711
- # uploaded. The researcher doesn't need to type anything β€”
712
- # just uploading the file starts the pipeline.
713
- # Sends "Analyze my Scopus CSV" as the initial message.
714
  # TRIGGERED BY: upload.change event
715
  # CALLS: B6 (respond) with auto-message
716
  # OUTPUTS: chatbot, download, chart_selector, chart_display,
@@ -746,23 +604,7 @@ with gr.Blocks(
746
  # ── end B19: _auto_load_csv ────────────────────────────────
747
 
748
 
749
- # ╔═══════════════════════════════════════════════════════════════╗
750
- # β•‘ SECTION 5 β€” LAUNCH β•‘
751
- # β•‘ Start the Gradio server. On HuggingFace Spaces this runs β•‘
752
- # β•‘ automatically. Locally, access at http://localhost:7860 β•‘
753
- # β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
754
 
755
-
756
- # ── B20: Launch ────────────────────────────────────────────────
757
- # PURPOSE: Start the web server. In Gradio 6.0, theme/css/footer
758
- # params moved here from gr.Blocks().
759
- # CONFIG: theme β€” from B3 (Soft + DM Sans + slate)
760
- # footer_links=[] β€” hides footer natively (no CSS hack)
761
- # ssr_mode=False β€” for HuggingFace Spaces free tier compat
762
- # server_name="0.0.0.0" β€” accessible on network
763
- # NOTE: On Spaces, port 7860 is auto-exposed to the internet.
764
- # Locally, open http://localhost:7860 in your browser.
765
- # ────────────────────────────────────────────────────────────────
766
  print(">>> Launching...")
767
  demo.launch(
768
  server_name="0.0.0.0",
@@ -771,4 +613,4 @@ demo.launch(
771
  theme=theme, # Gradio 6: moved from gr.Blocks()
772
  footer_links=[], # Gradio 6: hides footer, replaces show_api
773
  )
774
- # ── end B20: Launch ────────────────────────────────────────────
 
1
  """
2
  app.py β€” Topic Modelling Agentic AI | Gradio UI
3
  ═══════════════════════════════════════════════════
4
+ Version: 3.1.0 | April 2026
5
  Stack: Gradio 5.x + LangGraph + Mistral + BERTopic
6
  Deploy: HuggingFace Spaces (sdk: gradio)
7
  Rules: Zero gr.HTML(). All UI via native Gradio components.
 
39
  print(">>> app.py: imports complete")
40
 
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  llm = ChatMistralAI(model="mistral-small-latest", temperature=0, timeout=300)
43
  tools = get_local_tools()
44
  agent = create_react_agent(
 
53
 
54
  # ── B3: Theme ───────────────────────────────────────────────────
55
  # PURPOSE: Define the visual identity of the entire application.
56
+ # Uses teal/indigo on zinc β€” purposeful scientific feel.
57
+ # Plus Jakarta Sans: geometric-humanist, modern but not generic.
58
+ # Fira Code for monospace elements (phase progress, etc).
59
+ # USED BY: B20 (demo.launch) β€” theme applied at launch time.
 
 
 
 
60
  # ────────────────────────────────────────────────────────────────
61
+ theme = gr.themes.Default(
62
+ primary_hue="teal",
63
+ secondary_hue="indigo",
64
+ neutral_hue="zinc",
65
+ font=gr.themes.GoogleFont("Plus Jakarta Sans"),
66
+ font_mono=gr.themes.GoogleFont("Fira Code"),
67
+ radius_size="sm",
68
+ spacing_size="md",
69
+ ).set(
70
+ button_primary_background_fill="*primary_600",
71
+ button_primary_background_fill_hover="*primary_500",
72
+ button_primary_text_color="white",
73
+ block_label_text_size="sm",
74
+ block_title_text_weight="600",
75
  )
76
  # ── end B3: Theme ──────────────────────────────────────────────
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  def _latest_output():
79
  """Scan /tmp for ALL rq4_* files, sorted by phase order.
80
  Returns list of filepaths for gr.File download component."""
 
96
  return list(map(lambda x: x[1], scored)) or None
97
  # ── end B4: _latest_output ─────────────────────────────────────
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  def _build_progress():
100
  """Return emoji progress pipeline. NO HTML β€” just text + emoji.
101
  Displayed in gr.Markdown component (B14)."""
 
114
  # ── end B5: _build_progress ────────────────────────────────────
115
 
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  def respond(message, chat_history, uploaded_file):
118
  """Handle one chat turn with the LangGraph agent.
119
  Yields twice: progress bubble β†’ final response."""
 
162
  # ── end B6: respond ────────────────────────────────────────────
163
 
164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  def _load_chart(chart_name):
166
  """Load Plotly chart from JSON file. Returns figure for gr.Plot.
167
  No HTML, no iframe β€” just a native Plotly figure object."""
 
176
  # ── end B7: _load_chart ───────────────────────────────────────
177
 
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  def _load_review_table():
180
  """Build review table from latest checkpoint JSON.
181
  Approve column is bool (renders as checkbox in gr.Dataframe).
 
229
  # ── end B8: _load_review_table ─────────────────────────────────
230
 
231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  def _show_papers_by_select(table_data, evt: gr.SelectData):
233
  """Show papers for clicked row. Uses column 0 as topic_id.
234
  Triggered by review_table.select() β€” no separate Topic # input needed."""
 
268
  # ── end B9: _show_papers_by_select ─────────────────────────────
269
 
270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  def _submit_review(table_data, chat_history):
272
  """Convert review table edits into agent message.
273
  Approve column is bool (checkbox), not string."""
 
312
  gr.update(value=_load_review_table()),
313
  _build_progress(),
314
  )
 
 
315
 
 
 
 
 
 
 
316
 
317
  print(">>> Building UI...")
318
 
319
 
 
 
 
 
 
 
 
 
 
320
  with gr.Blocks(
321
  title="Topic Modelling β€” Agentic AI",
322
  fill_width=True,
323
+ css="""
324
+ /* Accent bar at very top of page */
325
+ .gradio-container::before {
326
+ content: "";
327
+ display: block;
328
+ height: 3px;
329
+ background: linear-gradient(90deg, #0d9488, #6366f1);
330
+ margin-bottom: 4px;
331
+ }
332
+ /* Tabs: tighter padding, bolder active state */
333
+ .tab-nav button {
334
+ font-size: 13px !important;
335
+ font-weight: 500 !important;
336
+ letter-spacing: 0.01em;
337
+ padding: 6px 16px !important;
338
+ }
339
+ .tab-nav button.selected {
340
+ font-weight: 700 !important;
341
+ border-bottom: 2px solid #0d9488 !important;
342
+ }
343
+ /* Dataframe: subtle zebra rows */
344
+ .table-wrap tr:nth-child(even) td {
345
+ background-color: rgba(13, 148, 136, 0.04);
346
+ }
347
+ /* Chat: teal left-border on assistant bubbles */
348
+ .message.bot {
349
+ border-left: 3px solid #0d9488 !important;
350
+ }
351
+ /* Phase progress: monospace, slightly muted */
352
+ .phase-bar p {
353
+ font-family: "Fira Code", monospace;
354
+ font-size: 12px;
355
+ letter-spacing: 0.03em;
356
+ opacity: 0.80;
357
+ }
358
+ /* Upload area: cleaner dashed border */
359
+ .upload-container {
360
+ border-style: dashed !important;
361
+ border-width: 1px !important;
362
+ }
363
+ """,
364
  ) as demo:
365
 
366
 
367
  # ── B12: Header ────────────────────────────────────────────
368
+ # PURPOSE: Application title and subtitle.
 
 
 
369
  # ───────────────────────────────────────────────────────────
370
  gr.Markdown(
371
+ "# πŸ”¬ Topic Modelling Β· Agentic AI\n"
372
+ "<sub>Mistral Β· Cosine Clustering Β· 384d Embeddings Β· Braun & Clarke Thematic Analysis</sub>"
373
  )
374
  # ── end B12: Header ────────────────────────────────────────
375
 
 
381
  # COMPONENTS: gr.File (upload) + gr.Markdown (instructions)
382
  # EVENTS: upload.change β†’ B19 (_auto_load_csv)
383
  # ────────────────────���──────────────────────────────────────
384
+ gr.Markdown("**β‘  Upload**")
385
  with gr.Row():
386
+ upload = gr.File(label="πŸ“‚ Scopus CSV", file_types=[".csv"])
387
+ gr.Markdown(
388
+ "Upload your Scopus CSV export, then type `run abstract only` in the chat below "
389
+ "to begin the analysis pipeline."
390
+ )
391
  # ── end B13: Data input ────────────────────────────────────
392
 
393
 
394
  # ── B14: Progress pipeline ─────────────────────────────────
395
  # PURPOSE: Visual indicator of which Braun & Clarke analysis
396
  # phases are complete. Updated after every agent action.
397
+ # elem_classes="phase-bar" targets the monospace CSS rule in B11.
 
398
  # COMPONENT: gr.Markdown β€” displays emoji string from B5
399
  # UPDATED BY: B18 (after chat), B10 (after review), B19 (after upload)
 
400
  # ───────────────────────────────────────────────────────────
401
+ phase_progress = gr.Markdown(value=_build_progress(), elem_classes=["phase-bar"])
402
  # ── end B14: Progress pipeline ─────────────────────────────
403
 
404
 
405
  # ── B15: Chatbot + input ───────────────────────────────────
406
  # PURPOSE: Main conversation interface between researcher and
407
+ # the LangGraph agent.
 
 
408
  # COMPONENTS: gr.Chatbot (display), gr.Textbox (input), gr.Button (send)
409
  # EVENTS: msg.submit β†’ B18, send.click β†’ B18
 
 
410
  # ───────────────────────────────────────────────────────────
411
+ gr.Markdown("**β‘‘ Conversation** β€” follow the guided workflow")
412
  with gr.Group():
413
  chatbot = gr.Chatbot(
414
+ height=320,
415
  show_label=False,
416
+ avatar_images=(
417
+ None,
418
+ "https://api.dicebear.com/7.x/bottts-neutral/svg?seed=bertopic",
419
+ ),
420
+ placeholder=(
421
+ "**Ready.** Upload a Scopus CSV above, then type:\n\n"
422
+ "`run abstract only` Β· `approve all` Β· `show topic 4 papers` Β· `done`"
423
+ ),
424
  )
425
  with gr.Row():
426
  msg = gr.Textbox(
427
  placeholder="run Β· approve Β· show topic 4 papers Β· group 0 1 5 Β· done",
428
  show_label=False, scale=9, lines=1, max_lines=1, container=False,
429
  )
430
+ send = gr.Button("⏎ Send", variant="primary", scale=1, min_width=80)
431
  # ── end B15: Chatbot + input ───────────────────────────────
432
 
433
 
 
437
  # topics. This is the core human-in-the-loop interface.
438
  #
439
  # KEY FEATURES (all native Gradio, no HTML):
440
+ # - static_columns=[0,1,2,3,4] β€” first 5 columns read-only
441
+ # - datatype "bool" on column 5 β€” Approve renders as checkbox
442
+ # - pinned_columns=2 β€” # and Label stay visible when scrolling
443
+ # - show_search="filter" β€” built-in column filtering
444
+ # - .select() event β€” clicking any row auto-loads that topic's papers
 
 
 
 
 
 
 
445
  #
446
  # COMPONENTS: gr.Dataframe, gr.Button (submit), gr.Textbox (papers)
447
+ # EVENTS: review_table.select β†’ B9, submit_review.click β†’ B10
 
 
 
 
448
  # ───────────────────────────────────────────────────────────
449
+ gr.Markdown("**β‘’ Review & Export**")
450
  with gr.Tabs():
451
+ with gr.Tab("πŸ“‹ Topics"):
452
  gr.Markdown(
453
+ "*Toggle **Approve**, fill in **Rename To** or **Reasoning**, "
454
+ "then click Submit. Click any row to inspect its source papers below.*"
455
  )
456
  review_table = gr.Dataframe(
457
  headers=[
 
475
  )
476
  submit_review = gr.Button("βœ… Submit Review to Agent", variant="primary")
477
 
 
478
  gr.Markdown("---")
479
  gr.Markdown("**πŸ“„ Papers in selected topic** *(click any row above)*")
480
  paper_list = gr.Textbox(
 
485
 
486
 
487
  # ── B17a: Charts tab ───────────────────────────────────
488
+ # PURPOSE: Display BERTopic visualization charts rendered
489
+ # natively in gr.Plot from Plotly JSON files.
 
 
490
  # COMPONENTS: gr.Dropdown (selector), gr.Plot (display)
491
  # EVENTS: chart_selector.change β†’ B7 (_load_chart)
 
 
492
  # ───────────────────────────────────────────────────────
493
+ with gr.Tab("πŸ“Š Visualise"):
494
  chart_selector = gr.Dropdown(
495
+ choices=[], label="Select chart", interactive=True,
496
  )
497
  chart_display = gr.Plot(label="BERTopic Visualization")
498
  # ── end B17a: Charts tab ───────────────────────────────
 
500
 
501
  # ── B17b: Download tab ─────────────────────────────────
502
  # PURPOSE: Multi-file download for all pipeline outputs.
 
 
503
  # COMPONENTS: gr.Markdown (descriptions), gr.File (download)
504
  # UPDATED BY: B18, B10, B19 β€” refreshed after each action
505
  # ───────────────────────────────────────────────────────
506
+ with gr.Tab("⬇ Export"):
507
  gr.Markdown(
508
  "**Files by Phase (per run: abstract / title):**\n\n"
509
  "**Phase 2 β€” Discovery:** `summaries.json` Β· `emb.npy`\n\n"
 
518
  # ── end B17b: Download tab ─────────────────────────────
519
 
520
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
521
  chart_selector.change(_load_chart, [chart_selector], [chart_display])
522
 
523
  review_table.select(
 
567
 
568
  # ── B19: _auto_load_csv() ──────────────────────────────────
569
  # PURPOSE: Automatically triggers analysis when a CSV file is
570
+ # uploaded. Sends "Analyze my Scopus CSV" as the
571
+ # initial message so no manual typing is needed.
 
572
  # TRIGGERED BY: upload.change event
573
  # CALLS: B6 (respond) with auto-message
574
  # OUTPUTS: chatbot, download, chart_selector, chart_display,
 
604
  # ── end B19: _auto_load_csv ────────────────────────────────
605
 
606
 
 
 
 
 
 
607
 
 
 
 
 
 
 
 
 
 
 
 
608
  print(">>> Launching...")
609
  demo.launch(
610
  server_name="0.0.0.0",
 
613
  theme=theme, # Gradio 6: moved from gr.Blocks()
614
  footer_links=[], # Gradio 6: hides footer, replaces show_api
615
  )
616
+ # ── end B20: Launch ────────────────────────────────────────────