| """ | |
| app.py β Topic Modelling Agentic AI | Gradio UI | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| Version: 3.0.0 | April 2026 | |
| Stack: Gradio 5.x + LangGraph + Mistral + BERTopic | |
| Deploy: HuggingFace Spaces (sdk: gradio) | |
| Rules: Zero gr.HTML(). All UI via native Gradio components. | |
| See GRADIO_UI_GUIDELINES_v2.docx for full standards. | |
| ARCHITECTURE β 20 Blocks in 5 Sections | |
| βββββββββββββββββββββββββββββββββββββββββ | |
| Section 1: Setup (B1βB3) Imports, agent, theme | |
| Section 2: Helpers (B4βB10) Pure Python functions, no UI | |
| Section 3: UI Layout (B11βB17) gr.Blocks with native components | |
| Section 4: Event Wiring (B18βB19) Connect UI to functions | |
| Section 5: Launch (B20) Start server | |
| BLOCK COMMUNICATION MAP | |
| βββββββββββββββββββββββββ | |
| B6 (respond) ββ B2 (agent) : invokes agent for chat | |
| B6 (respond) β B4 (output) : scans for download files | |
| B7 (chart) β B17a (display) : loads Plotly JSON β gr.Plot | |
| B8 (table) β B16 (review) : builds rows β gr.Dataframe | |
| B9 (papers) β B16 (review) : triggered by row click | |
| B10 (submit) β B2 (agent) : sends review edits to agent | |
| B18 (wiring) β B5,B7,B8 : refreshes progress, charts, table | |
| """ | |
| import os | |
| import glob | |
| import json | |
| import plotly.io as pio | |
| import gradio as gr | |
| from langchain_mistralai import ChatMistralAI | |
| from langgraph.prebuilt import create_react_agent | |
| from langgraph.checkpoint.memory import MemorySaver | |
| from agent import SYSTEM_PROMPT, get_local_tools | |
| print(">>> app.py: imports complete") | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # β SECTION 1 β SETUP β | |
| # β One-time initialization: agent creation and visual theme. β | |
| # β Nothing here renders UI β it prepares the backend brain β | |
| # β and the visual identity for the entire application. β | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ββ B2: Agent setup βββββββββββββββββββββββββββββββββββββββββββββ | |
| # PURPOSE: Create the LangGraph ReAct agent that powers all chat. | |
| # Connects Mistral LLM to BERTopic tools with memory so | |
| # the agent remembers context across conversation turns. | |
| # PRODUCES: `agent` β used by B6 (respond) and B10 (_submit_review) | |
| # IMPORTS: SYSTEM_PROMPT, get_local_tools from agent.py | |
| # NOTE: MemorySaver keeps conversation in RAM (resets on restart). | |
| # For persistent memory, swap to SQLite checkpointer. | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| llm = ChatMistralAI(model="mistral-small-latest", temperature=0, timeout=300) | |
| tools = get_local_tools() | |
| agent = create_react_agent( | |
| model=llm, tools=tools, prompt=SYSTEM_PROMPT, checkpointer=MemorySaver() | |
| ) | |
| print(f">>> app.py: agent ready ({len(tools)} tools)") | |
| _msg_count = 0 # Global message counter (shared across users) | |
| _uploaded = {"path": ""} # Last uploaded CSV path (shared session) | |
| # ββ end B2: Agent setup ββββββββββββββββββββββββββββββββββββββββ | |
| # ββ B3: Theme βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # PURPOSE: Define the visual identity of the entire application. | |
| # Replaces ALL custom CSS that was previously in HEADER_HTML: | |
| # - DM Sans font (was @import url in <style> block) | |
| # - Slate color palette (was hardcoded hex in inline styles) | |
| # - Soft rounded corners and spacing | |
| # USED BY: B20 (demo.launch) β Gradio 6 moved theme from gr.Blocks | |
| # to launch(). The theme object is created here but applied | |
| # in B20 via demo.launch(theme=theme). | |
| # REPLACES: Old HEADER_HTML lines 33-38 (<style> block with CSS) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| theme = gr.themes.Soft( | |
| primary_hue="slate", | |
| font=gr.themes.GoogleFont("DM Sans"), | |
| font_mono=gr.themes.GoogleFont("JetBrains Mono"), | |
| ) | |
| # ββ end B3: Theme ββββββββββββββββββββββββββββββββββββββββββββββ | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # β SECTION 2 β HELPER FUNCTIONS β | |
| # β Pure Python functions that process data and return clean β | |
| # β values (strings, lists, figures). NONE of these functions β | |
| # β return HTML strings. They feed data to UI components in β | |
| # β Section 3 via event handlers in Section 4. β | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ββ B4: _latest_output() βββββββββββββββββββββββββββββββββββββββ | |
| # PURPOSE: Scan /tmp for all rq4_* output files generated by the | |
| # BERTopic agent pipeline (CSVs, JSONs, chart files). | |
| # Sorts them by pipeline phase order so the download | |
| # component shows files in logical sequence. | |
| # RETURNS: List[str] of filepaths sorted by phase, or None | |
| # USED BY: B6 (respond) β attaches to download component after | |
| # each agent response | |
| # B10 (_submit_review) β refreshes downloads after review | |
| # B19 (_auto_load_csv) β refreshes after initial upload | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _latest_output(): | |
| """Scan /tmp for ALL rq4_* files, sorted by phase order. | |
| Returns list of filepaths for gr.File download component.""" | |
| phase_order = { | |
| "summaries": 1, "labels": 2, "themes": 3, "taxonomy": 4, | |
| "emb": 0, "intertopic": 5, "bars": 6, "hierarchy": 7, | |
| "heatmap": 8, "comparison": 9, "narrative": 10, | |
| } | |
| files = ( | |
| glob.glob("/tmp/rq4_*.csv") | |
| + glob.glob("/tmp/rq4_*.json") | |
| + glob.glob("/tmp/checkpoints/rq4_*.json") | |
| ) | |
| scored = list(map( | |
| lambda f: (sum(v * (k in f) for k, v in phase_order.items()), f), | |
| files, | |
| )) | |
| scored.sort(key=lambda x: x[0]) | |
| return list(map(lambda x: x[1], scored)) or None | |
| # ββ end B4: _latest_output βββββββββββββββββββββββββββββββββββββ | |
| # ββ B5: _build_progress() ββββββββββββββββββββββββββββββββββββββ | |
| # PURPOSE: Check which Braun & Clarke phases are complete by | |
| # scanning for checkpoint files on disk. Returns a | |
| # human-readable emoji string showing pipeline status. | |
| # RETURNS: str like "β Load β β Codes β β³ Themes β β¬ Report" | |
| # USED BY: B14 (phase_progress initial value) | |
| # B18 (respond_with_viz) β refreshes after each agent turn | |
| # B10 (_submit_review) β refreshes after review submission | |
| # B19 (_auto_load_csv) β refreshes after CSV upload | |
| # REPLACES: Old _build_progress() which returned 24 lines of HTML | |
| # with inline-styled <span> elements and color codes. | |
| # Now returns pure text with emoji β gr.Markdown renders it. | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _build_progress(): | |
| """Return emoji progress pipeline. NO HTML β just text + emoji. | |
| Displayed in gr.Markdown component (B14).""" | |
| checks = [ | |
| ("Load", bool(glob.glob("/tmp/checkpoints/rq4_*_summaries.json") | |
| or glob.glob("/tmp/checkpoints/rq4_*_emb.npy"))), | |
| ("Codes", bool(glob.glob("/tmp/checkpoints/rq4_*_labels.json"))), | |
| ("Themes", bool(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))), | |
| ("Review", bool(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))), | |
| ("Names", bool(glob.glob("/tmp/checkpoints/rq4_*_themes.json"))), | |
| ("PAJAIS", bool(glob.glob("/tmp/checkpoints/rq4_*_taxonomy_map.json"))), | |
| ("Report", bool(glob.glob("/tmp/rq4_comparison.csv") | |
| or glob.glob("/tmp/rq4_narrative.txt"))), | |
| ] | |
| return " β ".join(f"{'β ' if done else 'β¬'} {name}" for name, done in checks) | |
| # ββ end B5: _build_progress ββββββββββββββββββββββββββββββββββββ | |
| # ββ B6: respond() ββββββββββββββββββββββββββββββββββββββββββββββ | |
| # PURPOSE: Core chat handler. This is the brain of the app. | |
| # 1. Stores uploaded CSV file path (if new upload) | |
| # 2. Appends file location + phase context to user message | |
| # so the agent knows what data is available | |
| # 3. Yields a "thinking..." bubble immediately (user sees | |
| # instant feedback while agent processes) | |
| # 4. Invokes the LangGraph agent (Mistral decides which | |
| # BERTopic tools to call) | |
| # 5. Replaces thinking bubble with actual agent response | |
| # 6. Attaches latest output files to download component | |
| # INPUTS: message (str), chat_history (list[dict]), uploaded_file (str|None) | |
| # YIELDS: Tuple of (chat_history, empty_string, download_files) | |
| # β yields TWICE: first with progress bubble, then with final response | |
| # TALKS TO: B2 (agent.invoke) β sends message, gets response | |
| # B4 (_latest_output) β gets download file list | |
| # USED BY: B18 (respond_with_viz wraps this) | |
| # B19 (_auto_load_csv wraps this) | |
| # NOTE: Uses single thread_id="session" so agent remembers | |
| # previous turns (loaded CSV path, current phase, etc.) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def respond(message, chat_history, uploaded_file): | |
| """Handle one chat turn with the LangGraph agent. | |
| Yields twice: progress bubble β final response.""" | |
| global _msg_count | |
| _msg_count += 1 | |
| # Store file path β uses `or` short-circuit instead of if/else | |
| _uploaded["path"] = uploaded_file or _uploaded.get("path", "") | |
| # Tell agent where the CSV is (prevents hallucinated filepaths) | |
| file_note = ( | |
| f"\n[CSV file at: {_uploaded['path']}]" * bool(_uploaded["path"]) | |
| ) or "\n[No CSV uploaded yet β ask user to upload a file first]" | |
| # Tell agent what phase we're in based on existing checkpoint files | |
| phase_context = ( | |
| "\n[Phase context: labels exist]" | |
| * bool(glob.glob("/tmp/checkpoints/rq4_*_labels.json")) | |
| or "\n[Phase context: embeddings exist]" | |
| * bool(glob.glob("/tmp/checkpoints/rq4_*_emb.npy")) | |
| or "\n[Phase context: fresh start]" | |
| ) | |
| text = ((message or "").strip() or "Analyze my Scopus CSV") + file_note + phase_context | |
| print(f"\n{'='*60}\n>>> MSG #{_msg_count}: '{text[:120]}'\n{'='*60}") | |
| # YIELD 1: Show "thinking" bubble immediately | |
| chat_history = chat_history + [ | |
| {"role": "user", "content": (message or "").strip()}, | |
| {"role": "assistant", "content": "π¬ **Working...** _Agent is thinking..._"}, | |
| ] | |
| yield chat_history, "", _latest_output() | |
| # Invoke agent β Mistral brain decides which tools to call | |
| result = agent.invoke( | |
| {"messages": [("human", text)]}, | |
| config={"configurable": {"thread_id": "session"}}, | |
| ) | |
| response = result["messages"][-1].content | |
| print(f">>> Response ({len(response)} chars)") | |
| # YIELD 2: Replace thinking bubble with actual response | |
| chat_history[-1] = {"role": "assistant", "content": response} | |
| gr.Info(f"Agent responded ({len(response)} chars)") | |
| yield chat_history, "", _latest_output() | |
| # ββ end B6: respond ββββββββββββββββββββββββββββββββββββββββββββ | |
| # ββ B7: _load_chart() ββββββββββββββββββββββββββββββββββββββββββ | |
| # PURPOSE: Load a BERTopic visualization chart from a saved Plotly | |
| # JSON file on disk and return the figure object. | |
| # The gr.Plot component in B17a renders this directly β | |
| # no iframe, no HTML escaping, no srcdoc hack. | |
| # INPUT: chart_name (str) β filename like "rq4_intertopic.json" | |
| # RETURNS: plotly.graph_objects.Figure or None | |
| # USED BY: B17a (chart_selector.change event) | |
| # B18 (respond_with_viz) β auto-shows latest chart | |
| # REPLACES: Old _load_chart() which used html.escape() + iframe | |
| # srcdoc to embed HTML files. That was 8 lines of hack. | |
| # REQUIRES: BERTopic tools in tools.py must save charts as Plotly | |
| # JSON via pio.to_json(fig) instead of fig.write_html(). | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _load_chart(chart_name): | |
| """Load Plotly chart from JSON file. Returns figure for gr.Plot. | |
| No HTML, no iframe β just a native Plotly figure object.""" | |
| path = f"/tmp/{chart_name}" | |
| (not os.path.exists(path)) and (not None) # guard | |
| return pio.from_json(open(path).read()) * bool(os.path.exists(path)) or None | |
| def _get_chart_choices(): | |
| """Find all rq4_*.json chart files in /tmp.""" | |
| files = sorted(glob.glob("/tmp/rq4_*.json")) | |
| return list(map(os.path.basename, files)) | |
| # ββ end B7: _load_chart βββββββββββββββββββββββββββββββββββββββ | |
| # ββ B8: _load_review_table() βββββββββββββββββββββββββββββββββββ | |
| # PURPOSE: Load the latest BERTopic phase data (taxonomy, themes, | |
| # labels, or summaries β whichever is most recent) and | |
| # build a review table for the researcher to approve, | |
| # rename, or annotate topics. | |
| # RETURNS: List[List] with 8 columns matching the Dataframe schema: | |
| # [#, Label, Evidence, Sentences, Papers, Approve, Rename, Reasoning] | |
| # - Column 5 (Approve) is bool (True/False) β renders as checkbox | |
| # - Columns 0-4 are read-only (enforced by static_columns in B16) | |
| # - Columns 5-7 are editable by the researcher | |
| # USED BY: B16 (initial table value) | |
| # B10 (_submit_review) β reloads after agent processes review | |
| # B18 (respond_with_viz) β refreshes after each agent turn | |
| # REPLACES: Old version which returned "yes"/"no" strings for Approve. | |
| # Now returns True/False so gr.Dataframe renders checkboxes. | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _load_review_table(): | |
| """Build review table from latest checkpoint JSON. | |
| Approve column is bool (renders as checkbox in gr.Dataframe). | |
| Priority: taxonomy_map > themes > labels > summaries.""" | |
| taxonomy_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_taxonomy_map.json")) | |
| theme_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_themes.json")) | |
| label_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_labels.json")) | |
| summary_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_summaries.json")) | |
| # Pick most advanced checkpoint available | |
| path = ( | |
| (taxonomy_files and taxonomy_files[-1]) | |
| or (theme_files and theme_files[-1]) | |
| or (label_files and label_files[-1]) | |
| or (summary_files and summary_files[-1]) | |
| or "" | |
| ) | |
| is_taxonomy = bool(taxonomy_files and taxonomy_files[-1] == path) | |
| data = (os.path.exists(path) and json.load(open(path))) or [] | |
| # For taxonomy: merge with themes to get sentence/paper counts | |
| theme_lookup = {} | |
| (is_taxonomy and theme_files) and theme_lookup.update( | |
| {t.get("label", ""): t for t in json.load(open(theme_files[-1]))} | |
| ) | |
| rows = list(map( | |
| lambda pair: [ | |
| pair[0], # # | |
| pair[1].get("label", pair[1].get("top_words", ""))[:60], # Label | |
| # Evidence: PAJAIS mapping for taxonomy, nearest sentence otherwise | |
| ( | |
| is_taxonomy | |
| and f"β {pair[1].get('pajais_match', '?')} | {pair[1].get('reasoning', '')}"[:120] | |
| ) or ( | |
| (pair[1].get("nearest", [{}])[0].get("sentence", "")[:120] + "...") | |
| * bool(pair[1].get("nearest")) | |
| ), | |
| # Sentence/paper counts | |
| theme_lookup.get(pair[1].get("label", ""), pair[1]).get( | |
| "sentence_count", pair[1].get("sentence_count", 0)), | |
| theme_lookup.get(pair[1].get("label", ""), pair[1]).get( | |
| "paper_count", pair[1].get("paper_count", 0)), | |
| True, # Approve (bool β checkbox) | |
| "", # Rename To | |
| "", # Reasoning | |
| ], | |
| enumerate(data), | |
| )) | |
| return rows or [[0, "No data yet", "", 0, 0, False, "", ""]] | |
| # ββ end B8: _load_review_table βββββββββββββββββββββββββββββββββ | |
| # ββ B9: _show_papers_by_select() βββββββββββββββββββββββββββββββ | |
| # PURPOSE: When the researcher clicks any row in the review table, | |
| # this function fires and shows the papers belonging to | |
| # that topic. Eliminates the old workflow of typing a | |
| # Topic # into a separate input and clicking "Show Papers". | |
| # INPUT: gr.SelectData event β contains .index (row, col) and .value | |
| # RETURNS: str β formatted paper list for gr.Textbox (paper_list) | |
| # TRIGGERED BY: review_table.select() event in B16 | |
| # REPLACES: Old _show_papers(topic_id) + topic_num (gr.Number) + | |
| # view_papers_btn (gr.Button) β all three components removed. | |
| # NOTE: Uses column 0 value (the # column) as topic_id, NOT the | |
| # row index, because filtering/sorting may reorder rows. | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _show_papers_by_select(table_data, evt: gr.SelectData): | |
| """Show papers for clicked row. Uses column 0 as topic_id. | |
| Triggered by review_table.select() β no separate Topic # input needed.""" | |
| row_idx = evt.index[0] | |
| # Get topic_id from column 0 of the clicked row (not row index) | |
| topic_id = int(table_data.iloc[row_idx, 0]) if hasattr(table_data, 'iloc') else int(table_data[row_idx][0]) | |
| # Load paper data from checkpoint files | |
| label_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_labels.json")) | |
| summary_files = sorted(glob.glob("/tmp/checkpoints/rq4_*_summaries.json")) | |
| all_files = label_files or summary_files | |
| lines = [] | |
| for f in all_files: | |
| source = os.path.basename(f).split("_")[1] | |
| data = json.load(open(f)) | |
| for t in data: | |
| (t.get("topic_id") == topic_id) and lines.append( | |
| f"βββ {source.upper()} β Topic {topic_id}: " | |
| f"{t.get('label', t.get('top_words', '')[:50])} βββ\n" | |
| f"{t.get('sentence_count', 0)} sentences from {t.get('paper_count', 0)} papers\n" | |
| f"AI Reasoning: {t.get('reasoning', 'not yet labeled')}\n\n" | |
| f"ββ 5 NEAREST CENTROID SENTENCES (evidence) ββ\n" | |
| + "\n".join( | |
| f" {i+1}. \"{t['nearest'][i]['sentence'][:200]}\"\n" | |
| f" Paper: {t['nearest'][i].get('title', '')[:100]}" | |
| for i in range(min(5, len(t.get('nearest', [])))) | |
| ) | |
| + "\n\nββ ALL PAPER TITLES ββ\n" | |
| + "\n".join( | |
| f" {i+1}. {title}" | |
| for i, title in enumerate(t.get('paper_titles', [])) | |
| ) | |
| ) | |
| return "\n\n".join(lines) or f"Topic {topic_id} not found." | |
| # ββ end B9: _show_papers_by_select βββββββββββββββββββββββββββββ | |
| # ββ B10: _submit_review() ββββββββββββββββββββββββββββββββββββββ | |
| # PURPOSE: When the researcher finishes editing the review table | |
| # (checking Approve boxes, typing Rename values, adding | |
| # Reasoning notes) and clicks "Submit Review", this | |
| # function converts those edits into a natural language | |
| # message and sends it to the agent for processing. | |
| # INPUTS: table_data (DataFrame from gr.Dataframe), chat_history (list) | |
| # YIELDS: Tuple of (chat, download, chart_choices, chart_fig, | |
| # review_rows, progress_str) β yields twice (progress β final) | |
| # TALKS TO: B2 (agent.invoke) β sends review decisions | |
| # B4 (_latest_output) β refreshes downloads | |
| # B5 (_build_progress) β refreshes pipeline status | |
| # B7 (_get_chart_choices) β refreshes chart dropdown | |
| # B8 (_load_review_table) β reloads table with updated data | |
| # NOTE: Column 5 (Approve) is now bool. True = approve, False = reject. | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _submit_review(table_data, chat_history): | |
| """Convert review table edits into agent message. | |
| Approve column is bool (checkbox), not string.""" | |
| rows = table_data.values.tolist() | |
| lines = list(map( | |
| lambda r: ( | |
| f"Topic {int(r[0])}: " | |
| + (f"RENAME to '{r[6]}'" * bool(str(r[6]).strip())) | |
| + (f"APPROVE '{r[1]}'" * (not bool(str(r[6]).strip())) * bool(r[5])) | |
| + (f"REJECT" * (not r[5])) | |
| + (f" β reason: {r[7]}" * bool(str(r[7]).strip())) | |
| ), | |
| rows, | |
| )) | |
| review_msg = "Review decisions:\n" + "\n".join(lines) | |
| print(f">>> Review submitted: {review_msg[:200]}") | |
| # YIELD 1: Show processing bubble | |
| chat_history = chat_history + [ | |
| {"role": "user", "content": review_msg}, | |
| {"role": "assistant", "content": "π¬ **Processing review decisions...**"}, | |
| ] | |
| gr.Info("Review submitted to agent") | |
| yield (chat_history, _latest_output(), gr.update(), | |
| gr.update(), gr.update(), _build_progress()) | |
| # Invoke agent with review decisions | |
| result = agent.invoke( | |
| {"messages": [("human", review_msg)]}, | |
| config={"configurable": {"thread_id": "session"}}, | |
| ) | |
| response = result["messages"][-1].content | |
| # YIELD 2: Final response + refreshed table/charts | |
| chat_history[-1] = {"role": "assistant", "content": response} | |
| gr.Info("Review processed β table updated") | |
| yield ( | |
| chat_history, | |
| _latest_output(), | |
| gr.update(choices=_get_chart_choices()), | |
| gr.update(), | |
| gr.update(value=_load_review_table()), | |
| _build_progress(), | |
| ) | |
| # ββ end B10: _submit_review ββββββββββββββββββββββββββββββββββββ | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # β SECTION 3 β UI LAYOUT β | |
| # β All visual components defined here using ONLY native Gradio β | |
| # β widgets. Zero gr.HTML() calls. Theming via B3. β | |
| # β Layout: Header β Upload β Progress β Chat β Results tabs β | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| print(">>> Building UI...") | |
| # ββ B11: gr.Blocks container βββββββββββββββββββββββββββββββββββ | |
| # PURPOSE: Root container for the entire application UI. | |
| # Enables full browser width via fill_width. | |
| # CONTAINS: All UI blocks B12 through B17b | |
| # CONFIG: title β browser tab title (stays on Blocks in Gradio 6) | |
| # fill_width β removes side padding, uses full browser width | |
| # NOTE: In Gradio 6.0, theme/css/footer_links moved from | |
| # gr.Blocks() to demo.launch(). See B20 for those params. | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks( | |
| title="Topic Modelling β Agentic AI", | |
| fill_width=True, | |
| ) as demo: | |
| # ββ B12: Header ββββββββββββββββββββββββββββββββββββββββββββ | |
| # PURPOSE: Application title and subtitle. Single gr.Markdown | |
| # call replaces 15 lines of HEADER_HTML that included | |
| # a gradient background div, font imports, and inline CSS. | |
| # REPLACES: Old HEADER_HTML constant (lines 32-47 of old app.py) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| gr.Markdown( | |
| "# π¬ Topic Modelling β Agentic AI\n" | |
| "*Mistral Β· Cosine Clustering Β· 384d Β· B&C Thematic Analysis*" | |
| ) | |
| # ββ end B12: Header ββββββββββββββββββββββββββββββββββββββββ | |
| # ββ B13: Data input ββββββββββββββββββββββββββββββββββββββββ | |
| # PURPOSE: CSV file upload area with inline instructions. | |
| # Researcher uploads their Scopus CSV export here. | |
| # On upload, B19 auto-triggers the first analysis. | |
| # COMPONENTS: gr.File (upload) + gr.Markdown (instructions) | |
| # EVENTS: upload.change β B19 (_auto_load_csv) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| gr.Markdown("**β Data input**") | |
| with gr.Row(): | |
| upload = gr.File(label="π Upload Scopus CSV", file_types=[".csv"]) | |
| gr.Markdown("**Upload your CSV** then type `run abstract only` in chat below") | |
| # ββ end B13: Data input ββββββββββββββββββββββββββββββββββββ | |
| # ββ B14: Progress pipeline βββββββββββββββββββββββββββββββββ | |
| # PURPOSE: Visual indicator of which Braun & Clarke analysis | |
| # phases are complete. Updated after every agent action. | |
| # Now uses gr.Markdown with emoji text (was gr.HTML | |
| # with inline-styled colored <span> elements). | |
| # COMPONENT: gr.Markdown β displays emoji string from B5 | |
| # UPDATED BY: B18 (after chat), B10 (after review), B19 (after upload) | |
| # REPLACES: Old gr.HTML(value=_build_progress()) with 24 lines of HTML | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| phase_progress = gr.Markdown(value=_build_progress()) | |
| # ββ end B14: Progress pipeline βββββββββββββββββββββββββββββ | |
| # ββ B15: Chatbot + input βββββββββββββββββββββββββββββββββββ | |
| # PURPOSE: Main conversation interface between researcher and | |
| # the LangGraph agent. The chatbot displays message | |
| # history with markdown rendering. The textbox + button | |
| # below it capture user input. | |
| # COMPONENTS: gr.Chatbot (display), gr.Textbox (input), gr.Button (send) | |
| # EVENTS: msg.submit β B18, send.click β B18 | |
| # NOTE: placeholder text guides the researcher on available commands. | |
| # height=300 keeps chat visible while showing results below. | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| gr.Markdown("**β‘ Agent conversation** β follow the prompts below") | |
| with gr.Group(): | |
| chatbot = gr.Chatbot( | |
| height=300, | |
| show_label=False, | |
| placeholder="Upload your Scopus CSV above, then type: run abstract only", | |
| ) | |
| with gr.Row(): | |
| msg = gr.Textbox( | |
| placeholder="run Β· approve Β· show topic 4 papers Β· group 0 1 5 Β· done", | |
| show_label=False, scale=9, lines=1, max_lines=1, container=False, | |
| ) | |
| send = gr.Button("Send", variant="primary", scale=1, min_width=70) | |
| # ββ end B15: Chatbot + input βββββββββββββββββββββββββββββββ | |
| # ββ B16: Review table tab ββββββββββββββββββββββββββββββββββ | |
| # PURPOSE: Interactive topic review table where the researcher | |
| # approves, renames, or annotates BERTopic-discovered | |
| # topics. This is the core human-in-the-loop interface. | |
| # | |
| # KEY FEATURES (all native Gradio, no HTML): | |
| # - static_columns=[0,1,2,3,4] β first 5 columns (#, Label, | |
| # Evidence, Sentences, Papers) are READ-ONLY. Prevents | |
| # accidental edits to agent-generated data. | |
| # - datatype "bool" on column 5 β Approve renders as a native | |
| # CHECKBOX. Researcher clicks to toggle, no typing needed. | |
| # - pinned_columns=2 β # and Label columns stay visible when | |
| # scrolling horizontally through wider columns. | |
| # - show_search="filter" β built-in column filtering. Researcher | |
| # can filter by paper count, sentence count, etc. | |
| # - .select() event β clicking any row auto-loads that topic's | |
| # papers in the textbox below. REPLACES the old workflow of | |
| # Topic # input + Show Papers button (both removed). | |
| # | |
| # COMPONENTS: gr.Dataframe, gr.Button (submit), gr.Textbox (papers) | |
| # EVENTS: review_table.select β B9 (_show_papers_by_select) | |
| # submit_review.click β B10 (_submit_review) | |
| # DATA: Loaded by B8 (_load_review_table) | |
| # REPLACES: Old gr.Dataframe (no static_columns, string Approve, | |
| # no search) + topic_num + view_papers_btn | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| gr.Markdown("**β’ Results** β review table, charts, downloads") | |
| with gr.Tabs(): | |
| with gr.Tab("π Review Table"): | |
| gr.Markdown( | |
| "*Edit Approve / Rename To / Reasoning β click Submit. " | |
| "Click any row to see its papers below.*" | |
| ) | |
| review_table = gr.Dataframe( | |
| headers=[ | |
| "#", "Topic Label", "Top Evidence Sentence", | |
| "Sentences", "Papers", "Approve", "Rename To", "Your Reasoning", | |
| ], | |
| datatype=[ | |
| "number", "str", "str", "number", "number", | |
| "bool", "str", "str", | |
| ], | |
| interactive=True, | |
| column_count=8, | |
| # NOTE: These features need Gradio >=5.23. Uncomment when available: | |
| # static_columns=[0, 1, 2, 3, 4], | |
| # pinned_columns=2, | |
| # show_search="filter", | |
| # show_row_numbers=True, | |
| # show_fullscreen_button=True, | |
| # show_copy_button=True, | |
| # column_widths=["60px","200px","250px","80px","70px","70px","150px","200px"], | |
| ) | |
| submit_review = gr.Button("β Submit Review to Agent", variant="primary") | |
| # Paper viewer β triggered by clicking any row (replaces Topic # + button) | |
| gr.Markdown("---") | |
| gr.Markdown("**π Papers in selected topic** *(click any row above)*") | |
| paper_list = gr.Textbox( | |
| label="Papers in selected topic", | |
| lines=8, interactive=False, | |
| ) | |
| # ββ end B16: Review table tab ββββββββββββββββββββββββββββββ | |
| # ββ B17a: Charts tab βββββββββββββββββββββββββββββββββββ | |
| # PURPOSE: Display BERTopic visualization charts (intertopic | |
| # distance map, bar chart, hierarchy, heatmap). | |
| # Charts are loaded as Plotly figure objects from | |
| # JSON files and rendered natively in gr.Plot. | |
| # COMPONENTS: gr.Dropdown (selector), gr.Plot (display) | |
| # EVENTS: chart_selector.change β B7 (_load_chart) | |
| # REPLACES: Old iframe + srcdoc hack that used html.escape() | |
| # to embed HTML files. Now uses gr.Plot directly. | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("π Charts"): | |
| chart_selector = gr.Dropdown( | |
| choices=[], label="Select Chart", interactive=True, | |
| ) | |
| chart_display = gr.Plot(label="BERTopic Visualization") | |
| # ββ end B17a: Charts tab βββββββββββββββββββββββββββββββ | |
| # ββ B17b: Download tab βββββββββββββββββββββββββββββββββ | |
| # PURPOSE: Multi-file download for all pipeline outputs. | |
| # Shows file descriptions by phase and a gr.File | |
| # component with all generated files. | |
| # COMPONENTS: gr.Markdown (descriptions), gr.File (download) | |
| # UPDATED BY: B18, B10, B19 β refreshed after each action | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("π₯ Download"): | |
| gr.Markdown( | |
| "**Files by Phase (per run: abstract / title):**\n\n" | |
| "**Phase 2 β Discovery:** `summaries.json` Β· `emb.npy`\n\n" | |
| "**Phase 2 β Labeling:** `labels.json`\n\n" | |
| "**Phase 2 β Charts:** `intertopic.json` Β· `bars.json` Β· " | |
| "`hierarchy.json` Β· `heatmap.json`\n\n" | |
| "**Phase 3 β Themes:** `themes.json`\n\n" | |
| "**Phase 5.5 β Taxonomy:** `taxonomy_map.json`\n\n" | |
| "**Phase 6 β Report:** `comparison.csv` Β· `narrative.txt`" | |
| ) | |
| download = gr.File(label="All output files", file_count="multiple") | |
| # ββ end B17b: Download tab βββββββββββββββββββββββββββββ | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # β SECTION 4 β EVENT WIRING β | |
| # β Connect UI components to helper functions. This is β | |
| # β where data flows are defined: which function runs when β | |
| # β a button is clicked, a file is uploaded, or a row is β | |
| # β selected. No HTML, no CSS β just Python event binding. β | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ββ B18: respond_with_viz() + event bindings βββββββββββββββ | |
| # PURPOSE: Wrapper around B6 (respond) that also refreshes | |
| # the chart dropdown, chart display, review table, | |
| # and progress pipeline after each agent response. | |
| # This is the main "after every chat turn, update | |
| # everything" orchestrator. | |
| # CALLS: B6 (respond), B5 (_build_progress), B7 (_load_chart, | |
| # _get_chart_choices), B8 (_load_review_table) | |
| # BINDINGS: msg.submit β this function | |
| # send.click β this function | |
| # OUTPUTS: chatbot, msg, download, chart_selector, chart_display, | |
| # review_table, phase_progress (7 components updated) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| chart_selector.change(_load_chart, [chart_selector], [chart_display]) | |
| review_table.select( | |
| _show_papers_by_select, [review_table], [paper_list], | |
| ) | |
| submit_review.click( | |
| _submit_review, [review_table, chatbot], | |
| [chatbot, download, chart_selector, chart_display, | |
| review_table, phase_progress], | |
| ) | |
| def respond_with_viz(message, chat_history, uploaded_file): | |
| """Wrap respond() and update charts + table + progress after each turn.""" | |
| gen = respond(message, chat_history, uploaded_file) | |
| # First yield (progress bubble) | |
| hist, txt, dl = next(gen) | |
| yield (hist, txt, dl, gr.update(choices=_get_chart_choices()), | |
| gr.update(), gr.update(), _build_progress()) | |
| # Second yield (final response + populate table + charts) | |
| hist, txt, dl = next(gen) | |
| choices = _get_chart_choices() | |
| first_chart = (choices and _load_chart(choices[-1])) or gr.update() | |
| table_data = _load_review_table() | |
| yield ( | |
| hist, txt, dl, | |
| gr.update(choices=choices, value=(choices and choices[-1]) or None), | |
| first_chart, | |
| gr.update(value=table_data), | |
| _build_progress(), | |
| ) | |
| msg.submit( | |
| respond_with_viz, [msg, chatbot, upload], | |
| [chatbot, msg, download, chart_selector, chart_display, | |
| review_table, phase_progress], | |
| ) | |
| send.click( | |
| respond_with_viz, [msg, chatbot, upload], | |
| [chatbot, msg, download, chart_selector, chart_display, | |
| review_table, phase_progress], | |
| ) | |
| # ββ end B18: respond_with_viz + event bindings βββββββββββββ | |
| # ββ B19: _auto_load_csv() ββββββββββββββββββββββββββββββββββ | |
| # PURPOSE: Automatically triggers analysis when a CSV file is | |
| # uploaded. The researcher doesn't need to type anything β | |
| # just uploading the file starts the pipeline. | |
| # Sends "Analyze my Scopus CSV" as the initial message. | |
| # TRIGGERED BY: upload.change event | |
| # CALLS: B6 (respond) with auto-message | |
| # OUTPUTS: chatbot, download, chart_selector, chart_display, | |
| # review_table, phase_progress | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _auto_load_csv(uploaded_file, chat_history): | |
| """Auto-trigger analysis when CSV is uploaded β no typing needed.""" | |
| gen = respond("Analyze my Scopus CSV", chat_history, uploaded_file) | |
| # First yield (progress) | |
| hist, txt, dl = next(gen) | |
| yield (hist, dl, gr.update(), gr.update(), | |
| gr.update(), _build_progress()) | |
| # Second yield (final + populate everything) | |
| hist, txt, dl = next(gen) | |
| choices = _get_chart_choices() | |
| first_chart = (choices and _load_chart(choices[-1])) or gr.update() | |
| table_data = _load_review_table() | |
| yield ( | |
| hist, dl, | |
| gr.update(choices=choices, value=(choices and choices[-1]) or None), | |
| first_chart, | |
| gr.update(value=table_data), | |
| _build_progress(), | |
| ) | |
| upload.change( | |
| _auto_load_csv, [upload, chatbot], | |
| [chatbot, download, chart_selector, chart_display, | |
| review_table, phase_progress], | |
| ) | |
| # ββ end B19: _auto_load_csv ββββββββββββββββββββββββββββββββ | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # β SECTION 5 β LAUNCH β | |
| # β Start the Gradio server. On HuggingFace Spaces this runs β | |
| # β automatically. Locally, access at http://localhost:7860 β | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ββ B20: Launch ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # PURPOSE: Start the web server. In Gradio 6.0, theme/css/footer | |
| # params moved here from gr.Blocks(). | |
| # CONFIG: theme β from B3 (Soft + DM Sans + slate) | |
| # footer_links=[] β hides footer natively (no CSS hack) | |
| # ssr_mode=False β for HuggingFace Spaces free tier compat | |
| # server_name="0.0.0.0" β accessible on network | |
| # NOTE: On Spaces, port 7860 is auto-exposed to the internet. | |
| # Locally, open http://localhost:7860 in your browser. | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| print(">>> Launching...") | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| ssr_mode=False, | |
| theme=theme, # Gradio 6: moved from gr.Blocks() | |
| footer_links=[], # Gradio 6: hides footer, replaces show_api | |
| ) | |
| # ββ end B20: Launch ββββββββββββββββββββββββββββββββββββββββββββ | |