""" app.py — Gradio UI for BERTopic Agentic AI Application (~370 lines) Sections: ① Data Input ② Agent Conversation ③ Results (Table | Charts | Download) Rules: ZERO business logic here. All decisions made by agent.py. """ import os import json import glob import gradio as gr from agent import invoke_agent CHECKPOINT_DIR = "checkpoints" os.makedirs(CHECKPOINT_DIR, exist_ok=True) CSV_PATH = os.path.join(CHECKPOINT_DIR, "uploaded.csv") # ── Checkpoint file paths ────────────────────────────────────────────────────── def ckpt(name): return os.path.join(CHECKPOINT_DIR, name) # ── Phase progress HTML ──────────────────────────────────────────────────────── def build_phase_bar(): phases = [ ("① Load", "stats.json"), ("② Codes", "abstract_labels.json"), ("③ Themes", "abstract_themes.json"), ("④ Saturation", "abstract_themes.json"), ("⑤ Names", "abstract_themes.json"), ("⑤½ PAJAIS", "abstract_taxonomy_map.json"), ("⑥ Report", "comparison.csv"), ] items = list(map( lambda p: ( f'
' f'{"✅" if os.path.exists(ckpt(p[1])) else "⬜"} {p[0]}
' ), phases, )) bar = ( '
' 'B&C PHASES:' + "".join(items) + "
" ) return bar # ── Review table loading ─────────────────────────────────────────────────────── def load_review_table(): """Priority: taxonomy_map → themes → labels → summaries""" priority = [ ("abstract_taxonomy_map.json", "taxonomy"), ("abstract_themes.json", "themes"), ("abstract_labels.json", "labels"), ("abstract_summaries.json", "summaries"), ] for filename, mode in priority: path = ckpt(filename) if os.path.exists(path): with open(path) as f: data = json.load(f) return _format_table(data, mode) return _empty_table() def _empty_table(): import pandas as pd return pd.DataFrame( [["", "", "", 0, "", "yes", "", ""]], columns=["#", "Topic Label", "Top Evidence", "Sentences", "Papers", "Approve", "Rename To", "Reasoning"], ) def _format_table(data, mode): import pandas as pd rows = list(map(lambda item: _format_row(item, mode), data)) if not rows: return _empty_table() return pd.DataFrame( rows, columns=["#", "Topic Label", "Top Evidence", "Sentences", "Papers", "Approve", "Rename To", "Reasoning"], ) def _format_row(item, mode): idx = item.get("topic_id", item.get("name", "")) label = item.get("label", item.get("name", "")) if mode == "taxonomy": evidence = ( f"→ {item.get('pajais_match', 'NOVEL')} " f"| conf: {item.get('match_confidence', 0):.2f} " f"| {item.get('reasoning', '')}" ) else: sentences = item.get("top_sentences", []) evidence = sentences[0] if sentences else "" sentences_count = item.get("sentence_count", len(item.get("top_sentences", []))) papers = item.get("paper_count", "") approve = item.get("approve", "yes") rename = item.get("rename_to", label) reasoning = item.get("reasoning", "") return [idx, label, evidence, sentences_count, papers, approve, rename, reasoning] # ── Chart list ──────────────────────────────────────────────────────────────── def get_chart_choices(): chart_files = glob.glob(ckpt("*_chart_*.html")) choices = list(map( lambda f: os.path.basename(f).replace("_", " ").replace(".html", "").title(), chart_files, )) return choices if choices else ["No charts yet"] def load_chart_html(choice): if not choice or choice == "No charts yet": return "

Charts appear after Phase 2 analysis.

" filename = choice.lower().replace(" ", "_") + ".html" path = ckpt(filename) if os.path.exists(path): with open(path) as f: content = f.read() return f'' return "

Chart file not found.

" # ── Download file list ───────────────────────────────────────────────────────── def get_download_files(): patterns = [ "*.csv", "*.json", "*.txt", "*.npy", ] files = [] list(map(lambda p: files.extend(glob.glob(ckpt(p))), patterns)) files.sort(key=os.path.getmtime, reverse=True) return files if files else None # ── Table-to-theme-map parser ────────────────────────────────────────────────── def parse_table_to_message(table_data): """Convert review table edits into a structured message for the agent. Handles both pandas DataFrame (from gr.Dataframe) and list of lists.""" import pandas as pd # Normalise to list of lists regardless of input type if table_data is None: return "Submit Review: No table data provided." if isinstance(table_data, pd.DataFrame): if table_data.empty: return "Submit Review: Table is empty, nothing to review." rows = table_data.values.tolist() else: rows = list(table_data) if table_data else [] if not rows: return "Submit Review: No table data provided." approved = list(filter( lambda row: len(row) >= 6 and str(row[5]).strip().lower() in ("yes", "y", "1", "true"), rows, )) rejected = list(filter( lambda row: len(row) >= 6 and str(row[5]).strip().lower() in ("no", "n", "0", "false"), rows, )) theme_groups = {} list(map( lambda row: theme_groups.setdefault( str(row[6]).strip() if len(row) > 6 and row[6] and str(row[6]).strip() else str(row[1]), [] ).append(int(row[0]) if str(row[0]).isdigit() else str(row[0])), approved, )) theme_map_str = json.dumps(theme_groups) reasoning_lines = list(filter(None, list(map( lambda row: f" - Topic {row[0]} ({row[1]}): {row[7]}" if len(row) > 7 and str(row[7]).strip() else "", approved, )))) msg = ( f"Submit Review received.\n\n" f"Approved topics: {len(approved)}\n" f"Rejected topics: {len(rejected)}\n\n" f"Theme groupings (RENAME TO → [topic_ids]):\n{theme_map_str}\n\n" f"Researcher reasoning:\n" + ("\n".join(reasoning_lines) if reasoning_lines else " (no reasoning provided)") + "\n\nPlease proceed to the next phase based on these decisions." ) return msg # ── Main Gradio App ──────────────────────────────────────────────────────────── def build_app(): with gr.Blocks(title="BERTopic Thematic Analysis Agent") as app: # ── Header ────────────────────────────────────────────────────────── gr.HTML("""
Braun & Clarke (2006) · BERTopic · PAJAIS Taxonomy

Thematic Analysis Agent

Agentic AI · LangGraph · Mistral LLM · AgglomerativeClustering (cosine, 384d)

""") # Phase progress bar phase_bar = gr.HTML(value=build_phase_bar(), label="Phase Progress") # ── SECTION 1: Data Input ──────────────────────────────────────────── gr.HTML('
① DATA INPUT
') with gr.Row(): csv_upload = gr.File( label="Upload Scopus CSV Export", file_types=[".csv"], scale=2, ) with gr.Column(scale=1): gr.HTML("""
Required CSV Columns:
Authors · Title · Abstract
Author Keywords · Cited by
Source title · Year
""") # ── SECTION 2: Agent Conversation ─────────────────────────────────── gr.HTML('
② AGENT CONVERSATION
') chatbot = gr.Chatbot( label="Thematic Analysis Agent", height=500, avatar_images=(None, "https://www.anthropic.com/favicon.ico"), ) with gr.Row(): user_input = gr.Textbox( placeholder="Type 'run abstract', 'run title', or any instruction...", label="", scale=5, lines=1, container=False, ) send_btn = gr.Button("Send ▶", variant="primary", scale=1) # ── SECTION 3: Results ─────────────────────────────────────────────── gr.HTML('
③ RESULTS
') with gr.Tabs(): # Tab 1: Review Table with gr.TabItem("📋 Review Table"): gr.HTML("""

Edit Approve (yes/no), Rename To, and Reasoning columns. Then click Submit Review to send decisions to the agent.

""") review_table = gr.Dataframe( headers=["#", "Topic Label", "Top Evidence", "Sentences", "Papers", "Approve", "Rename To", "Reasoning"], datatype=["str", "str", "str", "number", "str", "str", "str", "str"], row_count=10, column_count=8, interactive=True, wrap=True, label="", ) submit_review_btn = gr.Button("📤 Submit Review →", variant="primary") # Tab 2: Charts with gr.TabItem("📊 Charts"): chart_dropdown = gr.Dropdown( choices=get_chart_choices(), label="Select Chart", interactive=True, ) refresh_charts_btn = gr.Button("🔄 Refresh Chart List", variant="secondary", size="sm") chart_display = gr.HTML( value="

Charts appear after Phase 2 BERTopic analysis.

" ) # Tab 3: Downloads with gr.TabItem("📥 Download Files"): gr.HTML("""

All checkpoint files are listed below. Download for your conference paper.

""") download_files = gr.File( label="Output Files", file_count="multiple", interactive=False, ) refresh_downloads_btn = gr.Button("🔄 Refresh Files", variant="secondary", size="sm") # ── State ───────────────────────────────────────────────────────────── thread_state = gr.State("default") # ── Event: CSV Upload ───────────────────────────────────────────────── def on_csv_upload(file, history, thread_id): if file is None: return history, build_phase_bar(), load_review_table() # In Gradio 6, uploaded file is a filepath string filepath = file if isinstance(file, str) else file.name history = history or [] history.append({"role": "user", "content": f"CSV uploaded: {os.path.basename(filepath)}"}) response = invoke_agent(f"load_scopus_csv filepath={filepath}", thread_id) history.append({"role": "assistant", "content": response}) return history, build_phase_bar(), load_review_table() csv_upload.upload( on_csv_upload, inputs=[csv_upload, chatbot, thread_state], outputs=[chatbot, phase_bar, review_table], ) # ── Event: Send message ─────────────────────────────────────────────── def on_send(message, history, thread_id): if not message.strip(): return history, "", build_phase_bar(), load_review_table() history = history or [] history.append({"role": "user", "content": message}) response = invoke_agent(message, thread_id) history.append({"role": "assistant", "content": response}) return history, "", build_phase_bar(), load_review_table() send_btn.click( on_send, inputs=[user_input, chatbot, thread_state], outputs=[chatbot, user_input, phase_bar, review_table], ) user_input.submit( on_send, inputs=[user_input, chatbot, thread_state], outputs=[chatbot, user_input, phase_bar, review_table], ) # ── Event: Submit Review ────────────────────────────────────────────── def on_submit_review(table_data, history, thread_id): msg = parse_table_to_message(table_data) history = history or [] history.append({"role": "user", "content": "📤 Submit Review (table decisions sent to agent)"}) response = invoke_agent(msg, thread_id) history.append({"role": "assistant", "content": response}) return history, build_phase_bar(), load_review_table() submit_review_btn.click( on_submit_review, inputs=[review_table, chatbot, thread_state], outputs=[chatbot, phase_bar, review_table], ) # ── Event: Chart selection ──────────────────────────────────────────── chart_dropdown.change( load_chart_html, inputs=[chart_dropdown], outputs=[chart_display], ) def refresh_charts(): choices = get_chart_choices() return gr.update(choices=choices, value=choices[0] if choices else None) refresh_charts_btn.click( refresh_charts, outputs=[chart_dropdown], ) # ── Event: Download refresh ─────────────────────────────────────────── def refresh_downloads(): files = get_download_files() return gr.update(value=files) refresh_downloads_btn.click( refresh_downloads, outputs=[download_files], ) # ── Initial load ────────────────────────────────────────────────────── app.load( lambda: (build_phase_bar(), load_review_table(), get_download_files()), outputs=[phase_bar, review_table, download_files], ) return app # ── Launch ───────────────────────────────────────────────────────────────────── if __name__ == "__main__": demo = build_app() demo.launch( server_name="0.0.0.0", server_port=7860, ssr_mode=False, share=False, theme=gr.themes.Base( primary_hue="emerald", secondary_hue="slate", neutral_hue="slate", font=[gr.themes.GoogleFont("IBM Plex Mono"), "monospace"], ), css=""" body { background: #0a0f1a !important; } .gradio-container { max-width: 1400px !important; background: #0a0f1a !important; } .section-header { font-size: 13px; font-weight: 700; color: #64748b; letter-spacing: 0.12em; text-transform: uppercase; margin-bottom: 12px; padding-bottom: 8px; border-bottom: 1px solid #1e293b; } footer { display: none !important; } """, )