Spaces:
Sleeping
Sleeping
| """ | |
| app.py β Gradio UI for BERTopic Agentic AI Application (~370 lines) | |
| Sections: β Data Input β‘ Agent Conversation β’ Results (Table | Charts | Download) | |
| Rules: ZERO business logic here. All decisions made by agent.py. | |
| """ | |
| import os | |
| import json | |
| import glob | |
| import gradio as gr | |
| from agent import invoke_agent | |
| CHECKPOINT_DIR = "checkpoints" | |
| os.makedirs(CHECKPOINT_DIR, exist_ok=True) | |
| CSV_PATH = os.path.join(CHECKPOINT_DIR, "uploaded.csv") | |
| # ββ Checkpoint file paths ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def ckpt(name): | |
| return os.path.join(CHECKPOINT_DIR, name) | |
| # ββ Phase progress HTML ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_phase_bar(): | |
| phases = [ | |
| ("β Load", "stats.json"), | |
| ("β‘ Codes", "abstract_labels.json"), | |
| ("β’ Themes", "abstract_themes.json"), | |
| ("β£ Saturation", "abstract_themes.json"), | |
| ("β€ Names", "abstract_themes.json"), | |
| ("β€Β½ PAJAIS", "abstract_taxonomy_map.json"), | |
| ("β₯ Report", "comparison.csv"), | |
| ] | |
| items = list(map( | |
| lambda p: ( | |
| f'<div style="display:inline-flex;align-items:center;gap:6px;' | |
| f'padding:6px 14px;border-radius:20px;font-size:13px;font-weight:600;' | |
| f'background:{"#22c55e" if os.path.exists(ckpt(p[1])) else "#374151"};' | |
| f'color:{"#fff" if os.path.exists(ckpt(p[1])) else "#9ca3af"};">' | |
| f'{"β " if os.path.exists(ckpt(p[1])) else "β¬"} {p[0]}</div>' | |
| ), | |
| phases, | |
| )) | |
| bar = ( | |
| '<div style="background:#111827;padding:12px 16px;border-radius:12px;' | |
| 'border:1px solid #1f2937;display:flex;flex-wrap:wrap;gap:8px;align-items:center;">' | |
| '<span style="color:#6b7280;font-size:12px;font-weight:700;margin-right:4px;">B&C PHASES:</span>' | |
| + "".join(items) | |
| + "</div>" | |
| ) | |
| return bar | |
| # ββ Review table loading βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def load_review_table(): | |
| """Priority: taxonomy_map β themes β labels β summaries""" | |
| priority = [ | |
| ("abstract_taxonomy_map.json", "taxonomy"), | |
| ("abstract_themes.json", "themes"), | |
| ("abstract_labels.json", "labels"), | |
| ("abstract_summaries.json", "summaries"), | |
| ] | |
| for filename, mode in priority: | |
| path = ckpt(filename) | |
| if os.path.exists(path): | |
| with open(path) as f: | |
| data = json.load(f) | |
| return _format_table(data, mode) | |
| return _empty_table() | |
| def _empty_table(): | |
| import pandas as pd | |
| return pd.DataFrame( | |
| [["", "", "", 0, "", "yes", "", ""]], | |
| columns=["#", "Topic Label", "Top Evidence", "Sentences", "Papers", "Approve", "Rename To", "Reasoning"], | |
| ) | |
| def _format_table(data, mode): | |
| import pandas as pd | |
| rows = list(map(lambda item: _format_row(item, mode), data)) | |
| if not rows: | |
| return _empty_table() | |
| return pd.DataFrame( | |
| rows, | |
| columns=["#", "Topic Label", "Top Evidence", "Sentences", "Papers", "Approve", "Rename To", "Reasoning"], | |
| ) | |
| def _format_row(item, mode): | |
| idx = item.get("topic_id", item.get("name", "")) | |
| label = item.get("label", item.get("name", "")) | |
| if mode == "taxonomy": | |
| evidence = ( | |
| f"β {item.get('pajais_match', 'NOVEL')} " | |
| f"| conf: {item.get('match_confidence', 0):.2f} " | |
| f"| {item.get('reasoning', '')}" | |
| ) | |
| else: | |
| sentences = item.get("top_sentences", []) | |
| evidence = sentences[0] if sentences else "" | |
| sentences_count = item.get("sentence_count", len(item.get("top_sentences", []))) | |
| papers = item.get("paper_count", "") | |
| approve = item.get("approve", "yes") | |
| rename = item.get("rename_to", label) | |
| reasoning = item.get("reasoning", "") | |
| return [idx, label, evidence, sentences_count, papers, approve, rename, reasoning] | |
| # ββ Chart list ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def get_chart_choices(): | |
| chart_files = glob.glob(ckpt("*_chart_*.html")) | |
| choices = list(map( | |
| lambda f: os.path.basename(f).replace("_", " ").replace(".html", "").title(), | |
| chart_files, | |
| )) | |
| return choices if choices else ["No charts yet"] | |
| def load_chart_html(choice): | |
| if not choice or choice == "No charts yet": | |
| return "<p style='color:#6b7280;padding:20px;'>Charts appear after Phase 2 analysis.</p>" | |
| filename = choice.lower().replace(" ", "_") + ".html" | |
| path = ckpt(filename) | |
| if os.path.exists(path): | |
| with open(path) as f: | |
| content = f.read() | |
| return f'<iframe srcdoc="{content.replace(chr(34), """)}" width="100%" height="600px" frameborder="0"></iframe>' | |
| return "<p style='color:#ef4444;'>Chart file not found.</p>" | |
| # ββ Download file list βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def get_download_files(): | |
| patterns = [ | |
| "*.csv", "*.json", "*.txt", "*.npy", | |
| ] | |
| files = [] | |
| list(map(lambda p: files.extend(glob.glob(ckpt(p))), patterns)) | |
| files.sort(key=os.path.getmtime, reverse=True) | |
| return files if files else None | |
| # ββ Table-to-theme-map parser ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def parse_table_to_message(table_data): | |
| """Convert review table edits into a structured message for the agent. | |
| Handles both pandas DataFrame (from gr.Dataframe) and list of lists.""" | |
| import pandas as pd | |
| # Normalise to list of lists regardless of input type | |
| if table_data is None: | |
| return "Submit Review: No table data provided." | |
| if isinstance(table_data, pd.DataFrame): | |
| if table_data.empty: | |
| return "Submit Review: Table is empty, nothing to review." | |
| rows = table_data.values.tolist() | |
| else: | |
| rows = list(table_data) if table_data else [] | |
| if not rows: | |
| return "Submit Review: No table data provided." | |
| approved = list(filter( | |
| lambda row: len(row) >= 6 and str(row[5]).strip().lower() in ("yes", "y", "1", "true"), | |
| rows, | |
| )) | |
| rejected = list(filter( | |
| lambda row: len(row) >= 6 and str(row[5]).strip().lower() in ("no", "n", "0", "false"), | |
| rows, | |
| )) | |
| theme_groups = {} | |
| list(map( | |
| lambda row: theme_groups.setdefault( | |
| str(row[6]).strip() if len(row) > 6 and row[6] and str(row[6]).strip() else str(row[1]), | |
| [] | |
| ).append(int(row[0]) if str(row[0]).isdigit() else str(row[0])), | |
| approved, | |
| )) | |
| theme_map_str = json.dumps(theme_groups) | |
| reasoning_lines = list(filter(None, list(map( | |
| lambda row: f" - Topic {row[0]} ({row[1]}): {row[7]}" if len(row) > 7 and str(row[7]).strip() else "", | |
| approved, | |
| )))) | |
| msg = ( | |
| f"Submit Review received.\n\n" | |
| f"Approved topics: {len(approved)}\n" | |
| f"Rejected topics: {len(rejected)}\n\n" | |
| f"Theme groupings (RENAME TO β [topic_ids]):\n{theme_map_str}\n\n" | |
| f"Researcher reasoning:\n" | |
| + ("\n".join(reasoning_lines) if reasoning_lines else " (no reasoning provided)") | |
| + "\n\nPlease proceed to the next phase based on these decisions." | |
| ) | |
| return msg | |
| # ββ Main Gradio App ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_app(): | |
| with gr.Blocks(title="BERTopic Thematic Analysis Agent") as app: | |
| # ββ Header ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| gr.HTML(""" | |
| <div style="text-align:center;padding:32px 0 16px;background:linear-gradient(180deg,#0f172a 0%,#0a0f1a 100%);"> | |
| <div style="font-family:'IBM Plex Mono',monospace;font-size:11px;letter-spacing:0.3em; | |
| color:#10b981;text-transform:uppercase;margin-bottom:8px;"> | |
| Braun & Clarke (2006) Β· BERTopic Β· PAJAIS Taxonomy | |
| </div> | |
| <h1 style="font-family:'IBM Plex Mono',monospace;font-size:28px;font-weight:700; | |
| color:#f1f5f9;margin:0 0 8px;"> | |
| Thematic Analysis Agent | |
| </h1> | |
| <p style="color:#475569;font-size:14px;margin:0;"> | |
| Agentic AI Β· LangGraph Β· Mistral LLM Β· AgglomerativeClustering (cosine, 384d) | |
| </p> | |
| </div> | |
| """) | |
| # Phase progress bar | |
| phase_bar = gr.HTML(value=build_phase_bar(), label="Phase Progress") | |
| # ββ SECTION 1: Data Input ββββββββββββββββββββββββββββββββββββββββββββ | |
| gr.HTML('<div class="section-header">β DATA INPUT</div>') | |
| with gr.Row(): | |
| csv_upload = gr.File( | |
| label="Upload Scopus CSV Export", | |
| file_types=[".csv"], | |
| scale=2, | |
| ) | |
| with gr.Column(scale=1): | |
| gr.HTML(""" | |
| <div style="background:#1e293b;border-radius:12px;padding:16px;font-size:13px;color:#94a3b8;"> | |
| <b style="color:#f1f5f9;">Required CSV Columns:</b><br> | |
| Authors Β· Title Β· Abstract<br> | |
| Author Keywords Β· Cited by<br> | |
| Source title Β· Year | |
| </div> | |
| """) | |
| # ββ SECTION 2: Agent Conversation βββββββββββββββββββββββββββββββββββ | |
| gr.HTML('<div class="section-header">β‘ AGENT CONVERSATION</div>') | |
| chatbot = gr.Chatbot( | |
| label="Thematic Analysis Agent", | |
| height=500, | |
| avatar_images=(None, "https://www.anthropic.com/favicon.ico"), | |
| ) | |
| with gr.Row(): | |
| user_input = gr.Textbox( | |
| placeholder="Type 'run abstract', 'run title', or any instruction...", | |
| label="", | |
| scale=5, | |
| lines=1, | |
| container=False, | |
| ) | |
| send_btn = gr.Button("Send βΆ", variant="primary", scale=1) | |
| # ββ SECTION 3: Results βββββββββββββββββββββββββββββββββββββββββββββββ | |
| gr.HTML('<div class="section-header">β’ RESULTS</div>') | |
| with gr.Tabs(): | |
| # Tab 1: Review Table | |
| with gr.TabItem("π Review Table"): | |
| gr.HTML(""" | |
| <p style="color:#94a3b8;font-size:13px;margin-bottom:8px;"> | |
| Edit <b>Approve</b> (yes/no), <b>Rename To</b>, and <b>Reasoning</b> columns. | |
| Then click <b>Submit Review</b> to send decisions to the agent. | |
| </p> | |
| """) | |
| review_table = gr.Dataframe( | |
| headers=["#", "Topic Label", "Top Evidence", "Sentences", "Papers", "Approve", "Rename To", "Reasoning"], | |
| datatype=["str", "str", "str", "number", "str", "str", "str", "str"], | |
| row_count=10, | |
| column_count=8, | |
| interactive=True, | |
| wrap=True, | |
| label="", | |
| ) | |
| submit_review_btn = gr.Button("π€ Submit Review β", variant="primary") | |
| # Tab 2: Charts | |
| with gr.TabItem("π Charts"): | |
| chart_dropdown = gr.Dropdown( | |
| choices=get_chart_choices(), | |
| label="Select Chart", | |
| interactive=True, | |
| ) | |
| refresh_charts_btn = gr.Button("π Refresh Chart List", variant="secondary", size="sm") | |
| chart_display = gr.HTML( | |
| value="<p style='color:#6b7280;padding:20px;'>Charts appear after Phase 2 BERTopic analysis.</p>" | |
| ) | |
| # Tab 3: Downloads | |
| with gr.TabItem("π₯ Download Files"): | |
| gr.HTML(""" | |
| <p style="color:#94a3b8;font-size:13px;margin-bottom:8px;"> | |
| All checkpoint files are listed below. Download for your conference paper. | |
| </p> | |
| """) | |
| download_files = gr.File( | |
| label="Output Files", | |
| file_count="multiple", | |
| interactive=False, | |
| ) | |
| refresh_downloads_btn = gr.Button("π Refresh Files", variant="secondary", size="sm") | |
| # ββ State βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| thread_state = gr.State("default") | |
| # ββ Event: CSV Upload βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def on_csv_upload(file, history, thread_id): | |
| if file is None: | |
| return history, build_phase_bar(), load_review_table() | |
| # In Gradio 6, uploaded file is a filepath string | |
| filepath = file if isinstance(file, str) else file.name | |
| history = history or [] | |
| history.append({"role": "user", "content": f"CSV uploaded: {os.path.basename(filepath)}"}) | |
| response = invoke_agent(f"load_scopus_csv filepath={filepath}", thread_id) | |
| history.append({"role": "assistant", "content": response}) | |
| return history, build_phase_bar(), load_review_table() | |
| csv_upload.upload( | |
| on_csv_upload, | |
| inputs=[csv_upload, chatbot, thread_state], | |
| outputs=[chatbot, phase_bar, review_table], | |
| ) | |
| # ββ Event: Send message βββββββββββββββββββββββββββββββββββββββββββββββ | |
| def on_send(message, history, thread_id): | |
| if not message.strip(): | |
| return history, "", build_phase_bar(), load_review_table() | |
| history = history or [] | |
| history.append({"role": "user", "content": message}) | |
| response = invoke_agent(message, thread_id) | |
| history.append({"role": "assistant", "content": response}) | |
| return history, "", build_phase_bar(), load_review_table() | |
| send_btn.click( | |
| on_send, | |
| inputs=[user_input, chatbot, thread_state], | |
| outputs=[chatbot, user_input, phase_bar, review_table], | |
| ) | |
| user_input.submit( | |
| on_send, | |
| inputs=[user_input, chatbot, thread_state], | |
| outputs=[chatbot, user_input, phase_bar, review_table], | |
| ) | |
| # ββ Event: Submit Review ββββββββββββββββββββββββββββββββββββββββββββββ | |
| def on_submit_review(table_data, history, thread_id): | |
| msg = parse_table_to_message(table_data) | |
| history = history or [] | |
| history.append({"role": "user", "content": "π€ Submit Review (table decisions sent to agent)"}) | |
| response = invoke_agent(msg, thread_id) | |
| history.append({"role": "assistant", "content": response}) | |
| return history, build_phase_bar(), load_review_table() | |
| submit_review_btn.click( | |
| on_submit_review, | |
| inputs=[review_table, chatbot, thread_state], | |
| outputs=[chatbot, phase_bar, review_table], | |
| ) | |
| # ββ Event: Chart selection ββββββββββββββββββββββββββββββββββββββββββββ | |
| chart_dropdown.change( | |
| load_chart_html, | |
| inputs=[chart_dropdown], | |
| outputs=[chart_display], | |
| ) | |
| def refresh_charts(): | |
| choices = get_chart_choices() | |
| return gr.update(choices=choices, value=choices[0] if choices else None) | |
| refresh_charts_btn.click( | |
| refresh_charts, | |
| outputs=[chart_dropdown], | |
| ) | |
| # ββ Event: Download refresh βββββββββββββββββββββββββββββββββββββββββββ | |
| def refresh_downloads(): | |
| files = get_download_files() | |
| return gr.update(value=files) | |
| refresh_downloads_btn.click( | |
| refresh_downloads, | |
| outputs=[download_files], | |
| ) | |
| # ββ Initial load ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| app.load( | |
| lambda: (build_phase_bar(), load_review_table(), get_download_files()), | |
| outputs=[phase_bar, review_table, download_files], | |
| ) | |
| return app | |
| # ββ Launch βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if __name__ == "__main__": | |
| demo = build_app() | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| ssr_mode=False, | |
| share=False, | |
| theme=gr.themes.Base( | |
| primary_hue="emerald", | |
| secondary_hue="slate", | |
| neutral_hue="slate", | |
| font=[gr.themes.GoogleFont("IBM Plex Mono"), "monospace"], | |
| ), | |
| css=""" | |
| body { background: #0a0f1a !important; } | |
| .gradio-container { max-width: 1400px !important; background: #0a0f1a !important; } | |
| .section-header { | |
| font-size: 13px; | |
| font-weight: 700; | |
| color: #64748b; | |
| letter-spacing: 0.12em; | |
| text-transform: uppercase; | |
| margin-bottom: 12px; | |
| padding-bottom: 8px; | |
| border-bottom: 1px solid #1e293b; | |
| } | |
| footer { display: none !important; } | |
| """, | |
| ) |