| """ |
| BERTopic Thematic Analysis Agent β Production Gradio UI |
| ======================================================== |
| A dashboard-style Gradio interface for orchestrating BERTopic topic modelling |
| via an LLM-backed agent defined in agent.py. |
| |
| Layout |
| ------ |
| - Top: Header + Phase progress bar |
| - Body: Vertical cards in sequence |
| 1) Data Input |
| 2) Agent Console |
| 3) Results (Tabs: Review | Charts | Downloads) |
| |
| Fixes applied (v2) |
| ------------------ |
| - BUG 3 : submit_review() now writes parsed review rows into |
| agent_state["review_df"] BEFORE calling the agent, so |
| _parse_review_df() in agent.py always receives a populated list. |
| - ISSUE 2 : PHASES list updated to 7 labels matching the actual B&C phases |
| (was 6 labels misaligned with agent phase 0-6 mapping). |
| - ISSUE 4 : Added a startup API-key warning banner rendered in the UI when |
| MISTRAL_API_KEY is not set in the environment. |
| """ |
|
|
| |
| |
| |
| import gradio as gr |
| import pandas as pd |
| import json |
| import os |
| import shutil |
| import uuid |
| from pathlib import Path |
| from urllib.parse import quote |
|
|
| |
| |
| |
| try: |
| from tools import ( |
| extract_methods_from_pdfs, |
| OUTPUT_DIR as TOOLS_OUTPUT_DIR, |
| _load_json as tools_load_json, |
| ) |
| METHOD_TOOLS_AVAILABLE = True |
| except ImportError: |
| METHOD_TOOLS_AVAILABLE = False |
|
|
| |
| |
| |
| try: |
| from agent import agent |
| AGENT_AVAILABLE = True |
| except ImportError: |
| AGENT_AVAILABLE = False |
|
|
| class _StubAgent: |
| """Minimal stub so the UI works without agent.py.""" |
|
|
| def invoke(self, message: str, state: dict) -> tuple[str, dict]: |
| reply = ( |
| f"[STUB] Received: **{message}**\n\n" |
| "Connect `agent.py` to get real responses. " |
| f"Current phase: `{state.get('phase', 0)}`." |
| ) |
| state["phase"] = min(state.get("phase", 0) + 1, 8) |
| return reply, state |
|
|
| agent = _StubAgent() |
|
|
| |
| |
| |
|
|
| |
| PHASES = [ |
| "Familiarisation", |
| "Initial Codes", |
| "Themes", |
| "Review Themes", |
| "Naming", |
| "PAJAIS Mapping", |
| "Report", |
| ] |
|
|
| CHART_OPTIONS = ["Intertopic Map", "Top Words", "Hierarchy", "Heatmap"] |
|
|
| REVIEW_COLUMNS = [ |
| "#", "Topic Label", "Top Evidence", "Sentences", "Papers", |
| "Approve", "Rename To", "Reasoning", |
| ] |
|
|
| EMPTY_REVIEW_DF = pd.DataFrame(columns=REVIEW_COLUMNS) |
|
|
| |
| MISTRAL_KEY_MISSING = not bool(os.environ.get("MISTRAL_API_KEY", "")) |
| GROQ_KEY_MISSING = not bool(os.environ.get("GROQ_API_KEY", "")) |
| UPLOADS_DIR = Path("uploads") |
| PDF_UPLOADS_DIR = Path("uploads") / "pdfs" |
| OUTPUTS_DIR = Path(__file__).resolve().parent / "outputs" |
|
|
| |
| |
| |
| CUSTOM_CSS = """ |
| /* Fonts */ |
| @import url('https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,400;0,9..40,500;0,9..40,600;0,9..40,700;1,9..40,400&family=DM+Mono:wght@400;500&display=swap'); |
| |
| /* Tokens */ |
| :root { |
| --bg-base: #0f1117; |
| --bg-surface: #181c27; |
| --bg-elevated: #1f2437; |
| --bg-hover: #252b3d; |
| --border: #2a3048; |
| --border-active: #4f6ef7; |
| --text-primary: #e8eaf0; |
| --text-secondary: #8b92a8; |
| --text-muted: #555f7a; |
| --accent: #4f6ef7; |
| --accent-soft: rgba(79,110,247,0.15); |
| --accent-glow: rgba(79,110,247,0.35); |
| --success: #34d399; |
| --success-soft: rgba(52,211,153,0.15); |
| --warning: #fbbf24; |
| --warning-soft: rgba(251,191,36,0.15); |
| --danger: #f87171; |
| --radius-sm: 8px; |
| --radius-md: 14px; |
| --radius-lg: 20px; |
| --shadow-card: 0 4px 24px rgba(0,0,0,0.45), 0 1px 3px rgba(0,0,0,0.3); |
| --shadow-button: 0 2px 12px rgba(79,110,247,0.4); |
| --font-ui: 'DM Sans', system-ui, sans-serif; |
| --font-mono: 'DM Mono', 'Fira Code', monospace; |
| --transition: 0.2s cubic-bezier(0.4, 0, 0.2, 1); |
| } |
| |
| body, .gradio-container { |
| background: var(--bg-base) !important; |
| color: var(--text-primary) !important; |
| font-family: var(--font-ui) !important; |
| } |
| .gradio-container { max-width: 1600px !important; padding: 0 !important; } |
| |
| /* Header */ |
| #app-header { |
| background: linear-gradient(135deg, #0f1117 0%, #181c27 50%, #1a1f32 100%); |
| border-bottom: 1px solid var(--border); |
| padding: 24px 36px 20px; |
| position: relative; |
| overflow: hidden; |
| } |
| #app-header::before { |
| content: ''; |
| position: absolute; |
| top: -60px; right: -60px; |
| width: 240px; height: 240px; |
| background: radial-gradient(circle, rgba(79,110,247,0.18) 0%, transparent 70%); |
| pointer-events: none; |
| } |
| #app-header .header-title { |
| font-size: 1.7rem; font-weight: 700; letter-spacing: -0.03em; |
| color: var(--text-primary); margin: 0 0 4px; |
| } |
| #app-header .header-subtitle { |
| font-size: 0.875rem; color: var(--text-secondary); margin: 0; |
| } |
| #app-header .header-badge { |
| display: inline-flex; align-items: center; gap: 6px; |
| background: var(--accent-soft); border: 1px solid var(--accent); |
| border-radius: 100px; padding: 3px 12px; font-size: 0.75rem; |
| font-weight: 600; color: var(--accent); margin-left: 12px; vertical-align: middle; |
| } |
| |
| /* API key warning banner */ |
| .api-warning { |
| background: var(--warning-soft); |
| border: 1px solid var(--warning); |
| border-radius: var(--radius-sm); |
| padding: 10px 16px; |
| font-size: 0.83rem; |
| font-weight: 500; |
| color: var(--warning); |
| margin: 12px 28px 0; |
| } |
| |
| /* Phase progress bar */ |
| .phase-bar-wrap { |
| display: flex; align-items: center; gap: 0; |
| margin-top: 20px; position: relative; |
| } |
| .phase-bar-wrap::before { |
| content: ''; |
| position: absolute; |
| left: 20px; right: 20px; top: 50%; |
| height: 2px; background: var(--border); |
| transform: translateY(-50%); z-index: 0; |
| } |
| .phase-item { |
| display: flex; flex-direction: column; |
| align-items: center; flex: 1; position: relative; z-index: 1; |
| } |
| .phase-dot { |
| width: 32px; height: 32px; border-radius: 50%; |
| display: flex; align-items: center; justify-content: center; |
| font-size: 0.8rem; font-weight: 700; |
| border: 2px solid var(--border); background: var(--bg-base); |
| transition: all var(--transition); |
| } |
| .phase-dot.done { background: var(--success-soft); border-color: var(--success); color: var(--success); } |
| .phase-dot.active { background: var(--accent-soft); border-color: var(--accent); color: var(--accent); |
| box-shadow: 0 0 14px var(--accent-glow); } |
| .phase-dot.pending { color: var(--text-muted); } |
| .phase-label { |
| font-size: 0.65rem; font-weight: 500; color: var(--text-muted); |
| margin-top: 6px; text-align: center; letter-spacing: 0.02em; white-space: nowrap; |
| } |
| .phase-label.active { color: var(--accent); } |
| .phase-label.done { color: var(--success); } |
| |
| /* Main body */ |
| #main-body { |
| padding: 22px 28px 32px; |
| gap: 16px !important; |
| max-width: 1160px; |
| margin: 0 auto; |
| width: 100%; |
| } |
| |
| .panel-card { |
| background: |
| radial-gradient(1200px 260px at 100% -15%, rgba(79,110,247,0.12), transparent 52%), |
| linear-gradient(180deg, rgba(31,36,55,0.9) 0%, rgba(24,28,39,0.95) 100%); |
| border: 1px solid var(--border); |
| border-radius: var(--radius-lg); |
| box-shadow: var(--shadow-card); |
| padding: 18px 18px 16px; |
| position: relative; |
| overflow: hidden; |
| margin-bottom: 2px; |
| } |
| |
| .panel-card:last-child { margin-bottom: 0; } |
| |
| .panel-card::after { |
| content: ''; |
| position: absolute; |
| inset: 0; |
| background: linear-gradient(120deg, rgba(255,255,255,0.02), transparent 25%, transparent 75%, rgba(255,255,255,0.02)); |
| pointer-events: none; |
| } |
| |
| .panel-data { margin-bottom: 2px; } |
| .panel-chat { margin-bottom: 2px; } |
| |
| /* Card titles */ |
| .card-title { |
| font-size: 0.74rem; font-weight: 700; letter-spacing: 0.1em; |
| text-transform: uppercase; color: var(--text-muted); |
| margin: 0 0 16px; display: flex; align-items: center; gap: 10px; |
| border-bottom: 1px solid var(--border); |
| padding-bottom: 12px; |
| } |
| .card-title::before { |
| content: ''; |
| width: 8px; |
| height: 8px; |
| border-radius: 50%; |
| background: var(--accent); |
| box-shadow: 0 0 10px var(--accent-glow); |
| } |
| .card-title span { font-size: 1.02rem; color: var(--text-primary); letter-spacing: 0.01em; } |
| |
| /* Stats */ |
| .stats-grid { |
| display: grid; grid-template-columns: 1fr 1fr; gap: 10px; margin-top: 12px; |
| } |
| .stat-card { |
| background: var(--bg-elevated); border: 1px solid var(--border); |
| border-radius: var(--radius-sm); padding: 12px 14px; |
| } |
| .stat-value { font-size: 1.4rem; font-weight: 700; color: var(--text-primary); line-height: 1; } |
| .stat-label { font-size: 0.72rem; color: var(--text-muted); margin-top: 4px; text-transform: uppercase; letter-spacing: 0.05em; } |
| .stat-card.accent .stat-value { color: var(--accent); } |
| .stat-card.success .stat-value { color: var(--success); } |
| |
| /* Status pill */ |
| .status-pill { |
| display: inline-flex; align-items: center; gap: 6px; |
| padding: 5px 12px; border-radius: 100px; font-size: 0.78rem; font-weight: 600; margin-top: 12px; |
| } |
| .status-pill.idle { background: rgba(139,146,168,0.12); color: var(--text-secondary); } |
| .status-pill.ready { background: var(--success-soft); color: var(--success); } |
| .status-pill.working { background: var(--accent-soft); color: var(--accent); } |
| .status-pill .dot { width: 7px; height: 7px; border-radius: 50%; background: currentColor; } |
| .status-pill.working .dot { animation: pulse-dot 1.2s ease-in-out infinite; } |
| @keyframes pulse-dot { |
| 0%, 100% { opacity: 1; transform: scale(1); } |
| 50% { opacity: 0.4; transform: scale(0.7); } |
| } |
| |
| /* Chatbot */ |
| #chatbot-container .chatbot { |
| background: var(--bg-elevated) !important; |
| border: 1px solid var(--border) !important; |
| border-radius: var(--radius-md) !important; |
| } |
| .message.user { |
| background: var(--accent-soft) !important; |
| border: 1px solid rgba(79,110,247,0.2) !important; |
| border-radius: 14px 14px 4px 14px !important; |
| color: var(--text-primary) !important; |
| font-size: 0.875rem !important; |
| } |
| .message.bot { |
| background: var(--bg-elevated) !important; |
| border: 1px solid var(--border) !important; |
| border-radius: 14px 14px 14px 4px !important; |
| color: var(--text-primary) !important; |
| font-size: 0.875rem !important; |
| } |
| |
| /* Chat input */ |
| #chat-input-row { display: flex; gap: 10px; margin-top: 12px; align-items: flex-end; } |
| #chat-input-row textarea { |
| background: var(--bg-elevated) !important; border: 1px solid var(--border) !important; |
| border-radius: var(--radius-md) !important; color: var(--text-primary) !important; |
| font-family: var(--font-ui) !important; font-size: 0.875rem !important; |
| resize: none !important; transition: border-color var(--transition) !important; |
| } |
| #chat-input-row textarea:focus { |
| border-color: var(--accent) !important; |
| box-shadow: 0 0 0 3px var(--accent-soft) !important; |
| } |
| |
| /* Buttons */ |
| .btn-primary { |
| background: var(--accent) !important; border: none !important; |
| border-radius: var(--radius-sm) !important; color: #fff !important; |
| font-family: var(--font-ui) !important; font-weight: 600 !important; |
| font-size: 0.875rem !important; padding: 10px 20px !important; |
| cursor: pointer !important; box-shadow: var(--shadow-button) !important; |
| transition: all var(--transition) !important; white-space: nowrap; |
| } |
| .btn-primary:hover { |
| background: #3d5de6 !important; |
| box-shadow: 0 4px 20px rgba(79,110,247,0.55) !important; |
| transform: translateY(-1px) !important; |
| } |
| .btn-primary:disabled { opacity: 0.45 !important; cursor: not-allowed !important; transform: none !important; } |
| |
| .btn-secondary { |
| background: var(--bg-elevated) !important; border: 1px solid var(--border) !important; |
| border-radius: var(--radius-sm) !important; color: var(--text-secondary) !important; |
| font-family: var(--font-ui) !important; font-weight: 500 !important; |
| font-size: 0.875rem !important; padding: 10px 18px !important; |
| cursor: pointer !important; transition: all var(--transition) !important; |
| } |
| .btn-secondary:hover { |
| background: var(--bg-hover) !important; border-color: var(--accent) !important; |
| color: var(--text-primary) !important; |
| } |
| |
| .btn-success { |
| background: rgba(52,211,153,0.15) !important; border: 1px solid var(--success) !important; |
| border-radius: var(--radius-sm) !important; color: var(--success) !important; |
| font-family: var(--font-ui) !important; font-weight: 600 !important; |
| font-size: 0.875rem !important; padding: 10px 20px !important; |
| cursor: pointer !important; transition: all var(--transition) !important; |
| } |
| .btn-success:hover { background: rgba(52,211,153,0.25) !important; box-shadow: 0 2px 14px rgba(52,211,153,0.3) !important; } |
| |
| /* Tabs */ |
| .tabs > .tab-nav { |
| background: var(--bg-elevated) !important; border-bottom: 1px solid var(--border) !important; |
| border-radius: var(--radius-md) var(--radius-md) 0 0 !important; |
| padding: 6px 6px 0 !important; gap: 4px !important; |
| } |
| .tabs > .tab-nav button { |
| background: transparent !important; border: none !important; |
| color: var(--text-muted) !important; font-family: var(--font-ui) !important; |
| font-size: 0.8rem !important; font-weight: 600 !important; |
| letter-spacing: 0.04em !important; padding: 8px 16px !important; |
| border-radius: var(--radius-sm) var(--radius-sm) 0 0 !important; |
| transition: all var(--transition) !important; cursor: pointer !important; |
| } |
| .tabs > .tab-nav button:hover { color: var(--text-primary) !important; background: var(--bg-hover) !important; } |
| .tabs > .tab-nav button.selected { |
| color: var(--accent) !important; background: var(--accent-soft) !important; |
| box-shadow: inset 0 -2px 0 var(--accent) !important; |
| } |
| .tabitem { |
| background: var(--bg-elevated) !important; border: 1px solid var(--border) !important; |
| border-top: none !important; border-radius: 0 0 var(--radius-md) var(--radius-md) !important; |
| padding: 16px !important; |
| } |
| |
| /* Dataframe */ |
| .dataframe-wrap { |
| overflow-x: auto !important; |
| } |
| .dataframe-wrap table { |
| font-family: var(--font-mono) !important; |
| font-size: 0.78rem !important; |
| border-collapse: collapse !important; |
| width: max-content !important; |
| min-width: 100% !important; |
| table-layout: auto !important; |
| } |
| .dataframe-wrap th { |
| background: var(--bg-elevated) !important; color: var(--text-muted) !important; |
| font-family: var(--font-ui) !important; font-size: 0.72rem !important; |
| font-weight: 600 !important; letter-spacing: 0.06em !important; |
| text-transform: uppercase !important; padding: 10px 12px !important; |
| border-bottom: 1px solid var(--border) !important; |
| min-width: 120px !important; |
| } |
| .dataframe-wrap td { |
| background: var(--bg-surface) !important; color: var(--text-primary) !important; |
| padding: 9px 12px !important; border-bottom: 1px solid var(--border) !important; |
| line-height: 1.35 !important; |
| vertical-align: top !important; |
| min-width: 120px !important; |
| } |
| .dataframe-wrap th, |
| .dataframe-wrap td { |
| white-space: nowrap !important; |
| } |
| .dataframe-wrap td > div, |
| .dataframe-wrap td > span, |
| .dataframe-wrap td > p { |
| display: block !important; |
| max-width: none !important; |
| white-space: nowrap !important; |
| overflow: visible !important; |
| text-overflow: clip !important; |
| cursor: pointer !important; |
| } |
| .dataframe-wrap td:focus-within > div, |
| .dataframe-wrap td:focus-within > span, |
| .dataframe-wrap td:focus-within > p { |
| white-space: nowrap !important; |
| overflow: hidden !important; |
| text-overflow: ellipsis !important; |
| } |
| .dataframe-wrap textarea, |
| .dataframe-wrap input[type="text"] { |
| white-space: nowrap !important; |
| overflow-wrap: normal !important; |
| word-break: normal !important; |
| overflow-x: auto !important; |
| width: 100% !important; |
| min-width: 160px !important; |
| box-sizing: border-box !important; |
| } |
| .dataframe-wrap textarea { |
| min-height: 38px !important; |
| height: 38px !important; |
| max-height: 38px !important; |
| overflow-y: hidden !important; |
| resize: none !important; |
| } |
| .dataframe-wrap tr:hover td { background: var(--bg-hover) !important; } |
| .dataframe-wrap input[type="checkbox"] { |
| appearance: auto !important; |
| accent-color: var(--accent) !important; |
| cursor: pointer !important; |
| width: 16px; |
| height: 16px; |
| } |
| |
| /* Chart frame */ |
| .chart-frame { |
| width: 100%; min-height: 420px; border: 1px solid var(--border); |
| border-radius: var(--radius-md); background: var(--bg-elevated); overflow: hidden; |
| } |
| |
| /* Vertical card spacing on small screens */ |
| @media (max-width: 900px) { |
| #main-body { |
| padding: 14px 12px 20px; |
| gap: 12px !important; |
| } |
| .panel-card { |
| padding: 14px 12px; |
| border-radius: var(--radius-md); |
| } |
| .chart-frame { min-height: 320px; } |
| } |
| |
| /* Download list */ |
| .file-list-item { |
| display: flex; align-items: center; gap: 10px; |
| background: var(--bg-elevated); border: 1px solid var(--border); |
| border-radius: var(--radius-sm); padding: 10px 14px; margin-bottom: 8px; |
| transition: all var(--transition); |
| } |
| .file-list-item:hover { border-color: var(--accent); background: var(--bg-hover); } |
| .file-icon { font-size: 1.1rem; } |
| .file-name { font-size: 0.83rem; color: var(--text-primary); flex: 1; font-family: var(--font-mono); } |
| .file-size { font-size: 0.72rem; color: var(--text-muted); } |
| |
| /* Misc Gradio overrides */ |
| label, .label-wrap { color: var(--text-secondary) !important; font-family: var(--font-ui) !important; font-size: 0.8rem !important; } |
| input:not([type="checkbox"]), textarea { background: var(--bg-elevated) !important; color: var(--text-primary) !important; border-color: var(--border) !important; } |
| .gr-form:not(.panel-card), .gr-box:not(.panel-card) { background: transparent !important; border: none !important; } |
| footer { display: none !important; } |
| select { background: var(--bg-elevated) !important; border: 1px solid var(--border) !important; border-radius: var(--radius-sm) !important; color: var(--text-primary) !important; font-family: var(--font-ui) !important; font-size: 0.875rem !important; padding: 8px 12px !important; } |
| |
| /* Animations */ |
| .fade-in { animation: fadeIn 0.35s ease-out both; } |
| @keyframes fadeIn { from { opacity: 0; transform: translateY(8px); } to { opacity: 1; transform: none; } } |
| |
| /* Scrollbar */ |
| ::-webkit-scrollbar { width: 6px; height: 6px; } |
| ::-webkit-scrollbar-track { background: var(--bg-base); } |
| ::-webkit-scrollbar-thumb { background: #2d3550; border-radius: 3px; } |
| ::-webkit-scrollbar-thumb:hover { background: #3d4770; } |
| """ |
|
|
| |
| |
| |
| |
| def build_phase_html(current_phase: int) -> str: |
| """ |
| Render the 7-step phase progress bar. |
| current_phase is the agent's phase (1-7); phase 0 = no phase started yet. |
| Phase 8 indicates full completion and renders all 7 steps as done. |
| """ |
| items = [] |
| for i, label in enumerate(PHASES): |
| phase_number = i + 1 |
| if phase_number < current_phase: |
| dot_cls, lbl_cls, icon = "done", "done", "v" |
| elif phase_number == current_phase: |
| dot_cls, lbl_cls, icon = "active", "active", str(phase_number) |
| else: |
| dot_cls, lbl_cls, icon = "pending", "", str(phase_number) |
|
|
| items.append(f""" |
| <div class="phase-item"> |
| <div class="phase-dot {dot_cls}">{icon}</div> |
| <div class="phase-label {lbl_cls}">{label}</div> |
| </div>""") |
|
|
| inner = "\n".join(items) |
| return f""" |
| <div id="app-header"> |
| <div style="display:flex;align-items:baseline;gap:4px;"> |
| <span class="header-title">BERTopic Thematic Analysis Agent</span> |
| <span class="header-badge">AI-Powered</span> |
| </div> |
| <p class="header-subtitle"> |
| End-to-end topic modelling β upload a Scopus corpus, run the agent, review topics. |
| </p> |
| <div class="phase-bar-wrap"> |
| {inner} |
| </div> |
| </div>""" |
|
|
|
|
| |
| |
| |
| def build_stats_html(rows: int, cols: int, filename: str) -> str: |
| return f""" |
| <div class="stats-grid fade-in"> |
| <div class="stat-card accent"> |
| <div class="stat-value">{rows:,}</div> |
| <div class="stat-label">Rows</div> |
| </div> |
| <div class="stat-card"> |
| <div class="stat-value">{cols}</div> |
| <div class="stat-label">Columns</div> |
| </div> |
| </div> |
| <div class="status-pill ready" style="margin-top:14px;"> |
| <div class="dot"></div> |
| {filename} |
| </div>""" |
|
|
|
|
| |
| |
| |
| def build_file_list_html(paths: list[str]) -> str: |
| if not paths: |
| return "<p style='color:var(--text-muted);font-size:0.83rem;padding:8px 0;'>No files generated yet.</p>" |
| icons = {".csv": "CSV", ".json": "JSON", ".html": "HTML", ".png": "IMG", ".xlsx": "XLS", ".txt": "TXT"} |
| items = [] |
| for p in paths: |
| p = Path(p) |
| ext = p.suffix.lower() |
| icon = icons.get(ext, "FILE") |
| size = "" |
| if p.exists(): |
| b = p.stat().st_size |
| size = f"{b/1024:.1f} KB" if b < 1_048_576 else f"{b/1_048_576:.1f} MB" |
| items.append(f""" |
| <div class="file-list-item fade-in"> |
| <span class="file-icon" style="font-size:0.7rem;background:var(--accent-soft);color:var(--accent); |
| padding:2px 5px;border-radius:4px;font-family:var(--font-mono);font-weight:600;">{icon}</span> |
| <span class="file-name">{p.name}</span> |
| <span class="file-size">{size}</span> |
| </div>""") |
| return "\n".join(items) |
|
|
|
|
| |
| |
| |
| def build_cluster_stats_html(agent_state: dict) -> str: |
| run_key = agent_state.get("run_key", "abstract") |
| opt_path = OUTPUTS_DIR / run_key / "optimization.json" |
| if not opt_path.exists(): |
| return ( |
| "<p style='color:var(--text-muted);font-size:0.83rem;padding:6px 0 2px;'>" |
| "No clustering stats yet. Run topic discovery to generate optimization stats." |
| "</p>" |
| ) |
|
|
| try: |
| rounds = json.loads(opt_path.read_text(encoding="utf-8")) |
| except Exception: |
| rounds = [] |
|
|
| if not isinstance(rounds, list) or not rounds: |
| return ( |
| "<p style='color:var(--text-muted);font-size:0.83rem;padding:6px 0 2px;'>" |
| "Optimization stats are unavailable or empty." |
| "</p>" |
| ) |
|
|
| first = rounds[0] |
| last = rounds[-1] |
| first_clusters = int(first.get("metrics", {}).get("n_clusters", 0)) |
| last_clusters = int(last.get("metrics", {}).get("n_clusters", 0)) |
|
|
| before_round = first |
| after_round = last |
| if last_clusters > first_clusters: |
| before_round, after_round = last, first |
|
|
| def _metrics_block(metrics: dict) -> str: |
| if not isinstance(metrics, dict): |
| return "<div style='color:var(--text-muted);'>No metrics</div>" |
| return ( |
| "<div style='display:grid;gap:4px;font-size:0.78rem;'>" |
| f"<div>Clusters: <b>{int(metrics.get('n_clusters', 0))}</b></div>" |
| f"<div>Noise ratio: <b>{metrics.get('noise_ratio', 0.0):.2f}</b></div>" |
| f"<div>Min/Med/Mean/Max size: <b>{metrics.get('min_size', 0):.0f}</b> / " |
| f"<b>{metrics.get('median_size', 0):.0f}</b> / " |
| f"<b>{metrics.get('mean_size', 0):.0f}</b> / " |
| f"<b>{metrics.get('max_size', 0):.0f}</b></div>" |
| "</div>" |
| ) |
|
|
| def _params_line(params: dict) -> str: |
| if not isinstance(params, dict): |
| return "" |
| return ( |
| f"min_cluster_size={params.get('min_cluster_size', '')}, " |
| f"max_cluster_size={params.get('max_cluster_size', '')}, " |
| f"min_samples={params.get('min_samples', '')}" |
| ) |
|
|
| before_label = "Before optimization (more)" |
| after_label = "After optimization (less)" if len(rounds) > 1 else "After optimization (no change)" |
|
|
| return f""" |
| <div style='display:grid;gap:10px;'> |
| <div style='font-size:0.82rem;color:var(--text-secondary);font-weight:600;'>Cluster stats</div> |
| <div style='display:grid;grid-template-columns:1fr 1fr;gap:12px;'> |
| <div style='background:var(--bg-elevated);border:1px solid var(--border);border-radius:10px;padding:10px 12px;'> |
| <div style='font-size:0.78rem;color:var(--text-secondary);margin-bottom:6px;'>{before_label}</div> |
| <div style='font-size:0.74rem;color:var(--text-muted);margin-bottom:6px;'> |
| {_params_line(before_round.get('params', {}))} |
| </div> |
| {_metrics_block(before_round.get('metrics', {}))} |
| </div> |
| <div style='background:var(--bg-elevated);border:1px solid var(--border);border-radius:10px;padding:10px 12px;'> |
| <div style='font-size:0.78rem;color:var(--text-secondary);margin-bottom:6px;'>{after_label}</div> |
| <div style='font-size:0.74rem;color:var(--text-muted);margin-bottom:6px;'> |
| {_params_line(after_round.get('params', {}))} |
| </div> |
| {_metrics_block(after_round.get('metrics', {}))} |
| </div> |
| </div> |
| </div>""" |
|
|
|
|
| |
| |
| |
| def build_cluster_info_html(agent_state: dict) -> str: |
| run_key = agent_state.get("run_key", "abstract") |
| summaries_path = OUTPUTS_DIR / run_key / "summaries.json" |
| labels_path = OUTPUTS_DIR / run_key / "labels.json" |
|
|
| if not summaries_path.exists(): |
| return ( |
| "<p style='color:var(--text-muted);font-size:0.83rem;padding:6px 0 2px;'>" |
| "No clusters yet. Run topic discovery to generate cluster summaries." |
| "</p>" |
| ) |
|
|
| try: |
| summaries = json.loads(summaries_path.read_text(encoding="utf-8")) |
| except Exception: |
| summaries = [] |
|
|
| labels = [] |
| if labels_path.exists(): |
| try: |
| labels = json.loads(labels_path.read_text(encoding="utf-8")) |
| except Exception: |
| labels = [] |
|
|
| label_by_id = { |
| int(row.get("cluster_id", -1)): ( |
| row.get("adjudicated_label") |
| or row.get("mistral_label") |
| or row.get("label") |
| or "" |
| ) |
| for row in labels |
| if isinstance(row, dict) |
| } |
|
|
| def _escape_html(text: object) -> str: |
| return ( |
| str(text or "") |
| .replace("&", "&") |
| .replace("<", "<") |
| .replace(">", ">") |
| ) |
|
|
| def _format_papers(papers: list[dict]) -> str: |
| if not papers: |
| return "" |
| items = [] |
| for entry in papers[:3]: |
| if not isinstance(entry, dict): |
| continue |
| title = str(entry.get("paper_title") or entry.get("title") or "").strip() |
| if not title: |
| continue |
| count = entry.get("count") |
| items.append( |
| f"{_escape_html(title)} ({count})" if count else _escape_html(title) |
| ) |
| return "; ".join(items) |
|
|
| def _cluster_card(summary: dict) -> str: |
| cid = int(summary.get("cluster_id", -1)) |
| label = _escape_html(label_by_id.get(cid, "")) |
| size = int(summary.get("size", 0)) |
| evidence = summary.get("evidence", []) |
| top_evidence = _escape_html(evidence[0]) if evidence else "" |
| paper_count = summary.get("paper_count", "") |
| top_papers = _format_papers(summary.get("top_papers", [])) |
|
|
| if not label: |
| return "" |
|
|
| return ( |
| "<details style='background:var(--bg-elevated);border:1px solid var(--border);" |
| "border-radius:10px;padding:10px 12px;'>" |
| f"<summary style='cursor:pointer;font-size:0.84rem;font-weight:600;color:var(--text-primary);'>" |
| f"Cluster {cid} β {label or 'Unlabeled'} ({size} sentences)</summary>" |
| "<div style='margin-top:8px;font-size:0.78rem;color:var(--text-secondary);display:grid;gap:6px;'>" |
| f"<div><b>Top evidence:</b> {top_evidence}</div>" |
| f"<div><b>Papers:</b> {paper_count} | {top_papers}</div>" |
| "</div>" |
| "</details>" |
| ) |
|
|
| if not isinstance(summaries, list) or not summaries: |
| return ( |
| "<p style='color:var(--text-muted);font-size:0.83rem;padding:6px 0 2px;'>" |
| "Cluster summaries are empty." |
| "</p>" |
| ) |
|
|
| cards = "\n".join(filter(None, map(_cluster_card, summaries))) |
| if not cards: |
| return ( |
| "<p style='color:var(--text-muted);font-size:0.83rem;padding:6px 0 2px;'>" |
| "No labeled clusters yet. Run labeling or VERIFY to populate labels." |
| "</p>" |
| ) |
| return ( |
| "<div style='display:grid;gap:10px;'>" |
| "<div style='font-size:0.82rem;color:var(--text-secondary);font-weight:600;'>" |
| "Cluster details</div>" |
| f"{cards}" |
| "</div>" |
| ) |
|
|
|
|
| |
| |
| |
| def build_placeholder_chart(chart_type: str) -> str: |
| colour_map = { |
| "Intertopic Map": "#4f6ef7", |
| "Top Words": "#34d399", |
| "Hierarchy": "#fbbf24", |
| "Heatmap": "#f87171", |
| } |
| col = colour_map.get(chart_type, "#4f6ef7") |
| return f""" |
| <div class="chart-frame" style="display:flex;align-items:center;justify-content:center;flex-direction:column;gap:10px;"> |
| <div style="font-size:2rem;color:var(--text-muted);">CHART</div> |
| <div style="color:var(--text-secondary);font-size:0.9rem;font-weight:600;">{chart_type}</div> |
| <div style="color:var(--text-muted);font-size:0.78rem;">Run the agent to generate this chart.</div> |
| <div style="width:180px;height:4px;background:var(--border);border-radius:2px;margin-top:6px;"> |
| <div style="width:0%;height:4px;background:{col};border-radius:2px;animation:grow 2s ease-in-out infinite alternate;"></div> |
| </div> |
| </div> |
| <style>@keyframes grow {{ from{{width:0%}} to{{width:75%}} }}</style>""" |
|
|
|
|
| |
| |
| |
|
|
| def build_method_stats_html(result: dict) -> str: |
| """Build stats HTML for method extraction results.""" |
| if not result or result.get("error"): |
| return ( |
| "<p style='color:var(--text-muted);font-size:0.83rem;padding:6px 0;'>" |
| "Upload PDFs and click <b>Run Method Extraction</b> to start." |
| "</p>" |
| ) |
| n_papers = result.get("n_papers", 0) |
| n_extracted = result.get("n_extracted", 0) |
| return f""" |
| <div class="stats-grid fade-in" style="grid-template-columns:1fr 1fr;"> |
| <div class="stat-card accent"> |
| <div class="stat-value">{n_papers}</div> |
| <div class="stat-label">PDFs Processed</div> |
| </div> |
| <div class="stat-card success"> |
| <div class="stat-value">{n_extracted}</div> |
| <div class="stat-label">Methods Identified</div> |
| </div> |
| </div> |
| """ |
|
|
|
|
| def get_method_results_df() -> pd.DataFrame: |
| """Return the method summary dataframe.""" |
| columns = [ |
| "Paper ID", |
| "Paper Title", |
| "Computational Methods", |
| ] |
| csv_path = OUTPUTS_DIR / "methods" / "method_summary.csv" |
| if csv_path.exists(): |
| try: |
| df = pd.read_csv(csv_path) |
| except Exception: |
| return pd.DataFrame(columns=columns) |
| for col in columns: |
| if col not in df.columns: |
| df[col] = "" |
| return df[columns] |
| return pd.DataFrame(columns=columns) |
|
|
|
|
| def get_method_technique_df() -> pd.DataFrame: |
| """Return the technique-to-papers summary dataframe.""" |
| columns = ["Main Computational Technique", "Algorithms", "Papers"] |
| csv_path = OUTPUTS_DIR / "methods" / "technique_to_papers.csv" |
| if csv_path.exists(): |
| try: |
| df = pd.read_csv(csv_path) |
| except Exception: |
| return pd.DataFrame(columns=columns) |
| for col in columns: |
| if col not in df.columns: |
| df[col] = "" |
| return df[columns] |
| return pd.DataFrame(columns=columns) |
|
|
|
|
| def get_method_download_file() -> list[str]: |
| """Return downloadable method CSV.""" |
| technique_path = OUTPUTS_DIR / "methods" / "technique_to_papers.csv" |
| if technique_path.exists(): |
| return [str(technique_path)] |
| return None |
|
|
|
|
| |
| |
| |
|
|
| def handle_pdf_upload(file_objs): |
| """Copy uploaded PDFs to a stable directory.""" |
| if not file_objs: |
| return ( |
| "<div class='status-pill idle'><div class='dot'></div>No PDFs uploaded</div>", |
| "<p style='color:var(--text-muted);font-size:0.83rem;'>Upload PDF research papers to extract methods.</p>", |
| ) |
|
|
| PDF_UPLOADS_DIR.mkdir(parents=True, exist_ok=True) |
| |
| for old in PDF_UPLOADS_DIR.glob("*.pdf"): |
| old.unlink() |
| for old in PDF_UPLOADS_DIR.glob("*.PDF"): |
| old.unlink() |
|
|
| count = 0 |
| for f in file_objs: |
| src = Path(f.name) if hasattr(f, 'name') else Path(f) |
| if src.suffix.lower() == ".pdf": |
| dst = PDF_UPLOADS_DIR / f"{uuid.uuid4().hex[:8]}_{src.name}" |
| shutil.copy2(src, dst) |
| count += 1 |
|
|
| status = f"<div class='status-pill ready'><div class='dot'></div>{count} PDFs ready</div>" |
| stats = f""" |
| <div class="stats-grid fade-in"> |
| <div class="stat-card accent"> |
| <div class="stat-value">{count}</div> |
| <div class="stat-label">PDFs Uploaded</div> |
| </div> |
| </div>""" |
| return status, stats |
|
|
|
|
| def run_method_extraction_pipeline(): |
| """Run the method extraction pipeline.""" |
| if not METHOD_TOOLS_AVAILABLE: |
| return ( |
| build_method_stats_html({"error": True}), |
| "<div class='status-pill idle'><div class='dot'></div>Tools unavailable</div>", |
| get_method_technique_df(), |
| get_method_download_file(), |
| ) |
|
|
| pdf_dir = str(PDF_UPLOADS_DIR.resolve()) |
| if not PDF_UPLOADS_DIR.exists() or not list(PDF_UPLOADS_DIR.glob("*.pdf")) + list(PDF_UPLOADS_DIR.glob("*.PDF")): |
| return ( |
| "<p style='color:var(--danger);font-size:0.83rem;'>No PDFs found. Upload PDFs first.</p>", |
| "<div class='status-pill idle'><div class='dot'></div>No PDFs</div>", |
| get_method_technique_df(), |
| get_method_download_file(), |
| ) |
|
|
| |
| result = extract_methods_from_pdfs.invoke({"pdf_dir": pdf_dir}) |
|
|
| if isinstance(result, dict) and result.get("error"): |
| return ( |
| f"<p style='color:var(--danger);font-size:0.83rem;'>{result['error']}</p>", |
| "<div class='status-pill idle'><div class='dot'></div>Extraction failed</div>", |
| get_method_technique_df(), |
| get_method_download_file(), |
| ) |
|
|
| |
| stats_html = build_method_stats_html(result) |
| status_html = "<div class='status-pill ready'><div class='dot'></div>Extraction complete</div>" |
|
|
| return ( |
| stats_html, |
| status_html, |
| get_method_technique_df(), |
| get_method_download_file(), |
| ) |
|
|
|
|
| |
| |
| |
|
|
| def _persist_upload(file_obj) -> Path: |
| """Copy Gradio temp upload to a stable local path and return it.""" |
| src = Path(file_obj.name) |
| UPLOADS_DIR.mkdir(parents=True, exist_ok=True) |
| dst = UPLOADS_DIR / f"{uuid.uuid4().hex[:10]}_{src.name}" |
| shutil.copy2(src, dst) |
| return dst.resolve() |
|
|
| def handle_file_upload(file_obj, agent_state): |
| """Parse uploaded CSV, store file_path in state, trigger agent.""" |
| if file_obj is None: |
| return ( |
| "<p style='color:var(--text-muted);font-size:0.83rem;'>No file selected.</p>", |
| "<div class='status-pill idle'><div class='dot'></div>Awaiting upload</div>", |
| agent_state, |
| build_phase_html(agent_state.get("phase", 0)), |
| ) |
|
|
| try: |
| persisted = _persist_upload(file_obj) |
| df = pd.read_csv(persisted) |
| rows, cols = df.shape |
| filename = Path(file_obj.name).name |
| stats_html = build_stats_html(rows, cols, filename) |
| agent_state["file_path"] = str(persisted) |
| agent_state["file_name"] = filename |
| agent_state["rows"] = rows |
| agent_state["cols"] = cols |
| except Exception as exc: |
| stats_html = f"<p style='color:var(--danger);font-size:0.83rem;'>Upload error: {exc}</p>" |
|
|
| status_html = "<div class='status-pill ready'><div class='dot'></div>File ready</div>" |
| phase_html = build_phase_html(agent_state.get("phase", 0)) |
| return stats_html, status_html, agent_state, phase_html |
|
|
|
|
| def handle_chat(user_message: str, chat_history: list, agent_state: dict): |
| """Stream one user turn through the agent.""" |
| if not user_message.strip(): |
| yield chat_history, agent_state, build_phase_html(agent_state.get("phase", 0)) |
| return |
|
|
| chat_history = chat_history + [ |
| {"role": "user", "content": user_message}, |
| {"role": "assistant", "content": "Thinking..."}, |
| ] |
| yield chat_history, agent_state, build_phase_html(agent_state.get("phase", 0)) |
|
|
| file_path = agent_state.get("file_path") |
| if file_path and not Path(file_path).exists(): |
| chat_history[-1]["content"] = ( |
| "Uploaded CSV is no longer available on disk. " |
| "Please upload the file again and retry." |
| ) |
| yield chat_history, agent_state, build_phase_html(agent_state.get("phase", 0)) |
| return |
|
|
| try: |
| reply, agent_state = agent.invoke(user_message, agent_state) |
| except Exception as exc: |
| reply = f"Agent error: `{exc}`" |
|
|
| chat_history[-1]["content"] = reply |
| yield chat_history, agent_state, build_phase_html(agent_state.get("phase", 0)) |
|
|
|
|
| def auto_trigger_agent(agent_state: dict, chat_history: list): |
| """Fire an automatic Phase 1 trigger after file upload.""" |
| filename = agent_state.get("file_name", "uploaded file") |
| rows = agent_state.get("rows", 0) |
| auto_msg = ( |
| f"A dataset has been uploaded: **{filename}** ({rows:,} rows). " |
| "Please start the thematic analysis pipeline." |
| ) |
| results = [] |
| for state in handle_chat(auto_msg, chat_history, agent_state): |
| results = state |
| return results |
|
|
|
|
| def refresh_review_table(agent_state: dict): |
| """Render the review DataFrame from agent_state.""" |
| raw = agent_state.get("review_df", []) |
| if raw: |
| try: |
| return gr.update(value=pd.DataFrame(raw), interactive=True) |
| except Exception: |
| pass |
| return gr.update(value=EMPTY_REVIEW_DF.copy(), interactive=True) |
|
|
|
|
| def submit_review(review_df, agent_state: dict, chat_history: list): |
| """ |
| FIX BUG 3 β write parsed review rows into agent_state["review_df"] |
| BEFORE calling the agent, so _parse_review_df() receives the populated list. |
| """ |
| def _next_phase_message(state: dict) -> str: |
| gate = state.get("stop_gate") |
| if gate == "STOP_GATE_1_AWAIT_REVIEW_TABLE": |
| return "Review table submitted. Please proceed to Phase 3 and consolidate themes." |
| if gate == "STOP_GATE_2_AWAIT_THEME_MERGE": |
| return "Theme merge confirmed. Please proceed to Phase 4 for saturation check." |
| if gate == "STOP_GATE_3_AWAIT_SATURATION_SIGNOFF": |
| return "Saturation sign-off confirmed. Please proceed to Phase 5 for naming themes." |
| if gate == "STOP_GATE_4_AWAIT_TAXONOMY_REVIEW": |
| return "Taxonomy review confirmed. Please proceed to Phase 6 to finalize outputs." |
| return "Review table submitted. Please proceed to the next phase." |
|
|
| |
| agent_state["review_df"] = review_df.to_dict(orient="records") |
| agent_state["review_submitted"] = True |
|
|
| |
| msg = _next_phase_message(agent_state) |
| results = [] |
| for state in handle_chat(msg, chat_history, agent_state): |
| results = state |
| new_history, new_state, phase_html = results |
| return new_history, new_state, phase_html |
|
|
|
|
| def auto_accept_review(agent_state: dict, chat_history: list, enabled: bool): |
| """Auto-approve Phase 2 review rows and submit when enabled.""" |
| if not enabled: |
| return chat_history, agent_state, build_phase_html(agent_state.get("phase", 0)) |
|
|
| gate = agent_state.get("stop_gate") |
| if gate != "STOP_GATE_1_AWAIT_REVIEW_TABLE": |
| return chat_history, agent_state, build_phase_html(agent_state.get("phase", 0)) |
|
|
| if agent_state.get("review_submitted"): |
| return chat_history, agent_state, build_phase_html(agent_state.get("phase", 0)) |
|
|
| if agent_state.get("auto_accept_last_gate") == gate: |
| return chat_history, agent_state, build_phase_html(agent_state.get("phase", 0)) |
|
|
| rows = agent_state.get("review_df", []) |
| if not rows: |
| return chat_history, agent_state, build_phase_html(agent_state.get("phase", 0)) |
|
|
| df = pd.DataFrame(rows) |
| if "Approve" in df.columns: |
| df["Approve"] = True |
| if "Rename To" in df.columns and "Topic Label" in df.columns: |
| df["Rename To"] = df["Rename To"].fillna("").astype(str) |
| df["Rename To"] = df.apply( |
| lambda r: r["Rename To"] or r["Topic Label"], axis=1 |
| ) |
|
|
| new_history, new_state, phase_html = submit_review(df, agent_state, chat_history) |
| new_state["auto_accept_last_gate"] = gate |
| return new_history, new_state, phase_html |
|
|
|
|
| def refresh_downloads(agent_state: dict): |
| """Return downloadable artefact paths from agent state.""" |
| files = agent_state.get("output_files", []) |
| html = build_file_list_html(files) |
| valid = [f for f in files if os.path.exists(f)] |
| return html, valid if valid else None |
|
|
|
|
| def get_chart_html(chart_choice: str, agent_state: dict) -> str: |
| """Return chart iframe or placeholder HTML.""" |
| charts = agent_state.get("charts", {}) |
| if chart_choice in charts: |
| src = charts[chart_choice] |
| if os.path.exists(src): |
| |
| |
| normalised = str(Path(src).resolve()).replace("\\", "/") |
| encoded = quote(normalised, safe="/:") |
| return ( |
| f'<iframe src="./gradio_api/file={encoded}" ' |
| 'class="chart-frame" frameborder="0"></iframe>' |
| ) |
| return f'<div class="chart-frame fade-in">{src}</div>' |
| return build_placeholder_chart(chart_choice) |
|
|
|
|
| |
| |
| |
|
|
| def build_app() -> gr.Blocks: |
| with gr.Blocks( |
| title="BERTopic Thematic Analysis Agent", |
| ) as app: |
|
|
| |
| agent_state = gr.State({}) |
| chat_history = gr.State([]) |
|
|
| |
| phase_bar = gr.HTML(value=build_phase_html(0), elem_id="phase-bar") |
|
|
| |
| if MISTRAL_KEY_MISSING: |
| gr.HTML( |
| "<div class='api-warning'>" |
| "WARNING: MISTRAL_API_KEY is not set. " |
| "All LLM calls will fail. " |
| "Set it in HuggingFace Spaces: Settings -> Variables and secrets." |
| "</div>" |
| ) |
|
|
| if GROQ_KEY_MISSING: |
| gr.HTML( |
| "<div class='api-warning'>" |
| "WARNING: GROQ_API_KEY is not set. " |
| "VERIFY command will be unavailable for Groq side-by-side checks. " |
| "Set it to enable Mistral + Groq-Ollama + Groq-GPT verification in Phase 2 " |
| "and Groq verification in Phase 5.5." |
| "</div>" |
| ) |
|
|
| |
| with gr.Column(elem_id="main-body"): |
|
|
| with gr.Column(elem_classes=["panel-card", "panel-data"]): |
| gr.HTML("""<div class="card-title"><span>Data Input</span></div>""") |
|
|
| file_input = gr.File( |
| label="Upload Corpus (CSV)", |
| file_types=[".csv"], |
| interactive=True, |
| elem_id="csv-upload", |
| ) |
|
|
| file_status = gr.HTML( |
| value="<div class='status-pill idle'><div class='dot'></div>Awaiting upload</div>" |
| ) |
|
|
| dataset_stats = gr.HTML( |
| value="<p style='color:var(--text-muted);font-size:0.83rem;" |
| "padding:8px 0 0;'>Upload a CSV to see statistics.</p>" |
| ) |
|
|
| gr.HTML("<hr style='border:none;border-top:1px solid var(--border);margin:16px 0;'>") |
| gr.HTML(""" |
| <div style='font-size:0.72rem;color:var(--text-muted);line-height:1.7;'> |
| <b style='color:var(--text-secondary);'>Expected columns</b><br> |
| Title, Abstract, Author Keywords, Authors, Year<br><br> |
| <b style='color:var(--text-secondary);'>Quick commands</b><br> |
| <code style='font-family:var(--font-mono);'>run abstract</code><br> |
| <code style='font-family:var(--font-mono);'>run title</code><br> |
| <code style='font-family:var(--font-mono);'>run keywords</code><br> |
| <code style='font-family:var(--font-mono);'>verify</code><br> |
| <code style='font-family:var(--font-mono);'>show topics</code><br> |
| <code style='font-family:var(--font-mono);'>export results</code> |
| </div>""") |
|
|
| with gr.Column(elem_classes=["panel-card", "panel-chat"]): |
| gr.HTML("""<div class="card-title"><span>Agent Console</span></div>""") |
|
|
| chatbot = gr.Chatbot( |
| value=[], |
| height=470, |
| show_label=False, |
| avatar_images=(None, None), |
| elem_id="chatbot-container", |
| ) |
|
|
| with gr.Row(elem_id="chat-input-row"): |
| chat_input = gr.Textbox( |
| placeholder='Type a command, e.g. "run abstract" or "run keywords" ...', |
| show_label=False, |
| lines=1, |
| scale=5, |
| container=False, |
| ) |
| send_btn = gr.Button( |
| "Send", |
| variant="primary", |
| scale=1, |
| min_width=90, |
| elem_classes=["btn-primary"], |
| ) |
|
|
| with gr.Row(): |
| clear_btn = gr.Button( |
| "Clear Chat", |
| variant="secondary", |
| scale=1, |
| elem_classes=["btn-secondary"], |
| ) |
|
|
| with gr.Column(elem_classes=["panel-card", "panel-results"]): |
| gr.HTML("""<div class="card-title"><span>Results</span></div>""") |
|
|
| cluster_stats = gr.HTML( |
| value=build_cluster_stats_html({}), |
| ) |
|
|
| with gr.Tabs(elem_classes=["tabs"]): |
|
|
| |
| with gr.TabItem("Review", elem_classes=["tabitem"]): |
| gr.HTML(""" |
| <p style='font-size:0.78rem;color:var(--text-muted);margin:0 0 12px;'> |
| Edit <b>Approve</b>, <b>Rename To</b>, and <b>Reasoning</b> columns inline, |
| and use the <b>Papers</b> column to see the top 3 paper titles per cluster. |
| then click <b>Submit Review</b>. Use <b>verify</b> in chat at Phase 2 |
| or Phase 5.5 to see Mistral vs Groq comparisons directly in chat output. |
| Phase 2 verification also adds an adjudicated best label. |
| Enable <b>Auto-accept Phase 2 review</b> to skip manual submission. |
| </p>""") |
|
|
| review_table = gr.Dataframe( |
| value=EMPTY_REVIEW_DF.copy(), |
| headers=REVIEW_COLUMNS, |
| datatype=[ |
| "number", "str", "str", "number", "str", |
| "bool", "str", "str", |
| ], |
| interactive=True, |
| wrap=False, |
| elem_classes=["dataframe-wrap"], |
| ) |
|
|
| with gr.Row(): |
| refresh_table_btn = gr.Button( |
| "Refresh", |
| variant="secondary", |
| scale=1, |
| elem_classes=["btn-secondary"], |
| ) |
| submit_review_btn = gr.Button( |
| "Submit Review", |
| variant="primary", |
| scale=2, |
| elem_classes=["btn-success"], |
| ) |
|
|
| auto_accept_toggle = gr.Checkbox( |
| label="Auto-accept Phase 2 review and continue", |
| value=False, |
| ) |
|
|
| |
| with gr.TabItem("Charts", elem_classes=["tabitem"]): |
| chart_selector = gr.Dropdown( |
| choices=CHART_OPTIONS, |
| value=CHART_OPTIONS[0], |
| label="Select chart", |
| interactive=True, |
| ) |
| chart_display = gr.HTML( |
| value=build_placeholder_chart(CHART_OPTIONS[0]) |
| ) |
|
|
| |
| with gr.TabItem("Downloads", elem_classes=["tabitem"]): |
| gr.HTML(""" |
| <p style='font-size:0.78rem;color:var(--text-muted);margin:0 0 12px;'> |
| Files generated by the agent will appear here automatically. |
| </p>""") |
|
|
| download_file_list_html = gr.HTML( |
| value="<p style='color:var(--text-muted);font-size:0.83rem;'>" |
| "No files generated yet.</p>" |
| ) |
|
|
| download_files = gr.File( |
| label="", |
| file_count="multiple", |
| interactive=False, |
| ) |
|
|
| refresh_dl_btn = gr.Button( |
| "Refresh Downloads", |
| variant="secondary", |
| elem_classes=["btn-secondary"], |
| ) |
|
|
| |
| with gr.TabItem("Clusters", elem_classes=["tabitem"]): |
| cluster_info_html = gr.HTML( |
| value=build_cluster_info_html({}), |
| ) |
|
|
| |
| with gr.Column(elem_classes=["panel-card"]): |
| gr.HTML(""" |
| <div class="card-title"> |
| <span>π Computational Methodology Extraction</span> |
| </div> |
| <p style='font-size:0.78rem;color:var(--text-muted);margin:0 0 12px;'> |
| Upload research PDFs to identify the specific computational methods |
| used in each paper (text-only extraction via PyMuPDF + LLM). |
| </p> |
| """) |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| pdf_upload = gr.File( |
| label="Upload Research PDFs", |
| file_types=[".pdf"], |
| file_count="multiple", |
| interactive=True, |
| elem_id="pdf-upload", |
| ) |
| with gr.Column(scale=1): |
| method_status = gr.HTML( |
| value="<div class='status-pill idle'><div class='dot'></div>Awaiting PDF upload</div>" |
| ) |
| method_stats = gr.HTML( |
| value="<p style='color:var(--text-muted);font-size:0.83rem;'>" |
| "Upload PDF research papers to extract methods.</p>" |
| ) |
|
|
| run_methods_btn = gr.Button( |
| "π Extract Computational Methods", |
| variant="primary", |
| elem_classes=["btn-primary"], |
| ) |
|
|
| gr.HTML("<hr style='border:none;border-top:1px solid var(--border);margin:12px 0;'>") |
|
|
| |
| gr.HTML(""" |
| <div style='font-size:0.82rem;color:var(--text-secondary);font-weight:600;margin-bottom:8px;'> |
| Computational Techniques β Algorithms β Papers |
| </div>""") |
| method_technique_df = gr.Dataframe( |
| headers=["Main Computational Technique", "Algorithms", "Papers"], |
| interactive=False, |
| wrap=True, |
| ) |
|
|
| gr.HTML("<hr style='border:none;border-top:1px solid var(--border);margin:12px 0;'>") |
| |
| |
| method_dl_files = gr.File( |
| label="Download CSV Report", |
| file_count="multiple", |
| interactive=False, |
| ) |
|
|
| |
| |
| |
|
|
| def _on_file_upload(file_obj, a_state, c_history): |
| stats, status, a_state, phase_html = handle_file_upload(file_obj, a_state) |
| if file_obj is not None and "file_path" in a_state: |
| c_history, a_state, phase_html = auto_trigger_agent(a_state, c_history) |
| return stats, status, a_state, phase_html, c_history |
|
|
| file_input.change( |
| fn=_on_file_upload, |
| inputs=[file_input, agent_state, chat_history], |
| outputs=[dataset_stats, file_status, agent_state, phase_bar, chatbot], |
| ) |
|
|
| def _on_send(msg, c_history, a_state): |
| accumulated = [] |
| for result in handle_chat(msg, c_history, a_state): |
| accumulated = result |
| yield accumulated[0], accumulated[1], accumulated[2], "" |
|
|
| send_btn.click( |
| fn=_on_send, |
| inputs=[chat_input, chatbot, agent_state], |
| outputs=[chatbot, agent_state, phase_bar, chat_input], |
| ) |
| chat_input.submit( |
| fn=_on_send, |
| inputs=[chat_input, chatbot, agent_state], |
| outputs=[chatbot, agent_state, phase_bar, chat_input], |
| ) |
|
|
| clear_btn.click( |
| fn=lambda: ([], {}), |
| outputs=[chatbot, agent_state], |
| ) |
|
|
| refresh_table_btn.click( |
| fn=refresh_review_table, |
| inputs=[agent_state], |
| outputs=[review_table], |
| ) |
|
|
| |
| submit_review_btn.click( |
| fn=submit_review, |
| inputs=[review_table, agent_state, chatbot], |
| outputs=[chatbot, agent_state, phase_bar], |
| ) |
|
|
| chart_selector.change( |
| fn=get_chart_html, |
| inputs=[chart_selector, agent_state], |
| outputs=[chart_display], |
| ) |
|
|
| refresh_dl_btn.click( |
| fn=refresh_downloads, |
| inputs=[agent_state], |
| outputs=[download_file_list_html, download_files], |
| ) |
|
|
| |
| chatbot.change( |
| fn=lambda selected_chart, a: ( |
| refresh_review_table(a), |
| *refresh_downloads(a), |
| get_chart_html(selected_chart, a), |
| build_cluster_stats_html(a), |
| build_cluster_info_html(a), |
| ), |
| inputs=[chart_selector, agent_state], |
| outputs=[ |
| review_table, |
| download_file_list_html, |
| download_files, |
| chart_display, |
| cluster_stats, |
| cluster_info_html, |
| ], |
| ) |
|
|
| |
| chatbot.change( |
| fn=auto_accept_review, |
| inputs=[agent_state, chatbot, auto_accept_toggle], |
| outputs=[chatbot, agent_state, phase_bar], |
| ) |
|
|
| |
|
|
| pdf_upload.change( |
| fn=handle_pdf_upload, |
| inputs=[pdf_upload], |
| outputs=[method_status, method_stats], |
| ) |
|
|
| run_methods_btn.click( |
| fn=run_method_extraction_pipeline, |
| inputs=[], |
| outputs=[ |
| method_stats, |
| method_status, |
| method_technique_df, |
| method_dl_files, |
| ], |
| ) |
|
|
| return app |
|
|
|
|
| |
| |
| |
| if __name__ == "__main__": |
| demo = build_app() |
| demo.launch( |
| server_name="0.0.0.0", |
| server_port=7860, |
| share=False, |
| show_error=True, |
| allowed_paths=[str(OUTPUTS_DIR.resolve())], |
| css=CUSTOM_CSS, |
| theme=gr.themes.Soft( |
| primary_hue=gr.themes.colors.indigo, |
| secondary_hue=gr.themes.colors.slate, |
| neutral_hue=gr.themes.colors.slate, |
| font=[gr.themes.GoogleFont("DM Sans"), "system-ui", "sans-serif"], |
| ), |
| ) |
|
|