anujjuna's picture
Update app.py
079d3be verified
"""
app.py β€” Gradio UI for BERTopic Agentic AI Application (~370 lines)
Sections: β‘  Data Input β‘‘ Agent Conversation β‘’ Results (Table | Charts | Download)
Rules: ZERO business logic here. All decisions made by agent.py.
"""
import os
import json
import glob
import gradio as gr
from agent import invoke_agent
CHECKPOINT_DIR = "checkpoints"
os.makedirs(CHECKPOINT_DIR, exist_ok=True)
CSV_PATH = os.path.join(CHECKPOINT_DIR, "uploaded.csv")
# ── Checkpoint file paths ──────────────────────────────────────────────────────
def ckpt(name):
return os.path.join(CHECKPOINT_DIR, name)
# ── Phase progress HTML ────────────────────────────────────────────────────────
def build_phase_bar():
phases = [
("β‘  Load", "stats.json"),
("β‘‘ Codes", "abstract_labels.json"),
("β‘’ Themes", "abstract_themes.json"),
("β‘£ Saturation", "abstract_themes.json"),
("β‘€ Names", "abstract_themes.json"),
("β‘€Β½ PAJAIS", "abstract_taxonomy_map.json"),
("β‘₯ Report", "comparison.csv"),
]
items = list(map(
lambda p: (
f'<div style="display:inline-flex;align-items:center;gap:6px;'
f'padding:6px 14px;border-radius:20px;font-size:13px;font-weight:600;'
f'background:{"#22c55e" if os.path.exists(ckpt(p[1])) else "#374151"};'
f'color:{"#fff" if os.path.exists(ckpt(p[1])) else "#9ca3af"};">'
f'{"βœ…" if os.path.exists(ckpt(p[1])) else "⬜"} {p[0]}</div>'
),
phases,
))
bar = (
'<div style="background:#111827;padding:12px 16px;border-radius:12px;'
'border:1px solid #1f2937;display:flex;flex-wrap:wrap;gap:8px;align-items:center;">'
'<span style="color:#6b7280;font-size:12px;font-weight:700;margin-right:4px;">B&amp;C PHASES:</span>'
+ "".join(items)
+ "</div>"
)
return bar
# ── Review table loading ───────────────────────────────────────────────────────
def load_review_table():
"""Priority: taxonomy_map β†’ themes β†’ labels β†’ summaries"""
priority = [
("abstract_taxonomy_map.json", "taxonomy"),
("abstract_themes.json", "themes"),
("abstract_labels.json", "labels"),
("abstract_summaries.json", "summaries"),
]
for filename, mode in priority:
path = ckpt(filename)
if os.path.exists(path):
with open(path) as f:
data = json.load(f)
return _format_table(data, mode)
return _empty_table()
def _empty_table():
import pandas as pd
return pd.DataFrame(
[["", "", "", 0, "", "yes", "", ""]],
columns=["#", "Topic Label", "Top Evidence", "Sentences", "Papers", "Approve", "Rename To", "Reasoning"],
)
def _format_table(data, mode):
import pandas as pd
rows = list(map(lambda item: _format_row(item, mode), data))
if not rows:
return _empty_table()
return pd.DataFrame(
rows,
columns=["#", "Topic Label", "Top Evidence", "Sentences", "Papers", "Approve", "Rename To", "Reasoning"],
)
def _format_row(item, mode):
idx = item.get("topic_id", item.get("name", ""))
label = item.get("label", item.get("name", ""))
if mode == "taxonomy":
evidence = (
f"β†’ {item.get('pajais_match', 'NOVEL')} "
f"| conf: {item.get('match_confidence', 0):.2f} "
f"| {item.get('reasoning', '')}"
)
else:
sentences = item.get("top_sentences", [])
evidence = sentences[0] if sentences else ""
sentences_count = item.get("sentence_count", len(item.get("top_sentences", [])))
papers = item.get("paper_count", "")
approve = item.get("approve", "yes")
rename = item.get("rename_to", label)
reasoning = item.get("reasoning", "")
return [idx, label, evidence, sentences_count, papers, approve, rename, reasoning]
# ── Chart list ────────────────────────────────────────────────────────────────
def get_chart_choices():
chart_files = glob.glob(ckpt("*_chart_*.html"))
choices = list(map(
lambda f: os.path.basename(f).replace("_", " ").replace(".html", "").title(),
chart_files,
))
return choices if choices else ["No charts yet"]
def load_chart_html(choice):
if not choice or choice == "No charts yet":
return "<p style='color:#6b7280;padding:20px;'>Charts appear after Phase 2 analysis.</p>"
filename = choice.lower().replace(" ", "_") + ".html"
path = ckpt(filename)
if os.path.exists(path):
with open(path) as f:
content = f.read()
return f'<iframe srcdoc="{content.replace(chr(34), "&quot;")}" width="100%" height="600px" frameborder="0"></iframe>'
return "<p style='color:#ef4444;'>Chart file not found.</p>"
# ── Download file list ─────────────────────────────────────────────────────────
def get_download_files():
patterns = [
"*.csv", "*.json", "*.txt", "*.npy",
]
files = []
list(map(lambda p: files.extend(glob.glob(ckpt(p))), patterns))
files.sort(key=os.path.getmtime, reverse=True)
return files if files else None
# ── Table-to-theme-map parser ──────────────────────────────────────────────────
def parse_table_to_message(table_data):
"""Convert review table edits into a structured message for the agent.
Handles both pandas DataFrame (from gr.Dataframe) and list of lists."""
import pandas as pd
# Normalise to list of lists regardless of input type
if table_data is None:
return "Submit Review: No table data provided."
if isinstance(table_data, pd.DataFrame):
if table_data.empty:
return "Submit Review: Table is empty, nothing to review."
rows = table_data.values.tolist()
else:
rows = list(table_data) if table_data else []
if not rows:
return "Submit Review: No table data provided."
approved = list(filter(
lambda row: len(row) >= 6 and str(row[5]).strip().lower() in ("yes", "y", "1", "true"),
rows,
))
rejected = list(filter(
lambda row: len(row) >= 6 and str(row[5]).strip().lower() in ("no", "n", "0", "false"),
rows,
))
theme_groups = {}
list(map(
lambda row: theme_groups.setdefault(
str(row[6]).strip() if len(row) > 6 and row[6] and str(row[6]).strip() else str(row[1]),
[]
).append(int(row[0]) if str(row[0]).isdigit() else str(row[0])),
approved,
))
theme_map_str = json.dumps(theme_groups)
reasoning_lines = list(filter(None, list(map(
lambda row: f" - Topic {row[0]} ({row[1]}): {row[7]}" if len(row) > 7 and str(row[7]).strip() else "",
approved,
))))
msg = (
f"Submit Review received.\n\n"
f"Approved topics: {len(approved)}\n"
f"Rejected topics: {len(rejected)}\n\n"
f"Theme groupings (RENAME TO β†’ [topic_ids]):\n{theme_map_str}\n\n"
f"Researcher reasoning:\n"
+ ("\n".join(reasoning_lines) if reasoning_lines else " (no reasoning provided)")
+ "\n\nPlease proceed to the next phase based on these decisions."
)
return msg
# ── Main Gradio App ────────────────────────────────────────────────────────────
def build_app():
with gr.Blocks(title="BERTopic Thematic Analysis Agent") as app:
# ── Header ──────────────────────────────────────────────────────────
gr.HTML("""
<div style="text-align:center;padding:32px 0 16px;background:linear-gradient(180deg,#0f172a 0%,#0a0f1a 100%);">
<div style="font-family:'IBM Plex Mono',monospace;font-size:11px;letter-spacing:0.3em;
color:#10b981;text-transform:uppercase;margin-bottom:8px;">
Braun &amp; Clarke (2006) Β· BERTopic Β· PAJAIS Taxonomy
</div>
<h1 style="font-family:'IBM Plex Mono',monospace;font-size:28px;font-weight:700;
color:#f1f5f9;margin:0 0 8px;">
Thematic Analysis Agent
</h1>
<p style="color:#475569;font-size:14px;margin:0;">
Agentic AI Β· LangGraph Β· Mistral LLM Β· AgglomerativeClustering (cosine, 384d)
</p>
</div>
""")
# Phase progress bar
phase_bar = gr.HTML(value=build_phase_bar(), label="Phase Progress")
# ── SECTION 1: Data Input ────────────────────────────────────────────
gr.HTML('<div class="section-header">β‘  DATA INPUT</div>')
with gr.Row():
csv_upload = gr.File(
label="Upload Scopus CSV Export",
file_types=[".csv"],
scale=2,
)
with gr.Column(scale=1):
gr.HTML("""
<div style="background:#1e293b;border-radius:12px;padding:16px;font-size:13px;color:#94a3b8;">
<b style="color:#f1f5f9;">Required CSV Columns:</b><br>
Authors Β· Title Β· Abstract<br>
Author Keywords Β· Cited by<br>
Source title Β· Year
</div>
""")
# ── SECTION 2: Agent Conversation ───────────────────────────────────
gr.HTML('<div class="section-header">β‘‘ AGENT CONVERSATION</div>')
chatbot = gr.Chatbot(
label="Thematic Analysis Agent",
height=500,
avatar_images=(None, "https://www.anthropic.com/favicon.ico"),
)
with gr.Row():
user_input = gr.Textbox(
placeholder="Type 'run abstract', 'run title', or any instruction...",
label="",
scale=5,
lines=1,
container=False,
)
send_btn = gr.Button("Send β–Ά", variant="primary", scale=1)
# ── SECTION 3: Results ───────────────────────────────────────────────
gr.HTML('<div class="section-header">β‘’ RESULTS</div>')
with gr.Tabs():
# Tab 1: Review Table
with gr.TabItem("πŸ“‹ Review Table"):
gr.HTML("""
<p style="color:#94a3b8;font-size:13px;margin-bottom:8px;">
Edit <b>Approve</b> (yes/no), <b>Rename To</b>, and <b>Reasoning</b> columns.
Then click <b>Submit Review</b> to send decisions to the agent.
</p>
""")
review_table = gr.Dataframe(
headers=["#", "Topic Label", "Top Evidence", "Sentences", "Papers", "Approve", "Rename To", "Reasoning"],
datatype=["str", "str", "str", "number", "str", "str", "str", "str"],
row_count=10,
column_count=8,
interactive=True,
wrap=True,
label="",
)
submit_review_btn = gr.Button("πŸ“€ Submit Review β†’", variant="primary")
# Tab 2: Charts
with gr.TabItem("πŸ“Š Charts"):
chart_dropdown = gr.Dropdown(
choices=get_chart_choices(),
label="Select Chart",
interactive=True,
)
refresh_charts_btn = gr.Button("πŸ”„ Refresh Chart List", variant="secondary", size="sm")
chart_display = gr.HTML(
value="<p style='color:#6b7280;padding:20px;'>Charts appear after Phase 2 BERTopic analysis.</p>"
)
# Tab 3: Downloads
with gr.TabItem("πŸ“₯ Download Files"):
gr.HTML("""
<p style="color:#94a3b8;font-size:13px;margin-bottom:8px;">
All checkpoint files are listed below. Download for your conference paper.
</p>
""")
download_files = gr.File(
label="Output Files",
file_count="multiple",
interactive=False,
)
refresh_downloads_btn = gr.Button("πŸ”„ Refresh Files", variant="secondary", size="sm")
# ── State ─────────────────────────────────────────────────────────────
thread_state = gr.State("default")
# ── Event: CSV Upload ─────────────────────────────────────────────────
def on_csv_upload(file, history, thread_id):
if file is None:
return history, build_phase_bar(), load_review_table()
# In Gradio 6, uploaded file is a filepath string
filepath = file if isinstance(file, str) else file.name
history = history or []
history.append({"role": "user", "content": f"CSV uploaded: {os.path.basename(filepath)}"})
response = invoke_agent(f"load_scopus_csv filepath={filepath}", thread_id)
history.append({"role": "assistant", "content": response})
return history, build_phase_bar(), load_review_table()
csv_upload.upload(
on_csv_upload,
inputs=[csv_upload, chatbot, thread_state],
outputs=[chatbot, phase_bar, review_table],
)
# ── Event: Send message ───────────────────────────────────────────────
def on_send(message, history, thread_id):
if not message.strip():
return history, "", build_phase_bar(), load_review_table()
history = history or []
history.append({"role": "user", "content": message})
response = invoke_agent(message, thread_id)
history.append({"role": "assistant", "content": response})
return history, "", build_phase_bar(), load_review_table()
send_btn.click(
on_send,
inputs=[user_input, chatbot, thread_state],
outputs=[chatbot, user_input, phase_bar, review_table],
)
user_input.submit(
on_send,
inputs=[user_input, chatbot, thread_state],
outputs=[chatbot, user_input, phase_bar, review_table],
)
# ── Event: Submit Review ──────────────────────────────────────────────
def on_submit_review(table_data, history, thread_id):
msg = parse_table_to_message(table_data)
history = history or []
history.append({"role": "user", "content": "πŸ“€ Submit Review (table decisions sent to agent)"})
response = invoke_agent(msg, thread_id)
history.append({"role": "assistant", "content": response})
return history, build_phase_bar(), load_review_table()
submit_review_btn.click(
on_submit_review,
inputs=[review_table, chatbot, thread_state],
outputs=[chatbot, phase_bar, review_table],
)
# ── Event: Chart selection ────────────────────────────────────────────
chart_dropdown.change(
load_chart_html,
inputs=[chart_dropdown],
outputs=[chart_display],
)
def refresh_charts():
choices = get_chart_choices()
return gr.update(choices=choices, value=choices[0] if choices else None)
refresh_charts_btn.click(
refresh_charts,
outputs=[chart_dropdown],
)
# ── Event: Download refresh ───────────────────────────────────────────
def refresh_downloads():
files = get_download_files()
return gr.update(value=files)
refresh_downloads_btn.click(
refresh_downloads,
outputs=[download_files],
)
# ── Initial load ──────────────────────────────────────────────────────
app.load(
lambda: (build_phase_bar(), load_review_table(), get_download_files()),
outputs=[phase_bar, review_table, download_files],
)
return app
# ── Launch ─────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
demo = build_app()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
ssr_mode=False,
share=False,
theme=gr.themes.Base(
primary_hue="emerald",
secondary_hue="slate",
neutral_hue="slate",
font=[gr.themes.GoogleFont("IBM Plex Mono"), "monospace"],
),
css="""
body { background: #0a0f1a !important; }
.gradio-container { max-width: 1400px !important; background: #0a0f1a !important; }
.section-header {
font-size: 13px;
font-weight: 700;
color: #64748b;
letter-spacing: 0.12em;
text-transform: uppercase;
margin-bottom: 12px;
padding-bottom: 8px;
border-bottom: 1px solid #1e293b;
}
footer { display: none !important; }
""",
)