Spaces:

anujjuna
/

BERTopic_Agentic_AI

Sleeping

App Files Files Community

BERTopic_Agentic_AI / app.py

anujjuna

Update app.py

079d3be verified about 1 month ago

raw

history blame contribute delete

18.9 kB

	"""
	app.py — Gradio UI for BERTopic Agentic AI Application (~370 lines)
	Sections: ① Data Input ② Agent Conversation ③ Results (Table \| Charts \| Download)
	Rules: ZERO business logic here. All decisions made by agent.py.
	"""

	import os
	import json
	import glob
	import gradio as gr
	from agent import invoke_agent

	CHECKPOINT_DIR = "checkpoints"
	os.makedirs(CHECKPOINT_DIR, exist_ok=True)

	CSV_PATH = os.path.join(CHECKPOINT_DIR, "uploaded.csv")

	# ── Checkpoint file paths ──────────────────────────────────────────────────────
	def ckpt(name):
	return os.path.join(CHECKPOINT_DIR, name)


	# ── Phase progress HTML ────────────────────────────────────────────────────────
	def build_phase_bar():
	phases = [
	("① Load", "stats.json"),
	("② Codes", "abstract_labels.json"),
	("③ Themes", "abstract_themes.json"),
	("④ Saturation", "abstract_themes.json"),
	("⑤ Names", "abstract_themes.json"),
	("⑤½ PAJAIS", "abstract_taxonomy_map.json"),
	("⑥ Report", "comparison.csv"),
	]
	items = list(map(
	lambda p: (
	f'<div style="display:inline-flex;align-items:center;gap:6px;'
	f'padding:6px 14px;border-radius:20px;font-size:13px;font-weight:600;'
	f'background:{"#22c55e" if os.path.exists(ckpt(p[1])) else "#374151"};'
	f'color:{"#fff" if os.path.exists(ckpt(p[1])) else "#9ca3af"};">'
	f'{"✅" if os.path.exists(ckpt(p[1])) else "⬜"} {p[0]}</div>'
	),
	phases,
	))
	bar = (
	'<div style="background:#111827;padding:12px 16px;border-radius:12px;'
	'border:1px solid #1f2937;display:flex;flex-wrap:wrap;gap:8px;align-items:center;">'
	'<span style="color:#6b7280;font-size:12px;font-weight:700;margin-right:4px;">B&C PHASES:</span>'
	+ "".join(items)
	+ "</div>"
	)
	return bar


	# ── Review table loading ───────────────────────────────────────────────────────
	def load_review_table():
	"""Priority: taxonomy_map → themes → labels → summaries"""
	priority = [
	("abstract_taxonomy_map.json", "taxonomy"),
	("abstract_themes.json", "themes"),
	("abstract_labels.json", "labels"),
	("abstract_summaries.json", "summaries"),
	]
	for filename, mode in priority:
	path = ckpt(filename)
	if os.path.exists(path):
	with open(path) as f:
	data = json.load(f)
	return _format_table(data, mode)
	return _empty_table()


	def _empty_table():
	import pandas as pd
	return pd.DataFrame(
	[["", "", "", 0, "", "yes", "", ""]],
	columns=["#", "Topic Label", "Top Evidence", "Sentences", "Papers", "Approve", "Rename To", "Reasoning"],
	)


	def _format_table(data, mode):
	import pandas as pd
	rows = list(map(lambda item: _format_row(item, mode), data))
	if not rows:
	return _empty_table()
	return pd.DataFrame(
	rows,
	columns=["#", "Topic Label", "Top Evidence", "Sentences", "Papers", "Approve", "Rename To", "Reasoning"],
	)


	def _format_row(item, mode):
	idx = item.get("topic_id", item.get("name", ""))
	label = item.get("label", item.get("name", ""))

	if mode == "taxonomy":
	evidence = (
	f"→ {item.get('pajais_match', 'NOVEL')} "
	f"\| conf: {item.get('match_confidence', 0):.2f} "
	f"\| {item.get('reasoning', '')}"
	)
	else:
	sentences = item.get("top_sentences", [])
	evidence = sentences[0] if sentences else ""

	sentences_count = item.get("sentence_count", len(item.get("top_sentences", [])))
	papers = item.get("paper_count", "")
	approve = item.get("approve", "yes")
	rename = item.get("rename_to", label)
	reasoning = item.get("reasoning", "")

	return [idx, label, evidence, sentences_count, papers, approve, rename, reasoning]


	# ── Chart list ────────────────────────────────────────────────────────────────
	def get_chart_choices():
	chart_files = glob.glob(ckpt("_chart_.html"))
	choices = list(map(
	lambda f: os.path.basename(f).replace("_", " ").replace(".html", "").title(),
	chart_files,
	))
	return choices if choices else ["No charts yet"]


	def load_chart_html(choice):
	if not choice or choice == "No charts yet":
	return "<p style='color:#6b7280;padding:20px;'>Charts appear after Phase 2 analysis.</p>"
	filename = choice.lower().replace(" ", "_") + ".html"
	path = ckpt(filename)
	if os.path.exists(path):
	with open(path) as f:
	content = f.read()
	return f'<iframe srcdoc="{content.replace(chr(34), """)}" width="100%" height="600px" frameborder="0"></iframe>'
	return "<p style='color:#ef4444;'>Chart file not found.</p>"


	# ── Download file list ─────────────────────────────────────────────────────────
	def get_download_files():
	patterns = [
	".csv", ".json", ".txt", ".npy",
	]
	files = []
	list(map(lambda p: files.extend(glob.glob(ckpt(p))), patterns))
	files.sort(key=os.path.getmtime, reverse=True)
	return files if files else None


	# ── Table-to-theme-map parser ──────────────────────────────────────────────────
	def parse_table_to_message(table_data):
	"""Convert review table edits into a structured message for the agent.
	Handles both pandas DataFrame (from gr.Dataframe) and list of lists."""
	import pandas as pd

	# Normalise to list of lists regardless of input type
	if table_data is None:
	return "Submit Review: No table data provided."
	if isinstance(table_data, pd.DataFrame):
	if table_data.empty:
	return "Submit Review: Table is empty, nothing to review."
	rows = table_data.values.tolist()
	else:
	rows = list(table_data) if table_data else []

	if not rows:
	return "Submit Review: No table data provided."

	approved = list(filter(
	lambda row: len(row) >= 6 and str(row[5]).strip().lower() in ("yes", "y", "1", "true"),
	rows,
	))
	rejected = list(filter(
	lambda row: len(row) >= 6 and str(row[5]).strip().lower() in ("no", "n", "0", "false"),
	rows,
	))

	theme_groups = {}
	list(map(
	lambda row: theme_groups.setdefault(
	str(row[6]).strip() if len(row) > 6 and row[6] and str(row[6]).strip() else str(row[1]),
	[]
	).append(int(row[0]) if str(row[0]).isdigit() else str(row[0])),
	approved,
	))

	theme_map_str = json.dumps(theme_groups)

	reasoning_lines = list(filter(None, list(map(
	lambda row: f" - Topic {row[0]} ({row[1]}): {row[7]}" if len(row) > 7 and str(row[7]).strip() else "",
	approved,
	))))

	msg = (
	f"Submit Review received.\n\n"
	f"Approved topics: {len(approved)}\n"
	f"Rejected topics: {len(rejected)}\n\n"
	f"Theme groupings (RENAME TO → [topic_ids]):\n{theme_map_str}\n\n"
	f"Researcher reasoning:\n"
	+ ("\n".join(reasoning_lines) if reasoning_lines else " (no reasoning provided)")
	+ "\n\nPlease proceed to the next phase based on these decisions."
	)
	return msg


	# ── Main Gradio App ────────────────────────────────────────────────────────────
	def build_app():
	with gr.Blocks(title="BERTopic Thematic Analysis Agent") as app:

	# ── Header ──────────────────────────────────────────────────────────
	gr.HTML("""
	<div style="text-align:center;padding:32px 0 16px;background:linear-gradient(180deg,#0f172a 0%,#0a0f1a 100%);">
	<div style="font-family:'IBM Plex Mono',monospace;font-size:11px;letter-spacing:0.3em;
	color:#10b981;text-transform:uppercase;margin-bottom:8px;">
	Braun & Clarke (2006) · BERTopic · PAJAIS Taxonomy
	</div>
	<h1 style="font-family:'IBM Plex Mono',monospace;font-size:28px;font-weight:700;
	color:#f1f5f9;margin:0 0 8px;">
	Thematic Analysis Agent
	</h1>
	<p style="color:#475569;font-size:14px;margin:0;">
	Agentic AI · LangGraph · Mistral LLM · AgglomerativeClustering (cosine, 384d)
	</p>
	</div>
	""")

	# Phase progress bar
	phase_bar = gr.HTML(value=build_phase_bar(), label="Phase Progress")

	# ── SECTION 1: Data Input ────────────────────────────────────────────
	gr.HTML('<div class="section-header">① DATA INPUT</div>')
	with gr.Row():
	csv_upload = gr.File(
	label="Upload Scopus CSV Export",
	file_types=[".csv"],
	scale=2,
	)
	with gr.Column(scale=1):
	gr.HTML("""
	<div style="background:#1e293b;border-radius:12px;padding:16px;font-size:13px;color:#94a3b8;">
	<b style="color:#f1f5f9;">Required CSV Columns:</b><br>
	Authors · Title · Abstract<br>
	Author Keywords · Cited by<br>
	Source title · Year
	</div>
	""")

	# ── SECTION 2: Agent Conversation ───────────────────────────────────
	gr.HTML('<div class="section-header">② AGENT CONVERSATION</div>')
	chatbot = gr.Chatbot(
	label="Thematic Analysis Agent",
	height=500,
	avatar_images=(None, "https://www.anthropic.com/favicon.ico"),
	)
	with gr.Row():
	user_input = gr.Textbox(
	placeholder="Type 'run abstract', 'run title', or any instruction...",
	label="",
	scale=5,
	lines=1,
	container=False,
	)
	send_btn = gr.Button("Send ▶", variant="primary", scale=1)

	# ── SECTION 3: Results ───────────────────────────────────────────────
	gr.HTML('<div class="section-header">③ RESULTS</div>')
	with gr.Tabs():

	# Tab 1: Review Table
	with gr.TabItem("📋 Review Table"):
	gr.HTML("""
	<p style="color:#94a3b8;font-size:13px;margin-bottom:8px;">
	Edit <b>Approve</b> (yes/no), <b>Rename To</b>, and <b>Reasoning</b> columns.
	Then click <b>Submit Review</b> to send decisions to the agent.
	</p>
	""")
	review_table = gr.Dataframe(
	headers=["#", "Topic Label", "Top Evidence", "Sentences", "Papers", "Approve", "Rename To", "Reasoning"],
	datatype=["str", "str", "str", "number", "str", "str", "str", "str"],
	row_count=10,
	column_count=8,
	interactive=True,
	wrap=True,
	label="",
	)
	submit_review_btn = gr.Button("📤 Submit Review →", variant="primary")

	# Tab 2: Charts
	with gr.TabItem("📊 Charts"):
	chart_dropdown = gr.Dropdown(
	choices=get_chart_choices(),
	label="Select Chart",
	interactive=True,
	)
	refresh_charts_btn = gr.Button("🔄 Refresh Chart List", variant="secondary", size="sm")
	chart_display = gr.HTML(
	value="<p style='color:#6b7280;padding:20px;'>Charts appear after Phase 2 BERTopic analysis.</p>"
	)

	# Tab 3: Downloads
	with gr.TabItem("📥 Download Files"):
	gr.HTML("""
	<p style="color:#94a3b8;font-size:13px;margin-bottom:8px;">
	All checkpoint files are listed below. Download for your conference paper.
	</p>
	""")
	download_files = gr.File(
	label="Output Files",
	file_count="multiple",
	interactive=False,
	)
	refresh_downloads_btn = gr.Button("🔄 Refresh Files", variant="secondary", size="sm")

	# ── State ─────────────────────────────────────────────────────────────
	thread_state = gr.State("default")

	# ── Event: CSV Upload ─────────────────────────────────────────────────
	def on_csv_upload(file, history, thread_id):
	if file is None:
	return history, build_phase_bar(), load_review_table()
	# In Gradio 6, uploaded file is a filepath string
	filepath = file if isinstance(file, str) else file.name
	history = history or []
	history.append({"role": "user", "content": f"CSV uploaded: {os.path.basename(filepath)}"})
	response = invoke_agent(f"load_scopus_csv filepath={filepath}", thread_id)
	history.append({"role": "assistant", "content": response})
	return history, build_phase_bar(), load_review_table()

	csv_upload.upload(
	on_csv_upload,
	inputs=[csv_upload, chatbot, thread_state],
	outputs=[chatbot, phase_bar, review_table],
	)

	# ── Event: Send message ───────────────────────────────────────────────
	def on_send(message, history, thread_id):
	if not message.strip():
	return history, "", build_phase_bar(), load_review_table()
	history = history or []
	history.append({"role": "user", "content": message})
	response = invoke_agent(message, thread_id)
	history.append({"role": "assistant", "content": response})
	return history, "", build_phase_bar(), load_review_table()

	send_btn.click(
	on_send,
	inputs=[user_input, chatbot, thread_state],
	outputs=[chatbot, user_input, phase_bar, review_table],
	)
	user_input.submit(
	on_send,
	inputs=[user_input, chatbot, thread_state],
	outputs=[chatbot, user_input, phase_bar, review_table],
	)

	# ── Event: Submit Review ──────────────────────────────────────────────
	def on_submit_review(table_data, history, thread_id):
	msg = parse_table_to_message(table_data)
	history = history or []
	history.append({"role": "user", "content": "📤 Submit Review (table decisions sent to agent)"})
	response = invoke_agent(msg, thread_id)
	history.append({"role": "assistant", "content": response})
	return history, build_phase_bar(), load_review_table()

	submit_review_btn.click(
	on_submit_review,
	inputs=[review_table, chatbot, thread_state],
	outputs=[chatbot, phase_bar, review_table],
	)

	# ── Event: Chart selection ────────────────────────────────────────────
	chart_dropdown.change(
	load_chart_html,
	inputs=[chart_dropdown],
	outputs=[chart_display],
	)

	def refresh_charts():
	choices = get_chart_choices()
	return gr.update(choices=choices, value=choices[0] if choices else None)

	refresh_charts_btn.click(
	refresh_charts,
	outputs=[chart_dropdown],
	)

	# ── Event: Download refresh ───────────────────────────────────────────
	def refresh_downloads():
	files = get_download_files()
	return gr.update(value=files)

	refresh_downloads_btn.click(
	refresh_downloads,
	outputs=[download_files],
	)

	# ── Initial load ──────────────────────────────────────────────────────
	app.load(
	lambda: (build_phase_bar(), load_review_table(), get_download_files()),
	outputs=[phase_bar, review_table, download_files],
	)

	return app


	# ── Launch ─────────────────────────────────────────────────────────────────────
	if __name__ == "__main__":
	demo = build_app()
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	ssr_mode=False,
	share=False,
	theme=gr.themes.Base(
	primary_hue="emerald",
	secondary_hue="slate",
	neutral_hue="slate",
	font=[gr.themes.GoogleFont("IBM Plex Mono"), "monospace"],
	),
	css="""
	body { background: #0a0f1a !important; }
	.gradio-container { max-width: 1400px !important; background: #0a0f1a !important; }
	.section-header {
	font-size: 13px;
	font-weight: 700;
	color: #64748b;
	letter-spacing: 0.12em;
	text-transform: uppercase;
	margin-bottom: 12px;
	padding-bottom: 8px;
	border-bottom: 1px solid #1e293b;
	}
	footer { display: none !important; }
	""",
	)