"""
app.py — Gradio UI for BERTopic Agentic AI
Assignment: Text Analysis & Topic Modelling (Prof. Shailaja Jha)
Generated via: Anthropic Claude Sonnet 4.5
Architecture: LangGraph ReAct Agent + Gradio 5.x UI
"""
import os
import json
import pandas as pd
import gradio as gr
from agent import invoke_agent
OUTPUT_DIR = "./outputs"
os.makedirs(OUTPUT_DIR, exist_ok=True)
# Use a simple global for thread ID — avoids gr.State schema issues
_THREAD_ID = "main-session"
# ─── HELPERS ──────────────────────────────────────────────────────────────────
def _exists(name: str) -> bool:
return os.path.exists(os.path.join(OUTPUT_DIR, name))
def _load(name: str):
with open(os.path.join(OUTPUT_DIR, name), "r", encoding="utf-8") as f:
return json.load(f)
def get_phase_html() -> str:
phases = [
("① Load", _exists("corpus_config.json")),
("② Codes", _exists("abstract_labels.json")),
("③ Themes", _exists("abstract_themes.json")),
("④ Saturation", _exists("abstract_themes.json")),
("⑤ Names", _exists("abstract_themes.json")),
("⑤½ PAJAIS", _exists("taxonomy_map.json")),
("⑥ Report", _exists("comparison.csv") and _exists("narrative.txt")),
]
items = "".join(
f''
f'{"✅" if done else "⬜"} {name}'
for name, done in phases
)
return f'
{items}
'
def load_review_table():
"""Return table rows as list-of-lists."""
if _exists("taxonomy_map.json"):
tax = _load("taxonomy_map.json")
mapping = tax.get("taxonomy_mapping", {})
rows = [
[i, theme,
f"→ {v.get('pajais_match','?')} | {v.get('reasoning','')[:80]}",
0, 0, "YES", theme, v.get("reasoning", "")]
for i, (theme, v) in enumerate(mapping.items())
]
return rows if rows else []
for fname, key in [("abstract_themes.json", "theme_name"),
("abstract_labels.json", "label")]:
if _exists(fname):
data = _load(fname)
rows = [
[i, d.get(key, str(i)),
(d.get("top_sentences", [""])[0] or "")[:120],
d.get("sentence_count", 0), d.get("paper_count", 0),
"YES", d.get(key, ""), d.get("reasoning", "")]
for i, d in enumerate(data)
]
return rows if rows else []
return []
def get_download_files():
targets = ["comparison.csv", "taxonomy_map.json", "narrative.txt",
"abstract_labels.json", "abstract_themes.json",
"title_labels.json", "title_themes.json"]
paths = [os.path.join(OUTPUT_DIR, f) for f in targets if _exists(f)]
return paths if paths else None
# ─── EVENT HANDLERS ───────────────────────────────────────────────────────────
def on_csv_upload(file_obj, history):
if file_obj is None:
return history, get_phase_html(), load_review_table(), get_download_files()
filepath = file_obj if isinstance(file_obj, str) else file_obj.name
message = f"Analyze my Scopus CSV at: {filepath}"
try:
response = invoke_agent(message, _THREAD_ID)
except Exception as e:
response = f"❌ Error: {e}"
history = history + [{"role": "user", "content": message},
{"role": "assistant", "content": response}]
return history, get_phase_html(), load_review_table(), get_download_files()
def on_send(message, history):
if not message.strip():
return history, ""
try:
response = invoke_agent(message, _THREAD_ID)
except Exception as e:
response = f"❌ Error: {e}"
history = history + [{"role": "user", "content": message},
{"role": "assistant", "content": response}]
return history, ""
def on_refresh(history):
return history, get_phase_html(), load_review_table(), get_download_files()
def on_submit_review(table_data, history):
# Handle both DataFrame (Gradio 5) and list formats
if table_data is None:
return history, get_phase_html(), load_review_table(), get_download_files()
if isinstance(table_data, pd.DataFrame):
if table_data.empty:
return history, get_phase_html(), load_review_table(), get_download_files()
rows_list = table_data.values.tolist()
else:
if not table_data:
return history, get_phase_html(), load_review_table(), get_download_files()
rows_list = table_data
headers = ["#", "Topic Label", "Top Evidence",
"Sentences", "Papers", "Approve", "Rename To", "Reasoning"]
rows_out = []
for row in rows_list:
if not row:
continue
if isinstance(row, dict):
d = row
else:
d = dict(zip(headers, row))
rows_out.append({
"cluster_id": int(d.get("#", 0) or 0),
"label": str(d.get("Topic Label", "")),
"approve": str(d.get("Approve", "YES")).upper(),
"rename_to": str(d.get("Rename To", "")),
"reasoning": str(d.get("Reasoning", "")),
})
message = f"I have reviewed the table. Here are my decisions (JSON):\n{json.dumps(rows_out)}"
try:
response = invoke_agent(message, _THREAD_ID)
except Exception as e:
response = f"❌ Error: {e}"
history = history + [{"role": "user", "content": "[Submit Review]"},
{"role": "assistant", "content": response}]
return history, get_phase_html(), load_review_table(), get_download_files()
# ─── GRADIO 5.x UI ────────────────────────────────────────────────────────────
with gr.Blocks(title="BERTopic Agentic AI") as demo:
gr.HTML("""
🤖 BERTopic Agentic AI
RQ5–RQ7: Abstract vs Title Theme Comparison & PAJAIS Taxonomy Mapping
LangGraph · Mistral Small · all-MiniLM-L6-v2 · Braun & Clarke (2006) · PAJAIS 2019
""")
phase_bar = gr.HTML(value=get_phase_html())
with gr.Group():
gr.Markdown("### 📁 Step 1: Upload Your Scopus CSV")
csv_file = gr.File(label="Upload Scopus CSV (.csv)", file_types=[".csv"])
with gr.Group():
gr.Markdown("### 💬 Step 2: Agent Conversation")
chatbot = gr.Chatbot(
height=380,
show_label=False,
type="messages",
placeholder="Upload your CSV first, then type 'run abstract' or 'run title'...",
)
with gr.Row():
msg_box = gr.Textbox(
placeholder="Type 'run abstract', 'run title', or a question...",
label="Your message",
scale=5,
show_label=False,
)
send_btn = gr.Button("Send ➤", variant="primary", scale=1)
with gr.Row():
submit_btn = gr.Button("📋 Submit Review", variant="secondary")
refresh_btn = gr.Button("🔄 Refresh", variant="secondary")
with gr.Group():
gr.Markdown("### 📊 Step 3: Topic Review Table")
gr.Markdown("_Edit **Approve** (YES/NO) and **Rename To** inline, then click Submit Review._")
review_table = gr.Dataframe(
headers=["#", "Topic Label", "Top Evidence",
"Sentences", "Papers", "Approve", "Rename To", "Reasoning"],
value=load_review_table(),
interactive=True,
)
with gr.Group():
gr.Markdown("### 📥 Step 4: Download Deliverables")
gr.Markdown("_Click Refresh after each phase to see new files._")
download_box = gr.File(
value=get_download_files(),
label="Deliverable Files",
interactive=False,
)
gr.Markdown("""
---
**Stack:** Mistral Small · all-MiniLM-L6-v2 · AgglomerativeClustering (cosine, 0.7) · LangGraph ReAct · MemorySaver · PAJAIS 2019
> ⚙️ Set `MISTRAL_API_KEY` in Space **Settings → Variables and secrets**
""")
# ── Event Wiring ──────────────────────────────────────────────────────────
csv_file.upload(
fn=on_csv_upload,
inputs=[csv_file, chatbot],
outputs=[chatbot, phase_bar, review_table, download_box],
)
send_btn.click(
fn=on_send,
inputs=[msg_box, chatbot],
outputs=[chatbot, msg_box],
)
msg_box.submit(
fn=on_send,
inputs=[msg_box, chatbot],
outputs=[chatbot, msg_box],
)
submit_btn.click(
fn=on_submit_review,
inputs=[review_table, chatbot],
outputs=[chatbot, phase_bar, review_table, download_box],
)
refresh_btn.click(
fn=on_refresh,
inputs=[chatbot],
outputs=[chatbot, phase_bar, review_table, download_box],
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)