try / app.py
ronitsonawane24's picture
Upload 5 files
4e60557 verified
"""
app.py β€” Gradio UI for BERTopic Agentic AI
Assignment: Text Analysis & Topic Modelling (Prof. Shailaja Jha)
Generated via: Anthropic Claude Sonnet 4.5
Architecture: LangGraph ReAct Agent + Gradio 5.x UI
"""
import os
import json
import pandas as pd
import gradio as gr
from agent import invoke_agent
OUTPUT_DIR = "./outputs"
os.makedirs(OUTPUT_DIR, exist_ok=True)
# Use a simple global for thread ID β€” avoids gr.State schema issues
_THREAD_ID = "main-session"
# ─── HELPERS ──────────────────────────────────────────────────────────────────
def _exists(name: str) -> bool:
return os.path.exists(os.path.join(OUTPUT_DIR, name))
def _load(name: str):
with open(os.path.join(OUTPUT_DIR, name), "r", encoding="utf-8") as f:
return json.load(f)
def get_phase_html() -> str:
phases = [
("β‘  Load", _exists("corpus_config.json")),
("β‘‘ Codes", _exists("abstract_labels.json")),
("β‘’ Themes", _exists("abstract_themes.json")),
("β‘£ Saturation", _exists("abstract_themes.json")),
("β‘€ Names", _exists("abstract_themes.json")),
("β‘€Β½ PAJAIS", _exists("taxonomy_map.json")),
("β‘₯ Report", _exists("comparison.csv") and _exists("narrative.txt")),
]
items = "".join(
f'<span style="padding:6px 14px;border-radius:20px;margin:3px;font-size:13px;'
f'background:{"#22c55e" if done else "#374151"};color:white;font-weight:600;">'
f'{"βœ…" if done else "⬜"} {name}</span>'
for name, done in phases
)
return f'<div style="display:flex;flex-wrap:wrap;gap:4px;padding:8px;">{items}</div>'
def load_review_table():
"""Return table rows as list-of-lists."""
if _exists("taxonomy_map.json"):
tax = _load("taxonomy_map.json")
mapping = tax.get("taxonomy_mapping", {})
rows = [
[i, theme,
f"β†’ {v.get('pajais_match','?')} | {v.get('reasoning','')[:80]}",
0, 0, "YES", theme, v.get("reasoning", "")]
for i, (theme, v) in enumerate(mapping.items())
]
return rows if rows else []
for fname, key in [("abstract_themes.json", "theme_name"),
("abstract_labels.json", "label")]:
if _exists(fname):
data = _load(fname)
rows = [
[i, d.get(key, str(i)),
(d.get("top_sentences", [""])[0] or "")[:120],
d.get("sentence_count", 0), d.get("paper_count", 0),
"YES", d.get(key, ""), d.get("reasoning", "")]
for i, d in enumerate(data)
]
return rows if rows else []
return []
def get_download_files():
targets = ["comparison.csv", "taxonomy_map.json", "narrative.txt",
"abstract_labels.json", "abstract_themes.json",
"title_labels.json", "title_themes.json"]
paths = [os.path.join(OUTPUT_DIR, f) for f in targets if _exists(f)]
return paths if paths else None
# ─── EVENT HANDLERS ───────────────────────────────────────────────────────────
def on_csv_upload(file_obj, history):
if file_obj is None:
return history, get_phase_html(), load_review_table(), get_download_files()
filepath = file_obj if isinstance(file_obj, str) else file_obj.name
message = f"Analyze my Scopus CSV at: {filepath}"
try:
response = invoke_agent(message, _THREAD_ID)
except Exception as e:
response = f"❌ Error: {e}"
history = history + [{"role": "user", "content": message},
{"role": "assistant", "content": response}]
return history, get_phase_html(), load_review_table(), get_download_files()
def on_send(message, history):
if not message.strip():
return history, ""
try:
response = invoke_agent(message, _THREAD_ID)
except Exception as e:
response = f"❌ Error: {e}"
history = history + [{"role": "user", "content": message},
{"role": "assistant", "content": response}]
return history, ""
def on_refresh(history):
return history, get_phase_html(), load_review_table(), get_download_files()
def on_submit_review(table_data, history):
# Handle both DataFrame (Gradio 5) and list formats
if table_data is None:
return history, get_phase_html(), load_review_table(), get_download_files()
if isinstance(table_data, pd.DataFrame):
if table_data.empty:
return history, get_phase_html(), load_review_table(), get_download_files()
rows_list = table_data.values.tolist()
else:
if not table_data:
return history, get_phase_html(), load_review_table(), get_download_files()
rows_list = table_data
headers = ["#", "Topic Label", "Top Evidence",
"Sentences", "Papers", "Approve", "Rename To", "Reasoning"]
rows_out = []
for row in rows_list:
if not row:
continue
if isinstance(row, dict):
d = row
else:
d = dict(zip(headers, row))
rows_out.append({
"cluster_id": int(d.get("#", 0) or 0),
"label": str(d.get("Topic Label", "")),
"approve": str(d.get("Approve", "YES")).upper(),
"rename_to": str(d.get("Rename To", "")),
"reasoning": str(d.get("Reasoning", "")),
})
message = f"I have reviewed the table. Here are my decisions (JSON):\n{json.dumps(rows_out)}"
try:
response = invoke_agent(message, _THREAD_ID)
except Exception as e:
response = f"❌ Error: {e}"
history = history + [{"role": "user", "content": "[Submit Review]"},
{"role": "assistant", "content": response}]
return history, get_phase_html(), load_review_table(), get_download_files()
# ─── GRADIO 5.x UI ────────────────────────────────────────────────────────────
with gr.Blocks(title="BERTopic Agentic AI") as demo:
gr.HTML("""
<div style="text-align:center;padding:16px;background:linear-gradient(135deg,#1e1b4b,#312e81);border-radius:12px;margin-bottom:12px;">
<h1 style="color:white;margin:0;font-size:1.8em;">πŸ€– BERTopic Agentic AI</h1>
<p style="color:#a5b4fc;margin:4px 0 0;">RQ5–RQ7: Abstract vs Title Theme Comparison &amp; PAJAIS Taxonomy Mapping</p>
<p style="color:#818cf8;font-size:0.85em;margin:4px 0 0;">LangGraph Β· Mistral Small Β· all-MiniLM-L6-v2 Β· Braun &amp; Clarke (2006) Β· PAJAIS 2019</p>
</div>
""")
phase_bar = gr.HTML(value=get_phase_html())
with gr.Group():
gr.Markdown("### πŸ“ Step 1: Upload Your Scopus CSV")
csv_file = gr.File(label="Upload Scopus CSV (.csv)", file_types=[".csv"])
with gr.Group():
gr.Markdown("### πŸ’¬ Step 2: Agent Conversation")
chatbot = gr.Chatbot(
height=380,
show_label=False,
type="messages",
placeholder="Upload your CSV first, then type 'run abstract' or 'run title'...",
)
with gr.Row():
msg_box = gr.Textbox(
placeholder="Type 'run abstract', 'run title', or a question...",
label="Your message",
scale=5,
show_label=False,
)
send_btn = gr.Button("Send ➀", variant="primary", scale=1)
with gr.Row():
submit_btn = gr.Button("πŸ“‹ Submit Review", variant="secondary")
refresh_btn = gr.Button("πŸ”„ Refresh", variant="secondary")
with gr.Group():
gr.Markdown("### πŸ“Š Step 3: Topic Review Table")
gr.Markdown("_Edit **Approve** (YES/NO) and **Rename To** inline, then click Submit Review._")
review_table = gr.Dataframe(
headers=["#", "Topic Label", "Top Evidence",
"Sentences", "Papers", "Approve", "Rename To", "Reasoning"],
value=load_review_table(),
interactive=True,
)
with gr.Group():
gr.Markdown("### πŸ“₯ Step 4: Download Deliverables")
gr.Markdown("_Click Refresh after each phase to see new files._")
download_box = gr.File(
value=get_download_files(),
label="Deliverable Files",
interactive=False,
)
gr.Markdown("""
---
**Stack:** Mistral Small Β· all-MiniLM-L6-v2 Β· AgglomerativeClustering (cosine, 0.7) Β· LangGraph ReAct Β· MemorySaver Β· PAJAIS 2019
> βš™οΈ Set `MISTRAL_API_KEY` in Space **Settings β†’ Variables and secrets**
""")
# ── Event Wiring ──────────────────────────────────────────────────────────
csv_file.upload(
fn=on_csv_upload,
inputs=[csv_file, chatbot],
outputs=[chatbot, phase_bar, review_table, download_box],
)
send_btn.click(
fn=on_send,
inputs=[msg_box, chatbot],
outputs=[chatbot, msg_box],
)
msg_box.submit(
fn=on_send,
inputs=[msg_box, chatbot],
outputs=[chatbot, msg_box],
)
submit_btn.click(
fn=on_submit_review,
inputs=[review_table, chatbot],
outputs=[chatbot, phase_bar, review_table, download_box],
)
refresh_btn.click(
fn=on_refresh,
inputs=[chatbot],
outputs=[chatbot, phase_bar, review_table, download_box],
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)