""" app.py — Gradio UI for BERTopic Agentic AI Assignment: Text Analysis & Topic Modelling (Prof. Shailaja Jha) Generated via: Anthropic Claude Sonnet 4.5 Architecture: LangGraph ReAct Agent + Gradio 5.x UI """ import os import json import pandas as pd import gradio as gr from agent import invoke_agent OUTPUT_DIR = "./outputs" os.makedirs(OUTPUT_DIR, exist_ok=True) # Use a simple global for thread ID — avoids gr.State schema issues _THREAD_ID = "main-session" # ─── HELPERS ────────────────────────────────────────────────────────────────── def _exists(name: str) -> bool: return os.path.exists(os.path.join(OUTPUT_DIR, name)) def _load(name: str): with open(os.path.join(OUTPUT_DIR, name), "r", encoding="utf-8") as f: return json.load(f) def get_phase_html() -> str: phases = [ ("① Load", _exists("corpus_config.json")), ("② Codes", _exists("abstract_labels.json")), ("③ Themes", _exists("abstract_themes.json")), ("④ Saturation", _exists("abstract_themes.json")), ("⑤ Names", _exists("abstract_themes.json")), ("⑤½ PAJAIS", _exists("taxonomy_map.json")), ("⑥ Report", _exists("comparison.csv") and _exists("narrative.txt")), ] items = "".join( f'' f'{"✅" if done else "⬜"} {name}' for name, done in phases ) return f'
{items}
' def load_review_table(): """Return table rows as list-of-lists.""" if _exists("taxonomy_map.json"): tax = _load("taxonomy_map.json") mapping = tax.get("taxonomy_mapping", {}) rows = [ [i, theme, f"→ {v.get('pajais_match','?')} | {v.get('reasoning','')[:80]}", 0, 0, "YES", theme, v.get("reasoning", "")] for i, (theme, v) in enumerate(mapping.items()) ] return rows if rows else [] for fname, key in [("abstract_themes.json", "theme_name"), ("abstract_labels.json", "label")]: if _exists(fname): data = _load(fname) rows = [ [i, d.get(key, str(i)), (d.get("top_sentences", [""])[0] or "")[:120], d.get("sentence_count", 0), d.get("paper_count", 0), "YES", d.get(key, ""), d.get("reasoning", "")] for i, d in enumerate(data) ] return rows if rows else [] return [] def get_download_files(): targets = ["comparison.csv", "taxonomy_map.json", "narrative.txt", "abstract_labels.json", "abstract_themes.json", "title_labels.json", "title_themes.json"] paths = [os.path.join(OUTPUT_DIR, f) for f in targets if _exists(f)] return paths if paths else None # ─── EVENT HANDLERS ─────────────────────────────────────────────────────────── def on_csv_upload(file_obj, history): if file_obj is None: return history, get_phase_html(), load_review_table(), get_download_files() filepath = file_obj if isinstance(file_obj, str) else file_obj.name message = f"Analyze my Scopus CSV at: {filepath}" try: response = invoke_agent(message, _THREAD_ID) except Exception as e: response = f"❌ Error: {e}" history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": response}] return history, get_phase_html(), load_review_table(), get_download_files() def on_send(message, history): if not message.strip(): return history, "" try: response = invoke_agent(message, _THREAD_ID) except Exception as e: response = f"❌ Error: {e}" history = history + [{"role": "user", "content": message}, {"role": "assistant", "content": response}] return history, "" def on_refresh(history): return history, get_phase_html(), load_review_table(), get_download_files() def on_submit_review(table_data, history): # Handle both DataFrame (Gradio 5) and list formats if table_data is None: return history, get_phase_html(), load_review_table(), get_download_files() if isinstance(table_data, pd.DataFrame): if table_data.empty: return history, get_phase_html(), load_review_table(), get_download_files() rows_list = table_data.values.tolist() else: if not table_data: return history, get_phase_html(), load_review_table(), get_download_files() rows_list = table_data headers = ["#", "Topic Label", "Top Evidence", "Sentences", "Papers", "Approve", "Rename To", "Reasoning"] rows_out = [] for row in rows_list: if not row: continue if isinstance(row, dict): d = row else: d = dict(zip(headers, row)) rows_out.append({ "cluster_id": int(d.get("#", 0) or 0), "label": str(d.get("Topic Label", "")), "approve": str(d.get("Approve", "YES")).upper(), "rename_to": str(d.get("Rename To", "")), "reasoning": str(d.get("Reasoning", "")), }) message = f"I have reviewed the table. Here are my decisions (JSON):\n{json.dumps(rows_out)}" try: response = invoke_agent(message, _THREAD_ID) except Exception as e: response = f"❌ Error: {e}" history = history + [{"role": "user", "content": "[Submit Review]"}, {"role": "assistant", "content": response}] return history, get_phase_html(), load_review_table(), get_download_files() # ─── GRADIO 5.x UI ──────────────────────────────────────────────────────────── with gr.Blocks(title="BERTopic Agentic AI") as demo: gr.HTML("""

🤖 BERTopic Agentic AI

RQ5–RQ7: Abstract vs Title Theme Comparison & PAJAIS Taxonomy Mapping

LangGraph · Mistral Small · all-MiniLM-L6-v2 · Braun & Clarke (2006) · PAJAIS 2019

""") phase_bar = gr.HTML(value=get_phase_html()) with gr.Group(): gr.Markdown("### 📁 Step 1: Upload Your Scopus CSV") csv_file = gr.File(label="Upload Scopus CSV (.csv)", file_types=[".csv"]) with gr.Group(): gr.Markdown("### 💬 Step 2: Agent Conversation") chatbot = gr.Chatbot( height=380, show_label=False, type="messages", placeholder="Upload your CSV first, then type 'run abstract' or 'run title'...", ) with gr.Row(): msg_box = gr.Textbox( placeholder="Type 'run abstract', 'run title', or a question...", label="Your message", scale=5, show_label=False, ) send_btn = gr.Button("Send ➤", variant="primary", scale=1) with gr.Row(): submit_btn = gr.Button("📋 Submit Review", variant="secondary") refresh_btn = gr.Button("🔄 Refresh", variant="secondary") with gr.Group(): gr.Markdown("### 📊 Step 3: Topic Review Table") gr.Markdown("_Edit **Approve** (YES/NO) and **Rename To** inline, then click Submit Review._") review_table = gr.Dataframe( headers=["#", "Topic Label", "Top Evidence", "Sentences", "Papers", "Approve", "Rename To", "Reasoning"], value=load_review_table(), interactive=True, ) with gr.Group(): gr.Markdown("### 📥 Step 4: Download Deliverables") gr.Markdown("_Click Refresh after each phase to see new files._") download_box = gr.File( value=get_download_files(), label="Deliverable Files", interactive=False, ) gr.Markdown(""" --- **Stack:** Mistral Small · all-MiniLM-L6-v2 · AgglomerativeClustering (cosine, 0.7) · LangGraph ReAct · MemorySaver · PAJAIS 2019 > ⚙️ Set `MISTRAL_API_KEY` in Space **Settings → Variables and secrets** """) # ── Event Wiring ────────────────────────────────────────────────────────── csv_file.upload( fn=on_csv_upload, inputs=[csv_file, chatbot], outputs=[chatbot, phase_bar, review_table, download_box], ) send_btn.click( fn=on_send, inputs=[msg_box, chatbot], outputs=[chatbot, msg_box], ) msg_box.submit( fn=on_send, inputs=[msg_box, chatbot], outputs=[chatbot, msg_box], ) submit_btn.click( fn=on_submit_review, inputs=[review_table, chatbot], outputs=[chatbot, phase_bar, review_table, download_box], ) refresh_btn.click( fn=on_refresh, inputs=[chatbot], outputs=[chatbot, phase_bar, review_table, download_box], ) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)