Spaces:
Build error
Build error
| """ | |
| app.py β Gradio UI for BERTopic Agentic AI | |
| Assignment: Text Analysis & Topic Modelling (Prof. Shailaja Jha) | |
| Generated via: Anthropic Claude Sonnet 4.5 | |
| Architecture: LangGraph ReAct Agent + Gradio 5.x UI | |
| """ | |
| import os | |
| import json | |
| import pandas as pd | |
| import gradio as gr | |
| from agent import invoke_agent | |
| OUTPUT_DIR = "./outputs" | |
| os.makedirs(OUTPUT_DIR, exist_ok=True) | |
| # Use a simple global for thread ID β avoids gr.State schema issues | |
| _THREAD_ID = "main-session" | |
| # βββ HELPERS ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _exists(name: str) -> bool: | |
| return os.path.exists(os.path.join(OUTPUT_DIR, name)) | |
| def _load(name: str): | |
| with open(os.path.join(OUTPUT_DIR, name), "r", encoding="utf-8") as f: | |
| return json.load(f) | |
| def get_phase_html() -> str: | |
| phases = [ | |
| ("β Load", _exists("corpus_config.json")), | |
| ("β‘ Codes", _exists("abstract_labels.json")), | |
| ("β’ Themes", _exists("abstract_themes.json")), | |
| ("β£ Saturation", _exists("abstract_themes.json")), | |
| ("β€ Names", _exists("abstract_themes.json")), | |
| ("β€Β½ PAJAIS", _exists("taxonomy_map.json")), | |
| ("β₯ Report", _exists("comparison.csv") and _exists("narrative.txt")), | |
| ] | |
| items = "".join( | |
| f'<span style="padding:6px 14px;border-radius:20px;margin:3px;font-size:13px;' | |
| f'background:{"#22c55e" if done else "#374151"};color:white;font-weight:600;">' | |
| f'{"β " if done else "β¬"} {name}</span>' | |
| for name, done in phases | |
| ) | |
| return f'<div style="display:flex;flex-wrap:wrap;gap:4px;padding:8px;">{items}</div>' | |
| def load_review_table(): | |
| """Return table rows as list-of-lists.""" | |
| if _exists("taxonomy_map.json"): | |
| tax = _load("taxonomy_map.json") | |
| mapping = tax.get("taxonomy_mapping", {}) | |
| rows = [ | |
| [i, theme, | |
| f"β {v.get('pajais_match','?')} | {v.get('reasoning','')[:80]}", | |
| 0, 0, "YES", theme, v.get("reasoning", "")] | |
| for i, (theme, v) in enumerate(mapping.items()) | |
| ] | |
| return rows if rows else [] | |
| for fname, key in [("abstract_themes.json", "theme_name"), | |
| ("abstract_labels.json", "label")]: | |
| if _exists(fname): | |
| data = _load(fname) | |
| rows = [ | |
| [i, d.get(key, str(i)), | |
| (d.get("top_sentences", [""])[0] or "")[:120], | |
| d.get("sentence_count", 0), d.get("paper_count", 0), | |
| "YES", d.get(key, ""), d.get("reasoning", "")] | |
| for i, d in enumerate(data) | |
| ] | |
| return rows if rows else [] | |
| return [] | |
| def get_download_files(): | |
| targets = ["comparison.csv", "taxonomy_map.json", "narrative.txt", | |
| "abstract_labels.json", "abstract_themes.json", | |
| "title_labels.json", "title_themes.json"] | |
| paths = [os.path.join(OUTPUT_DIR, f) for f in targets if _exists(f)] | |
| return paths if paths else None | |
| # βββ EVENT HANDLERS βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def on_csv_upload(file_obj, history): | |
| if file_obj is None: | |
| return history, get_phase_html(), load_review_table(), get_download_files() | |
| filepath = file_obj if isinstance(file_obj, str) else file_obj.name | |
| message = f"Analyze my Scopus CSV at: {filepath}" | |
| try: | |
| response = invoke_agent(message, _THREAD_ID) | |
| except Exception as e: | |
| response = f"β Error: {e}" | |
| history = history + [{"role": "user", "content": message}, | |
| {"role": "assistant", "content": response}] | |
| return history, get_phase_html(), load_review_table(), get_download_files() | |
| def on_send(message, history): | |
| if not message.strip(): | |
| return history, "" | |
| try: | |
| response = invoke_agent(message, _THREAD_ID) | |
| except Exception as e: | |
| response = f"β Error: {e}" | |
| history = history + [{"role": "user", "content": message}, | |
| {"role": "assistant", "content": response}] | |
| return history, "" | |
| def on_refresh(history): | |
| return history, get_phase_html(), load_review_table(), get_download_files() | |
| def on_submit_review(table_data, history): | |
| # Handle both DataFrame (Gradio 5) and list formats | |
| if table_data is None: | |
| return history, get_phase_html(), load_review_table(), get_download_files() | |
| if isinstance(table_data, pd.DataFrame): | |
| if table_data.empty: | |
| return history, get_phase_html(), load_review_table(), get_download_files() | |
| rows_list = table_data.values.tolist() | |
| else: | |
| if not table_data: | |
| return history, get_phase_html(), load_review_table(), get_download_files() | |
| rows_list = table_data | |
| headers = ["#", "Topic Label", "Top Evidence", | |
| "Sentences", "Papers", "Approve", "Rename To", "Reasoning"] | |
| rows_out = [] | |
| for row in rows_list: | |
| if not row: | |
| continue | |
| if isinstance(row, dict): | |
| d = row | |
| else: | |
| d = dict(zip(headers, row)) | |
| rows_out.append({ | |
| "cluster_id": int(d.get("#", 0) or 0), | |
| "label": str(d.get("Topic Label", "")), | |
| "approve": str(d.get("Approve", "YES")).upper(), | |
| "rename_to": str(d.get("Rename To", "")), | |
| "reasoning": str(d.get("Reasoning", "")), | |
| }) | |
| message = f"I have reviewed the table. Here are my decisions (JSON):\n{json.dumps(rows_out)}" | |
| try: | |
| response = invoke_agent(message, _THREAD_ID) | |
| except Exception as e: | |
| response = f"β Error: {e}" | |
| history = history + [{"role": "user", "content": "[Submit Review]"}, | |
| {"role": "assistant", "content": response}] | |
| return history, get_phase_html(), load_review_table(), get_download_files() | |
| # βββ GRADIO 5.x UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks(title="BERTopic Agentic AI") as demo: | |
| gr.HTML(""" | |
| <div style="text-align:center;padding:16px;background:linear-gradient(135deg,#1e1b4b,#312e81);border-radius:12px;margin-bottom:12px;"> | |
| <h1 style="color:white;margin:0;font-size:1.8em;">π€ BERTopic Agentic AI</h1> | |
| <p style="color:#a5b4fc;margin:4px 0 0;">RQ5βRQ7: Abstract vs Title Theme Comparison & PAJAIS Taxonomy Mapping</p> | |
| <p style="color:#818cf8;font-size:0.85em;margin:4px 0 0;">LangGraph Β· Mistral Small Β· all-MiniLM-L6-v2 Β· Braun & Clarke (2006) Β· PAJAIS 2019</p> | |
| </div> | |
| """) | |
| phase_bar = gr.HTML(value=get_phase_html()) | |
| with gr.Group(): | |
| gr.Markdown("### π Step 1: Upload Your Scopus CSV") | |
| csv_file = gr.File(label="Upload Scopus CSV (.csv)", file_types=[".csv"]) | |
| with gr.Group(): | |
| gr.Markdown("### π¬ Step 2: Agent Conversation") | |
| chatbot = gr.Chatbot( | |
| height=380, | |
| show_label=False, | |
| type="messages", | |
| placeholder="Upload your CSV first, then type 'run abstract' or 'run title'...", | |
| ) | |
| with gr.Row(): | |
| msg_box = gr.Textbox( | |
| placeholder="Type 'run abstract', 'run title', or a question...", | |
| label="Your message", | |
| scale=5, | |
| show_label=False, | |
| ) | |
| send_btn = gr.Button("Send β€", variant="primary", scale=1) | |
| with gr.Row(): | |
| submit_btn = gr.Button("π Submit Review", variant="secondary") | |
| refresh_btn = gr.Button("π Refresh", variant="secondary") | |
| with gr.Group(): | |
| gr.Markdown("### π Step 3: Topic Review Table") | |
| gr.Markdown("_Edit **Approve** (YES/NO) and **Rename To** inline, then click Submit Review._") | |
| review_table = gr.Dataframe( | |
| headers=["#", "Topic Label", "Top Evidence", | |
| "Sentences", "Papers", "Approve", "Rename To", "Reasoning"], | |
| value=load_review_table(), | |
| interactive=True, | |
| ) | |
| with gr.Group(): | |
| gr.Markdown("### π₯ Step 4: Download Deliverables") | |
| gr.Markdown("_Click Refresh after each phase to see new files._") | |
| download_box = gr.File( | |
| value=get_download_files(), | |
| label="Deliverable Files", | |
| interactive=False, | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| **Stack:** Mistral Small Β· all-MiniLM-L6-v2 Β· AgglomerativeClustering (cosine, 0.7) Β· LangGraph ReAct Β· MemorySaver Β· PAJAIS 2019 | |
| > βοΈ Set `MISTRAL_API_KEY` in Space **Settings β Variables and secrets** | |
| """) | |
| # ββ Event Wiring ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| csv_file.upload( | |
| fn=on_csv_upload, | |
| inputs=[csv_file, chatbot], | |
| outputs=[chatbot, phase_bar, review_table, download_box], | |
| ) | |
| send_btn.click( | |
| fn=on_send, | |
| inputs=[msg_box, chatbot], | |
| outputs=[chatbot, msg_box], | |
| ) | |
| msg_box.submit( | |
| fn=on_send, | |
| inputs=[msg_box, chatbot], | |
| outputs=[chatbot, msg_box], | |
| ) | |
| submit_btn.click( | |
| fn=on_submit_review, | |
| inputs=[review_table, chatbot], | |
| outputs=[chatbot, phase_bar, review_table, download_box], | |
| ) | |
| refresh_btn.click( | |
| fn=on_refresh, | |
| inputs=[chatbot], | |
| outputs=[chatbot, phase_bar, review_table, download_box], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |