BERTopic_AG_final

Running

App Files Files Community

anujjuna commited on Apr 26

Commit

8c6e466

verified ·

1 Parent(s): 5a7ae5f

Update app.py

Browse files

Files changed (1) hide show

app.py +261 -607

app.py CHANGED Viewed

@@ -1,607 +1,261 @@
-"""
-app.py
-------
-Streamlit UI for the BERTopic + Dual LLM (Groq + Mistral) research paper analysis pipeline.
-Redesigned with a clean, dark editorial aesthetic.
-"""
-import os
-import json
-import tempfile
-import pandas as pd
-import streamlit as st
-from tools import run_topic_modeling
-from agent import run_agent
-# ---------------------------------------------------------------------------
-# Page Config & Global Styles
-# ---------------------------------------------------------------------------
-st.set_page_config(
-    page_title="Arxiv Lens · Topic Analyzer",
-    page_icon="🔬",
-    layout="wide",
-    initial_sidebar_state="expanded",
-)
-st.markdown("""
-<style>
-@import url('https://fonts.googleapis.com/css2?family=DM+Serif+Display:ital@0;1&family=DM+Mono:wght@400;500&family=DM+Sans:wght@300;400;500&display=swap');
-/* ── Global Reset ─────────────────────────────────────────── */
-html, body, [class*="css"] {
-    font-family: 'DM Sans', sans-serif;
-}
-.stApp {
-    background-color: #0d0f14;
-    color: #e8e4dc;
-}
-/* ── Sidebar ──────────────────────────────────────────────── */
-[data-testid="stSidebar"] {
-    background-color: #111318 !important;
-    border-right: 1px solid #1e2028;
-}
-[data-testid="stSidebar"] * {
-    color: #c8c4bc !important;
-}
-.sidebar-logo {
-    font-family: 'DM Serif Display', serif;
-    font-size: 1.5rem;
-    color: #f0ebe0 !important;
-    letter-spacing: -0.02em;
-    margin-bottom: 0.2rem;
-}
-.sidebar-tagline {
-    font-size: 0.72rem;
-    color: #5a5f6e !important;
-    text-transform: uppercase;
-    letter-spacing: 0.12em;
-    margin-bottom: 1.5rem;
-}
-/* ── Header ───────────────────────────────────────────────── */
-.hero {
-    padding: 2.5rem 0 1.5rem 0;
-    border-bottom: 1px solid #1e2028;
-    margin-bottom: 2rem;
-}
-.hero-title {
-    font-family: 'DM Serif Display', serif;
-    font-size: 2.8rem;
-    color: #f0ebe0;
-    letter-spacing: -0.03em;
-    line-height: 1.1;
-    margin: 0;
-}
-.hero-title em {
-    font-style: italic;
-    color: #c8a97e;
-}
-.hero-sub {
-    font-size: 0.88rem;
-    color: #5a5f6e;
-    margin-top: 0.5rem;
-    text-transform: uppercase;
-    letter-spacing: 0.1em;
-}
-/* ── Key Pill ─────────────────────────────────────────────── */
-.key-required {
-    display: inline-block;
-    background: #1a1d25;
-    border: 1px solid #2e3240;
-    border-radius: 4px;
-    padding: 0.15rem 0.5rem;
-    font-family: 'DM Mono', monospace;
-    font-size: 0.72rem;
-    color: #c8a97e;
-    margin-bottom: 0.4rem;
-}
-.key-optional {
-    display: inline-block;
-    background: #1a1d25;
-    border: 1px solid #2e3240;
-    border-radius: 4px;
-    padding: 0.15rem 0.5rem;
-    font-family: 'DM Mono', monospace;
-    font-size: 0.72rem;
-    color: #5a8a6e;
-    margin-bottom: 0.4rem;
-}
-/* ── Section Headers ──────────────────────────────────────── */
-.section-label {
-    font-family: 'DM Mono', monospace;
-    font-size: 0.68rem;
-    color: #5a5f6e;
-    text-transform: uppercase;
-    letter-spacing: 0.14em;
-    margin-bottom: 0.75rem;
-    padding-bottom: 0.4rem;
-    border-bottom: 1px solid #1e2028;
-}
-/* ── Stat Cards ───────────────────────────────────────────── */
-.stat-card {
-    background: #111318;
-    border: 1px solid #1e2028;
-    border-radius: 8px;
-    padding: 1.2rem 1.4rem;
-    margin-bottom: 0.75rem;
-}
-.stat-number {
-    font-family: 'DM Serif Display', serif;
-    font-size: 2.4rem;
-    color: #c8a97e;
-    line-height: 1;
-}
-.stat-label {
-    font-size: 0.75rem;
-    color: #5a5f6e;
-    text-transform: uppercase;
-    letter-spacing: 0.1em;
-    margin-top: 0.3rem;
-}
-/* ── Pipeline Step Badges ─────────────────────────────────── */
-.step-row {
-    display: flex;
-    align-items: center;
-    gap: 1rem;
-    margin-bottom: 0.5rem;
-}
-.step-num {
-    font-family: 'DM Mono', monospace;
-    font-size: 0.7rem;
-    color: #0d0f14;
-    background: #c8a97e;
-    border-radius: 50%;
-    width: 1.4rem;
-    height: 1.4rem;
-    display: flex;
-    align-items: center;
-    justify-content: center;
-    flex-shrink: 0;
-    font-weight: 500;
-}
-.step-text {
-    font-size: 0.82rem;
-    color: #8a8f9e;
-}
-/* ── Buttons ──────────────────────────────────────────────── */
-.stButton > button {
-    background: #c8a97e !important;
-    color: #0d0f14 !important;
-    border: none !important;
-    border-radius: 6px !important;
-    font-family: 'DM Mono', monospace !important;
-    font-size: 0.8rem !important;
-    font-weight: 500 !important;
-    letter-spacing: 0.08em !important;
-    text-transform: uppercase !important;
-    padding: 0.6rem 1.4rem !important;
-    transition: all 0.15s ease !important;
-}
-.stButton > button:hover {
-    background: #debb94 !important;
-    transform: translateY(-1px) !important;
-}
-/* ── Inputs ───────────────────────────────────────────────── */
-.stTextInput > div > div > input,
-.stSelectbox > div > div,
-.stSlider {
-    background-color: #111318 !important;
-    border-color: #2e3240 !important;
-    color: #e8e4dc !important;
-}
-/* ── Dataframe ────────────────────────────────────────────── */
-[data-testid="stDataFrame"] {
-    border: 1px solid #1e2028 !important;
-    border-radius: 8px !important;
-    overflow: hidden;
-}
-/* ── Upload zone ──────────────────────────────────────────── */
-[data-testid="stFileUploader"] {
-    background: #111318;
-    border: 1px dashed #2e3240 !important;
-    border-radius: 8px;
-}
-/* ── Expanders ────────────────────────────────────────────── */
-.streamlit-expanderHeader {
-    background-color: #111318 !important;
-    border: 1px solid #1e2028 !important;
-    border-radius: 6px !important;
-    color: #c8c4bc !important;
-    font-size: 0.82rem !important;
-}
-/* ── Tabs ─────────────────────────────────────────────────── */
-.stTabs [data-baseweb="tab-list"] {
-    gap: 0;
-    border-bottom: 1px solid #1e2028;
-    background: transparent;
-}
-.stTabs [data-baseweb="tab"] {
-    font-family: 'DM Mono', monospace;
-    font-size: 0.75rem;
-    text-transform: uppercase;
-    letter-spacing: 0.1em;
-    color: #5a5f6e !important;
-    background: transparent !important;
-    border: none !important;
-    padding: 0.6rem 1.2rem;
-}
-.stTabs [aria-selected="true"] {
-    color: #c8a97e !important;
-    border-bottom: 2px solid #c8a97e !important;
-}
-/* ── Success / Error ──────────────────────────────────────── */
-.stSuccess {
-    background: #0d1f16 !important;
-    border-left: 3px solid #4caf7d !important;
-    border-radius: 4px !important;
-}
-.stError {
-    background: #1f0d0d !important;
-    border-left: 3px solid #cf4f4f !important;
-    border-radius: 4px !important;
-}
-/* ── Download buttons ─────────────────────────────────────── */
-.stDownloadButton > button {
-    background: transparent !important;
-    color: #c8a97e !important;
-    border: 1px solid #c8a97e !important;
-    border-radius: 6px !important;
-    font-family: 'DM Mono', monospace !important;
-    font-size: 0.75rem !important;
-    letter-spacing: 0.08em !important;
-}
-.stDownloadButton > button:hover {
-    background: #c8a97e22 !important;
-}
-/* ── Divider ──────────────────────────────────────────────── */
-hr {
-    border-color: #1e2028 !important;
-    margin: 1.5rem 0 !important;
-}
-</style>
-""", unsafe_allow_html=True)
-# ---------------------------------------------------------------------------
-# Sidebar
-# ---------------------------------------------------------------------------
-with st.sidebar:
-    st.markdown('<div class="sidebar-logo">Arxiv Lens</div>', unsafe_allow_html=True)
-    st.markdown('<div class="sidebar-tagline">Research Topic Analyzer</div>', unsafe_allow_html=True)
-    st.markdown('<div class="section-label">API Keys</div>', unsafe_allow_html=True)
-    st.markdown('<span class="key-required">REQUIRED · GROQ</span>', unsafe_allow_html=True)
-    groq_key_input = st.text_input(
-        "Groq API Key",
-        value="",
-        type="password",
-        placeholder="gsk_...",
-        label_visibility="collapsed",
-    )
-    st.markdown('<span class="key-optional">OPTIONAL · MISTRAL</span>', unsafe_allow_html=True)
-    mistral_key_input = st.text_input(
-        "Mistral API Key",
-        value="",
-        type="password",
-        placeholder="For dual-LLM validation",
-        label_visibility="collapsed",
-    )
-    st.caption("Keys are never stored. Falls back to env vars if blank.")
-    st.markdown("---")
-    st.markdown('<div class="section-label">Model Settings</div>', unsafe_allow_html=True)
-    min_topic_size = st.slider("Min Topic Size", min_value=3, max_value=30, value=5)
-    st.markdown("---")
-    st.markdown('<div class="section-label">Pipeline</div>', unsafe_allow_html=True)
-    for i, step in enumerate([
-        "BERTopic clusters abstracts + titles",
-        "Groq LLM labels each cluster",
-        "Mistral validates Groq's labels",
-        "Cross-source diff report generated",
-    ], 1):
-        st.markdown(f"""
-        <div class="step-row">
-            <div class="step-num">{i}</div>
-            <div class="step-text">{step}</div>
-        </div>
-        """, unsafe_allow_html=True)
-    st.markdown("---")
-    if st.button("↺ Reset Results", use_container_width=True):
-        if "agent_results" in st.session_state:
-            del st.session_state["agent_results"]
-        st.rerun()
-groq_api_key = groq_key_input.strip() or os.getenv("GROQ_API_KEY")
-mistral_api_key = mistral_key_input.strip() or os.getenv("MISTRAL_API_KEY")
-# ---------------------------------------------------------------------------
-# Hero
-# ---------------------------------------------------------------------------
-st.markdown("""
-<div class="hero">
-    <h1 class="hero-title">Research<br><em>Topic Intelligence</em></h1>
-    <p class="hero-sub">BERTopic · Groq llama-3.1 · Mistral Validation</p>
-</div>
-""", unsafe_allow_html=True)
-# ---------------------------------------------------------------------------
-# Dataset Input
-# ---------------------------------------------------------------------------
-st.markdown('<div class="section-label">Dataset</div>', unsafe_allow_html=True)
-col_a, col_b = st.columns([3, 1])
-with col_a:
-    uploaded_file = st.file_uploader(
-        "Upload a CSV with **title** and **abstract** columns",
-        type=["csv"],
-        help="Must have at minimum 'title' and 'abstract' columns. More rows = richer topics.",
-    )
-with col_b:
-    st.markdown("<br>", unsafe_allow_html=True)
-    use_sample = st.checkbox("Use built-in sample dataset", value=False)
-st.markdown("---")
-# ---------------------------------------------------------------------------
-# Run Pipeline
-# ---------------------------------------------------------------------------
-run_btn = st.button("▶  Run Analysis Pipeline", use_container_width=False)
-if run_btn:
-    if not groq_api_key:
-        st.error("**Groq API key required.** Enter it in the sidebar or set `GROQ_API_KEY` in your environment.")
-        st.stop()
-    if not use_sample and uploaded_file is None:
-        st.error("**No dataset.** Upload a CSV or enable the sample dataset.")
-        st.stop()
-    # Resolve CSV path
-    if use_sample:
-        sample_data = {
-            "title": [
-                "Deep Learning for Image Classification",
-                "Neural Networks in Healthcare",
-                "Transformer Models for NLP",
-                "BERT in Question Answering",
-                "Blockchain and Distributed Ledger Technology",
-                "Smart Contracts in Finance",
-                "Federated Learning for Privacy",
-                "Differential Privacy in ML",
-                "Graph Neural Networks",
-                "Knowledge Graph Embeddings",
-            ],
-            "abstract": [
-                "We propose a deep learning model achieving state-of-the-art accuracy on image benchmarks.",
-                "A convolutional network trained for medical image classification tasks.",
-                "We introduce a transformer-based approach for text understanding.",
-                "Fine-tuning BERT achieves strong results on reading comprehension datasets.",
-                "This paper surveys blockchain consensus mechanisms and distributed ledger architectures.",
-                "We implement smart contracts for automated financial transactions on a public blockchain.",
-                "Federated learning enables collaborative model training without sharing raw data.",
-                "Differential privacy provides formal privacy guarantees for machine learning models.",
-                "Graph neural networks learn from relational data structures effectively.",
-                "Knowledge graph embeddings enable link prediction and entity classification.",
-            ],
-        }
-        df_sample = pd.DataFrame(sample_data)
-        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
-        df_sample.to_csv(tmp.name, index=False)
-        csv_path = tmp.name
-    else:
-        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
-        tmp.write(uploaded_file.read())
-        tmp.flush()
-        csv_path = tmp.name
-    # Step 1 — BERTopic
-    with st.spinner("🔬 Running BERTopic clustering…"):
-        try:
-            topic_results = run_topic_modeling(csv_path, min_topic_size=min_topic_size)
-        except Exception as exc:
-            st.error(f"**Topic modeling failed:** {exc}")
-            st.stop()
-    abstract_res = topic_results["abstracts"]
-    title_res = topic_results["titles"]
-    df = pd.read_csv(csv_path)
-    df.columns = df.columns.str.lower()
-    raw_titles = df["title"].fillna("").tolist()
-    raw_abstracts = df["abstract"].fillna("").tolist()
-    # Step 2 — Agent
-    with st.spinner("🤖 LLM interpretation + Mistral validation…"):
-        try:
-            st.session_state["agent_results"] = run_agent(
-                title_topic_keywords=title_res["topic_keywords"],
-                abstract_topic_keywords=abstract_res["topic_keywords"],
-                title_topic_assignments=title_res["topics"],
-                abstract_topic_assignments=abstract_res["topics"],
-                raw_titles=raw_titles,
-                raw_abstracts=raw_abstracts,
-                api_key=groq_api_key,
-                mistral_api_key=mistral_api_key,
-            )
-            st.success("Pipeline complete.")
-        except Exception as exc:
-            st.error(f"**Agent pipeline failed:** {exc}")
-            st.stop()
-# ---------------------------------------------------------------------------
-# Results
-# ---------------------------------------------------------------------------
-agent_results = st.session_state.get("agent_results")
-if agent_results:
-    title_interps = agent_results.get("title_interpretations", {})
-    abstract_interps = agent_results.get("abstract_interpretations", {})
-    comparison_rows = agent_results.get("comparison_rows", [])
-    taxonomy_map = agent_results.get("taxonomy_map", {})
-    # ── Stats Row ──────────────────────────────────────────────────────────
-    c1, c2, c3, c4 = st.columns(4)
-    with c1:
-        st.markdown(f"""
-        <div class="stat-card">
-            <div class="stat-number">{len(title_interps)}</div>
-            <div class="stat-label">Title Topics</div>
-        </div>
-        """, unsafe_allow_html=True)
-    with c2:
-        st.markdown(f"""
-        <div class="stat-card">
-            <div class="stat-number">{len(abstract_interps)}</div>
-            <div class="stat-label">Abstract Topics</div>
-        </div>
-        """, unsafe_allow_html=True)
-    with c3:
-        agreed = sum(
-            1 for i in list(title_interps.values()) + list(abstract_interps.values())
-            if i.validation_status == "AGREED"
-        )
-        st.markdown(f"""
-        <div class="stat-card">
-            <div class="stat-number">{agreed}</div>
-            <div class="stat-label">LLM Agreements</div>
-        </div>
-        """, unsafe_allow_html=True)
-    with c4:
-        novel = sum(
-            1 for i in list(title_interps.values()) + list(abstract_interps.values())
-            if i.classification == "NOVEL"
-        )
-        st.markdown(f"""
-        <div class="stat-card">
-            <div class="stat-number">{novel}</div>
-            <div class="stat-label">Novel Topics</div>
-        </div>
-        """, unsafe_allow_html=True)
-    st.markdown("---")
-    # ── Main Tabs ──────────────────────────────────────────────────────────
-    tab1, tab2, tab3, tab4 = st.tabs([
-        "Title Topics",
-        "Abstract Topics",
-        "Taxonomy Map",
-        "Comparison",
-    ])
-    def _interp_rows(interps):
-        return [
-            {
-                "ID": tid,
-                "Label": i.label,
-                "Category": i.taxonomy_category,
-                "Class": i.classification,
-                "Validation": i.validation_status,
-                "Confidence": i.confidence,
-                "Keywords": ", ".join(i.keywords[:8]),
-                "Reasoning": i.reasoning,
-            }
-            for tid, i in sorted(interps.items())
-        ]
-    with tab1:
-        st.markdown('<div class="section-label">Topics derived from paper titles</div>', unsafe_allow_html=True)
-        if title_interps:
-            st.dataframe(pd.DataFrame(_interp_rows(title_interps)), use_container_width=True, hide_index=True)
-        else:
-            st.info("No title topics found.")
-    with tab2:
-        st.markdown('<div class="section-label">Topics derived from paper abstracts</div>', unsafe_allow_html=True)
-        if abstract_interps:
-            st.dataframe(pd.DataFrame(_interp_rows(abstract_interps)), use_container_width=True, hide_index=True)
-        else:
-            st.info("No abstract topics found.")
-    with tab3:
-        st.markdown('<div class="section-label">Full taxonomy classification</div>', unsafe_allow_html=True)
-        inner_tabs = st.tabs(["Titles", "Abstracts"])
-        for itab, section in zip(inner_tabs, ["titles", "abstracts"]):
-            with itab:
-                entries = taxonomy_map.get(section, [])
-                if entries:
-                    st.dataframe(
-                        pd.DataFrame(entries)[[
-                            "topic_id", "label", "taxonomy_category",
-                            "classification", "validation_status", "confidence", "reasoning"
-                        ]],
-                        use_container_width=True,
-                        hide_index=True,
-                    )
-                else:
-                    st.info(f"No {section} entries.")
-    with tab4:
-        st.markdown('<div class="section-label">Side-by-side title vs abstract topic comparison</div>', unsafe_allow_html=True)
-        if comparison_rows:
-            from dataclasses import asdict
-            st.dataframe(pd.DataFrame([asdict(r) for r in comparison_rows]), use_container_width=True, hide_index=True)
-        else:
-            st.info("No overlapping topic IDs between title and abstract sources.")
-    st.markdown("---")
-    # ── Downloads ──────────────────────────────────────────────────────────
-    st.markdown('<div class="section-label">Export Results</div>', unsafe_allow_html=True)
-    dl1, dl2 = st.columns(2)
-    with dl1:
-        st.download_button(
-            "⬇ taxonomy_map.json",
-            json.dumps(agent_results["taxonomy_map"], indent=2),
-            file_name="taxonomy_map.json",
-            mime="application/json",
-            key="dl_json",
-            use_container_width=True,
-        )
-    with dl2:
-        from dataclasses import asdict
-        comp_df = pd.DataFrame([asdict(r) for r in agent_results["comparison_rows"]])
-        st.download_button(
-            "⬇ comparison.csv",
-            comp_df.to_csv(index=False),
-            file_name="comparison.csv",
-            mime="text/csv",
-            key="dl_csv",
-            use_container_width=True,
-        )

+"""
+streamlit_app.py
+----------------
+Streamlit UI for the BERTopic + Dual LLM (Groq + Mistral) research paper analysis pipeline.
+"""
+import os
+import json
+import tempfile
+import pandas as pd
+import streamlit as st
+from tools import run_topic_modeling
+from agent import run_agent
+# ---------------------------------------------------------------------------
+# Page Config
+# ---------------------------------------------------------------------------
+st.set_page_config(page_title="Research Topic Analyzer", layout="wide")
+st.title("Research Topic Analyzer")
+st.caption("BERTopic + Groq + Mistral dual-validation pipeline")
+# ---------------------------------------------------------------------------
+# API Key Handling (env-first, blank input as fallback)
+# ---------------------------------------------------------------------------
+with st.sidebar:
+    st.header("API Keys")
+    groq_key_input = st.text_input(
+        "Groq API Key",
+        value="",
+        type="password",
+        placeholder="Uses GROQ_API_KEY env var if blank",
+    )
+    mistral_key_input = st.text_input(
+        "Mistral API Key (optional)",
+        value="",
+        type="password",
+        placeholder="Uses MISTRAL_API_KEY env var if blank",
+    )
+    st.caption("Keys are never stored. Leave blank to use environment variables.")
+    st.divider()
+    min_topic_size = st.slider("Min Topic Size", min_value=3, max_value=30, value=5)
+    if st.button("Reset Results"):
+        if "agent_results" in st.session_state:
+            del st.session_state["agent_results"]
+        st.rerun()
+groq_api_key = groq_key_input.strip() or os.getenv("GROQ_API_KEY")
+mistral_api_key = mistral_key_input.strip() or os.getenv("MISTRAL_API_KEY")
+# ---------------------------------------------------------------------------
+# Dataset Loading
+# ---------------------------------------------------------------------------
+st.subheader("Dataset")
+use_sample = st.checkbox("Use sample dataset", value=False)
+uploaded_file = None
+if not use_sample:
+    uploaded_file = st.file_uploader(
+        "Upload CSV with 'title' and 'abstract' columns",
+        type=["csv"],
+    )
+# ---------------------------------------------------------------------------
+# Run Pipeline
+# ---------------------------------------------------------------------------
+run_btn = st.button("Run Pipeline", type="primary")
+if run_btn:
+    # --- Validate inputs ---
+    if not groq_api_key:
+        st.error("Groq API key is required. Provide it in the sidebar or set GROQ_API_KEY.")
+        st.stop()
+    if not use_sample and uploaded_file is None:
+        st.error("Please upload a CSV file or enable the sample dataset.")
+        st.stop()
+    # --- Resolve CSV path ---
+    if use_sample:
+        # Inline sample data
+        sample_data = {
+            "title": [
+                "Deep Learning for Image Classification",
+                "Neural Networks in Healthcare",
+                "Transformer Models for NLP",
+                "BERT in Question Answering",
+                "Blockchain and Distributed Ledger Technology",
+                "Smart Contracts in Finance",
+                "Federated Learning for Privacy",
+                "Differential Privacy in ML",
+                "Graph Neural Networks",
+                "Knowledge Graph Embeddings",
+            ],
+            "abstract": [
+                "We propose a deep learning model achieving state-of-the-art accuracy on image benchmarks.",
+                "A convolutional network trained for medical image classification tasks.",
+                "We introduce a transformer-based approach for text understanding.",
+                "Fine-tuning BERT achieves strong results on reading comprehension datasets.",
+                "This paper surveys blockchain consensus mechanisms and distributed ledger architectures.",
+                "We implement smart contracts for automated financial transactions on a public blockchain.",
+                "Federated learning enables collaborative model training without sharing raw data.",
+                "Differential privacy provides formal privacy guarantees for machine learning models.",
+                "Graph neural networks learn from relational data structures effectively.",
+                "Knowledge graph embeddings enable link prediction and entity classification.",
+            ],
+        }
+        df_sample = pd.DataFrame(sample_data)
+        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
+        df_sample.to_csv(tmp.name, index=False)
+        csv_path = tmp.name
+    else:
+        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
+        tmp.write(uploaded_file.read())
+        tmp.flush()
+        csv_path = tmp.name
+    # ---------------------------------------------------------------------------
+    # Step 1: Topic Modeling
+    # ---------------------------------------------------------------------------
+    with st.spinner("Running BERTopic (this may take a minute)…"):
+        try:
+            topic_results = run_topic_modeling(csv_path, min_topic_size=min_topic_size)
+        except Exception as exc:
+            st.error(f"Topic modeling failed: {exc}")
+            st.stop()
+    abstract_res = topic_results["abstracts"]
+    title_res = topic_results["titles"]
+    # Reload df for raw texts
+    df = pd.read_csv(csv_path)
+    df.columns = df.columns.str.lower()
+    raw_titles = df["title"].fillna("").tolist()
+    raw_abstracts = df["abstract"].fillna("").tolist()
+    # ---------------------------------------------------------------------------
+    # Step 2: Agent (LLM interpretation + dual validation)
+    # ---------------------------------------------------------------------------
+    with st.spinner("Running LLM interpretation and Mistral validation…"):
+        try:
+            st.session_state["agent_results"] = run_agent(
+                title_topic_keywords=title_res["topic_keywords"],
+                abstract_topic_keywords=abstract_res["topic_keywords"],
+                title_topic_assignments=title_res["topics"],
+                abstract_topic_assignments=abstract_res["topics"],
+                raw_titles=raw_titles,
+                raw_abstracts=raw_abstracts,
+                api_key=groq_api_key,
+                mistral_api_key=mistral_api_key,
+            )
+            st.success("Pipeline complete!")
+        except Exception as exc:
+            st.error(f"Agent pipeline failed: {exc}")
+            st.stop()
+# ---------------------------------------------------------------------------
+# Display Logic (Outside if run_btn to persist during interactions)
+# ---------------------------------------------------------------------------
+agent_results = st.session_state.get("agent_results")
+if agent_results:
+    # ---------------------------------------------------------------------------
+    # Display: Title Topics
+    # ---------------------------------------------------------------------------
+    st.subheader("Title Topics")
+    title_interps = agent_results.get("title_interpretations", {})
+    if title_interps:
+        title_rows = []
+        for tid, interp in sorted(title_interps.items()):
+            title_rows.append({
+                "Topic ID": tid,
+                "Label": interp.label,
+                "Category": interp.taxonomy_category,
+                "Classification": interp.classification,
+                "Validation Status": interp.validation_status,
+                "Confidence": interp.confidence,
+                "Keywords": ", ".join(interp.keywords[:8]),
+            })
+        st.dataframe(pd.DataFrame(title_rows), use_container_width=True)
+    else:
+        st.info("No title topics found.")
+    # ---------------------------------------------------------------------------
+    # Display: Abstract Topics
+    # ---------------------------------------------------------------------------
+    st.subheader("Abstract Topics")
+    abstract_interps = agent_results.get("abstract_interpretations", {})
+    if abstract_interps:
+        abstract_rows = []
+        for tid, interp in sorted(abstract_interps.items()):
+            abstract_rows.append({
+                "Topic ID": tid,
+                "Label": interp.label,
+                "Category": interp.taxonomy_category,
+                "Classification": interp.classification,
+                "Validation Status": interp.validation_status,
+                "Confidence": interp.confidence,
+                "Keywords": ", ".join(interp.keywords[:8]),
+            })
+        st.dataframe(pd.DataFrame(abstract_rows), use_container_width=True)
+    else:
+        st.info("No abstract topics found.")
+    # ---------------------------------------------------------------------------
+    # Display: Taxonomy Map
+    # ---------------------------------------------------------------------------
+    st.subheader("Taxonomy Map")
+    taxonomy_map = agent_results.get("taxonomy_map", {})
+    tabs = st.tabs(["Titles", "Abstracts"])
+    for tab, section in zip(tabs, ["titles", "abstracts"]):
+        with tab:
+            entries = taxonomy_map.get(section, [])
+            if entries:
+                st.dataframe(
+                    pd.DataFrame(entries)[[
+                        "topic_id", "label", "taxonomy_category",
+                        "classification", "validation_status", "confidence", "reasoning"
+                    ]],
+                    use_container_width=True,
+                )
+            else:
+                st.info(f"No {section} taxonomy entries.")
+    # ---------------------------------------------------------------------------
+    # Display: Comparison Table
+    # ---------------------------------------------------------------------------
+    st.subheader("Title vs Abstract Comparison")
+    comparison_rows = agent_results.get("comparison_rows", [])
+    if comparison_rows:
+        from dataclasses import asdict
+        comp_df = pd.DataFrame([asdict(r) for r in comparison_rows])
+        st.dataframe(comp_df, use_container_width=True)
+    else:
+        st.info("No overlapping topics to compare.")
+    # ---------------------------------------------------------------------------
+    # Downloads
+    # ---------------------------------------------------------------------------
+    st.subheader("Downloads")
+    col1, col2 = st.columns(2)
+    with col1:
+        st.download_button(
+            "Download taxonomy_map.json",
+            json.dumps(agent_results["taxonomy_map"], indent=2),
+            file_name="taxonomy_map.json",
+            mime="application/json",
+            key="dl_json"
+        )
+    with col2:
+        from dataclasses import asdict
+        comp_df = pd.DataFrame([asdict(r) for r in agent_results["comparison_rows"]])
+        st.download_button(
+            "Download comparison.csv",
+            comp_df.to_csv(index=False),
+            file_name="comparison.csv",
+            mime="text/csv",
+            key="dl_csv"
+        )