BERTopic_AG_final

Running

App Files Files Community

anujjuna commited on 17 days ago

Commit

05df72c

verified ·

1 Parent(s): 9c754e7

Update app.py

Browse files

Files changed (1) hide show

app.py +360 -563

app.py CHANGED Viewed

@@ -1,659 +1,456 @@
 """
 app.py
 ------
-Streamlit UI — SPECTER2 + BERTopic + 3-LLM Council
-Research Topic Analyzer for SPJIMR × SPIT Group 14
 """
-import os
-import json
-import tempfile
 import pandas as pd
 import streamlit as st
 from tools import run_topic_modeling
 from agent import run_agent
-# ── Page setup ──────────────────────────────────────────────────────────────
-st.set_page_config(
-    page_title="TMIS Topic Analyzer",
-    page_icon="📐",
-    layout="wide",
-    initial_sidebar_state="expanded",
-)
-# ── Custom CSS ───────────────────────────────────────────────────────────────
 st.markdown("""
 <style>
 @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600&family=IBM+Plex+Sans:wght@300;400;500;600&display=swap');
-html, body, [class*="css"] {
-    font-family: 'IBM Plex Sans', sans-serif;
-}
-/* App background */
-.stApp {
-    background: #0d0f14;
-    color: #e8eaf0;
-}
-/* Sidebar */
-[data-testid="stSidebar"] {
-    background: #13161e;
-    border-right: 1px solid #1f2333;
-}
-[data-testid="stSidebar"] * {
-    color: #b0b8cc !important;
-}
-[data-testid="stSidebar"] h1,
-[data-testid="stSidebar"] h2,
-[data-testid="stSidebar"] h3 {
-    color: #e8eaf0 !important;
-    font-family: 'IBM Plex Mono', monospace !important;
-    font-size: 0.8rem !important;
-    letter-spacing: 0.12em !important;
-    text-transform: uppercase !important;
-}
-/* Header */
-.site-header {
-    padding: 2.5rem 0 1.5rem 0;
-    border-bottom: 1px solid #1f2333;
-    margin-bottom: 2rem;
-}
-.site-header h1 {
-    font-family: 'IBM Plex Mono', monospace;
-    font-size: 1.6rem;
-    font-weight: 600;
-    color: #e8eaf0;
-    letter-spacing: -0.01em;
-    margin: 0 0 0.3rem 0;
-}
-.site-header p {
-    font-size: 0.82rem;
-    color: #5a6480;
-    font-family: 'IBM Plex Mono', monospace;
-    margin: 0;
-    letter-spacing: 0.04em;
-}
-/* Pills / badges */
-.pill {
-    display: inline-block;
-    font-family: 'IBM Plex Mono', monospace;
-    font-size: 0.68rem;
-    font-weight: 600;
-    letter-spacing: 0.08em;
-    text-transform: uppercase;
-    padding: 3px 10px;
-    border-radius: 2px;
-    margin-right: 6px;
-}
-.pill-blue  { background: #0f2a4a; color: #4d9de0; border: 1px solid #1a4070; }
-.pill-green { background: #0a2a1a; color: #3dba7a; border: 1px solid #1a4a2a; }
-.pill-amber { background: #2a1f00; color: #e8a020; border: 1px solid #4a3500; }
-.pill-red   { background: #2a0f0f; color: #e04d4d; border: 1px solid #4a1a1a; }
-.pill-gray  { background: #1a1e2a; color: #7a8090; border: 1px solid #2a2e3a; }
-/* Stats row */
-.stat-grid {
-    display: grid;
-    grid-template-columns: repeat(4, 1fr);
-    gap: 1px;
-    background: #1f2333;
-    border: 1px solid #1f2333;
-    border-radius: 6px;
-    overflow: hidden;
-    margin-bottom: 2rem;
-}
-.stat-card {
-    background: #13161e;
-    padding: 1.25rem 1.5rem;
-    text-align: center;
-}
-.stat-val {
-    font-family: 'IBM Plex Mono', monospace;
-    font-size: 1.9rem;
-    font-weight: 600;
-    color: #e8eaf0;
-    line-height: 1;
-    margin-bottom: 0.3rem;
-}
-.stat-label {
-    font-size: 0.7rem;
-    color: #5a6480;
-    text-transform: uppercase;
-    letter-spacing: 0.1em;
-    font-family: 'IBM Plex Mono', monospace;
-}
-/* Section titles */
-.section-title {
-    font-family: 'IBM Plex Mono', monospace;
-    font-size: 0.7rem;
-    font-weight: 600;
-    letter-spacing: 0.15em;
-    text-transform: uppercase;
-    color: #5a6480;
-    padding-bottom: 0.6rem;
-    border-bottom: 1px solid #1f2333;
-    margin-bottom: 1.2rem;
-}
-/* Topic cards */
-.topic-card {
-    background: #13161e;
-    border: 1px solid #1f2333;
-    border-left: 3px solid #4d9de0;
-    border-radius: 4px;
-    padding: 1rem 1.25rem;
-    margin-bottom: 0.6rem;
-    transition: border-color 0.15s;
-}
-.topic-card:hover { border-left-color: #3dba7a; }
-.topic-card.novel { border-left-color: #e8a020; }
-.topic-label {
-    font-size: 0.92rem;
-    font-weight: 500;
-    color: #e8eaf0;
-    margin-bottom: 0.35rem;
-}
-.topic-meta {
-    font-family: 'IBM Plex Mono', monospace;
-    font-size: 0.7rem;
-    color: #5a6480;
-}
-.topic-kw {
-    font-family: 'IBM Plex Mono', monospace;
-    font-size: 0.68rem;
-    color: #3d6480;
-    margin-top: 0.4rem;
-    line-height: 1.5;
-}
-/* Validation panel */
-.val-box {
-    background: #0a2a1a;
-    border: 1px solid #1a4a2a;
-    border-radius: 6px;
-    padding: 1.25rem 1.5rem;
-    margin-bottom: 1.5rem;
-}
-.val-box h4 {
-    font-family: 'IBM Plex Mono', monospace;
-    font-size: 0.72rem;
-    font-weight: 600;
-    letter-spacing: 0.1em;
-    text-transform: uppercase;
-    color: #3dba7a;
-    margin: 0 0 0.75rem 0;
-}
-.val-row {
-    display: flex;
-    justify-content: space-between;
-    align-items: center;
-    padding: 0.4rem 0;
-    border-bottom: 1px solid #1a3a2a;
-    font-size: 0.8rem;
-    color: #a0b8a8;
-}
-.val-row:last-child { border-bottom: none; }
-.val-key { color: #5a7a6a; }
-.val-num { font-family: 'IBM Plex Mono', monospace; color: #3dba7a; font-weight: 600; }
-/* LLM council badge row */
-.council-row {
-    display: flex;
-    gap: 8px;
-    margin-bottom: 1rem;
-    flex-wrap: wrap;
-}
-/* Run button */
-.stButton > button {
-    background: #4d9de0 !important;
-    color: #0d0f14 !important;
-    border: none !important;
-    border-radius: 3px !important;
-    font-family: 'IBM Plex Mono', monospace !important;
-    font-size: 0.78rem !important;
-    font-weight: 600 !important;
-    letter-spacing: 0.08em !important;
-    text-transform: uppercase !important;
-    padding: 0.6rem 2rem !important;
-    transition: background 0.15s !important;
-}
-.stButton > button:hover {
-    background: #3d8ed0 !important;
-}
-/* Input overrides */
-.stTextInput input, .stSelectbox select {
-    background: #13161e !important;
-    border: 1px solid #1f2333 !important;
-    color: #e8eaf0 !important;
-    font-family: 'IBM Plex Mono', monospace !important;
-    font-size: 0.82rem !important;
-    border-radius: 3px !important;
-}
-/* Dataframe */
-.stDataFrame {
-    background: #13161e;
-    border: 1px solid #1f2333;
-    border-radius: 4px;
-}
-/* Download buttons */
-.stDownloadButton > button {
-    background: transparent !important;
-    color: #4d9de0 !important;
-    border: 1px solid #1a4070 !important;
-    border-radius: 3px !important;
-    font-family: 'IBM Plex Mono', monospace !important;
-    font-size: 0.72rem !important;
-    letter-spacing: 0.08em !important;
-}
-/* Expander */
-.streamlit-expanderHeader {
-    background: #13161e !important;
-    border: 1px solid #1f2333 !important;
-    font-family: 'IBM Plex Mono', monospace !important;
-    font-size: 0.78rem !important;
-    color: #a0a8c0 !important;
-}
-/* Progress / spinner */
-.stSpinner > div { border-top-color: #4d9de0 !important; }
-/* Divider */
-hr { border-color: #1f2333 !important; }
-/* Alerts */
-.stAlert { border-radius: 4px !important; }
 </style>
 """, unsafe_allow_html=True)
 # ── Header ───────────────────────────────────────────────────────────────────
 st.markdown("""
 <div class="site-header">
-  <h1>Research Topic Analyzer</h1>
-  <p>SPECTER2 embeddings &nbsp;·&nbsp; HDBSCAN/UMAP clustering &nbsp;·&nbsp; 3-LLM Council (Groq + Mistral + Gemini) &nbsp;·&nbsp; PAJAIS validation</p>
 </div>
 """, unsafe_allow_html=True)
 # ── Sidebar ──────────────────────────────────────────────────────────────────
 with st.sidebar:
     st.markdown("### API Keys")
-    groq_key_input    = st.text_input("Groq API Key",    type="password", placeholder="GROQ_API_KEY env var")
-    mistral_key_input = st.text_input("Mistral API Key", type="password", placeholder="MISTRAL_API_KEY env var")
-    gemini_key_input  = st.text_input("Gemini API Key",  type="password", placeholder="GEMINI_API_KEY env var")
     st.caption("Keys are never stored. Leave blank to use env vars.")
     st.markdown("---")
-    st.markdown("### Clustering Parameters")
-    min_topic_size = st.slider("Min papers per cluster", min_value=3, max_value=20, value=5,
-                               help="Prof. Kamat spec: min=5")
-    st.markdown(
-        "<span class='pill pill-blue'>Min clusters: 15</span>"
-        "<span class='pill pill-blue'>Max clusters: 30</span>",
-        unsafe_allow_html=True
-    )
     st.markdown(
-        "<span class='pill pill-gray'>Cosine sim: 0.50–0.55</span>",
-        unsafe_allow_html=True
-    )
     st.markdown("---")
     st.markdown("### LLM Council")
     st.markdown("""
-<div class="council-row">
   <span class="pill pill-blue">Groq / LLaMA-3.1</span>
   <span class="pill pill-green">Mistral Small</span>
   <span class="pill pill-amber">Gemini 2.5 Flash</span>
 </div>
-<p style="font-size:0.72rem;color:#5a6480;font-family:'IBM Plex Mono',monospace;">
-Majority vote → best label selected.<br>
-Keyword-overlap fallback if no consensus.
 </p>
 """, unsafe_allow_html=True)
     st.markdown("---")
     if st.button("Reset Results", use_container_width=True):
-        for key in ["agent_results", "topic_stats"]:
-            st.session_state.pop(key, None)
         st.rerun()
-groq_api_key    = groq_key_input.strip()    or os.getenv("GROQ_API_KEY")
-mistral_api_key = mistral_key_input.strip() or os.getenv("MISTRAL_API_KEY")
-gemini_api_key  = gemini_key_input.strip()  or os.getenv("GEMINI_API_KEY")
-# ── Dataset upload ────────────────────────────────────────────────────────────
 st.markdown("<div class='section-title'>Dataset</div>", unsafe_allow_html=True)
-col_up, col_sample = st.columns([3, 1])
 with col_up:
-    uploaded_file = st.file_uploader(
-        "Upload Scopus CSV — must contain 'title' and 'abstract' columns",
-        type=["csv"],
-        help="Export your corpus from Scopus as CSV. The tool will combine Title + Abstract into one SPECTER2 vector per paper."
-    )
-with col_sample:
     st.markdown("<br>", unsafe_allow_html=True)
-    use_sample = st.checkbox("Use sample dataset (50 papers)", value=False)
-if uploaded_file and not use_sample:
-    try:
-        df_preview = pd.read_csv(uploaded_file)
-        uploaded_file.seek(0)
-        col_a, col_b, col_c = st.columns(3)
-        col_a.metric("Papers detected", len(df_preview))
-        col_b.metric("Columns", len(df_preview.columns))
-        has_both = {"title", "abstract"}.issubset(set(df_preview.columns.str.lower()))
-        col_c.metric("Title + Abstract", "✓ present" if has_both else "✗ missing")
-        if not has_both:
-            st.error("CSV must have both 'title' and 'abstract' columns.")
-    except Exception as e:
-        st.error(f"Could not preview CSV: {e}")
-# ── Run Pipeline ─────────────────────────────────────────────────────────────
 st.markdown("<br>", unsafe_allow_html=True)
 run_btn = st.button("▶  Run Full Pipeline", type="primary")
 if run_btn:
-    # Validation
-    missing_keys = []
-    if not groq_api_key:    missing_keys.append("Groq")
-    if not mistral_api_key: missing_keys.append("Mistral")
-    if not gemini_api_key:  missing_keys.append("Gemini")
-    if missing_keys:
-        st.error(f"Missing API key(s): {', '.join(missing_keys)}. All three are required for the LLM council.")
         st.stop()
-    if not use_sample and uploaded_file is None:
-        st.error("Please upload a CSV file or enable the sample dataset.")
         st.stop()
-    # Prepare CSV path
     if use_sample:
-        import numpy as np
         rng = np.random.default_rng(42)
-        topics_pool = [
-            ("Deep Learning for Healthcare Prediction", "We apply LSTM networks to predict patient readmission from EHR data."),
-            ("Process Mining in Enterprise Systems", "Event log analysis using Petri nets for conformance checking in ERP workflows."),
-            ("Recommender Systems Collaborative Filtering", "Matrix factorization techniques applied to e-commerce product recommendation."),
-            ("LLM Applications in Information Systems", "GPT-4 used for automated requirements extraction from stakeholder documents."),
-            ("Blockchain Smart Contract Security", "Formal verification of Solidity smart contracts for financial transaction safety."),
-            ("Federated Learning Privacy Preservation", "Differential privacy mechanisms for distributed model training across hospitals."),
-            ("Cybersecurity Intrusion Detection", "Random forest classifiers for network anomaly detection in enterprise environments."),
-            ("Natural Language Processing Sentiment", "BERT fine-tuning for aspect-level sentiment analysis in product reviews."),
-            ("Knowledge Graph Embedding", "TransE and RotatE models for biomedical entity relation prediction."),
-            ("Computer Vision Medical Imaging", "CNN architectures for diabetic retinopathy grading from fundus photographs."),
         ]
-        rows = []
-        for i in range(50):
-            t, a = topics_pool[i % len(topics_pool)]
-            rows.append({"title": t, "abstract": a + f" Study {i+1}.", "doi": f"10.1145/sample.{i+1}"})
-        df_s = pd.DataFrame(rows)
         tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
-        df_s.to_csv(tmp.name, index=False)
-        csv_path = tmp.name
     else:
         tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
-        tmp.write(uploaded_file.read())
-        tmp.flush()
-        csv_path = tmp.name
-    # Step 1: Topic modeling
-    progress_bar = st.progress(0, text="Step 1/2 — SPECTER2 embeddings + HDBSCAN clustering (15–30 clusters)…")
     try:
-        topic_results = run_topic_modeling(csv_path, min_topic_size=min_topic_size)
-        n_clusters = len(topic_results["documents"]["topic_keywords"])
-        progress_bar.progress(50, text=f"Step 1/2 — Done. {n_clusters} clusters found.")
-    except Exception as exc:
-        st.error(f"Topic modeling failed: {exc}")
         st.stop()
-    # Step 2: LLM Council
-    progress_bar.progress(55, text="Step 2/2 — 3-LLM Council labelling (Groq + Mistral + Gemini)…")
     try:
-        agent_results = run_agent(
-            topic_results=topic_results,
-            groq_key=groq_api_key,
-            mistral_key=mistral_api_key,
-            gemini_key=gemini_api_key,
-        )
-        progress_bar.progress(100, text="Pipeline complete.")
-        st.session_state["agent_results"] = agent_results
-        # Compute summary stats
-        interps = agent_results.get("interpretations", {})
-        novel_count   = sum(1 for i in interps.values() if i.classification == "NOVEL")
-        mapped_count  = sum(1 for i in interps.values() if i.classification == "MAPPED")
-        total_papers  = sum(i.paper_count for i in interps.values())
-        st.session_state["topic_stats"] = {
-            "n_topics": len(interps),
-            "novel": novel_count,
-            "mapped": mapped_count,
-            "total_papers": total_papers,
-        }
-        st.success(f"Pipeline complete — {len(interps)} topics labelled by 3-LLM council.")
-    except Exception as exc:
-        st.error(f"LLM council failed: {exc}")
         st.stop()
-# ── Results Display ────────────────────────────────────────────────────────────
-results = st.session_state.get("agent_results")
-stats   = st.session_state.get("topic_stats")
-if results and stats:
-    interps = results.get("interpretations", {})
-    # ── Summary stats ─────────────────────────────────────────────────────────
-    st.markdown("<div class='section-title'>Pipeline Summary</div>", unsafe_allow_html=True)
     st.markdown(f"""
 <div class="stat-grid">
-  <div class="stat-card">
-    <div class="stat-val">{stats['n_topics']}</div>
-    <div class="stat-label">Topics Found</div>
-  </div>
-  <div class="stat-card">
-    <div class="stat-val">{stats['total_papers']}</div>
-    <div class="stat-label">Papers Assigned</div>
-  </div>
-  <div class="stat-card">
-    <div class="stat-val">{stats['novel']}</div>
-    <div class="stat-label">NOVEL (no PAJAIS home)</div>
-  </div>
-  <div class="stat-card">
-    <div class="stat-val">{stats['mapped']}</div>
-    <div class="stat-label">MAPPED to PAJAIS</div>
-  </div>
 </div>
 """, unsafe_allow_html=True)
-    # ── Validation panel ──────────────────────────────────────────────────────
-    st.markdown("<div class='section-title'>LLM Council Validation</div>", unsafe_allow_html=True)
-    novel_pct  = round(stats['novel']  / stats['n_topics'] * 100) if stats['n_topics'] else 0
-    mapped_pct = round(stats['mapped'] / stats['n_topics'] * 100) if stats['n_topics'] else 0
     st.markdown(f"""
 <div class="val-box">
-  <h4>Instructor Spec Compliance</h4>
-  <div class="val-row"><span class="val-key">Embedding model</span><span class="val-num">SPECTER2 (allenai/specter2_base)</span></div>
-  <div class="val-row"><span class="val-key">Input column</span><span class="val-num">Title + Abstract (combined)</span></div>
-  <div class="val-row"><span class="val-key">Clustering</span><span class="val-num">UMAP → HDBSCAN (min=5, max=100 per cluster)</span></div>
-  <div class="val-row"><span class="val-key">Cosine similarity range</span><span class="val-num">0.50 – 0.55 (merge / outlier reassign)</span></div>
-  <div class="val-row"><span class="val-key">Total clusters</span><span class="val-num">{stats['n_topics']} (target: 15–30)</span></div>
-  <div class="val-row"><span class="val-key">LLM council</span><span class="val-num">Groq (LLaMA-3.1) + Mistral Small + Gemini 2.5 Flash</span></div>
-  <div class="val-row"><span class="val-key">Label selection</span><span class="val-num">Majority vote → keyword-overlap fallback</span></div>
-  <div class="val-row"><span class="val-key">Rep. docs per topic</span><span class="val-num">Top-3 by cosine similarity to centroid</span></div>
-  <div class="val-row"><span class="val-key">NOVEL themes (no PAJAIS home)</span><span class="val-num">{novel_pct}% ({stats['novel']} topics)</span></div>
-  <div class="val-row"><span class="val-key">MAPPED to PAJAIS taxonomy</span><span class="val-num">{mapped_pct}% ({stats['mapped']} topics)</span></div>
 </div>
 """, unsafe_allow_html=True)
-    # ── Filters ───────────────────────────────────────────────────────────────
-    st.markdown("<div class='section-title'>Topic Results</div>", unsafe_allow_html=True)
     rows = []
-    for tid, interp in sorted(interps.items()):
         rows.append({
-            "Topic ID":       tid,
-            "Label":          interp.label,
-            "Classification": interp.classification,
-            "Category":       interp.category,
-            "Papers":         interp.paper_count,
-            "Keywords":       ", ".join(interp.keywords[:8]),
         })
-    df_res = pd.DataFrame(rows).sort_values("Papers", ascending=False).reset_index(drop=True)
-    col_f1, col_f2, col_f3 = st.columns([2, 2, 1])
-    with col_f1:
-        cats = ["All"] + sorted(df_res["Category"].unique().tolist())
-        sel_cat = st.selectbox("Filter by category", cats)
-    with col_f2:
-        clsf = ["All", "NOVEL", "MAPPED"]
-        sel_cls = st.selectbox("Filter by classification", clsf)
-    with col_f3:
-        sort_by = st.selectbox("Sort by", ["Papers ↓", "Papers ↑", "Label A–Z"])
-    df_f = df_res.copy()
-    if sel_cat != "All":
-        df_f = df_f[df_f["Category"] == sel_cat]
-    if sel_cls != "All":
-        df_f = df_f[df_f["Classification"] == sel_cls]
-    if sort_by == "Papers ↓":
-        df_f = df_f.sort_values("Papers", ascending=False)
-    elif sort_by == "Papers ↑":
-        df_f = df_f.sort_values("Papers", ascending=True)
-    else:
-        df_f = df_f.sort_values("Label")
-    df_f = df_f.reset_index(drop=True)
-    st.caption(f"Showing {len(df_f)} of {len(df_res)} topics")
-    # ── Topic cards ───────────────────────────────────────────────────────────
-    view_mode = st.radio("View as", ["Table", "Cards"], horizontal=True)
-    if view_mode == "Table":
-        st.dataframe(df_f, use_container_width=True, height=420)
-    else:
-        for _, row in df_f.iterrows():
-            cls_pill = (
-                "<span class='pill pill-amber'>NOVEL</span>"
-                if row["Classification"] == "NOVEL"
-                else "<span class='pill pill-green'>MAPPED</span>"
-            )
-            card_cls = "topic-card novel" if row["Classification"] == "NOVEL" else "topic-card"
             st.markdown(f"""
-<div class="{card_cls}">
   <div class="topic-label">{row['Label']}</div>
   <div class="topic-meta">
-    {cls_pill}
-    <span class="pill pill-gray">{row['Category']}</span>
-    <span class="pill pill-blue">{row['Papers']} papers</span>
   </div>
-  <div class="topic-kw">{row['Keywords']}</div>
-</div>
-""", unsafe_allow_html=True)
-    # ── Bar chart ─────────────────────────────────────────────────────────────
-    st.markdown("<br>", unsafe_allow_html=True)
-    with st.expander("Topic frequency chart", expanded=False):
-        chart_df = df_f[["Label", "Papers"]].copy()
-        chart_df["Label"] = chart_df["Label"].apply(lambda x: x[:35] + "…" if len(x) > 35 else x)
-        chart_df = chart_df.set_index("Label")
-        st.bar_chart(chart_df, height=380)
-    # ── NOVEL / PAJAIS breakdown ───────────────────────────────────────────────
-    with st.expander("NOVEL vs PAJAIS breakdown — for paper §4.6", expanded=False):
-        col_n, col_m = st.columns(2)
-        with col_n:
-            st.markdown("**NOVEL topics (no PAJAIS home)**")
-            novel_df = df_f[df_f["Classification"] == "NOVEL"][["Label", "Papers", "Category"]].reset_index(drop=True)
-            st.dataframe(novel_df, use_container_width=True)
-        with col_m:
-            st.markdown("**MAPPED topics (PAJAIS match)**")
-            mapped_df = df_f[df_f["Classification"] == "MAPPED"][["Label", "Papers", "Category"]].reset_index(drop=True)
-            st.dataframe(mapped_df, use_container_width=True)
-    # ── Representative documents ──────────────────────────────────────────────
-    with st.expander("Representative papers per topic (top-3 by centroid proximity)", expanded=False):
-        rep_docs = results.get("rep_docs_raw", {})
-        # Pull from topic_results stored in session if available
-        for tid, interp in sorted(interps.items()):
-            st.markdown(f"**Topic {tid} — {interp.label}**")
-            docs = interp.keywords  # fallback; actual rep_docs wired below
-            st.caption("See topics.json for full representative document titles.")
-        st.info("Download topics.json below to see the 3 representative paper titles per cluster used for LLM labelling.")
-    # ── Downloads ─────────────────────────────────────────────────────────────
-    st.markdown("<div class='section-title'>Downloads</div>", unsafe_allow_html=True)
-    col_d1, col_d2, col_d3 = st.columns(3)
-    with col_d1:
-        try:
-            with open(results["json_path"], "r") as f:
-                st.download_button(
-                    "⬇  topics.json",
-                    f.read(),
-                    file_name="tmis_topics.json",
-                    mime="application/json",
-                    use_container_width=True,
-                )
-        except Exception:
-            st.warning("JSON file not found.")
-    with col_d2:
         try:
-            df_dl = pd.read_csv(results["csv_path"])
-            st.download_button(
-                "⬇  topics.csv",
-                df_dl.to_csv(index=False),
-                file_name="tmis_topics.csv",
-                mime="text/csv",
-                use_container_width=True,
-            )
         except Exception:
-            st.warning("CSV file not found.")
-    with col_d3:
-        st.download_button(
-            "⬇  results table",
-            df_res.to_csv(index=False),
-            file_name="tmis_topic_results.csv",
-            mime="text/csv",
-            use_container_width=True,
-        )
-    # ── Method note for paper ─────────────────────────────────────────────────
-    st.markdown("<br>", unsafe_allow_html=True)
-    with st.expander("§3.4 methodology note — paste into paper", expanded=False):
-        st.code(f"""Pipeline A (Unsupervised Discovery): SPECTER2 (allenai/specter2_base) generates one
-768-dimensional document embedding per paper from a combined Title + Abstract column.
-UMAP (n_neighbors=15, n_components=5, metric=cosine) reduces dimensionality; HDBSCAN
-(min_cluster_size={min_topic_size}, metric=euclidean, cluster_selection=eom) clusters embeddings.
-Cosine similarity threshold 0.50–0.55 governs cluster merging and outlier reassignment.
-Total clusters constrained to 15–30 via iterative split/merge.
-Pipeline B (LLM Council Validation): For each cluster, the 3 papers nearest the centroid
-(by cosine similarity) are passed as representative titles to 3 independent LLMs:
-Groq/LLaMA-3.1-8b, Mistral-Small-Latest, and Gemini-2.5-Flash. Each LLM returns a
-structured JSON with label, taxonomy_category, and classification (MAPPED/NOVEL).
-Majority vote selects the final label; keyword-overlap fallback applies when no consensus.
-This is the 3-LLM Council approach validating AI output without using the same model
-for self-validation (per Carlsen & Ralund, 2022 CALM principle).
-Results: {stats['n_topics']} clusters discovered. {novel_pct}% classified as NOVEL
-(no PAJAIS 2019 home). {mapped_pct}% MAPPED to existing PAJAIS categories.""", language="text")
-# ── Empty state ───────────────────────────────────────────────────────────────
-elif not results:
     st.markdown("""
 <div style="text-align:center;padding:4rem 2rem;border:1px dashed #1f2333;border-radius:6px;margin-top:2rem;">
-  <p style="font-family:'IBM Plex Mono',monospace;font-size:0.8rem;color:#3a4060;letter-spacing:0.1em;">
     UPLOAD CSV → ENTER API KEYS → RUN PIPELINE
   </p>
-  <p style="font-size:0.75rem;color:#2a3050;margin-top:0.5rem;">
-    SPECTER2 embeddings · HDBSCAN · 3-LLM council · PAJAIS validation
   </p>
 </div>
-""", unsafe_allow_html=True)

 """
 app.py
 ------
+Streamlit UI — SPECTER-2 + UMAP + HDBSCAN Bayesian Pipeline
+with 2-D UMAP scatter, Pareto front, strong/weak members,
+trial log, and LLM Council Sheets 1-4.
 """
+import os, json, tempfile
 import pandas as pd
+import numpy as np
 import streamlit as st
+import plotly.express as px
+import plotly.graph_objects as go
 from tools import run_topic_modeling
 from agent import run_agent
+# ── Page ─────────────────────────────────────────────────────────────────────
+st.set_page_config(page_title="SPECTER-2 Topic Analyzer", page_icon="📐",
+                   layout="wide", initial_sidebar_state="expanded")
+# ── CSS ──────────────────────────────────────────────────────────────────────
 st.markdown("""
 <style>
 @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600&family=IBM+Plex+Sans:wght@300;400;500;600&display=swap');
+html, body, [class*="css"] { font-family:'IBM Plex Sans',sans-serif; }
+.stApp { background:#0d0f14; color:#e8eaf0; }
+[data-testid="stSidebar"] { background:#13161e; border-right:1px solid #1f2333; }
+[data-testid="stSidebar"] * { color:#b0b8cc !important; }
+[data-testid="stSidebar"] h1,[data-testid="stSidebar"] h2,[data-testid="stSidebar"] h3 {
+  color:#e8eaf0!important; font-family:'IBM Plex Mono',monospace!important;
+  font-size:.8rem!important; letter-spacing:.12em!important; text-transform:uppercase!important; }
+.site-header { padding:2.5rem 0 1.5rem; border-bottom:1px solid #1f2333; margin-bottom:2rem; }
+.site-header h1 { font-family:'IBM Plex Mono',monospace; font-size:1.6rem; font-weight:600;
+  color:#e8eaf0; letter-spacing:-.01em; margin:0 0 .3rem; }
+.site-header p { font-size:.82rem; color:#5a6480; font-family:'IBM Plex Mono',monospace; margin:0; }
+.pill { display:inline-block; font-family:'IBM Plex Mono',monospace; font-size:.68rem;
+  font-weight:600; letter-spacing:.08em; text-transform:uppercase; padding:3px 10px;
+  border-radius:2px; margin-right:6px; }
+.pill-blue  { background:#0f2a4a; color:#4d9de0; border:1px solid #1a4070; }
+.pill-green { background:#0a2a1a; color:#3dba7a; border:1px solid #1a4a2a; }
+.pill-amber { background:#2a1f00; color:#e8a020; border:1px solid #4a3500; }
+.pill-red   { background:#2a0f0f; color:#e04d4d; border:1px solid #4a1a1a; }
+.pill-gray  { background:#1a1e2a; color:#7a8090; border:1px solid #2a2e3a; }
+.stat-grid { display:grid; grid-template-columns:repeat(5,1fr); gap:1px;
+  background:#1f2333; border:1px solid #1f2333; border-radius:6px; overflow:hidden; margin-bottom:2rem; }
+.stat-card { background:#13161e; padding:1.25rem 1.5rem; text-align:center; }
+.stat-val { font-family:'IBM Plex Mono',monospace; font-size:1.9rem; font-weight:600;
+  color:#e8eaf0; line-height:1; margin-bottom:.3rem; }
+.stat-label { font-size:.7rem; color:#5a6480; text-transform:uppercase; letter-spacing:.1em;
+  font-family:'IBM Plex Mono',monospace; }
+.section-title { font-family:'IBM Plex Mono',monospace; font-size:.7rem; font-weight:600;
+  letter-spacing:.15em; text-transform:uppercase; color:#5a6480;
+  padding-bottom:.6rem; border-bottom:1px solid #1f2333; margin-bottom:1.2rem; }
+.topic-card { background:#13161e; border:1px solid #1f2333; border-left:3px solid #4d9de0;
+  border-radius:4px; padding:1rem 1.25rem; margin-bottom:.6rem; transition:border-color .15s; }
+.topic-card:hover { border-left-color:#3dba7a; }
+.topic-label { font-size:.92rem; font-weight:500; color:#e8eaf0; margin-bottom:.35rem; }
+.topic-meta { font-family:'IBM Plex Mono',monospace; font-size:.7rem; color:#5a6480; }
+.topic-kw { font-family:'IBM Plex Mono',monospace; font-size:.68rem; color:#3d6480;
+  margin-top:.4rem; line-height:1.5; }
+.val-box { background:#0a2a1a; border:1px solid #1a4a2a; border-radius:6px;
+  padding:1.25rem 1.5rem; margin-bottom:1.5rem; }
+.val-box h4 { font-family:'IBM Plex Mono',monospace; font-size:.72rem; font-weight:600;
+  letter-spacing:.1em; text-transform:uppercase; color:#3dba7a; margin:0 0 .75rem; }
+.val-row { display:flex; justify-content:space-between; align-items:center;
+  padding:.4rem 0; border-bottom:1px solid #1a3a2a; font-size:.8rem; color:#a0b8a8; }
+.val-row:last-child { border-bottom:none; }
+.val-key { color:#5a7a6a; } .val-num { font-family:'IBM Plex Mono',monospace; color:#3dba7a; font-weight:600; }
+.stButton > button { background:#4d9de0!important; color:#0d0f14!important; border:none!important;
+  border-radius:3px!important; font-family:'IBM Plex Mono',monospace!important;
+  font-size:.78rem!important; font-weight:600!important; letter-spacing:.08em!important;
+  text-transform:uppercase!important; padding:.6rem 2rem!important; }
+.stButton > button:hover { background:#3d8ed0!important; }
+.stDownloadButton > button { background:transparent!important; color:#4d9de0!important;
+  border:1px solid #1a4070!important; border-radius:3px!important;
+  font-family:'IBM Plex Mono',monospace!important; font-size:.72rem!important; }
 </style>
 """, unsafe_allow_html=True)
 # ── Header ───────────────────────────────────────────────────────────────────
 st.markdown("""
 <div class="site-header">
+  <h1>SPECTER-2 Topic Analyzer</h1>
+  <p>SPECTER-2 embeddings &nbsp;·&nbsp; Bayesian UMAP+HDBSCAN &nbsp;·&nbsp;
+     3-LLM Council (Groq + Mistral + Gemini)</p>
 </div>
 """, unsafe_allow_html=True)
 # ── Sidebar ──────────────────────────────────────────────────────────────────
 with st.sidebar:
     st.markdown("### API Keys")
+    groq_key_in    = st.text_input("Groq API Key",    type="password")
+    mistral_key_in = st.text_input("Mistral API Key", type="password")
+    gemini_key_in  = st.text_input("Gemini API Key",  type="password")
     st.caption("Keys are never stored. Leave blank to use env vars.")
     st.markdown("---")
+    st.markdown("### Bayesian Optimisation")
+    n_trials = st.slider("Optuna trials", 20, 100, 50,
+                         help="§3.4: 50–100 trials recommended")
     st.markdown(
+        "<span class='pill pill-blue'>Max mass ≤ 25%</span>"
+        "<span class='pill pill-blue'>Min size ≥ 5</span>",
+        unsafe_allow_html=True)
     st.markdown("---")
     st.markdown("### LLM Council")
     st.markdown("""
+<div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:1rem;">
   <span class="pill pill-blue">Groq / LLaMA-3.1</span>
   <span class="pill pill-green">Mistral Small</span>
   <span class="pill pill-amber">Gemini 2.5 Flash</span>
 </div>
+<p style="font-size:.72rem;color:#5a6480;font-family:'IBM Plex Mono',monospace;">
+Sheet 1–3 per LLM · Sheet 4 consolidation<br>
+Triple / Two / Single agreement tags<br>
+Defence prompt for disagreement clusters
 </p>
 """, unsafe_allow_html=True)
     st.markdown("---")
     if st.button("Reset Results", use_container_width=True):
+        for k in ["results", "agent_out", "topic_data"]:
+            st.session_state.pop(k, None)
         st.rerun()
+groq_key    = groq_key_in.strip()    or os.getenv("GROQ_API_KEY")
+mistral_key = mistral_key_in.strip() or os.getenv("MISTRAL_API_KEY")
+gemini_key  = gemini_key_in.strip()  or os.getenv("GEMINI_API_KEY")
+# ── Upload ───────────────────────────────────────────────────────────────────
 st.markdown("<div class='section-title'>Dataset</div>", unsafe_allow_html=True)
+col_up, col_s = st.columns([3, 1])
 with col_up:
+    uploaded = st.file_uploader(
+        "Upload Scopus CSV (must have 'title' + 'abstract')", type=["csv"])
+with col_s:
     st.markdown("<br>", unsafe_allow_html=True)
+    use_sample = st.checkbox("Use sample dataset (50 papers)")
+if uploaded and not use_sample:
+    dfp = pd.read_csv(uploaded); uploaded.seek(0)
+    c1, c2, c3 = st.columns(3)
+    c1.metric("Papers", len(dfp))
+    c2.metric("Columns", len(dfp.columns))
+    ok = {"title","abstract"}.issubset(set(dfp.columns.str.lower()))
+    c3.metric("Title+Abstract", "✓" if ok else "✗")
+    if not ok:
+        st.error("CSV must have 'title' and 'abstract' columns.")
+# ── Run ──────────────────────────────────────────────────────────────────────
 st.markdown("<br>", unsafe_allow_html=True)
 run_btn = st.button("▶  Run Full Pipeline", type="primary")
 if run_btn:
+    missing = []
+    if not groq_key:    missing.append("Groq")
+    if not mistral_key: missing.append("Mistral")
+    if not gemini_key:  missing.append("Gemini")
+    if missing:
+        st.error(f"Missing key(s): {', '.join(missing)}")
         st.stop()
+    if not use_sample and not uploaded:
+        st.error("Upload a CSV or enable sample dataset.")
         st.stop()
+    # Prepare CSV
     if use_sample:
         rng = np.random.default_rng(42)
+        pool = [
+            ("Deep Learning for Healthcare Prediction",
+             "We apply LSTM networks to predict patient readmission from EHR data."),
+            ("Process Mining in Enterprise Systems",
+             "Event log analysis using Petri nets for conformance checking in ERP workflows."),
+            ("Recommender Systems Collaborative Filtering",
+             "Matrix factorization techniques applied to e-commerce product recommendation."),
+            ("LLM Applications in Information Systems",
+             "GPT-4 used for automated requirements extraction from stakeholder documents."),
+            ("Blockchain Smart Contract Security",
+             "Formal verification of Solidity smart contracts for financial transaction safety."),
+            ("Federated Learning Privacy Preservation",
+             "Differential privacy mechanisms for distributed model training across hospitals."),
+            ("Cybersecurity Intrusion Detection",
+             "Random forest classifiers for network anomaly detection in enterprise environments."),
+            ("NLP Sentiment Analysis",
+             "BERT fine-tuning for aspect-level sentiment analysis in product reviews."),
+            ("Knowledge Graph Embedding",
+             "TransE and RotatE models for biomedical entity relation prediction."),
+            ("Computer Vision Medical Imaging",
+             "CNN architectures for diabetic retinopathy grading from fundus photographs."),
         ]
+        rows = [{"title": t, "abstract": a + f" Study {i+1}.",
+                 "doi": f"10.1145/sample.{i+1}"}
+                for i, (t, a) in enumerate(pool * 5)]
+        dfs = pd.DataFrame(rows)
         tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
+        dfs.to_csv(tmp.name, index=False); csv_path = tmp.name
     else:
         tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
+        tmp.write(uploaded.read()); tmp.flush(); csv_path = tmp.name
+    # Step 1 — Topic modelling + Bayesian optimisation
+    pbar = st.progress(0, text="Step 1/2 — SPECTER-2 embed + Bayesian UMAP/HDBSCAN…")
+    def _progress(cur, total, entry):
+        pct = int(cur / total * 45)
+        txt = (f"Trial {cur}/{total} — "
+               f"{'PASS' if entry['discipline_pass'] else 'FAIL'} — "
+               f"{entry['n_clusters']} clusters")
+        pbar.progress(min(pct, 49), text=txt)
     try:
+        topic_data = run_topic_modeling(csv_path, n_trials=n_trials,
+                                       progress_callback=_progress)
+        nc = topic_data["discipline"]["n_clusters"]
+        pbar.progress(50, text=f"Step 1 done — {nc} clusters, "
+                      f"{topic_data['n_trials_run']} trials.")
+    except Exception as e:
+        st.error(f"Topic modelling failed: {e}")
         st.stop()
+    # Step 2 — LLM Council
+    pbar.progress(55, text="Step 2/2 — 3-LLM Council labelling…")
     try:
+        agent_out = run_agent(topic_data, groq_key, mistral_key, gemini_key)
+        pbar.progress(100, text="Pipeline complete.")
+        st.session_state["topic_data"] = topic_data
+        st.session_state["agent_out"]  = agent_out
+        st.success(f"Done — {len(agent_out['interpretations'])} clusters labelled.")
+    except Exception as e:
+        st.error(f"LLM Council failed: {e}")
         st.stop()
+# ── Results ──────────────────────────────────────────────────────────────────
+td = st.session_state.get("topic_data")
+ao = st.session_state.get("agent_out")
+if td and ao:
+    interps = ao["interpretations"]
+    disc    = td["discipline"]
+    met     = td["metrics"]
+    # ── Summary stats ────────────────────────────────────────────────────
+    st.markdown("<div class='section-title'>Pipeline Summary</div>",
+                unsafe_allow_html=True)
+    n_topics = disc["n_clusters"]
+    strong_total = sum(i.strong_count for i in interps.values())
+    weak_total   = sum(i.weak_count for i in interps.values())
+    total_papers = strong_total + weak_total
+    strong_pct   = round(strong_total / max(total_papers, 1) * 100)
     st.markdown(f"""
 <div class="stat-grid">
+  <div class="stat-card"><div class="stat-val">{n_topics}</div>
+    <div class="stat-label">Clusters</div></div>
+  <div class="stat-card"><div class="stat-val">{total_papers}</div>
+    <div class="stat-label">Papers assigned</div></div>
+  <div class="stat-card"><div class="stat-val">{strong_pct}%</div>
+    <div class="stat-label">Strong members</div></div>
+  <div class="stat-card"><div class="stat-val">{round(met['persistence'],3)}</div>
+    <div class="stat-label">Persistence</div></div>
+  <div class="stat-card"><div class="stat-val">{round(met['dbcv'],3)}</div>
+    <div class="stat-label">DBCV</div></div>
 </div>
 """, unsafe_allow_html=True)
+    # ── Discipline + metrics panel ───────────────────────────────────────
+    st.markdown("<div class='section-title'>Discipline & Quality</div>",
+                unsafe_allow_html=True)
     st.markdown(f"""
 <div class="val-box">
+  <h4>§3.2 Hard Constraints + §3.4 Quality Criteria</h4>
+  <div class="val-row"><span class="val-key">Max cluster mass ≤ 25%</span>
+    <span class="val-num">{'✅ PASS' if disc['max_mass_ok'] else '❌ FAIL'}
+    ({round(disc['max_mass_pct']*100,1)}%)</span></div>
+  <div class="val-row"><span class="val-key">Min cluster size ≥ 5</span>
+    <span class="val-num">{'✅ PASS' if disc['min_size_ok'] else '❌ FAIL'}
+    (min={disc['min_size']})</span></div>
+  <div class="val-row"><span class="val-key">HDBSCAN Persistence</span>
+    <span class="val-num">{round(met['persistence'],4)}</span></div>
+  <div class="val-row"><span class="val-key">DBCV</span>
+    <span class="val-num">{round(met['dbcv'],4)}</span></div>
+  <div class="val-row"><span class="val-key">Stability (ARI, 5 seeds)</span>
+    <span class="val-num">{round(met['stability'],4)}</span></div>
+  <div class="val-row"><span class="val-key">Bayesian trials run</span>
+    <span class="val-num">{td['n_trials_run']} (best = #{td['best_trial']})</span></div>
+  <div class="val-row"><span class="val-key">Noise papers (−1)</span>
+    <span class="val-num">{disc['n_noise']}</span></div>
 </div>
 """, unsafe_allow_html=True)
+    # ── Best params ──────────────────────────────────────────────────────
+    with st.expander("Winning UMAP + HDBSCAN parameters", expanded=False):
+        bp = td["best_params"]
+        pdf = pd.DataFrame([
+            {"Parameter": "UMAP.n_neighbors", "Value": bp["n_neighbors"]},
+            {"Parameter": "UMAP.n_components", "Value": bp["n_components"]},
+            {"Parameter": "UMAP.min_dist", "Value": 0.0},
+            {"Parameter": "UMAP.metric", "Value": "cosine"},
+            {"Parameter": "HDBSCAN.min_cluster_size",
+             "Value": bp["min_cluster_size"]},
+            {"Parameter": "HDBSCAN.min_samples", "Value": bp["min_samples"]},
+            {"Parameter": "HDBSCAN.cluster_selection_method",
+             "Value": bp["csm"]},
+            {"Parameter": "HDBSCAN.cluster_selection_epsilon",
+             "Value": bp["cse"]},
+        ])
+        st.dataframe(pdf, use_container_width=True, hide_index=True)
+    # ── 2-D UMAP scatter ────────────────────────────────────────────────
+    st.markdown("<div class='section-title'>2-D UMAP Visualisation</div>",
+                unsafe_allow_html=True)
+    umap2d = np.array(td["umap_2d"])
+    labels_arr = np.array(td["labels"])
+    scatter_df = pd.DataFrame({
+        "UMAP-1": umap2d[:, 0], "UMAP-2": umap2d[:, 1],
+        "Cluster": [str(l) for l in labels_arr],
+        "Doc": [d[:80]+"…" for d in td["documents"]],
+    })
+    fig = px.scatter(scatter_df, x="UMAP-1", y="UMAP-2", color="Cluster",
+                     hover_data=["Doc"], opacity=0.75,
+                     title="SPECTER-2 embeddings (2-D UMAP, min_dist=0.1)")
+    fig.update_layout(
+        template="plotly_dark",
+        paper_bgcolor="#0d0f14", plot_bgcolor="#13161e",
+        font=dict(family="IBM Plex Mono", size=11),
+        height=520,
+    )
+    st.plotly_chart(fig, use_container_width=True)
+    # ── Pareto front ─────────────────────────────────────────────────────
+    with st.expander("Bayesian trial log & Pareto front", expanded=False):
+        tl = td["trial_log"]
+        tl_df = pd.DataFrame(tl)
+        if not tl_df.empty:
+            tl_df["colour"] = tl_df["discipline_pass"].map(
+                {True: "PASS", False: "FAIL"})
+            fig2 = px.scatter(
+                tl_df, x="persistence", y="dbcv", color="colour",
+                hover_data=["trial", "n_clusters", "max_mass_pct"],
+                color_discrete_map={"PASS": "#3dba7a", "FAIL": "#e04d4d"},
+                title="Pareto front — Persistence vs DBCV",
+            )
+            fig2.add_vline(x=0, line_dash="dash", line_color="#5a6480")
+            fig2.update_layout(
+                template="plotly_dark",
+                paper_bgcolor="#0d0f14", plot_bgcolor="#13161e",
+                font=dict(family="IBM Plex Mono", size=11), height=400)
+            st.plotly_chart(fig2, use_container_width=True)
+            st.dataframe(tl_df[["trial", "discipline_pass", "n_clusters",
+                                "persistence", "dbcv", "max_mass_pct",
+                                "min_size", "n_noise"]],
+                         use_container_width=True, height=300)
+    # ── Cluster table (strong / weak) ────────────────────────────────────
+    st.markdown("<div class='section-title'>Cluster Results</div>",
+                unsafe_allow_html=True)
     rows = []
+    for cid in sorted(interps.keys()):
+        i = interps[cid]
         rows.append({
+            "Cluster": cid,
+            "Label": i.final_label,
+            "Agreement": i.agreement,
+            "PAJAIS": i.final_pacis_match,
+            "Strong": i.strong_count,
+            "Weak": i.weak_count,
+            "Total": i.paper_count,
+            "Confidence": round(i.final_confidence, 2),
+            "Grounding": i.grounding_check.get("verdict", "?"),
+            "Keyphrases": ", ".join(i.keyphrases[:5]),
         })
+    df_res = pd.DataFrame(rows).sort_values("Total", ascending=False
+                                            ).reset_index(drop=True)
+    st.dataframe(df_res, use_container_width=True, height=420)
+    # ── Topic cards ──────────────────────────────────────────────────────
+    with st.expander("Topic cards (detailed)", expanded=False):
+        for _, row in df_res.iterrows():
+            ag_pill = {"Triple": "pill-green", "Two": "pill-blue",
+                       "Single": "pill-amber"}.get(row["Agreement"], "pill-gray")
             st.markdown(f"""
+<div class="topic-card">
   <div class="topic-label">{row['Label']}</div>
   <div class="topic-meta">
+    <span class="pill {ag_pill}">{row['Agreement']}</span>
+    <span class="pill pill-gray">{row['PAJAIS']}</span>
+    <span class="pill pill-blue">{row['Strong']}S / {row['Weak']}W</span>
+    <span class="pill pill-gray">Ground: {row['Grounding']}</span>
   </div>
+  <div class="topic-kw">{row['Keyphrases']}</div>
+</div>""", unsafe_allow_html=True)
+    # ── LLM Council Sheets ───────────────────────────────────────────────
+    with st.expander("LLM Council — Sheets 1-4", expanded=False):
+        sheet_rows = []
+        for cid in sorted(interps.keys()):
+            i = interps[cid]
+            for sn, sheet in [("Sheet 1 (Groq)", i.sheet1),
+                              ("Sheet 2 (Mistral)", i.sheet2),
+                              ("Sheet 3 (Gemini)", i.sheet3)]:
+                sheet_rows.append({
+                    "Cluster": cid, "Sheet": sn,
+                    "Label": sheet.get("label", "—"),
+                    "PAJAIS": sheet.get("pacis_match", "—"),
+                    "Conf": sheet.get("confidence", "—"),
+                })
+            sheet_rows.append({
+                "Cluster": cid, "Sheet": "Sheet 4 (Final)",
+                "Label": i.final_label,
+                "PAJAIS": i.final_pacis_match,
+                "Conf": i.final_confidence,
+            })
+        st.dataframe(pd.DataFrame(sheet_rows), use_container_width=True,
+                     height=400)
+    # ── Downloads ────────────────────────────────────────────────────────
+    st.markdown("<div class='section-title'>Downloads</div>",
+                unsafe_allow_html=True)
+    c1, c2, c3, c4 = st.columns(4)
+    with c1:
         try:
+            with open(ao["json_path"]) as f:
+                st.download_button("⬇ topics.json", f.read(),
+                                   "topics.json", "application/json",
+                                   use_container_width=True)
         except Exception:
+            st.warning("JSON not found.")
+    with c2:
+        st.download_button("⬇ results.csv",
+                           df_res.to_csv(index=False),
+                           "results.csv", "text/csv",
+                           use_container_width=True)
+    with c3:
+        tl_csv = pd.DataFrame(td["trial_log"]).to_csv(index=False)
+        st.download_button("⬇ trial_log.csv", tl_csv,
+                           "trial_log.csv", "text/csv",
+                           use_container_width=True)
+    with c4:
+        bp_json = json.dumps(td["best_params"], indent=2)
+        st.download_button("⬇ best_params.json", bp_json,
+                           "best_params.json", "application/json",
+                           use_container_width=True)
+elif not td:
     st.markdown("""
 <div style="text-align:center;padding:4rem 2rem;border:1px dashed #1f2333;border-radius:6px;margin-top:2rem;">
+  <p style="font-family:'IBM Plex Mono',monospace;font-size:.8rem;color:#3a4060;letter-spacing:.1em;">
     UPLOAD CSV → ENTER API KEYS → RUN PIPELINE
   </p>
+  <p style="font-size:.75rem;color:#2a3050;margin-top:.5rem;">
+    SPECTER-2 → Bayesian UMAP+HDBSCAN (50–100 trials) → 3-LLM Council
   </p>
 </div>
+""", unsafe_allow_html=True)