Spaces:

atharvthite05
/

BERTopic_Thematic_Analysis_Agent

Sleeping

App Files Files Community

atharvthite05 commited on 10 days ago

Commit

ff66bd7

verified ·

1 Parent(s): 6cd378e

Update app.py

Browse files

Files changed (1) hide show

app.py +516 -2

app.py CHANGED Viewed

@@ -35,6 +35,19 @@ import uuid
 from pathlib import Path
 from urllib.parse import quote
 # ---------------------------------------------------------------------------
 # Agent import — graceful stub when agent.py is absent during dev/testing
 # ---------------------------------------------------------------------------
@@ -86,6 +99,7 @@ EMPTY_REVIEW_DF = pd.DataFrame(columns=REVIEW_COLUMNS)
 MISTRAL_KEY_MISSING = not bool(os.environ.get("MISTRAL_API_KEY", ""))
 GROQ_KEY_MISSING = not bool(os.environ.get("GROQ_API_KEY", ""))
 UPLOADS_DIR = Path("uploads")
 OUTPUTS_DIR = Path(__file__).resolve().parent / "outputs"
 # ---------------------------------------------------------------------------
@@ -596,6 +610,198 @@ def build_file_list_html(paths: list[str]) -> str:
     return "\n".join(items)
 # ---------------------------------------------------------------------------
 # Helper — placeholder chart HTML
 # ---------------------------------------------------------------------------
@@ -619,6 +825,158 @@ def build_placeholder_chart(chart_type: str) -> str:
     <style>@keyframes grow {{ from{{width:0%}} to{{width:75%}} }}</style>"""
 # ---------------------------------------------------------------------------
 # Core interaction handlers
 # ---------------------------------------------------------------------------
@@ -719,12 +1077,24 @@ def submit_review(review_df, agent_state: dict, chat_history: list):
     FIX BUG 3 — write parsed review rows into agent_state["review_df"]
     BEFORE calling the agent, so _parse_review_df() receives the populated list.
     """
     # Store the review table in state so agent.py can read it
     agent_state["review_df"] = review_df.to_dict(orient="records")
     agent_state["review_submitted"] = True
     # Send a short trigger message — the agent reads state, not the payload
-    msg = "Review table submitted. Please proceed to Phase 3 and consolidate themes."
     results = []
     for state in handle_chat(msg, chat_history, agent_state):
         results = state
@@ -732,6 +1102,39 @@ def submit_review(review_df, agent_state: dict, chat_history: list):
     return new_history, new_state, phase_html
 def refresh_downloads(agent_state: dict):
     """Return downloadable artefact paths from agent state."""
     files = agent_state.get("output_files", [])
@@ -868,6 +1271,10 @@ def build_app() -> gr.Blocks:
             with gr.Column(elem_classes=["panel-card", "panel-results"]):
                 gr.HTML("""<div class="card-title"><span>Results</span></div>""")
                 with gr.Tabs(elem_classes=["tabs"]):
                     # ── Tab 1: Review Table ─────────────────────────────
@@ -875,8 +1282,11 @@ def build_app() -> gr.Blocks:
                         gr.HTML("""
                         <p style='font-size:0.78rem;color:var(--text-muted);margin:0 0 12px;'>
                             Edit <b>Approve</b>, <b>Rename To</b>, and <b>Reasoning</b> columns inline,
                             then click <b>Submit Review</b>. Use <b>verify</b> in chat at Phase 2
                             or Phase 5.5 to see Mistral vs Groq comparisons directly in chat output.
                         </p>""")
                         review_table = gr.Dataframe(
@@ -905,6 +1315,11 @@ def build_app() -> gr.Blocks:
                                 elem_classes=["btn-success"],
                             )
                     # ── Tab 2: Charts ───────────────────────────────────
                     with gr.TabItem("Charts", elem_classes=["tabitem"]):
                         chart_selector = gr.Dropdown(
@@ -941,6 +1356,70 @@ def build_app() -> gr.Blocks:
                             elem_classes=["btn-secondary"],
                         )
         # ────────────────────────────────────────────────────────────────
         # Event wiring
         # ────────────────────────────────────────────────────────────────
@@ -1010,9 +1489,44 @@ def build_app() -> gr.Blocks:
                 refresh_review_table(a),
                 *refresh_downloads(a),
                 get_chart_html(selected_chart, a),
             ),
             inputs=[chart_selector, agent_state],
-            outputs=[review_table, download_file_list_html, download_files, chart_display],
         )
     return app

 from pathlib import Path
 from urllib.parse import quote
+# ---------------------------------------------------------------------------
+# Method extraction tools — direct invocation (standalone tab, no agent)
+# ---------------------------------------------------------------------------
+try:
+    from tools import (
+        extract_methods_from_pdfs,
+        OUTPUT_DIR as TOOLS_OUTPUT_DIR,
+        _load_json as tools_load_json,
+    )
+    METHOD_TOOLS_AVAILABLE = True
+except ImportError:
+    METHOD_TOOLS_AVAILABLE = False
 # ---------------------------------------------------------------------------
 # Agent import — graceful stub when agent.py is absent during dev/testing
 # ---------------------------------------------------------------------------
 MISTRAL_KEY_MISSING = not bool(os.environ.get("MISTRAL_API_KEY", ""))
 GROQ_KEY_MISSING = not bool(os.environ.get("GROQ_API_KEY", ""))
 UPLOADS_DIR = Path("uploads")
+PDF_UPLOADS_DIR = Path("uploads") / "pdfs"
 OUTPUTS_DIR = Path(__file__).resolve().parent / "outputs"
 # ---------------------------------------------------------------------------
     return "\n".join(items)
+# ---------------------------------------------------------------------------
+# Helper — cluster stats HTML
+# ---------------------------------------------------------------------------
+def build_cluster_stats_html(agent_state: dict) -> str:
+    run_key = agent_state.get("run_key", "abstract")
+    opt_path = OUTPUTS_DIR / run_key / "optimization.json"
+    if not opt_path.exists():
+        return (
+            "<p style='color:var(--text-muted);font-size:0.83rem;padding:6px 0 2px;'>"
+            "No clustering stats yet. Run topic discovery to generate optimization stats."
+            "</p>"
+        )
+    try:
+        rounds = json.loads(opt_path.read_text(encoding="utf-8"))
+    except Exception:
+        rounds = []
+    if not isinstance(rounds, list) or not rounds:
+        return (
+            "<p style='color:var(--text-muted);font-size:0.83rem;padding:6px 0 2px;'>"
+            "Optimization stats are unavailable or empty."
+            "</p>"
+        )
+    first = rounds[0]
+    last = rounds[-1]
+    first_clusters = int(first.get("metrics", {}).get("n_clusters", 0))
+    last_clusters = int(last.get("metrics", {}).get("n_clusters", 0))
+    before_round = first
+    after_round = last
+    if last_clusters > first_clusters:
+        before_round, after_round = last, first
+    def _metrics_block(metrics: dict) -> str:
+        if not isinstance(metrics, dict):
+            return "<div style='color:var(--text-muted);'>No metrics</div>"
+        return (
+            "<div style='display:grid;gap:4px;font-size:0.78rem;'>"
+            f"<div>Clusters: <b>{int(metrics.get('n_clusters', 0))}</b></div>"
+            f"<div>Noise ratio: <b>{metrics.get('noise_ratio', 0.0):.2f}</b></div>"
+            f"<div>Min/Med/Mean/Max size: <b>{metrics.get('min_size', 0):.0f}</b> / "
+            f"<b>{metrics.get('median_size', 0):.0f}</b> / "
+            f"<b>{metrics.get('mean_size', 0):.0f}</b> / "
+            f"<b>{metrics.get('max_size', 0):.0f}</b></div>"
+            "</div>"
+        )
+    def _params_line(params: dict) -> str:
+        if not isinstance(params, dict):
+            return ""
+        return (
+            f"min_cluster_size={params.get('min_cluster_size', '')}, "
+            f"max_cluster_size={params.get('max_cluster_size', '')}, "
+            f"min_samples={params.get('min_samples', '')}"
+        )
+    before_label = "Before optimization (more)"
+    after_label = "After optimization (less)" if len(rounds) > 1 else "After optimization (no change)"
+    return f"""
+    <div style='display:grid;gap:10px;'>
+        <div style='font-size:0.82rem;color:var(--text-secondary);font-weight:600;'>Cluster stats</div>
+        <div style='display:grid;grid-template-columns:1fr 1fr;gap:12px;'>
+            <div style='background:var(--bg-elevated);border:1px solid var(--border);border-radius:10px;padding:10px 12px;'>
+                <div style='font-size:0.78rem;color:var(--text-secondary);margin-bottom:6px;'>{before_label}</div>
+                <div style='font-size:0.74rem;color:var(--text-muted);margin-bottom:6px;'>
+                    {_params_line(before_round.get('params', {}))}
+                </div>
+                {_metrics_block(before_round.get('metrics', {}))}
+            </div>
+            <div style='background:var(--bg-elevated);border:1px solid var(--border);border-radius:10px;padding:10px 12px;'>
+                <div style='font-size:0.78rem;color:var(--text-secondary);margin-bottom:6px;'>{after_label}</div>
+                <div style='font-size:0.74rem;color:var(--text-muted);margin-bottom:6px;'>
+                    {_params_line(after_round.get('params', {}))}
+                </div>
+                {_metrics_block(after_round.get('metrics', {}))}
+            </div>
+        </div>
+    </div>"""
+# ---------------------------------------------------------------------------
+# Helper — cluster info HTML
+# ---------------------------------------------------------------------------
+def build_cluster_info_html(agent_state: dict) -> str:
+    run_key = agent_state.get("run_key", "abstract")
+    summaries_path = OUTPUTS_DIR / run_key / "summaries.json"
+    labels_path = OUTPUTS_DIR / run_key / "labels.json"
+    if not summaries_path.exists():
+        return (
+            "<p style='color:var(--text-muted);font-size:0.83rem;padding:6px 0 2px;'>"
+            "No clusters yet. Run topic discovery to generate cluster summaries."
+            "</p>"
+        )
+    try:
+        summaries = json.loads(summaries_path.read_text(encoding="utf-8"))
+    except Exception:
+        summaries = []
+    labels = []
+    if labels_path.exists():
+        try:
+            labels = json.loads(labels_path.read_text(encoding="utf-8"))
+        except Exception:
+            labels = []
+    label_by_id = {
+        int(row.get("cluster_id", -1)): (
+            row.get("adjudicated_label")
+            or row.get("mistral_label")
+            or row.get("label")
+            or ""
+        )
+        for row in labels
+        if isinstance(row, dict)
+    }
+    def _escape_html(text: object) -> str:
+        return (
+            str(text or "")
+            .replace("&", "&amp;")
+            .replace("<", "&lt;")
+            .replace(">", "&gt;")
+        )
+    def _format_papers(papers: list[dict]) -> str:
+        if not papers:
+            return ""
+        items = []
+        for entry in papers[:3]:
+            if not isinstance(entry, dict):
+                continue
+            title = str(entry.get("paper_title") or entry.get("title") or "").strip()
+            if not title:
+                continue
+            count = entry.get("count")
+            items.append(
+                f"{_escape_html(title)} ({count})" if count else _escape_html(title)
+            )
+        return "; ".join(items)
+    def _cluster_card(summary: dict) -> str:
+        cid = int(summary.get("cluster_id", -1))
+        label = _escape_html(label_by_id.get(cid, ""))
+        size = int(summary.get("size", 0))
+        evidence = summary.get("evidence", [])
+        top_evidence = _escape_html(evidence[0]) if evidence else ""
+        paper_count = summary.get("paper_count", "")
+        top_papers = _format_papers(summary.get("top_papers", []))
+        if not label:
+            return ""
+        return (
+            "<details style='background:var(--bg-elevated);border:1px solid var(--border);"
+            "border-radius:10px;padding:10px 12px;'>"
+            f"<summary style='cursor:pointer;font-size:0.84rem;font-weight:600;color:var(--text-primary);'>"
+            f"Cluster {cid} — {label or 'Unlabeled'} ({size} sentences)</summary>"
+            "<div style='margin-top:8px;font-size:0.78rem;color:var(--text-secondary);display:grid;gap:6px;'>"
+            f"<div><b>Top evidence:</b> {top_evidence}</div>"
+            f"<div><b>Papers:</b> {paper_count} | {top_papers}</div>"
+            "</div>"
+            "</details>"
+        )
+    if not isinstance(summaries, list) or not summaries:
+        return (
+            "<p style='color:var(--text-muted);font-size:0.83rem;padding:6px 0 2px;'>"
+            "Cluster summaries are empty."
+            "</p>"
+        )
+    cards = "\n".join(filter(None, map(_cluster_card, summaries)))
+    if not cards:
+        return (
+            "<p style='color:var(--text-muted);font-size:0.83rem;padding:6px 0 2px;'>"
+            "No labeled clusters yet. Run labeling or VERIFY to populate labels."
+            "</p>"
+        )
+    return (
+        "<div style='display:grid;gap:10px;'>"
+        "<div style='font-size:0.82rem;color:var(--text-secondary);font-weight:600;'>"
+        "Cluster details</div>"
+        f"{cards}"
+        "</div>"
+    )
 # ---------------------------------------------------------------------------
 # Helper — placeholder chart HTML
 # ---------------------------------------------------------------------------
     <style>@keyframes grow {{ from{{width:0%}} to{{width:75%}} }}</style>"""
+# ---------------------------------------------------------------------------
+# Method Extraction — helper functions
+# ---------------------------------------------------------------------------
+def build_method_stats_html(result: dict) -> str:
+    """Build stats HTML for method extraction results."""
+    if not result or result.get("error"):
+        return (
+            "<p style='color:var(--text-muted);font-size:0.83rem;padding:6px 0;'>"
+            "Upload PDFs and click <b>Run Method Extraction</b> to start."
+            "</p>"
+        )
+    n_papers = result.get("n_papers", 0)
+    n_extracted = result.get("n_extracted", 0)
+    return f"""
+    <div class="stats-grid fade-in" style="grid-template-columns:1fr 1fr;">
+        <div class="stat-card accent">
+            <div class="stat-value">{n_papers}</div>
+            <div class="stat-label">PDFs Processed</div>
+        </div>
+        <div class="stat-card success">
+            <div class="stat-value">{n_extracted}</div>
+            <div class="stat-label">Methods Identified</div>
+        </div>
+    </div>
+    """
+def get_method_results_df() -> pd.DataFrame:
+    """Return the method summary dataframe."""
+    columns = [
+        "Paper ID",
+        "Paper Title",
+        "Computational Methods",
+    ]
+    csv_path = OUTPUTS_DIR / "methods" / "method_summary.csv"
+    if csv_path.exists():
+        try:
+            df = pd.read_csv(csv_path)
+        except Exception:
+            return pd.DataFrame(columns=columns)
+        for col in columns:
+            if col not in df.columns:
+                df[col] = ""
+        return df[columns]
+    return pd.DataFrame(columns=columns)
+def get_method_technique_df() -> pd.DataFrame:
+    """Return the technique-to-papers summary dataframe."""
+    columns = ["Main Computational Technique", "Algorithms", "Papers"]
+    csv_path = OUTPUTS_DIR / "methods" / "technique_to_papers.csv"
+    if csv_path.exists():
+        try:
+            df = pd.read_csv(csv_path)
+        except Exception:
+            return pd.DataFrame(columns=columns)
+        for col in columns:
+            if col not in df.columns:
+                df[col] = ""
+        return df[columns]
+    return pd.DataFrame(columns=columns)
+def get_method_download_file() -> list[str]:
+    """Return downloadable method CSV."""
+    technique_path = OUTPUTS_DIR / "methods" / "technique_to_papers.csv"
+    if technique_path.exists():
+        return [str(technique_path)]
+    return None
+# ---------------------------------------------------------------------------
+# Method Extraction — interaction handlers
+# ---------------------------------------------------------------------------
+def handle_pdf_upload(file_objs):
+    """Copy uploaded PDFs to a stable directory."""
+    if not file_objs:
+        return (
+            "<div class='status-pill idle'><div class='dot'></div>No PDFs uploaded</div>",
+            "<p style='color:var(--text-muted);font-size:0.83rem;'>Upload PDF research papers to extract methods.</p>",
+        )
+    PDF_UPLOADS_DIR.mkdir(parents=True, exist_ok=True)
+    # Clear previous uploads
+    for old in PDF_UPLOADS_DIR.glob("*.pdf"):
+        old.unlink()
+    for old in PDF_UPLOADS_DIR.glob("*.PDF"):
+        old.unlink()
+    count = 0
+    for f in file_objs:
+        src = Path(f.name) if hasattr(f, 'name') else Path(f)
+        if src.suffix.lower() == ".pdf":
+            dst = PDF_UPLOADS_DIR / f"{uuid.uuid4().hex[:8]}_{src.name}"
+            shutil.copy2(src, dst)
+            count += 1
+    status = f"<div class='status-pill ready'><div class='dot'></div>{count} PDFs ready</div>"
+    stats = f"""
+    <div class="stats-grid fade-in">
+        <div class="stat-card accent">
+            <div class="stat-value">{count}</div>
+            <div class="stat-label">PDFs Uploaded</div>
+        </div>
+    </div>"""
+    return status, stats
+def run_method_extraction_pipeline():
+    """Run the method extraction pipeline."""
+    if not METHOD_TOOLS_AVAILABLE:
+        return (
+            build_method_stats_html({"error": True}),
+            "<div class='status-pill idle'><div class='dot'></div>Tools unavailable</div>",
+            get_method_technique_df(),
+            get_method_download_file(),
+        )
+    pdf_dir = str(PDF_UPLOADS_DIR.resolve())
+    if not PDF_UPLOADS_DIR.exists() or not list(PDF_UPLOADS_DIR.glob("*.pdf")) + list(PDF_UPLOADS_DIR.glob("*.PDF")):
+        return (
+            "<p style='color:var(--danger);font-size:0.83rem;'>No PDFs found. Upload PDFs first.</p>",
+            "<div class='status-pill idle'><div class='dot'></div>No PDFs</div>",
+            get_method_technique_df(),
+            get_method_download_file(),
+        )
+    # Step 1: Extract + LLM Processing
+    result = extract_methods_from_pdfs.invoke({"pdf_dir": pdf_dir})
+    if isinstance(result, dict) and result.get("error"):
+        return (
+            f"<p style='color:var(--danger);font-size:0.83rem;'>{result['error']}</p>",
+            "<div class='status-pill idle'><div class='dot'></div>Extraction failed</div>",
+            get_method_technique_df(),
+            get_method_download_file(),
+        )
+    # Build UI outputs
+    stats_html = build_method_stats_html(result)
+    status_html = "<div class='status-pill ready'><div class='dot'></div>Extraction complete</div>"
+    return (
+        stats_html,
+        status_html,
+        get_method_technique_df(),
+        get_method_download_file(),
+    )
 # ---------------------------------------------------------------------------
 # Core interaction handlers
 # ---------------------------------------------------------------------------
     FIX BUG 3 — write parsed review rows into agent_state["review_df"]
     BEFORE calling the agent, so _parse_review_df() receives the populated list.
     """
+    def _next_phase_message(state: dict) -> str:
+        gate = state.get("stop_gate")
+        if gate == "STOP_GATE_1_AWAIT_REVIEW_TABLE":
+            return "Review table submitted. Please proceed to Phase 3 and consolidate themes."
+        if gate == "STOP_GATE_2_AWAIT_THEME_MERGE":
+            return "Theme merge confirmed. Please proceed to Phase 4 for saturation check."
+        if gate == "STOP_GATE_3_AWAIT_SATURATION_SIGNOFF":
+            return "Saturation sign-off confirmed. Please proceed to Phase 5 for naming themes."
+        if gate == "STOP_GATE_4_AWAIT_TAXONOMY_REVIEW":
+            return "Taxonomy review confirmed. Please proceed to Phase 6 to finalize outputs."
+        return "Review table submitted. Please proceed to the next phase."
     # Store the review table in state so agent.py can read it
     agent_state["review_df"] = review_df.to_dict(orient="records")
     agent_state["review_submitted"] = True
     # Send a short trigger message — the agent reads state, not the payload
+    msg = _next_phase_message(agent_state)
     results = []
     for state in handle_chat(msg, chat_history, agent_state):
         results = state
     return new_history, new_state, phase_html
+def auto_accept_review(agent_state: dict, chat_history: list, enabled: bool):
+    """Auto-approve Phase 2 review rows and submit when enabled."""
+    if not enabled:
+        return chat_history, agent_state, build_phase_html(agent_state.get("phase", 0))
+    gate = agent_state.get("stop_gate")
+    if gate != "STOP_GATE_1_AWAIT_REVIEW_TABLE":
+        return chat_history, agent_state, build_phase_html(agent_state.get("phase", 0))
+    if agent_state.get("review_submitted"):
+        return chat_history, agent_state, build_phase_html(agent_state.get("phase", 0))
+    if agent_state.get("auto_accept_last_gate") == gate:
+        return chat_history, agent_state, build_phase_html(agent_state.get("phase", 0))
+    rows = agent_state.get("review_df", [])
+    if not rows:
+        return chat_history, agent_state, build_phase_html(agent_state.get("phase", 0))
+    df = pd.DataFrame(rows)
+    if "Approve" in df.columns:
+        df["Approve"] = True
+    if "Rename To" in df.columns and "Topic Label" in df.columns:
+        df["Rename To"] = df["Rename To"].fillna("").astype(str)
+        df["Rename To"] = df.apply(
+            lambda r: r["Rename To"] or r["Topic Label"], axis=1
+        )
+    new_history, new_state, phase_html = submit_review(df, agent_state, chat_history)
+    new_state["auto_accept_last_gate"] = gate
+    return new_history, new_state, phase_html
 def refresh_downloads(agent_state: dict):
     """Return downloadable artefact paths from agent state."""
     files = agent_state.get("output_files", [])
             with gr.Column(elem_classes=["panel-card", "panel-results"]):
                 gr.HTML("""<div class="card-title"><span>Results</span></div>""")
+                cluster_stats = gr.HTML(
+                    value=build_cluster_stats_html({}),
+                )
                 with gr.Tabs(elem_classes=["tabs"]):
                     # ── Tab 1: Review Table ─────────────────────────────
                         gr.HTML("""
                         <p style='font-size:0.78rem;color:var(--text-muted);margin:0 0 12px;'>
                             Edit <b>Approve</b>, <b>Rename To</b>, and <b>Reasoning</b> columns inline,
+                            and use the <b>Papers</b> column to see the top 3 paper titles per cluster.
                             then click <b>Submit Review</b>. Use <b>verify</b> in chat at Phase 2
                             or Phase 5.5 to see Mistral vs Groq comparisons directly in chat output.
+                            Phase 2 verification also adds an adjudicated best label.
+                            Enable <b>Auto-accept Phase 2 review</b> to skip manual submission.
                         </p>""")
                         review_table = gr.Dataframe(
                                 elem_classes=["btn-success"],
                             )
+                        auto_accept_toggle = gr.Checkbox(
+                            label="Auto-accept Phase 2 review and continue",
+                            value=False,
+                        )
                     # ── Tab 2: Charts ───────────────────────────────────
                     with gr.TabItem("Charts", elem_classes=["tabitem"]):
                         chart_selector = gr.Dropdown(
                             elem_classes=["btn-secondary"],
                         )
+                    # ── Tab 4: Clusters ─────────────────────────────────
+                    with gr.TabItem("Clusters", elem_classes=["tabitem"]):
+                        cluster_info_html = gr.HTML(
+                            value=build_cluster_info_html({}),
+                        )
+            # ── METHOD EXTRACTION — Standalone panel ──────────────────────
+            with gr.Column(elem_classes=["panel-card"]):
+                gr.HTML("""
+                <div class="card-title">
+                    <span>📄 Computational Methodology Extraction</span>
+                </div>
+                <p style='font-size:0.78rem;color:var(--text-muted);margin:0 0 12px;'>
+                    Upload research PDFs to identify the specific computational methods
+                    used in each paper (text-only extraction via PyMuPDF + LLM).
+                </p>
+                """)
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        pdf_upload = gr.File(
+                            label="Upload Research PDFs",
+                            file_types=[".pdf"],
+                            file_count="multiple",
+                            interactive=True,
+                            elem_id="pdf-upload",
+                        )
+                    with gr.Column(scale=1):
+                        method_status = gr.HTML(
+                            value="<div class='status-pill idle'><div class='dot'></div>Awaiting PDF upload</div>"
+                        )
+                        method_stats = gr.HTML(
+                            value="<p style='color:var(--text-muted);font-size:0.83rem;'>"
+                                  "Upload PDF research papers to extract methods.</p>"
+                        )
+                run_methods_btn = gr.Button(
+                    "🚀 Extract Computational Methods",
+                    variant="primary",
+                    elem_classes=["btn-primary"],
+                )
+                gr.HTML("<hr style='border:none;border-top:1px solid var(--border);margin:12px 0;'>")
+                # Results Dataframe
+                gr.HTML("""
+                <div style='font-size:0.82rem;color:var(--text-secondary);font-weight:600;margin-bottom:8px;'>
+                    Computational Techniques → Algorithms → Papers
+                </div>""")
+                method_technique_df = gr.Dataframe(
+                    headers=["Main Computational Technique", "Algorithms", "Papers"],
+                    interactive=False,
+                    wrap=True,
+                )
+                gr.HTML("<hr style='border:none;border-top:1px solid var(--border);margin:12px 0;'>")
+                # CSV Download
+                method_dl_files = gr.File(
+                    label="Download CSV Report",
+                    file_count="multiple",
+                    interactive=False,
+                )
         # ────────────────────────────────────────────────────────────────
         # Event wiring
         # ────────────────────────────────────────────────────────────────
                 refresh_review_table(a),
                 *refresh_downloads(a),
                 get_chart_html(selected_chart, a),
+                build_cluster_stats_html(a),
+                build_cluster_info_html(a),
             ),
             inputs=[chart_selector, agent_state],
+            outputs=[
+                review_table,
+                download_file_list_html,
+                download_files,
+                chart_display,
+                cluster_stats,
+                cluster_info_html,
+            ],
+        )
+        # Auto-accept Phase 2 review when enabled.
+        chatbot.change(
+            fn=auto_accept_review,
+            inputs=[agent_state, chatbot, auto_accept_toggle],
+            outputs=[chatbot, agent_state, phase_bar],
+        )
+        # ── Method Extraction event wiring ─────────────────────────────
+        pdf_upload.change(
+            fn=handle_pdf_upload,
+            inputs=[pdf_upload],
+            outputs=[method_status, method_stats],
+        )
+        run_methods_btn.click(
+            fn=run_method_extraction_pipeline,
+            inputs=[],
+            outputs=[
+                method_stats,
+                method_status,
+                method_technique_df,
+                method_dl_files,
+            ],
         )
     return app