BERTopic_AG_final

Running

App Files Files Community

anujjuna commited on 8 days ago

Commit

34345fd

verified ·

1 Parent(s): ee50027

Update agent.py

Browse files

Files changed (1) hide show

agent.py +218 -371

agent.py CHANGED Viewed

@@ -1,414 +1,261 @@
 """
-agent.py
---------
-LLM Council labelling module (§3.5).
-Three independent LLMs label each cluster, producing Sheets 1–3.
-Sheet 4 consolidates with Triple / Two / Single agreement tags.
-Disagreement clusters get a fourth-round defence prompt.
-Labels not grounded in keyphrases are rejected.
 """
 from __future__ import annotations
-import json
-import logging
-import os
-import re
-import time
 from dataclasses import dataclass, field, asdict
-from typing import Optional
-import pandas as pd
-import numpy as np
-import requests
 from groq import Groq
-# ---------------------------------------------------------------------------
-# Logging
-# ---------------------------------------------------------------------------
 logging.basicConfig(level=logging.INFO, format="%(levelname)s | %(message)s")
 logger = logging.getLogger(__name__)
-# ---------------------------------------------------------------------------
-# Constants
-# ---------------------------------------------------------------------------
-GROQ_MODEL   = "llama-3.1-8b-instant"
 MISTRAL_MODEL = "mistral-small-latest"
-DEFAULT_TAXONOMY = [
-    "Artificial Intelligence", "Machine Learning",
-    "Natural Language Processing", "Computer Vision",
-    "Information Systems", "Healthcare & Bioinformatics",
-    "Finance & Economics", "Cybersecurity",
-    "Human-Computer Interaction", "Robotics & Automation",
-    "Education Technology", "Environmental Science",
-    "Social Sciences", "Data Engineering", "Other",
-]
 # ---------------------------------------------------------------------------
-# Data classes
 # ---------------------------------------------------------------------------
-@dataclass
-class LLMVote:
-    """One LLM's response for one cluster."""
-    llm_name: str
-    label: str = ""
-    description: str = ""
-    pacis_match: str = ""
-    confidence: float = 0.0
-    raw: dict = field(default_factory=dict)
-@dataclass
-class ClusterInterpretation:
-    """Consolidated interpretation for a single cluster."""
-    cluster_id: int
-    final_label: str = ""
-    final_description: str = ""
-    final_pacis_match: str = ""
-    final_confidence: float = 0.0
-    agreement: str = ""          # Triple / Two / Single
-    sheet1: dict = field(default_factory=dict)
-    sheet2: dict = field(default_factory=dict)
-    sheet3: dict = field(default_factory=dict)
-    defence: dict = field(default_factory=dict)  # 4th-round if needed
-    keyphrases: list = field(default_factory=list)
-    strong_count: int = 0
-    weak_count: int = 0
-    paper_count: int = 0
-    grounding_check: dict = field(default_factory=dict)
 # ---------------------------------------------------------------------------
-# API Clients
 # ---------------------------------------------------------------------------
-def build_groq_client(api_key: Optional[str] = None):
-    key = api_key or os.getenv("GROQ_API_KEY")
-    if not key:
-        raise ValueError("No Groq API key.")
-    return Groq(api_key=key, max_retries=0)
-def _call_groq(client, prompt: str) -> dict:
-    try:
-        r = client.chat.completions.create(
-            model=GROQ_MODEL,
-            messages=[{"role": "user", "content": prompt}],
-            temperature=0.2, timeout=15,
-        )
-        return _parse_json(r.choices[0].message.content)
-    except Exception as e:
-        logger.warning("Groq failed: %s", e)
-        return {}
-def _call_mistral(prompt: str, api_key: str) -> dict:
-    if not api_key:
-        return {}
     try:
-        r = requests.post(
-            "https://api.mistral.ai/v1/chat/completions",
-            headers={"Authorization": f"Bearer {api_key}",
-                     "Content-Type": "application/json"},
-            json={"model": MISTRAL_MODEL,
-                  "messages": [{"role": "user", "content": prompt}],
-                  "temperature": 0.2},
-            timeout=15,
-        )
-        return _parse_json(r.json()["choices"][0]["message"]["content"])
-    except Exception as e:
-        logger.warning("Mistral failed: %s", e)
-        return {}
-def _call_gemini(prompt: str, api_key: str) -> dict:
-    if not api_key:
-        return {}
-    url = (f"https://generativelanguage.googleapis.com/v1beta/models/"
-           f"gemini-2.5-flash:generateContent?key={api_key}")
     try:
-        r = requests.post(url,
-                          headers={"Content-Type": "application/json"},
-                          json={"contents": [{"parts": [{"text": prompt}]}],
-                                "generationConfig": {"temperature": 0.2}},
-                          timeout=15)
-        data = r.json()
-        if "candidates" not in data:
-            return {}
-        raw = data["candidates"][0]["content"]["parts"][0]["text"]
-        return _parse_json(raw)
-    except Exception as e:
-        logger.warning("Gemini failed: %s", e)
-        return {}
-def _parse_json(raw: str) -> dict:
-    raw = raw.strip().replace("```json", "").replace("```", "").strip()
-    s, e = raw.find("{"), raw.rfind("}") + 1
-    if s != -1 and e > 0:
-        raw = raw[s:e]
     try:
-        return json.loads(raw)
-    except Exception:
-        return {}
 # ---------------------------------------------------------------------------
-# Prompt builders
 # ---------------------------------------------------------------------------
-def _build_label_prompt(keyphrases: list, rep_abstracts: list) -> str:
-    kp_str = ", ".join(k if isinstance(k, str) else k[0]
-                       for k in keyphrases[:5])
-    abs_str = " | ".join(a[:300] for a in rep_abstracts[:3])
-    return f"""You are a research-topic classifier.
 A SPECTER-2 + HDBSCAN pipeline produced a topic cluster.
-KEYPHRASES: {kp_str}
-REPRESENTATIVE ABSTRACTS (truncated): {abs_str}
-Return ONLY valid JSON (no markdown, no other text):
-{{
-  "label": "<concise 5-8 word topic label>",
-  "description": "<one-sentence description of the topic>",
-  "pacis_match": "<closest PAJAIS 2019 category, or NOVEL if none>",
-  "confidence": <0.0-1.0 float>
-}}"""
-def _build_defence_prompt(
-    keyphrases: list,
-    rep_abstracts: list,
-    votes: list[dict],
-) -> str:
-    kp_str = ", ".join(k if isinstance(k, str) else k[0]
-                       for k in keyphrases[:5])
-    abs_str = " | ".join(a[:300] for a in rep_abstracts[:3])
-    v_str = "\n".join(
-        f"  LLM {i+1}: label=\"{v.get('label','?')}\", "
-        f"pacis=\"{v.get('pacis_match','?')}\""
-        for i, v in enumerate(votes)
-    )
-    return f"""You are a research-topic adjudicator resolving a labelling disagreement.
-KEYPHRASES: {kp_str}
-REPRESENTATIVE ABSTRACTS: {abs_str}
-Three LLMs proposed different labels:
-{v_str}
-Your task: pick the single best label from the three, or synthesise a
-better one.  Justify your choice in one sentence.
 Return ONLY valid JSON:
 {{
-  "label": "<best 5-8 word label>",
-  "description": "<one sentence>",
-  "pacis_match": "<PAJAIS category or NOVEL>",
-  "confidence": <0.0-1.0>,
-  "reasoning": "<one sentence justification>"
 }}"""
 # ---------------------------------------------------------------------------
-# Grounding check — reject labels not supported by keyphrases (§3.5)
 # ---------------------------------------------------------------------------
-def grounding_check(label: str, keyphrases: list) -> dict:
-    """Non-LLM regex check: label tokens must overlap keyphrases."""
-    if not label or not keyphrases:
-        return {"verdict": "FAIL", "score": 0, "matched": []}
-    label_toks = set(re.findall(r"\b[a-z]{3,}\b", label.lower()))
-    kp_toks = set()
-    for kp in keyphrases:
-        phrase = kp if isinstance(kp, str) else kp[0]
-        kp_toks.update(re.findall(r"\b[a-z]{3,}\b", phrase.lower()))
-    noise = {"the", "and", "for", "with", "using", "based", "from", "that",
-             "are", "this", "into", "its"}
-    label_toks -= noise
-    kp_toks -= noise
-    matched = list(label_toks & kp_toks)
-    # stem-level
-    stems = []
-    for lt in label_toks:
-        for kt in kp_toks:
-            if len(lt) >= 4 and (kt.startswith(lt[:4]) or lt.startswith(kt[:4])):
-                stems.append(f"{lt}≈{kt}")
-    score = min(1.0, len(matched) / max(len(label_toks), 1)
-                + 0.15 * len(stems))
-    verdict = "PASS" if (matched or stems) else "FAIL"
-    return {"verdict": verdict, "score": round(score, 3),
-            "matched": matched, "stems": stems[:5]}
 # ---------------------------------------------------------------------------
-# Core — interpret one cluster via 3-LLM council (§3.5)
 # ---------------------------------------------------------------------------
-def interpret_cluster(
-    cluster_id: int,
-    keyphrases: list,
-    rep_docs: list,
-    strong: int,
-    weak: int,
-    groq_client,
-    mistral_key: str,
-    gemini_key: str,
-) -> ClusterInterpretation:
-    prompt = _build_label_prompt(keyphrases, rep_docs)
-    # Sheet 1 — Groq / LLaMA-3.1
-    s1 = _call_groq(groq_client, prompt)
-    time.sleep(1)
-    # Sheet 2 — Mistral
-    s2 = _call_mistral(prompt, mistral_key)
-    time.sleep(1)
-    # Sheet 3 — Gemini
-    s3 = _call_gemini(prompt, gemini_key)
-    votes = [s1, s2, s3]
-    valid = [v for v in votes if v and "label" in v]
-    # --- Sheet 4: consolidate agreement ---
-    labels_lower = [_clean(v.get("label", "")).lower() for v in valid]
-    counts = {}
-    for l in labels_lower:
-        counts[l] = counts.get(l, 0) + 1
-    best_label = ""
-    agreement = "Single"
-    defence = {}
-    if any(c >= 3 for c in counts.values()):
-        agreement = "Triple"
-        winner = max(counts, key=counts.get)
-        best_label = next(v["label"] for v in valid
-                          if _clean(v["label"]).lower() == winner)
-    elif any(c >= 2 for c in counts.values()):
-        agreement = "Two"
-        winner = max(counts, key=counts.get)
-        best_label = next(v["label"] for v in valid
-                          if _clean(v["label"]).lower() == winner)
-    else:
-        agreement = "Single"
-        # Fourth-round defence prompt (§3.5)
-        defence_prompt = _build_defence_prompt(keyphrases, rep_docs, votes)
-        defence = _call_groq(groq_client, defence_prompt)
-        if defence and "label" in defence:
-            best_label = defence["label"]
-        elif valid:
-            best_label = valid[0]["label"]
-    best_label = _clean(best_label)
-    # Grounding check — reject if not supported by keyphrases
-    gc = grounding_check(best_label, keyphrases)
-    if gc["verdict"] == "FAIL" and valid:
-        # Fall back to most keyphrase-grounded label
-        scored = [(v, len(set(re.findall(r"\b[a-z]{3,}\b",
-                     v.get("label", "").lower()))
-                     & set(re.findall(r"\b[a-z]{3,}\b",
-                     " ".join(k if isinstance(k, str) else k[0]
-                              for k in keyphrases).lower()))))
-                  for v in valid]
-        scored.sort(key=lambda x: -x[1])
-        best_label = _clean(scored[0][0]["label"])
-        gc = grounding_check(best_label, keyphrases)
-        logger.info("Cluster %d: label rejected by grounding, "
-                     "fell back to '%s'", cluster_id, best_label)
-    # Best metadata
-    best_v = next((v for v in valid
-                   if _clean(v.get("label", "")).lower()
-                   == best_label.lower()), valid[0] if valid else {})
-    return ClusterInterpretation(
-        cluster_id=cluster_id,
-        final_label=best_label,
-        final_description=best_v.get("description", ""),
-        final_pacis_match=best_v.get("pacis_match", ""),
-        final_confidence=best_v.get("confidence", 0.0),
-        agreement=agreement,
-        sheet1=s1, sheet2=s2, sheet3=s3,
-        defence=defence,
-        keyphrases=[k if isinstance(k, str) else k[0]
-                    for k in keyphrases[:5]],
-        strong_count=strong,
-        weak_count=weak,
-        paper_count=strong + weak,
-        grounding_check=gc,
-    )
-def _clean(s: str) -> str:
-    s = str(s or "").replace("\n", " ").strip()
-    s = " ".join(s.split())
-    if len(s) > 60:
-        s = s[:60].rsplit(" ", 1)[0] if " " in s[:60] else s[:60]
-    return s.rstrip(" .")
 # ---------------------------------------------------------------------------
-# Numpy-safe serialisation
 # ---------------------------------------------------------------------------
-def _convert(obj):
-    if isinstance(obj, dict):
-        return {k: _convert(v) for k, v in obj.items()}
-    if isinstance(obj, list):
-        return [_convert(v) for v in obj]
-    if isinstance(obj, (np.integer,)):
-        return int(obj)
-    if isinstance(obj, (np.floating,)):
-        return float(obj)
-    return obj
 # ---------------------------------------------------------------------------
-# Run agent — orchestrate all clusters
 # ---------------------------------------------------------------------------
-def run_agent(
-    topic_results: dict,
-    groq_key: str,
-    mistral_key: str,
-    gemini_key: str,
-    output_json: str = "topics.json",
-    output_csv: str = "topics.csv",
-) -> dict:
-    client = build_groq_client(groq_key)
-    labels_list   = topic_results["labels"]
-    keyphrases    = topic_results["keyphrases"]
-    rep_docs      = topic_results["representative_docs"]
-    membership    = topic_results["membership"]
-    cluster_ids = sorted(keyphrases.keys())
-    interpretations = {}
-    for cid in cluster_ids:
-        sw = membership.get(cid, {"strong": 0, "weak": 0})
-        interp = interpret_cluster(
-            cluster_id=cid,
-            keyphrases=keyphrases.get(cid, []),
-            rep_docs=rep_docs.get(cid, []),
-            strong=sw["strong"],
-            weak=sw["weak"],
-            groq_client=client,
-            mistral_key=mistral_key,
-            gemini_key=gemini_key,
-        )
-        interpretations[cid] = interp
-        logger.info("Cluster %d → %s [%s] (%d strong, %d weak)",
-                    cid, interp.final_label, interp.agreement,
-                    interp.strong_count, interp.weak_count)
-    # Serialise
-    records = [_convert(asdict(i)) for i in interpretations.values()]
-    with open(output_json, "w") as f:
-        json.dump(records, f, indent=2)
-    df = pd.DataFrame(records)
-    if not df.empty:
-        for col in ["sheet1", "sheet2", "sheet3", "defence",
-                     "keyphrases", "grounding_check"]:
-            if col in df.columns:
-                df[col] = df[col].apply(str)
-        df.to_csv(output_csv, index=False)
-    return dict(interpretations=interpretations,
-                json_path=output_json, csv_path=output_csv)

 """
+agent.py — LangGraph-based topic analysis agent (§11).
+3-LLM Council for topic modelling, 4 sheets, triple-agreement tracking.
 """
 from __future__ import annotations
+import json, logging, os, re, time
 from dataclasses import dataclass, field, asdict
+from typing import TypedDict, Optional
+from collections import Counter
+import pandas as pd, numpy as np, requests
 from groq import Groq
+from langgraph.graph import StateGraph, END
 logging.basicConfig(level=logging.INFO, format="%(levelname)s | %(message)s")
 logger = logging.getLogger(__name__)
+GROQ_MODEL = "llama-3.1-8b-instant"
 MISTRAL_MODEL = "mistral-small-latest"
 # ---------------------------------------------------------------------------
+# LangGraph state
 # ---------------------------------------------------------------------------
+class PipelineState(TypedDict, total=False):
+    filepath: str
+    groq_key: str
+    mistral_key: str
+    gemini_key: str
+    n_trials: int
+    topic_data: dict
+    interpretations: dict
+    sheets: dict          # {1: [...], 2: [...], 3: [...], 4: [...]}
+    agreement_rates: dict
+    mismatch_table: list
+    json_path: str
+    csv_path: str
+    error: str
 # ---------------------------------------------------------------------------
+# API helpers
 # ---------------------------------------------------------------------------
+def _parse(raw: str) -> dict:
+    raw = raw.strip().replace("```json","").replace("```","").strip()
+    s, e = raw.find("{"), raw.rfind("}")+1
+    if s != -1 and e > 0: raw = raw[s:e]
+    try: return json.loads(raw)
+    except: return {}
+def _groq(client, prompt):
     try:
+        r = client.chat.completions.create(model=GROQ_MODEL,
+            messages=[{"role":"user","content":prompt}], temperature=0.2, timeout=15)
+        return _parse(r.choices[0].message.content)
+    except Exception as e: logger.warning("Groq: %s",e); return {}
+def _mistral(prompt, key):
+    if not key: return {}
     try:
+        r = requests.post("https://api.mistral.ai/v1/chat/completions",
+            headers={"Authorization":f"Bearer {key}","Content-Type":"application/json"},
+            json={"model":MISTRAL_MODEL,"messages":[{"role":"user","content":prompt}],
+                  "temperature":0.2}, timeout=15)
+        return _parse(r.json()["choices"][0]["message"]["content"])
+    except Exception as e: logger.warning("Mistral: %s",e); return {}
+def _gemini(prompt, key):
+    if not key: return {}
     try:
+        r = requests.post(
+            f"https://generativelanguage.googleapis.com/v1beta/models/"
+            f"gemini-2.5-flash:generateContent?key={key}",
+            headers={"Content-Type":"application/json"},
+            json={"contents":[{"parts":[{"text":prompt}]}],
+                  "generationConfig":{"temperature":0.2}}, timeout=15)
+        d = r.json()
+        if "candidates" not in d: return {}
+        return _parse(d["candidates"][0]["content"]["parts"][0]["text"])
+    except Exception as e: logger.warning("Gemini: %s",e); return {}
 # ---------------------------------------------------------------------------
+# Topic labelling prompt
 # ---------------------------------------------------------------------------
+def _label_prompt(keyphrases, rep_docs):
+    kp = ", ".join(k[0] if isinstance(k,tuple) else k for k in keyphrases[:5])
+    ab = " | ".join(a[:250] for a in rep_docs[:3])
+    return f"""You are a research topic classifier.
 A SPECTER-2 + HDBSCAN pipeline produced a topic cluster.
+KEYPHRASES: {kp}
+REPRESENTATIVE ABSTRACTS: {ab}
 Return ONLY valid JSON:
 {{
+  "label": "<5-8 word topic label>",
+  "description": "<one sentence description>",
+  "pacis_match": "<closest PAJAIS 2019 category, or NOVEL if none>",
+  "confidence": <0.0-1.0>
 }}"""
 # ---------------------------------------------------------------------------
+# Defence prompt for disagreements
 # ---------------------------------------------------------------------------
+def _defence_prompt(keyphrases, rep_docs, votes):
+    kp = ", ".join(k[0] if isinstance(k,tuple) else k for k in keyphrases[:5])
+    v_str = "\n".join(f"  LLM{i+1}: {v.get('label','?')}" for i,v in enumerate(votes))
+    return f"""Resolve this labelling disagreement.
+KEYPHRASES: {kp}
+Votes:\n{v_str}
+Pick the best label or synthesise a better one.
+Return ONLY JSON: {{"label":"...","description":"...","pacis_match":"...","confidence":0.0}}"""
 # ---------------------------------------------------------------------------
+# Grounding check
 # ---------------------------------------------------------------------------
+def _grounding(label, keyphrases):
+    if not label or not keyphrases: return {"verdict":"FAIL","score":0}
+    lt = set(re.findall(r"\b[a-z]{3,}\b", label.lower()))
+    kt = set()
+    for k in keyphrases:
+        kt.update(re.findall(r"\b[a-z]{3,}\b", (k[0] if isinstance(k,tuple) else k).lower()))
+    noise = {"the","and","for","with","using","based","from","that","are","this"}
+    lt -= noise; kt -= noise
+    m = list(lt & kt)
+    return {"verdict":"PASS" if m else "FAIL", "score":len(m)/max(len(lt),1), "matched":m}
+def _clean(s):
+    s = str(s or "").replace("\n"," ").strip()
+    return s[:60].rsplit(" ",1)[0] if len(s)>60 else s
+# ---------------------------------------------------------------------------
+# LangGraph node: run topic modelling
+# ---------------------------------------------------------------------------
+def embed_and_cluster(state: PipelineState) -> dict:
+    from tools import run_topic_modeling
+    try:
+        td = run_topic_modeling(state["filepath"], state.get("n_trials", 50))
+        return {"topic_data": td}
+    except Exception as e:
+        return {"error": str(e)}
 # ---------------------------------------------------------------------------
+# LangGraph node: LLM Council — 4 sheets for topic modelling
 # ---------------------------------------------------------------------------
+def llm_council(state: PipelineState) -> dict:
+    td = state["topic_data"]
+    if not td: return {"error": "No topic data"}
+    client = Groq(api_key=state["groq_key"], max_retries=0)
+    mk, gk = state["mistral_key"], state["gemini_key"]
+    sheets = {1:[], 2:[], 3:[], 4:[]}  # 1=Groq, 2=Mistral, 3=Gemini, 4=Consolidated
+    interps = {}
+    for cid in sorted(td["keyphrases"].keys()):
+        kps = td["keyphrases"][cid]
+        rds = td["representative_docs"].get(cid, [])
+        sw = td["membership"].get(cid, {"strong":0,"weak":0})
+        prompt = _label_prompt(kps, rds)
+        s1 = _groq(client, prompt); time.sleep(1)
+        s2 = _mistral(prompt, mk); time.sleep(1)
+        s3 = _gemini(prompt, gk)
+        votes = [s1, s2, s3]
+        # Sheets 1-3
+        for si, (sheet_n, resp) in enumerate([(1,s1),(2,s2),(3,s3)]):
+            sheets[sheet_n].append({"cluster":cid, **{k:resp.get(k,"—")
+                for k in ["label","description","pacis_match","confidence"]}})
+        # Sheet 4: consolidate
+        valid = [v for v in votes if v and "label" in v]
+        labels_l = [_clean(v.get("label","")).lower() for v in valid]
+        counts = Counter(labels_l)
+        if any(c>=3 for c in counts.values()):
+            agreement = "Triple"
+            winner = max(counts, key=counts.get)
+            best = next(v for v in valid if _clean(v["label"]).lower()==winner)
+        elif any(c>=2 for c in counts.values()):
+            agreement = "Two"
+            winner = max(counts, key=counts.get)
+            best = next(v for v in valid if _clean(v["label"]).lower()==winner)
+        else:
+            agreement = "Single"
+            d = _groq(client, _defence_prompt(kps, rds, votes))
+            best = d if d and "label" in d else (valid[0] if valid else {})
+        label = _clean(best.get("label",""))
+        gc = _grounding(label, kps)
+        if gc["verdict"]=="FAIL" and valid:
+            label = _clean(valid[0].get("label",""))
+        cp = td.get("cluster_persistence",{}).get(cid, 0.0)
+        sheets[4].append({"cluster":cid, "label":label, "agreement":agreement,
+            "description":best.get("description",""),
+            "pacis_match":best.get("pacis_match",""),
+            "strong":sw["strong"], "weak":sw["weak"],
+            "persistence":round(cp,4), "grounding":gc["verdict"]})
+        interps[cid] = {"label":label, "agreement":agreement,
+            "strong":sw["strong"], "weak":sw["weak"],
+            "persistence":cp, "description":best.get("description",""),
+            "pacis_match":best.get("pacis_match",""),
+            "keyphrases":[k[0] if isinstance(k,tuple) else k for k in kps[:5]]}
+        logger.info("Cluster %d → %s [%s]", cid, label, agreement)
+    # Agreement rate on labels
+    total = len(sheets[4]) or 1
+    n_triple = sum(1 for r in sheets[4] if r.get("agreement")=="Triple")
+    n_two = sum(1 for r in sheets[4] if r.get("agreement")=="Two")
+    rates = {
+        "triple": round(n_triple / total * 100),
+        "two_or_more": round((n_triple + n_two) / total * 100),
+        "single": round((total - n_triple - n_two) / total * 100),
+    }
+    # Save outputs
+    records = sheets[4]
+    with open("topics.json","w") as f: json.dump(records, f, indent=2)
+    pd.DataFrame(records).to_csv("topics.csv", index=False)
+    return {"interpretations":interps, "sheets":sheets,
+            "agreement_rates":rates, "json_path":"topics.json", "csv_path":"topics.csv"}
+# ---------------------------------------------------------------------------
+# LangGraph node: build mismatch table
+# ---------------------------------------------------------------------------
+def build_mismatch(state: PipelineState) -> dict:
+    from tools import build_mismatch_table
+    td = state["topic_data"]
+    interps = state.get("interpretations", {})
+    labels_map = {cid: v["label"] for cid, v in interps.items()}
+    mt = build_mismatch_table(td["keyphrases"], labels_map)
+    return {"mismatch_table": mt}
 # ---------------------------------------------------------------------------
+# Build the LangGraph
 # ---------------------------------------------------------------------------
+def build_graph() -> StateGraph:
+    g = StateGraph(PipelineState)
+    g.add_node("embed_and_cluster", embed_and_cluster)
+    g.add_node("llm_council", llm_council)
+    g.add_node("build_mismatch", build_mismatch)
+    g.set_entry_point("embed_and_cluster")
+    g.add_edge("embed_and_cluster", "llm_council")
+    g.add_edge("llm_council", "build_mismatch")
+    g.add_edge("build_mismatch", END)
+    return g.compile()
+# Compiled graph — importable
+pipeline_graph = build_graph()
+def run_pipeline(filepath, groq_key, mistral_key, gemini_key, n_trials=50):
+    """Convenience wrapper."""
+    result = pipeline_graph.invoke({
+        "filepath": filepath,
+        "groq_key": groq_key,
+        "mistral_key": mistral_key,
+        "gemini_key": gemini_key,
+        "n_trials": n_trials,
+    })
+    return result