Spaces:

atharvthite05
/

BERTopic_Thematic_Analysis_Agent

Sleeping

App Files Files Community

atharvthite05 commited on Apr 28

Commit

60bc73f

verified ·

1 Parent(s): f376c71

Update tools.py

Browse files

Files changed (1) hide show

tools.py +176 -56

tools.py CHANGED Viewed

@@ -46,10 +46,11 @@ import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
 import plotly.figure_factory as ff
-from sklearn.cluster import AgglomerativeClustering
 from sklearn.metrics.pairwise import cosine_similarity
 from sklearn.preprocessing import normalize
 from sentence_transformers import SentenceTransformer
 from langchain_core.tools import tool
 from langchain_core.prompts import PromptTemplate
@@ -69,7 +70,9 @@ MISTRAL_API_KEY: str   = os.environ.get("MISTRAL_API_KEY", "")
 MODEL_NAME:      str   = "mistral-small-latest"
 GROQ_API_KEY: str      = os.environ.get("GROQ_API_KEY", "")
 GROQ_MODEL_NAME: str   = os.environ.get("GROQ_MODEL_NAME", "llama-3.3-70b-versatile")
-EMBED_MODEL:     str   = "all-MiniLM-L6-v2"
 BASE_DIR:        Path  = Path(__file__).resolve().parent
 OUTPUT_DIR:      Path  = BASE_DIR / "outputs"
 N_EVIDENCE:      int   = 5       # sentences kept per cluster centroid
@@ -78,10 +81,17 @@ RANDOM_SEED:     int   = 42
 LLM_TIMEOUT_S:   int   = 45
 LLM_MAX_RETRIES: int   = 3
 MAX_LABEL_CLUSTERS: int = 60
-MIN_CLUSTER_SIZE_FOR_LABEL: int = 3
 MAX_TOOL_RETURN_PREVIEW: int = 12
 PROVIDER_RETRY_ATTEMPTS: int = 3
 PROVIDER_RETRY_BASE_DELAY_S: float = 1.5
 # Run configurations — keys map to source columns
 RUN_CONFIGS: dict[str, list[str]] = {
@@ -198,18 +208,33 @@ def _texts_for_candidates(df: pd.DataFrame, candidates: list[str]) -> tuple[list
 def _embed(sentences: list[str]) -> np.ndarray:
-    """Encode sentences to L2-normalised 384-d vectors."""
-    model = SentenceTransformer(EMBED_MODEL)
     raw   = model.encode(sentences, show_progress_bar=False, batch_size=64)
     return normalize(raw, norm="l2")   # unit-norm -> cosine = dot product
-def _cluster(embeddings: np.ndarray, threshold: float) -> np.ndarray:
-    return AgglomerativeClustering(
         metric="cosine",
-        linkage="average",
-        distance_threshold=threshold,
-        n_clusters=None,
     ).fit_predict(embeddings)
@@ -234,14 +259,14 @@ def _llm() -> ChatMistralAI:
     )
-def _llm_groq():
     if ChatGroq is None:
         raise RuntimeError(
             "langchain-groq is not installed. Install dependencies from requirements.txt "
             "to enable Groq topic-label verification."
         )
     return ChatGroq(
-        model=GROQ_MODEL_NAME,
         api_key=GROQ_API_KEY,
         temperature=0.2,
         timeout=LLM_TIMEOUT_S,
@@ -249,8 +274,12 @@ def _llm_groq():
     )
-def _groq_enabled() -> bool:
-    return bool(GROQ_API_KEY) and ChatGroq is not None
 def _to_float(value: object, fallback: float = 0.0) -> float:
@@ -345,7 +374,11 @@ def _chart_top_words(summaries: list[dict]) -> go.Figure:
 def _chart_hierarchy(labels: list[int], embeddings: np.ndarray) -> go.Figure:
-    unique     = sorted(set(labels))
     labels_arr = np.array(labels)
     centroids  = np.vstack([
         _centroid(embeddings[labels_arr == lbl])
@@ -362,7 +395,11 @@ def _chart_hierarchy(labels: list[int], embeddings: np.ndarray) -> go.Figure:
 def _chart_heatmap(labels: list[int], embeddings: np.ndarray) -> go.Figure:
-    unique     = sorted(set(labels))
     labels_arr = np.array(labels)
     centroids  = np.vstack([
         _centroid(embeddings[labels_arr == lbl])
@@ -460,21 +497,30 @@ def load_scopus_csv(filepath: str) -> dict:
 # ============================================================================
 @tool
-def run_bertopic_discovery(run_key: str, threshold: float = DISTANCE_THRESH) -> dict:
     """
-    Embed sentences, cluster with AgglomerativeClustering, extract evidence,
     and generate four Plotly charts.
     Saved artefacts
     ---------------
-    emb.npy         : (N, 384) float32  L2-normalised embeddings
     sent_labels.npy : (N,)     int32    per-sentence cluster label  [BUG 1 FIX]
     summaries.json  : list of cluster dicts with evidence sentences
     Parameters
     ----------
     run_key   : str   — "abstract" or "title" or "keywords"
-    threshold : float — cosine distance threshold for AgglomerativeClustering
     Returns
     -------
@@ -527,8 +573,16 @@ def run_bertopic_discovery(run_key: str, threshold: float = DISTANCE_THRESH) ->
     embeddings = _embed(sentences)
     np.save(str(rdir / "emb.npy"), embeddings)
-    labels     = _cluster(embeddings, threshold).tolist()
-    unique_ids = sorted(set(labels))
     # FIX BUG 1 — persist per-sentence label array so Tool 4 can build
     # correct cluster masks without any guesswork or scaffolding.
@@ -536,17 +590,39 @@ def run_bertopic_discovery(run_key: str, threshold: float = DISTANCE_THRESH) ->
     labels_arr = np.array(labels)
     def _cluster_summary(cid: int) -> dict:
         mask    = labels_arr == cid
         c_emb   = embeddings[mask]
         c_sent  = list(np.array(sentences)[mask])
         ctroid  = _centroid(c_emb)
         top_idx = _top_k_indices(c_emb, ctroid, N_EVIDENCE)
         return {
             "cluster_id": int(cid),
             "size":       int(mask.sum()),
-            "cx":         float(ctroid[0]),
-            "cy":         float(ctroid[1]),
             "evidence":   list(np.array(c_sent)[top_idx]),
         }
@@ -565,6 +641,9 @@ def run_bertopic_discovery(run_key: str, threshold: float = DISTANCE_THRESH) ->
         "n_clusters":      int(len(unique_ids)),
         "n_sentences":     int(len(sentences)),
         "threshold":       threshold,
         "chart_paths":     chart_paths,
         "summaries_path":  str(rdir / "summaries.json"),
         "embeddings_path": str(rdir / "emb.npy"),
@@ -663,8 +742,22 @@ def label_topics_with_llm(run_key: str) -> dict:
             "groq_confidence":    0.0,
             "groq_reasoning":     "",
             "groq_niche":         False,
             "verification_done":  False,
-            "verification_note":  "Run VERIFY in Phase 2 to compare with Groq labels.",
         }
     labelled = list(map(_label_one, selected))
@@ -679,6 +772,8 @@ def label_topics_with_llm(run_key: str) -> dict:
             "confidence":    r.get("confidence"),
             "mistral_label": r.get("mistral_label", ""),
             "groq_label":    r.get("groq_label", ""),
             "size":          r.get("size"),
             "niche":         r.get("niche", False),
         },
@@ -692,8 +787,8 @@ def label_topics_with_llm(run_key: str) -> dict:
         "total_clusters":    len(summaries),
         "selected_clusters": len(selected),
         "skipped_clusters":  max(0, len(summaries) - len(selected)),
-        "groq_enabled":      _groq_enabled(),
-        "mode_note":         "Single-model labeling complete (Mistral). Send VERIFY in Phase 2 to run Groq verification.",
         "labels_preview":    preview,
     }
@@ -702,7 +797,7 @@ def label_topics_with_llm(run_key: str) -> dict:
 def verify_topic_labels_with_groq(run_key: str) -> dict:
     """
     Run Groq topic labeling for already-labeled topics and append comparison fields
-    into labels.json so UI review table can show both Mistral and Groq labels.
     Parameters
     ----------
@@ -717,15 +812,16 @@ def verify_topic_labels_with_groq(run_key: str) -> dict:
     labels_path   = rdir / "labels.json"
     summaries_path = rdir / "summaries.json"
-    if not _groq_enabled():
         return {
             "run_key": run_key,
             "labels_path": str(labels_path),
             "verified_count": 0,
             "labels_preview": [],
             "error": (
-                "GROQ_API_KEY is missing or langchain-groq is unavailable. "
-                "Set GROQ_API_KEY and install requirements to use VERIFY."
             ),
         }
@@ -765,7 +861,8 @@ def verify_topic_labels_with_groq(run_key: str) -> dict:
         labels_data,
     ))
-    chain_groq = _LABEL_PROMPT | _llm_groq() | JsonOutputParser()
     def _evidence_block(summary: dict) -> str:
         return "\n".join(
@@ -773,29 +870,35 @@ def verify_topic_labels_with_groq(run_key: str) -> dict:
             for i, s in enumerate(summary.get("evidence", []))
         )
-    def _label_with_groq(row: dict) -> tuple[int, dict]:
         cid = int(row.get("cluster_id", -1))
         summary = summary_by_id[cid]
-        result = _invoke_with_retries(lambda: chain_groq.invoke({
             "cluster_id": summary["cluster_id"],
             "size":       summary["size"],
             "evidence":   _evidence_block(summary),
-        }))
-        return cid, result
     groq_pairs = list(map(_label_with_groq, target_rows))
-    groq_by_id = {cid: data for cid, data in groq_pairs}
     def _merge_row(row: dict) -> dict:
         cid = int(row.get("cluster_id", -1))
-        groq = groq_by_id.get(cid, {})
-        has_groq = bool(groq)
         mistral_label = str(row.get("mistral_label") or row.get("label", "")).strip()
-        groq_label = str(groq.get("label", "")).strip()
         is_agreement = (
-            mistral_label.lower() == groq_label.lower()
-            if has_groq and mistral_label and groq_label
-            else False
         )
         return {
@@ -808,18 +911,30 @@ def verify_topic_labels_with_groq(run_key: str) -> dict:
             ),
             "mistral_reasoning":  row.get("mistral_reasoning") or row.get("reasoning", ""),
             "mistral_niche":      bool(row.get("mistral_niche", row.get("niche", False))),
-            "groq_label":         groq.get("label", ""),
-            "groq_category":      groq.get("category", ""),
-            "groq_confidence":    _to_float(groq.get("confidence"), 0.0),
-            "groq_reasoning":     groq.get("reasoning", ""),
-            "groq_niche":         bool(groq.get("niche", False)),
-            "verification_done":  has_groq,
             "verification_note": (
-                "Mistral and Groq labels match."
                 if is_agreement
-                else "Mistral and Groq labels differ. Review before approval."
             )
-            if has_groq
             else "Groq labeling unavailable for this topic.",
         }
@@ -832,13 +947,18 @@ def verify_topic_labels_with_groq(run_key: str) -> dict:
         lambda r: {
             "cluster_id":    r.get("cluster_id"),
             "mistral_label": r.get("mistral_label", ""),
-            "groq_label":    r.get("groq_label", ""),
             "verification_note": r.get("verification_note", ""),
         },
         verified_rows[:MAX_TOOL_RETURN_PREVIEW],
     ))
-    verified_count = sum(1 for row in verified_rows if row.get("groq_label"))
     return {
         "run_key":           run_key,
@@ -1044,7 +1164,7 @@ def verify_taxonomy_mapping_with_groq(run_key: str) -> dict:
         run_key, taxonomy_path, verification_path,
         verified_count, mapping_preview
     """
-    if not _groq_enabled():
         return {
             "run_key": run_key,
             "taxonomy_path": str(_run_dir(run_key) / "taxonomy_map.json"),
@@ -1088,7 +1208,7 @@ def verify_taxonomy_mapping_with_groq(run_key: str) -> dict:
     taxonomy_map = _load_json(taxonomy_path)
     taxonomy_str = "\n".join(f"  - {cat}" for cat in PAJAIS_TAXONOMY)
-    chain_groq = _TAXONOMY_PROMPT | _llm_groq() | JsonOutputParser()
     def _map_theme_with_groq(theme: dict) -> dict:
         return _invoke_with_retries(lambda: chain_groq.invoke({

 import plotly.express as px
 import plotly.graph_objects as go
 import plotly.figure_factory as ff
 from sklearn.metrics.pairwise import cosine_similarity
 from sklearn.preprocessing import normalize
 from sentence_transformers import SentenceTransformer
+import hdbscan
+import umap
 from langchain_core.tools import tool
 from langchain_core.prompts import PromptTemplate
 MODEL_NAME:      str   = "mistral-small-latest"
 GROQ_API_KEY: str      = os.environ.get("GROQ_API_KEY", "")
 GROQ_MODEL_NAME: str   = os.environ.get("GROQ_MODEL_NAME", "llama-3.3-70b-versatile")
+GROQ_OLLAMA_MODEL_NAME: str = os.environ.get("GROQ_OLLAMA_MODEL_NAME", "llama-3.3-70b-versatile")
+GROQ_GPT_MODEL_NAME: str    = os.environ.get("GROQ_GPT_MODEL_NAME", "openai/gpt-oss-120b")
+EMBED_MODEL:     str   = "allenai/specter2_base"
 BASE_DIR:        Path  = Path(__file__).resolve().parent
 OUTPUT_DIR:      Path  = BASE_DIR / "outputs"
 N_EVIDENCE:      int   = 5       # sentences kept per cluster centroid
 LLM_TIMEOUT_S:   int   = 45
 LLM_MAX_RETRIES: int   = 3
 MAX_LABEL_CLUSTERS: int = 60
+MIN_CLUSTER_SIZE_FOR_LABEL: int = 20
 MAX_TOOL_RETURN_PREVIEW: int = 12
 PROVIDER_RETRY_ATTEMPTS: int = 3
 PROVIDER_RETRY_BASE_DELAY_S: float = 1.5
+HDBSCAN_MIN_CLUSTER_SIZE: int = 20
+HDBSCAN_MIN_SAMPLES: int = 5
+HDBSCAN_MAX_CLUSTER_SIZE: int = 120
+UMAP_N_NEIGHBORS: int = 15
+UMAP_MIN_DIST: float = 0.0
+UMAP_N_COMPONENTS_CLUSTER: int = 5
+UMAP_N_COMPONENTS_VIZ: int = 2
 # Run configurations — keys map to source columns
 RUN_CONFIGS: dict[str, list[str]] = {
 def _embed(sentences: list[str]) -> np.ndarray:
+    """Encode sentences to L2-normalised SPECTER2 vectors."""
+    model = SentenceTransformer(EMBED_MODEL, trust_remote_code=True)
     raw   = model.encode(sentences, show_progress_bar=False, batch_size=64)
     return normalize(raw, norm="l2")   # unit-norm -> cosine = dot product
+def _umap_reduce(embeddings: np.ndarray, n_components: int) -> np.ndarray:
+    reducer = umap.UMAP(
+        n_neighbors=UMAP_N_NEIGHBORS,
+        min_dist=UMAP_MIN_DIST,
+        n_components=n_components,
         metric="cosine",
+        random_state=RANDOM_SEED,
+    )
+    return reducer.fit_transform(embeddings)
+def _cluster(embeddings: np.ndarray,
+             min_cluster_size: int,
+             max_cluster_size: int,
+             min_samples: int) -> np.ndarray:
+    return hdbscan.HDBSCAN(
+        min_cluster_size=min_cluster_size,
+        min_samples=min_samples,
+        metric="euclidean",
+        cluster_selection_method="eom",
+        max_cluster_size=max_cluster_size,
     ).fit_predict(embeddings)
     )
+def _llm_groq(model_name: str):
     if ChatGroq is None:
         raise RuntimeError(
             "langchain-groq is not installed. Install dependencies from requirements.txt "
             "to enable Groq topic-label verification."
         )
     return ChatGroq(
+        model=model_name,
         api_key=GROQ_API_KEY,
         temperature=0.2,
         timeout=LLM_TIMEOUT_S,
     )
+def _groq_ollama_enabled() -> bool:
+    return bool(GROQ_API_KEY) and ChatGroq is not None and bool(GROQ_OLLAMA_MODEL_NAME)
+def _groq_gpt_enabled() -> bool:
+    return bool(GROQ_API_KEY) and ChatGroq is not None and bool(GROQ_GPT_MODEL_NAME)
 def _to_float(value: object, fallback: float = 0.0) -> float:
 def _chart_hierarchy(labels: list[int], embeddings: np.ndarray) -> go.Figure:
+    unique     = sorted(filter(lambda v: v != -1, set(labels)))
+    if not unique:
+        fig = go.Figure()
+        fig.update_layout(title="Cluster Hierarchy", template="plotly_dark")
+        return fig
     labels_arr = np.array(labels)
     centroids  = np.vstack([
         _centroid(embeddings[labels_arr == lbl])
 def _chart_heatmap(labels: list[int], embeddings: np.ndarray) -> go.Figure:
+    unique     = sorted(filter(lambda v: v != -1, set(labels)))
+    if not unique:
+        fig = go.Figure()
+        fig.update_layout(title="Cluster Similarity Heatmap", template="plotly_dark")
+        return fig
     labels_arr = np.array(labels)
     centroids  = np.vstack([
         _centroid(embeddings[labels_arr == lbl])
 # ============================================================================
 @tool
+def run_bertopic_discovery(
+    run_key: str,
+    threshold: float = DISTANCE_THRESH,
+    min_cluster_size: int = HDBSCAN_MIN_CLUSTER_SIZE,
+    max_cluster_size: int = HDBSCAN_MAX_CLUSTER_SIZE,
+    min_samples: int = HDBSCAN_MIN_SAMPLES,
+) -> dict:
     """
+    Embed sentences, cluster with UMAP + HDBSCAN, extract evidence,
     and generate four Plotly charts.
     Saved artefacts
     ---------------
+    emb.npy         : (N, D)   float32  L2-normalised embeddings
     sent_labels.npy : (N,)     int32    per-sentence cluster label  [BUG 1 FIX]
     summaries.json  : list of cluster dicts with evidence sentences
     Parameters
     ----------
     run_key   : str   — "abstract" or "title" or "keywords"
+    threshold : float — legacy arg (ignored by HDBSCAN)
+    min_cluster_size : int — HDBSCAN minimum cluster size
+    max_cluster_size : int — HDBSCAN maximum cluster size
+    min_samples : int — HDBSCAN min_samples
     Returns
     -------
     embeddings = _embed(sentences)
     np.save(str(rdir / "emb.npy"), embeddings)
+    cluster_space = _umap_reduce(embeddings, UMAP_N_COMPONENTS_CLUSTER)
+    umap_2d = _umap_reduce(embeddings, UMAP_N_COMPONENTS_VIZ)
+    labels     = _cluster(
+        cluster_space,
+        min_cluster_size=min_cluster_size,
+        max_cluster_size=max_cluster_size,
+        min_samples=min_samples,
+    ).tolist()
+    unique_ids = sorted(filter(lambda v: v != -1, set(labels)))
     # FIX BUG 1 — persist per-sentence label array so Tool 4 can build
     # correct cluster masks without any guesswork or scaffolding.
     labels_arr = np.array(labels)
+    if not unique_ids:
+        _save_json(rdir / "summaries.json", [])
+        return {
+            "run_key":         run_key,
+            "n_clusters":      0,
+            "n_sentences":     int(len(sentences)),
+            "threshold":       threshold,
+            "min_cluster_size": int(min_cluster_size),
+            "max_cluster_size": int(max_cluster_size),
+            "min_samples":     int(min_samples),
+            "chart_paths":     {},
+            "summaries_path":  str(rdir / "summaries.json"),
+            "embeddings_path": str(rdir / "emb.npy"),
+            "error": "HDBSCAN produced no clusters (all points labeled as noise).",
+        }
     def _cluster_summary(cid: int) -> dict:
         mask    = labels_arr == cid
         c_emb   = embeddings[mask]
+        c_umap  = umap_2d[mask]
         c_sent  = list(np.array(sentences)[mask])
         ctroid  = _centroid(c_emb)
         top_idx = _top_k_indices(c_emb, ctroid, N_EVIDENCE)
+        coords  = (
+            c_umap.mean(axis=0)
+            if c_umap.shape[0] > 0
+            else np.zeros(UMAP_N_COMPONENTS_VIZ, dtype=np.float32)
+        )
         return {
             "cluster_id": int(cid),
             "size":       int(mask.sum()),
+            "cx":         float(coords[0]),
+            "cy":         float(coords[1]),
             "evidence":   list(np.array(c_sent)[top_idx]),
         }
         "n_clusters":      int(len(unique_ids)),
         "n_sentences":     int(len(sentences)),
         "threshold":       threshold,
+        "min_cluster_size": int(min_cluster_size),
+        "max_cluster_size": int(max_cluster_size),
+        "min_samples":     int(min_samples),
         "chart_paths":     chart_paths,
         "summaries_path":  str(rdir / "summaries.json"),
         "embeddings_path": str(rdir / "emb.npy"),
             "groq_confidence":    0.0,
             "groq_reasoning":     "",
             "groq_niche":         False,
+            "groq_ollama_label":  "",
+            "groq_ollama_category": "",
+            "groq_ollama_confidence": 0.0,
+            "groq_ollama_reasoning": "",
+            "groq_ollama_niche":  False,
+            "groq_gpt_label":     "",
+            "groq_gpt_category":  "",
+            "groq_gpt_confidence": 0.0,
+            "groq_gpt_reasoning": "",
+            "groq_gpt_niche":     False,
             "verification_done":  False,
+            "verification_done_ollama": False,
+            "verification_done_gpt": False,
+            "verification_note":  (
+                "Run VERIFY in Phase 2 to compare with Groq-Ollama and Groq-GPT labels."
+            ),
         }
     labelled = list(map(_label_one, selected))
             "confidence":    r.get("confidence"),
             "mistral_label": r.get("mistral_label", ""),
             "groq_label":    r.get("groq_label", ""),
+            "groq_ollama_label": r.get("groq_ollama_label", r.get("groq_label", "")),
+            "groq_gpt_label": r.get("groq_gpt_label", ""),
             "size":          r.get("size"),
             "niche":         r.get("niche", False),
         },
         "total_clusters":    len(summaries),
         "selected_clusters": len(selected),
         "skipped_clusters":  max(0, len(summaries) - len(selected)),
+        "groq_enabled":      _groq_ollama_enabled() and _groq_gpt_enabled(),
+        "mode_note":         "Single-model labeling complete (Mistral). Send VERIFY in Phase 2 to run Groq-Ollama and Groq-GPT verification.",
         "labels_preview":    preview,
     }
 def verify_topic_labels_with_groq(run_key: str) -> dict:
     """
     Run Groq topic labeling for already-labeled topics and append comparison fields
+    into labels.json so UI review table can show Mistral vs Groq-Ollama vs Groq-GPT labels.
     Parameters
     ----------
     labels_path   = rdir / "labels.json"
     summaries_path = rdir / "summaries.json"
+    if not _groq_ollama_enabled() or not _groq_gpt_enabled():
         return {
             "run_key": run_key,
             "labels_path": str(labels_path),
             "verified_count": 0,
             "labels_preview": [],
             "error": (
+                "GROQ_API_KEY or Groq model config is missing, or langchain-groq is unavailable. "
+                "Set GROQ_API_KEY and GROQ_GPT_MODEL_NAME (and optionally GROQ_OLLAMA_MODEL_NAME) "
+                "and install requirements to use VERIFY."
             ),
         }
         labels_data,
     ))
+    chain_groq_ollama = _LABEL_PROMPT | _llm_groq(GROQ_OLLAMA_MODEL_NAME) | JsonOutputParser()
+    chain_groq_gpt = _LABEL_PROMPT | _llm_groq(GROQ_GPT_MODEL_NAME) | JsonOutputParser()
     def _evidence_block(summary: dict) -> str:
         return "\n".join(
             for i, s in enumerate(summary.get("evidence", []))
         )
+    def _label_with_groq(row: dict) -> tuple[int, dict, dict]:
         cid = int(row.get("cluster_id", -1))
         summary = summary_by_id[cid]
+        payload = {
             "cluster_id": summary["cluster_id"],
             "size":       summary["size"],
             "evidence":   _evidence_block(summary),
+        }
+        groq_ollama = _invoke_with_retries(lambda: chain_groq_ollama.invoke(payload))
+        groq_gpt = _invoke_with_retries(lambda: chain_groq_gpt.invoke(payload))
+        return cid, groq_ollama, groq_gpt
     groq_pairs = list(map(_label_with_groq, target_rows))
+    groq_ollama_by_id = {cid: data for cid, data, _ in groq_pairs}
+    groq_gpt_by_id = {cid: data for cid, _, data in groq_pairs}
     def _merge_row(row: dict) -> dict:
         cid = int(row.get("cluster_id", -1))
+        groq_ollama = groq_ollama_by_id.get(cid, {})
+        groq_gpt = groq_gpt_by_id.get(cid, {})
+        has_groq_ollama = bool(groq_ollama)
+        has_groq_gpt = bool(groq_gpt)
         mistral_label = str(row.get("mistral_label") or row.get("label", "")).strip()
+        groq_ollama_label = str(groq_ollama.get("label", "")).strip()
+        groq_gpt_label = str(groq_gpt.get("label", "")).strip()
         is_agreement = (
+            all([mistral_label, groq_ollama_label, groq_gpt_label])
+            and mistral_label.lower() == groq_ollama_label.lower()
+            and mistral_label.lower() == groq_gpt_label.lower()
         )
         return {
             ),
             "mistral_reasoning":  row.get("mistral_reasoning") or row.get("reasoning", ""),
             "mistral_niche":      bool(row.get("mistral_niche", row.get("niche", False))),
+            "groq_label":         groq_ollama_label,
+            "groq_category":      groq_ollama.get("category", ""),
+            "groq_confidence":    _to_float(groq_ollama.get("confidence"), 0.0),
+            "groq_reasoning":     groq_ollama.get("reasoning", ""),
+            "groq_niche":         bool(groq_ollama.get("niche", False)),
+            "groq_ollama_label":  groq_ollama_label,
+            "groq_ollama_category": groq_ollama.get("category", ""),
+            "groq_ollama_confidence": _to_float(groq_ollama.get("confidence"), 0.0),
+            "groq_ollama_reasoning": groq_ollama.get("reasoning", ""),
+            "groq_ollama_niche":  bool(groq_ollama.get("niche", False)),
+            "groq_gpt_label":     groq_gpt_label,
+            "groq_gpt_category":  groq_gpt.get("category", ""),
+            "groq_gpt_confidence": _to_float(groq_gpt.get("confidence"), 0.0),
+            "groq_gpt_reasoning": groq_gpt.get("reasoning", ""),
+            "groq_gpt_niche":     bool(groq_gpt.get("niche", False)),
+            "verification_done":  has_groq_ollama and has_groq_gpt,
+            "verification_done_ollama": has_groq_ollama,
+            "verification_done_gpt": has_groq_gpt,
             "verification_note": (
+                "Mistral, Groq-Ollama, and Groq-GPT labels match."
                 if is_agreement
+                else "Model labels differ. Review before approval."
             )
+            if has_groq_ollama and has_groq_gpt
             else "Groq labeling unavailable for this topic.",
         }
         lambda r: {
             "cluster_id":    r.get("cluster_id"),
             "mistral_label": r.get("mistral_label", ""),
+            "groq_ollama_label": r.get("groq_ollama_label", r.get("groq_label", "")),
+            "groq_gpt_label": r.get("groq_gpt_label", ""),
             "verification_note": r.get("verification_note", ""),
         },
         verified_rows[:MAX_TOOL_RETURN_PREVIEW],
     ))
+    verified_count = sum(
+        1
+        for row in verified_rows
+        if row.get("groq_ollama_label") and row.get("groq_gpt_label")
+    )
     return {
         "run_key":           run_key,
         run_key, taxonomy_path, verification_path,
         verified_count, mapping_preview
     """
+    if not _groq_ollama_enabled():
         return {
             "run_key": run_key,
             "taxonomy_path": str(_run_dir(run_key) / "taxonomy_map.json"),
     taxonomy_map = _load_json(taxonomy_path)
     taxonomy_str = "\n".join(f"  - {cat}" for cat in PAJAIS_TAXONOMY)
+    chain_groq = _TAXONOMY_PROMPT | _llm_groq(GROQ_OLLAMA_MODEL_NAME) | JsonOutputParser()
     def _map_theme_with_groq(theme: dict) -> dict:
         return _invoke_with_retries(lambda: chain_groq.invoke({