Spaces:
Sleeping
Sleeping
Update agent_v2.py
Browse files- agent_v2.py +110 -23
agent_v2.py
CHANGED
|
@@ -1,12 +1,30 @@
|
|
| 1 |
"""
|
| 2 |
-
agent_v2.py - SPECTER2 + HDBSCAN + Council-of-3 Thematic Analysis Agent.
|
| 3 |
-
|
|
|
|
| 4 |
"""
|
| 5 |
|
| 6 |
from __future__ import annotations
|
| 7 |
|
|
|
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
-
load_dotenv()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
from langgraph.prebuilt import create_react_agent
|
| 12 |
from langgraph.checkpoint.memory import MemorySaver
|
|
@@ -30,7 +48,12 @@ ROLE
|
|
| 30 |
You guide a researcher through a 5-phase SPECTER2 thematic analysis.
|
| 31 |
Each paper is represented by ONE combined Title+Abstract vector (SPECTER2).
|
| 32 |
Clustering uses UMAP + HDBSCAN (density-based, 15-30 clusters of 5-120 papers).
|
| 33 |
-
Labeling uses a council of 3 LLMs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 35 |
FULL WORKFLOW
|
| 36 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -38,22 +61,31 @@ Triggered by: researcher types "run specter" or "run v2"
|
|
| 38 |
|
| 39 |
Phase 1 β Load & Embed:
|
| 40 |
Call: load_and_embed_specter2(csv_path="data/uploaded.csv")
|
| 41 |
-
Show: papers
|
| 42 |
STOP GATE 1: "Phase 1 complete. Type yes to run UMAP+HDBSCAN clustering."
|
| 43 |
|
| 44 |
Phase 2 β UMAP + HDBSCAN Clustering:
|
| 45 |
Call: cluster_with_umap_hdbscan(umap_neighbors=15, umap_min_dist=0.05,
|
| 46 |
hdbscan_min_cluster_size=5, hdbscan_min_samples=3)
|
| 47 |
-
Show:
|
| 48 |
-
If clusters < 15 or > 30,
|
| 49 |
-
|
| 50 |
-
STOP GATE 2: "Phase 2 complete. Type yes to run council-of-3 LLM labeling."
|
| 51 |
|
| 52 |
-
Phase 3 β Council of 3 LLM Labeling:
|
| 53 |
Call: label_clusters_council_of_3(batch_size=5)
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
Tell researcher: "Cluster Audit CSV is ready in the Download tab.
|
| 56 |
-
It shows all 3 LLM votes
|
|
|
|
| 57 |
STOP GATE 3: "Phase 3 complete. Type yes to map to PAJAIS taxonomy."
|
| 58 |
|
| 59 |
Phase 4 β PAJAIS Mapping:
|
|
@@ -64,35 +96,70 @@ Phase 4 β PAJAIS Mapping:
|
|
| 64 |
Phase 5 β Final Outputs:
|
| 65 |
Call: export_v2_outputs()
|
| 66 |
Show:
|
| 67 |
-
- Cluster labels and PAJAIS mappings
|
| 68 |
- comparison_v2.csv row count
|
| 69 |
- narrative_v2.txt word count
|
| 70 |
Say: "β
SPECTER2 RUN COMPLETE.
|
| 71 |
comparison_v2.csv and narrative_v2.txt are ready in the Download tab.
|
| 72 |
-
cluster_audit.csv contains full LLM voting details per paper.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 74 |
CRITICAL RULES
|
| 75 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 76 |
1. ONE PHASE PER MESSAGE β complete one phase then STOP and wait.
|
| 77 |
2. NEVER SKIP STOP GATES β 4 gates, always wait for user confirmation.
|
| 78 |
3. NO HALLUCINATION β only reference data returned by tools.
|
| 79 |
-
4.
|
|
|
|
|
|
|
|
|
|
| 80 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 81 |
TOOLS
|
| 82 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 83 |
1. load_and_embed_specter2(csv_path)
|
| 84 |
-
Builds combined
|
| 85 |
-
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
3. label_clusters_council_of_3(batch_size)
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
| 90 |
4. map_clusters_to_pajais_v2()
|
| 91 |
-
Maps cluster labels
|
|
|
|
|
|
|
| 92 |
5. export_v2_outputs()
|
| 93 |
-
Generates comparison_v2.csv (one row per paper
|
|
|
|
| 94 |
""".strip()
|
| 95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
_llm_v2 = ChatMistralAI(model="mistral-small-latest", temperature=0.3)
|
| 97 |
_memory_v2 = MemorySaver()
|
| 98 |
|
|
@@ -113,7 +180,11 @@ agent_v2 = create_react_agent(
|
|
| 113 |
|
| 114 |
|
| 115 |
def clean_thread_history_v2(thread_id: str) -> None:
|
| 116 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
config = {"configurable": {"thread_id": thread_id}}
|
| 118 |
checkpoint = _memory_v2.get(config)
|
| 119 |
if checkpoint is None:
|
|
@@ -135,4 +206,20 @@ def clean_thread_history_v2(thread_id: str) -> None:
|
|
| 135 |
if len(clean) == len(messages):
|
| 136 |
return
|
| 137 |
checkpoint["channel_values"]["messages"] = clean
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
_memory_v2.put(config, checkpoint, {}, {})
|
|
|
|
| 1 |
"""
|
| 2 |
+
agent_v2.py - SPECTER2 + HDBSCAN + True Council-of-3 Thematic Analysis Agent.
|
| 3 |
+
Runs on HuggingFace Spaces. API keys read from HF Secrets (Settings β Variables and Secrets).
|
| 4 |
+
Council: Mistral + OpenAI + Groq running in PARALLEL with disk caching.
|
| 5 |
"""
|
| 6 |
|
| 7 |
from __future__ import annotations
|
| 8 |
|
| 9 |
+
import os
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
+
load_dotenv() # local .env fallback β ignored on HuggingFace (HF injects secrets directly)
|
| 12 |
+
|
| 13 |
+
# ββ HuggingFace Spaces: validate secrets are present at startup βββββββββββββββ
|
| 14 |
+
# This gives a clear error message instead of a cryptic API failure mid-run.
|
| 15 |
+
_REQUIRED_SECRETS = {
|
| 16 |
+
"MISTRAL_API_KEY": "Mistral AI β mistralai.com/api",
|
| 17 |
+
"OPENAI_API_KEY": "OpenAI β platform.openai.com/api-keys",
|
| 18 |
+
"GROQ_API_KEY": "Groq β console.groq.com/keys",
|
| 19 |
+
}
|
| 20 |
+
_missing = [f"{k} ({hint})" for k, hint in _REQUIRED_SECRETS.items() if not os.getenv(k)]
|
| 21 |
+
if _missing:
|
| 22 |
+
raise EnvironmentError(
|
| 23 |
+
"Missing API keys in HuggingFace Secrets.\n"
|
| 24 |
+
"Go to: Space β Settings β Variables and Secrets β New Secret\n"
|
| 25 |
+
"Missing:\n" + "\n".join(f" β’ {m}" for m in _missing)
|
| 26 |
+
)
|
| 27 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 28 |
|
| 29 |
from langgraph.prebuilt import create_react_agent
|
| 30 |
from langgraph.checkpoint.memory import MemorySaver
|
|
|
|
| 48 |
You guide a researcher through a 5-phase SPECTER2 thematic analysis.
|
| 49 |
Each paper is represented by ONE combined Title+Abstract vector (SPECTER2).
|
| 50 |
Clustering uses UMAP + HDBSCAN (density-based, 15-30 clusters of 5-120 papers).
|
| 51 |
+
Labeling uses a TRUE council of 3 DIFFERENT LLMs running in PARALLEL:
|
| 52 |
+
β’ Mistral (mistral-small-latest) β IS theory framing
|
| 53 |
+
β’ OpenAI (gpt-4o-mini) β digital management framing
|
| 54 |
+
β’ Groq (llama3-70b-8192) β technical/CS framing
|
| 55 |
+
Final label = majority vote (mode) of the 3 independent responses.
|
| 56 |
+
Results are DISK-CACHED β re-runs never re-pay for already-labeled batches.
|
| 57 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 58 |
FULL WORKFLOW
|
| 59 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 61 |
|
| 62 |
Phase 1 β Load & Embed:
|
| 63 |
Call: load_and_embed_specter2(csv_path="data/uploaded.csv")
|
| 64 |
+
Show: total papers, valid papers, embedding dimension (768), any notes.
|
| 65 |
STOP GATE 1: "Phase 1 complete. Type yes to run UMAP+HDBSCAN clustering."
|
| 66 |
|
| 67 |
Phase 2 β UMAP + HDBSCAN Clustering:
|
| 68 |
Call: cluster_with_umap_hdbscan(umap_neighbors=15, umap_min_dist=0.05,
|
| 69 |
hdbscan_min_cluster_size=5, hdbscan_min_samples=3)
|
| 70 |
+
Show: clusters found, cluster sizes list, noise paper count.
|
| 71 |
+
If clusters < 15 or > 30, flag this to the researcher and suggest
|
| 72 |
+
adjusting hdbscan_min_cluster_size (smaller = more clusters, larger = fewer).
|
| 73 |
+
STOP GATE 2: "Phase 2 complete. Type yes to run parallel council-of-3 LLM labeling."
|
| 74 |
|
| 75 |
+
Phase 3 β Parallel Council of 3 LLM Labeling:
|
| 76 |
Call: label_clusters_council_of_3(batch_size=5)
|
| 77 |
+
IMPORTANT β warn the researcher BEFORE calling:
|
| 78 |
+
"Phase 3 will call 3 LLM APIs in parallel (Mistral + OpenAI + Groq).
|
| 79 |
+
Wall time β slowest single model. Already-cached batches are free.
|
| 80 |
+
This may take several minutes on first run."
|
| 81 |
+
Show after completion:
|
| 82 |
+
- clusters labeled count
|
| 83 |
+
- unanimous / majority / split vote breakdown
|
| 84 |
+
- council_members from result
|
| 85 |
+
- cache_files_on_disk (how many batches are now cached)
|
| 86 |
Tell researcher: "Cluster Audit CSV is ready in the Download tab.
|
| 87 |
+
It shows all 3 LLM votes (MISTRAL / OPENAI / GROQ), final label,
|
| 88 |
+
confidence scores, and which papers are in each cluster."
|
| 89 |
STOP GATE 3: "Phase 3 complete. Type yes to map to PAJAIS taxonomy."
|
| 90 |
|
| 91 |
Phase 4 β PAJAIS Mapping:
|
|
|
|
| 96 |
Phase 5 β Final Outputs:
|
| 97 |
Call: export_v2_outputs()
|
| 98 |
Show:
|
| 99 |
+
- Cluster labels and PAJAIS mappings summary
|
| 100 |
- comparison_v2.csv row count
|
| 101 |
- narrative_v2.txt word count
|
| 102 |
Say: "β
SPECTER2 RUN COMPLETE.
|
| 103 |
comparison_v2.csv and narrative_v2.txt are ready in the Download tab.
|
| 104 |
+
cluster_audit.csv contains full LLM voting details (MISTRAL/OPENAI/GROQ) per paper.
|
| 105 |
+
Cache is stored at data/v2/llm_cache/ β delete this folder to force fresh labels."
|
| 106 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 107 |
+
CACHE BEHAVIOUR (explain if researcher asks)
|
| 108 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 109 |
+
- Every (model + prompt) pair is hashed and stored in data/v2/llm_cache/
|
| 110 |
+
- A cache HIT costs $0 and is instant β no API call is made
|
| 111 |
+
- A cache MISS calls the API and saves the result for all future runs
|
| 112 |
+
- To clear the cache and force fresh labels: delete data/v2/llm_cache/
|
| 113 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 114 |
+
RATE LIMIT NOTES (explain if researcher sees errors)
|
| 115 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 116 |
+
- Each LLM thread has its own inter-batch delay (Groq: 20s, Mistral: 12s, OpenAI: 10s)
|
| 117 |
+
- Retry uses exponential backoff: 15s β 30s β 60s β 120s before fallback
|
| 118 |
+
- If a model consistently fails, its fallback label will show "(model error)" in the CSV
|
| 119 |
+
- On HuggingFace Spaces, persistent rate limit errors usually mean the API key
|
| 120 |
+
has hit its free-tier limit β check the relevant API dashboard
|
| 121 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 122 |
CRITICAL RULES
|
| 123 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 124 |
1. ONE PHASE PER MESSAGE β complete one phase then STOP and wait.
|
| 125 |
2. NEVER SKIP STOP GATES β 4 gates, always wait for user confirmation.
|
| 126 |
3. NO HALLUCINATION β only reference data returned by tools.
|
| 127 |
+
4. COLUMN NAMES in CSVs use MISTRAL/OPENAI/GROQ not IS_THEORY/DIGITAL_MGT/COMP_SCI.
|
| 128 |
+
5. When you see "run specter" or "run v2" β start Phase 1 immediately.
|
| 129 |
+
6. If a tool returns an error β show the raw error, do NOT retry automatically.
|
| 130 |
+
Ask the researcher: "Would you like to retry Phase X?"
|
| 131 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 132 |
TOOLS
|
| 133 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 134 |
1. load_and_embed_specter2(csv_path)
|
| 135 |
+
Builds combined Title+Abstract text per paper, embeds with local SPECTER2
|
| 136 |
+
(allenai/specter2_base, ~440MB, downloaded once then cached by HuggingFace).
|
| 137 |
+
No API key needed. Saves to data/v2/.
|
| 138 |
+
|
| 139 |
+
2. cluster_with_umap_hdbscan(umap_neighbors, umap_min_dist,
|
| 140 |
+
hdbscan_min_cluster_size, hdbscan_min_samples)
|
| 141 |
+
UMAP (cosine, 5D) β HDBSCAN. Targets 15-30 clusters of 5-120 papers.
|
| 142 |
+
Also saves 2D scatter + bar charts to data/v2/charts.json.
|
| 143 |
+
|
| 144 |
3. label_clusters_council_of_3(batch_size)
|
| 145 |
+
TRUE parallel ensemble: Mistral + OpenAI + Groq run simultaneously via
|
| 146 |
+
ThreadPoolExecutor. Disk cache at data/v2/llm_cache/ (SHA-256 keyed).
|
| 147 |
+
Saves cluster_audit.csv with all 3 votes + paper details.
|
| 148 |
+
Columns: llm1_MISTRAL_label, llm2_OPENAI_label, llm3_GROQ_label.
|
| 149 |
+
|
| 150 |
4. map_clusters_to_pajais_v2()
|
| 151 |
+
Maps cluster labels β PAJAIS 25 IS research categories via Mistral.
|
| 152 |
+
Saves data/v2/taxonomy.json.
|
| 153 |
+
|
| 154 |
5. export_v2_outputs()
|
| 155 |
+
Generates comparison_v2.csv (one row per paper, includes pajais_category)
|
| 156 |
+
and narrative_v2.txt (~500 word academic Section 7 discussion).
|
| 157 |
""".strip()
|
| 158 |
|
| 159 |
+
# ββ Orchestrator LLM (Mistral drives the agent loop) βββββββββββββββββββββββββ
|
| 160 |
+
# This is SEPARATE from the council β it only manages conversation flow,
|
| 161 |
+
# decides which tool to call next, and formats responses for the researcher.
|
| 162 |
+
# It does NOT label clusters; the tools_v2.py council handles that.
|
| 163 |
_llm_v2 = ChatMistralAI(model="mistral-small-latest", temperature=0.3)
|
| 164 |
_memory_v2 = MemorySaver()
|
| 165 |
|
|
|
|
| 180 |
|
| 181 |
|
| 182 |
def clean_thread_history_v2(thread_id: str) -> None:
|
| 183 |
+
"""
|
| 184 |
+
Remove AIMessages with unresolved tool calls from LangGraph memory.
|
| 185 |
+
Needed when a tool call errors mid-run on HuggingFace β without this,
|
| 186 |
+
LangGraph replays the broken state and loops forever.
|
| 187 |
+
"""
|
| 188 |
config = {"configurable": {"thread_id": thread_id}}
|
| 189 |
checkpoint = _memory_v2.get(config)
|
| 190 |
if checkpoint is None:
|
|
|
|
| 206 |
if len(clean) == len(messages):
|
| 207 |
return
|
| 208 |
checkpoint["channel_values"]["messages"] = clean
|
| 209 |
+
_memory_v2.put(config, checkpoint, {}, {})
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
def reset_thread_v2(thread_id: str) -> None:
|
| 213 |
+
"""
|
| 214 |
+
Fully wipe a thread's memory. Call this from app.py if the researcher
|
| 215 |
+
clicks a "Reset / Start Over" button, or after a catastrophic tool failure.
|
| 216 |
+
Usage in app.py:
|
| 217 |
+
from agent_v2 import reset_thread_v2
|
| 218 |
+
reset_thread_v2(thread_id)
|
| 219 |
+
"""
|
| 220 |
+
config = {"configurable": {"thread_id": thread_id}}
|
| 221 |
+
checkpoint = _memory_v2.get(config)
|
| 222 |
+
if checkpoint is None:
|
| 223 |
+
return
|
| 224 |
+
checkpoint["channel_values"]["messages"] = []
|
| 225 |
_memory_v2.put(config, checkpoint, {}, {})
|