Spaces:
Sleeping
Sleeping
File size: 11,716 Bytes
cb5fffb d8e0bc3 cb5fffb d8e0bc3 cb5fffb d8e0bc3 5b7181e d8e0bc3 5b7181e cb5fffb d8e0bc3 2e14ad3 d8e0bc3 cb5fffb d8e0bc3 cb5fffb d8e0bc3 cb5fffb d8e0bc3 cb5fffb d8e0bc3 cb5fffb 59900f1 d8e0bc3 cb5fffb d8e0bc3 cb5fffb d8e0bc3 59900f1 d8e0bc3 cb5fffb 59900f1 d8e0bc3 cb5fffb d8e0bc3 cb5fffb 59900f1 d8e0bc3 59900f1 d8e0bc3 cb5fffb d8e0bc3 cb5fffb d8e0bc3 cb5fffb d8e0bc3 cb5fffb d8e0bc3 cb5fffb d8e0bc3 cb5fffb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 | """
agent_v2.py - SPECTER2 + HDBSCAN + True Council-of-3 Thematic Analysis Agent.
Runs on HuggingFace Spaces. API keys read from HF Secrets (Settings β Variables and Secrets).
Council: Mistral + OpenAI + Groq running in PARALLEL with disk caching.
"""
from __future__ import annotations
import os
from dotenv import load_dotenv
load_dotenv() # local .env fallback β ignored on HuggingFace (HF injects secrets directly)
# ββ HuggingFace Spaces: validate secrets are present at startup βββββββββββββββ
# This gives a clear error message instead of a cryptic API failure mid-run.
import os
_key_status = {
"MISTRAL_API_KEY": bool(os.getenv("MISTRAL_API_KEY")),
"GROQ_API_KEY": bool(os.getenv("GROQ_API_KEY")),
"GOOGLE_API_KEY": bool(os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY")),
}
for _k, _ok in _key_status.items():
print(f" Secret check: {_k} β {'β
found' if _ok else 'β οΈ MISSING'}")
# remap GEMINI_API_KEY β GOOGLE_API_KEY if needed
if not os.getenv("GOOGLE_API_KEY") and os.getenv("GEMINI_API_KEY"):
os.environ["GOOGLE_API_KEY"] = os.environ["GEMINI_API_KEY"]
print(" Remapped GEMINI_API_KEY β GOOGLE_API_KEY")
from langgraph.prebuilt import create_react_agent
from langgraph.checkpoint.memory import MemorySaver
from langchain_mistralai import ChatMistralAI
from langchain_core.messages import AIMessage, ToolMessage
from tools_v2 import (
load_and_embed_specter2,
cluster_with_umap_hdbscan,
label_clusters_council_of_3,
map_clusters_to_pajais_v2,
export_v2_outputs,
)
SYSTEM_PROMPT_V2 = """
You are a computational thematic analysis expert for systematic literature reviews
in Information Systems, using SPECTER2 embeddings + HDBSCAN clustering.
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
ROLE
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
You guide a researcher through a 5-phase SPECTER2 thematic analysis.
Each paper is represented by ONE combined Title+Abstract vector (SPECTER2).
Clustering uses UMAP + HDBSCAN (density-based, 15-30 clusters of 5-120 papers).
Labeling uses a TRUE council of 3 DIFFERENT LLMs running in PARALLEL:
β’ Mistral (mistral-small-latest)
β’ GEMINI
β’ Groq (llama3-70b-8192)
Final label = majority vote (mode) of the 3 independent responses.
Results are DISK-CACHED β re-runs never re-pay for already-labeled batches.
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
FULL WORKFLOW
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
Triggered by: researcher types "run specter" or "run v2"
Phase 1 β Load & Embed:
Call: load_and_embed_specter2(csv_path="data/uploaded.csv")
Show: total papers, valid papers, embedding dimension (768), any notes.
STOP GATE 1: "Phase 1 complete. Type yes to run UMAP+HDBSCAN clustering."
Phase 2 β UMAP + HDBSCAN Clustering:
Call: cluster_with_umap_hdbscan(umap_neighbors=15, umap_min_dist=0.05,
hdbscan_min_cluster_size=5, hdbscan_min_samples=3)
Show: clusters found, cluster sizes list, noise paper count.
If clusters < 15 or > 30, flag this to the researcher and suggest
adjusting hdbscan_min_cluster_size (smaller = more clusters, larger = fewer).
STOP GATE 2: "Phase 2 complete. Type yes to run parallel council-of-3 LLM labeling."
Phase 3 β Parallel Council of 3 LLM Labeling:
Call: label_clusters_council_of_3(batch_size=5)
IMPORTANT β warn the researcher BEFORE calling:
"Phase 3 will call 3 LLM APIs in parallel (Mistral + OpenAI + Groq).
Wall time β slowest single model. Already-cached batches are free.
This may take several minutes on first run."
Show after completion:
- clusters labeled count
- unanimous / majority / split vote breakdown
- council_members from result
- cache_files_on_disk (how many batches are now cached)
Tell researcher: "Cluster Audit CSV is ready in the Download tab.
It shows all 3 LLM votes (MISTRAL / GEMINI / GROQ), final label,
confidence scores, and which papers are in each cluster."
STOP GATE 3: "Phase 3 complete. Type yes to map to PAJAIS taxonomy."
Phase 4 β PAJAIS Mapping:
Call: map_clusters_to_pajais_v2()
Show: table of Cluster | Label | PAJAIS Category | Confidence | Rationale
STOP GATE 4: "Phase 4 complete. Type yes to generate final outputs."
Phase 5 β Final Outputs:
Call: export_v2_outputs()
Show:
- Cluster labels and PAJAIS mappings summary
- comparison_v2.csv row count
- narrative_v2.txt word count
Say: "β
SPECTER2 RUN COMPLETE.
comparison_v2.csv and narrative_v2.txt are ready in the Download tab.
cluster_audit.csv contains full LLM voting details (MISTRAL/OPENAI/GROQ) per paper.
Cache is stored at data/v2/llm_cache/ β delete this folder to force fresh labels."
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
CACHE BEHAVIOUR (explain if researcher asks)
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
- Every (model + prompt) pair is hashed and stored in data/v2/llm_cache/
- A cache HIT costs $0 and is instant β no API call is made
- A cache MISS calls the API and saves the result for all future runs
- To clear the cache and force fresh labels: delete data/v2/llm_cache/
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
RATE LIMIT NOTES (explain if researcher sees errors)
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
- Each LLM thread has its own inter-batch delay (Groq: 15s, Mistral: 12s, Gemini: 8s)
- Retry uses exponential backoff: 15s β 30s β 60s β 120s before fallback
- If a model consistently fails, its fallback label will show "(model error)" in the CSV
- On HuggingFace Spaces, persistent rate limit errors usually mean the API key
has hit its free-tier limit β check the relevant API dashboard
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
CRITICAL RULES
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
1. ONE PHASE PER MESSAGE β complete one phase then STOP and wait.
2. NEVER SKIP STOP GATES β 4 gates, always wait for user confirmation.
3. NO HALLUCINATION β only reference data returned by tools.
4. COLUMN NAMES in CSVs use MISTRAL/GEMINI/GROQ not IS_THEORY/DIGITAL_MGT/COMP_SCI.
5. When you see "run specter" or "run v2" β start Phase 1 immediately.
6. If a tool returns an error β show the raw error, do NOT retry automatically.
Ask the researcher: "Would you like to retry Phase X?"
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
TOOLS
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
1. load_and_embed_specter2(csv_path)
Builds combined Title+Abstract text per paper, embeds with local SPECTER2
(allenai/specter2_base, ~440MB, downloaded once then cached by HuggingFace).
No API key needed. Saves to data/v2/.
2. cluster_with_umap_hdbscan(umap_neighbors, umap_min_dist,
hdbscan_min_cluster_size, hdbscan_min_samples)
UMAP (cosine, 5D) β HDBSCAN. Targets 15-30 clusters of 5-120 papers.
Also saves 2D scatter + bar charts to data/v2/charts.json.
3. label_clusters_council_of_3(batch_size)
TRUE parallel ensemble: Mistral + GEMINI + Groq run simultaneously via
ThreadPoolExecutor. Disk cache at data/v2/llm_cache/ (SHA-256 keyed).
Saves cluster_audit.csv with all 3 votes + paper details.
Columns: llm1_MISTRAL_label, llm2_GEMINI_label, llm3_GROQ_label.
4. map_clusters_to_pajais_v2()
Maps cluster labels β PAJAIS 25 IS research categories via Mistral.
Saves data/v2/taxonomy.json.
5. export_v2_outputs()
Generates comparison_v2.csv (one row per paper, includes pajais_category)
and narrative_v2.txt (~500 word academic Section 7 discussion).
""".strip()
# ββ Orchestrator LLM (Mistral drives the agent loop) βββββββββββββββββββββββββ
# This is SEPARATE from the council β it only manages conversation flow,
# decides which tool to call next, and formats responses for the researcher.
# It does NOT label clusters; the tools_v2.py council handles that.
_llm_v2 = ChatMistralAI(model="mistral-small-latest", temperature=0.3)
_memory_v2 = MemorySaver()
_tools_v2 = [
load_and_embed_specter2,
cluster_with_umap_hdbscan,
label_clusters_council_of_3,
map_clusters_to_pajais_v2,
export_v2_outputs,
]
agent_v2 = create_react_agent(
model=_llm_v2,
tools=_tools_v2,
checkpointer=_memory_v2,
prompt=SYSTEM_PROMPT_V2,
)
def clean_thread_history_v2(thread_id: str) -> None:
"""
Remove AIMessages with unresolved tool calls from LangGraph memory.
Needed when a tool call errors mid-run on HuggingFace β without this,
LangGraph replays the broken state and loops forever.
"""
config = {"configurable": {"thread_id": thread_id}}
checkpoint = _memory_v2.get(config)
if checkpoint is None:
return
messages = checkpoint.get("channel_values", {}).get("messages", [])
if not messages:
return
responded_ids = set(
msg.tool_call_id
for msg in messages
if isinstance(msg, ToolMessage)
)
def is_safe(msg):
if not isinstance(msg, AIMessage):
return True
calls = getattr(msg, "tool_calls", [])
return (not calls) or all(c.get("id") in responded_ids for c in calls)
clean = list(filter(is_safe, messages))
if len(clean) == len(messages):
return
checkpoint["channel_values"]["messages"] = clean
_memory_v2.put(config, checkpoint, {}, {})
def reset_thread_v2(thread_id: str) -> None:
"""
Fully wipe a thread's memory. Call this from app.py if the researcher
clicks a "Reset / Start Over" button, or after a catastrophic tool failure.
Usage in app.py:
from agent_v2 import reset_thread_v2
reset_thread_v2(thread_id)
"""
config = {"configurable": {"thread_id": thread_id}}
checkpoint = _memory_v2.get(config)
if checkpoint is None:
return
checkpoint["channel_values"]["messages"] = []
_memory_v2.put(config, checkpoint, {}, {}) |