aadisawant2912 commited on
Commit
d8e0bc3
Β·
verified Β·
1 Parent(s): 121fcef

Update agent_v2.py

Browse files
Files changed (1) hide show
  1. agent_v2.py +110 -23
agent_v2.py CHANGED
@@ -1,12 +1,30 @@
1
  """
2
- agent_v2.py - SPECTER2 + HDBSCAN + Council-of-3 Thematic Analysis Agent.
3
- Single run on combined Title+Abstract per paper.
 
4
  """
5
 
6
  from __future__ import annotations
7
 
 
8
  from dotenv import load_dotenv
9
- load_dotenv()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  from langgraph.prebuilt import create_react_agent
12
  from langgraph.checkpoint.memory import MemorySaver
@@ -30,7 +48,12 @@ ROLE
30
  You guide a researcher through a 5-phase SPECTER2 thematic analysis.
31
  Each paper is represented by ONE combined Title+Abstract vector (SPECTER2).
32
  Clustering uses UMAP + HDBSCAN (density-based, 15-30 clusters of 5-120 papers).
33
- Labeling uses a council of 3 LLMs β€” final label is the mode of 3 votes.
 
 
 
 
 
34
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
35
  FULL WORKFLOW
36
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
@@ -38,22 +61,31 @@ Triggered by: researcher types "run specter" or "run v2"
38
 
39
  Phase 1 β€” Load & Embed:
40
  Call: load_and_embed_specter2(csv_path="data/uploaded.csv")
41
- Show: papers count, embedding dimension, any notes.
42
  STOP GATE 1: "Phase 1 complete. Type yes to run UMAP+HDBSCAN clustering."
43
 
44
  Phase 2 β€” UMAP + HDBSCAN Clustering:
45
  Call: cluster_with_umap_hdbscan(umap_neighbors=15, umap_min_dist=0.05,
46
  hdbscan_min_cluster_size=5, hdbscan_min_samples=3)
47
- Show: number of clusters found, cluster sizes, noise count.
48
- If clusters < 15 or > 30, note this to researcher and suggest they may
49
- want to re-run with adjusted parameters.
50
- STOP GATE 2: "Phase 2 complete. Type yes to run council-of-3 LLM labeling."
51
 
52
- Phase 3 β€” Council of 3 LLM Labeling:
53
  Call: label_clusters_council_of_3(batch_size=5)
54
- Show: clusters labeled, unanimous/majority/split vote counts.
 
 
 
 
 
 
 
 
55
  Tell researcher: "Cluster Audit CSV is ready in the Download tab.
56
- It shows all 3 LLM votes, final label, and which papers are in each cluster."
 
57
  STOP GATE 3: "Phase 3 complete. Type yes to map to PAJAIS taxonomy."
58
 
59
  Phase 4 β€” PAJAIS Mapping:
@@ -64,35 +96,70 @@ Phase 4 β€” PAJAIS Mapping:
64
  Phase 5 β€” Final Outputs:
65
  Call: export_v2_outputs()
66
  Show:
67
- - Cluster labels and PAJAIS mappings
68
  - comparison_v2.csv row count
69
  - narrative_v2.txt word count
70
  Say: "βœ… SPECTER2 RUN COMPLETE.
71
  comparison_v2.csv and narrative_v2.txt are ready in the Download tab.
72
- cluster_audit.csv contains full LLM voting details per paper."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
74
  CRITICAL RULES
75
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
76
  1. ONE PHASE PER MESSAGE β€” complete one phase then STOP and wait.
77
  2. NEVER SKIP STOP GATES β€” 4 gates, always wait for user confirmation.
78
  3. NO HALLUCINATION β€” only reference data returned by tools.
79
- 4. When you see "run specter" or "run v2" β†’ start Phase 1.
 
 
 
80
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
81
  TOOLS
82
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
83
  1. load_and_embed_specter2(csv_path)
84
- Builds combined T+A text per paper, embeds with SPECTER2, saves to data/v2/
85
- 2. cluster_with_umap_hdbscan(umap_neighbors, umap_min_dist, hdbscan_min_cluster_size, hdbscan_min_samples)
86
- UMAP β†’ HDBSCAN, targets 15-30 clusters of 5-120 papers, cosine metric
 
 
 
 
 
 
87
  3. label_clusters_council_of_3(batch_size)
88
- 3 Mistral-small calls with distinct personas β†’ mode vote for final label
89
- Saves cluster_audit.csv with all 3 votes + paper details
 
 
 
90
  4. map_clusters_to_pajais_v2()
91
- Maps cluster labels to PAJAIS 25 categories
 
 
92
  5. export_v2_outputs()
93
- Generates comparison_v2.csv (one row per paper) + narrative_v2.txt
 
94
  """.strip()
95
 
 
 
 
 
96
  _llm_v2 = ChatMistralAI(model="mistral-small-latest", temperature=0.3)
97
  _memory_v2 = MemorySaver()
98
 
@@ -113,7 +180,11 @@ agent_v2 = create_react_agent(
113
 
114
 
115
  def clean_thread_history_v2(thread_id: str) -> None:
116
- """Remove AIMessages with unresolved tool calls from LangGraph memory."""
 
 
 
 
117
  config = {"configurable": {"thread_id": thread_id}}
118
  checkpoint = _memory_v2.get(config)
119
  if checkpoint is None:
@@ -135,4 +206,20 @@ def clean_thread_history_v2(thread_id: str) -> None:
135
  if len(clean) == len(messages):
136
  return
137
  checkpoint["channel_values"]["messages"] = clean
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  _memory_v2.put(config, checkpoint, {}, {})
 
1
  """
2
+ agent_v2.py - SPECTER2 + HDBSCAN + True Council-of-3 Thematic Analysis Agent.
3
+ Runs on HuggingFace Spaces. API keys read from HF Secrets (Settings β†’ Variables and Secrets).
4
+ Council: Mistral + OpenAI + Groq running in PARALLEL with disk caching.
5
  """
6
 
7
  from __future__ import annotations
8
 
9
+ import os
10
  from dotenv import load_dotenv
11
+ load_dotenv() # local .env fallback β€” ignored on HuggingFace (HF injects secrets directly)
12
+
13
+ # ── HuggingFace Spaces: validate secrets are present at startup ───────────────
14
+ # This gives a clear error message instead of a cryptic API failure mid-run.
15
+ _REQUIRED_SECRETS = {
16
+ "MISTRAL_API_KEY": "Mistral AI β€” mistralai.com/api",
17
+ "OPENAI_API_KEY": "OpenAI β€” platform.openai.com/api-keys",
18
+ "GROQ_API_KEY": "Groq β€” console.groq.com/keys",
19
+ }
20
+ _missing = [f"{k} ({hint})" for k, hint in _REQUIRED_SECRETS.items() if not os.getenv(k)]
21
+ if _missing:
22
+ raise EnvironmentError(
23
+ "Missing API keys in HuggingFace Secrets.\n"
24
+ "Go to: Space β†’ Settings β†’ Variables and Secrets β†’ New Secret\n"
25
+ "Missing:\n" + "\n".join(f" β€’ {m}" for m in _missing)
26
+ )
27
+ # ─────────────────────────────────────────────────────────────────────────────
28
 
29
  from langgraph.prebuilt import create_react_agent
30
  from langgraph.checkpoint.memory import MemorySaver
 
48
  You guide a researcher through a 5-phase SPECTER2 thematic analysis.
49
  Each paper is represented by ONE combined Title+Abstract vector (SPECTER2).
50
  Clustering uses UMAP + HDBSCAN (density-based, 15-30 clusters of 5-120 papers).
51
+ Labeling uses a TRUE council of 3 DIFFERENT LLMs running in PARALLEL:
52
+ β€’ Mistral (mistral-small-latest) β€” IS theory framing
53
+ β€’ OpenAI (gpt-4o-mini) β€” digital management framing
54
+ β€’ Groq (llama3-70b-8192) β€” technical/CS framing
55
+ Final label = majority vote (mode) of the 3 independent responses.
56
+ Results are DISK-CACHED β€” re-runs never re-pay for already-labeled batches.
57
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
58
  FULL WORKFLOW
59
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 
61
 
62
  Phase 1 β€” Load & Embed:
63
  Call: load_and_embed_specter2(csv_path="data/uploaded.csv")
64
+ Show: total papers, valid papers, embedding dimension (768), any notes.
65
  STOP GATE 1: "Phase 1 complete. Type yes to run UMAP+HDBSCAN clustering."
66
 
67
  Phase 2 β€” UMAP + HDBSCAN Clustering:
68
  Call: cluster_with_umap_hdbscan(umap_neighbors=15, umap_min_dist=0.05,
69
  hdbscan_min_cluster_size=5, hdbscan_min_samples=3)
70
+ Show: clusters found, cluster sizes list, noise paper count.
71
+ If clusters < 15 or > 30, flag this to the researcher and suggest
72
+ adjusting hdbscan_min_cluster_size (smaller = more clusters, larger = fewer).
73
+ STOP GATE 2: "Phase 2 complete. Type yes to run parallel council-of-3 LLM labeling."
74
 
75
+ Phase 3 β€” Parallel Council of 3 LLM Labeling:
76
  Call: label_clusters_council_of_3(batch_size=5)
77
+ IMPORTANT β€” warn the researcher BEFORE calling:
78
+ "Phase 3 will call 3 LLM APIs in parallel (Mistral + OpenAI + Groq).
79
+ Wall time β‰ˆ slowest single model. Already-cached batches are free.
80
+ This may take several minutes on first run."
81
+ Show after completion:
82
+ - clusters labeled count
83
+ - unanimous / majority / split vote breakdown
84
+ - council_members from result
85
+ - cache_files_on_disk (how many batches are now cached)
86
  Tell researcher: "Cluster Audit CSV is ready in the Download tab.
87
+ It shows all 3 LLM votes (MISTRAL / OPENAI / GROQ), final label,
88
+ confidence scores, and which papers are in each cluster."
89
  STOP GATE 3: "Phase 3 complete. Type yes to map to PAJAIS taxonomy."
90
 
91
  Phase 4 β€” PAJAIS Mapping:
 
96
  Phase 5 β€” Final Outputs:
97
  Call: export_v2_outputs()
98
  Show:
99
+ - Cluster labels and PAJAIS mappings summary
100
  - comparison_v2.csv row count
101
  - narrative_v2.txt word count
102
  Say: "βœ… SPECTER2 RUN COMPLETE.
103
  comparison_v2.csv and narrative_v2.txt are ready in the Download tab.
104
+ cluster_audit.csv contains full LLM voting details (MISTRAL/OPENAI/GROQ) per paper.
105
+ Cache is stored at data/v2/llm_cache/ β€” delete this folder to force fresh labels."
106
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
107
+ CACHE BEHAVIOUR (explain if researcher asks)
108
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
109
+ - Every (model + prompt) pair is hashed and stored in data/v2/llm_cache/
110
+ - A cache HIT costs $0 and is instant β€” no API call is made
111
+ - A cache MISS calls the API and saves the result for all future runs
112
+ - To clear the cache and force fresh labels: delete data/v2/llm_cache/
113
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
114
+ RATE LIMIT NOTES (explain if researcher sees errors)
115
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
116
+ - Each LLM thread has its own inter-batch delay (Groq: 20s, Mistral: 12s, OpenAI: 10s)
117
+ - Retry uses exponential backoff: 15s β†’ 30s β†’ 60s β†’ 120s before fallback
118
+ - If a model consistently fails, its fallback label will show "(model error)" in the CSV
119
+ - On HuggingFace Spaces, persistent rate limit errors usually mean the API key
120
+ has hit its free-tier limit β€” check the relevant API dashboard
121
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
122
  CRITICAL RULES
123
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
124
  1. ONE PHASE PER MESSAGE β€” complete one phase then STOP and wait.
125
  2. NEVER SKIP STOP GATES β€” 4 gates, always wait for user confirmation.
126
  3. NO HALLUCINATION β€” only reference data returned by tools.
127
+ 4. COLUMN NAMES in CSVs use MISTRAL/OPENAI/GROQ not IS_THEORY/DIGITAL_MGT/COMP_SCI.
128
+ 5. When you see "run specter" or "run v2" β†’ start Phase 1 immediately.
129
+ 6. If a tool returns an error β†’ show the raw error, do NOT retry automatically.
130
+ Ask the researcher: "Would you like to retry Phase X?"
131
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
132
  TOOLS
133
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
134
  1. load_and_embed_specter2(csv_path)
135
+ Builds combined Title+Abstract text per paper, embeds with local SPECTER2
136
+ (allenai/specter2_base, ~440MB, downloaded once then cached by HuggingFace).
137
+ No API key needed. Saves to data/v2/.
138
+
139
+ 2. cluster_with_umap_hdbscan(umap_neighbors, umap_min_dist,
140
+ hdbscan_min_cluster_size, hdbscan_min_samples)
141
+ UMAP (cosine, 5D) β†’ HDBSCAN. Targets 15-30 clusters of 5-120 papers.
142
+ Also saves 2D scatter + bar charts to data/v2/charts.json.
143
+
144
  3. label_clusters_council_of_3(batch_size)
145
+ TRUE parallel ensemble: Mistral + OpenAI + Groq run simultaneously via
146
+ ThreadPoolExecutor. Disk cache at data/v2/llm_cache/ (SHA-256 keyed).
147
+ Saves cluster_audit.csv with all 3 votes + paper details.
148
+ Columns: llm1_MISTRAL_label, llm2_OPENAI_label, llm3_GROQ_label.
149
+
150
  4. map_clusters_to_pajais_v2()
151
+ Maps cluster labels β†’ PAJAIS 25 IS research categories via Mistral.
152
+ Saves data/v2/taxonomy.json.
153
+
154
  5. export_v2_outputs()
155
+ Generates comparison_v2.csv (one row per paper, includes pajais_category)
156
+ and narrative_v2.txt (~500 word academic Section 7 discussion).
157
  """.strip()
158
 
159
+ # ── Orchestrator LLM (Mistral drives the agent loop) ─────────────────────────
160
+ # This is SEPARATE from the council β€” it only manages conversation flow,
161
+ # decides which tool to call next, and formats responses for the researcher.
162
+ # It does NOT label clusters; the tools_v2.py council handles that.
163
  _llm_v2 = ChatMistralAI(model="mistral-small-latest", temperature=0.3)
164
  _memory_v2 = MemorySaver()
165
 
 
180
 
181
 
182
  def clean_thread_history_v2(thread_id: str) -> None:
183
+ """
184
+ Remove AIMessages with unresolved tool calls from LangGraph memory.
185
+ Needed when a tool call errors mid-run on HuggingFace β€” without this,
186
+ LangGraph replays the broken state and loops forever.
187
+ """
188
  config = {"configurable": {"thread_id": thread_id}}
189
  checkpoint = _memory_v2.get(config)
190
  if checkpoint is None:
 
206
  if len(clean) == len(messages):
207
  return
208
  checkpoint["channel_values"]["messages"] = clean
209
+ _memory_v2.put(config, checkpoint, {}, {})
210
+
211
+
212
+ def reset_thread_v2(thread_id: str) -> None:
213
+ """
214
+ Fully wipe a thread's memory. Call this from app.py if the researcher
215
+ clicks a "Reset / Start Over" button, or after a catastrophic tool failure.
216
+ Usage in app.py:
217
+ from agent_v2 import reset_thread_v2
218
+ reset_thread_v2(thread_id)
219
+ """
220
+ config = {"configurable": {"thread_id": thread_id}}
221
+ checkpoint = _memory_v2.get(config)
222
+ if checkpoint is None:
223
+ return
224
+ checkpoint["channel_values"]["messages"] = []
225
  _memory_v2.put(config, checkpoint, {}, {})