Spaces:
Sleeping
Sleeping
| """ | |
| agent_v2.py - SPECTER2 + HDBSCAN + True Council-of-3 Thematic Analysis Agent. | |
| Runs on HuggingFace Spaces. API keys read from HF Secrets (Settings β Variables and Secrets). | |
| Council: Mistral + OpenAI + Groq running in PARALLEL with disk caching. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() # local .env fallback β ignored on HuggingFace (HF injects secrets directly) | |
| # ββ HuggingFace Spaces: validate secrets are present at startup βββββββββββββββ | |
| # This gives a clear error message instead of a cryptic API failure mid-run. | |
| import os | |
| _key_status = { | |
| "MISTRAL_API_KEY": bool(os.getenv("MISTRAL_API_KEY")), | |
| "GROQ_API_KEY": bool(os.getenv("GROQ_API_KEY")), | |
| "GOOGLE_API_KEY": bool(os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY")), | |
| } | |
| for _k, _ok in _key_status.items(): | |
| print(f" Secret check: {_k} β {'β found' if _ok else 'β οΈ MISSING'}") | |
| # remap GEMINI_API_KEY β GOOGLE_API_KEY if needed | |
| if not os.getenv("GOOGLE_API_KEY") and os.getenv("GEMINI_API_KEY"): | |
| os.environ["GOOGLE_API_KEY"] = os.environ["GEMINI_API_KEY"] | |
| print(" Remapped GEMINI_API_KEY β GOOGLE_API_KEY") | |
| from langgraph.prebuilt import create_react_agent | |
| from langgraph.checkpoint.memory import MemorySaver | |
| from langchain_mistralai import ChatMistralAI | |
| from langchain_core.messages import AIMessage, ToolMessage | |
| from tools_v2 import ( | |
| load_and_embed_specter2, | |
| cluster_with_umap_hdbscan, | |
| label_clusters_council_of_3, | |
| map_clusters_to_pajais_v2, | |
| export_v2_outputs, | |
| ) | |
| SYSTEM_PROMPT_V2 = """ | |
| You are a computational thematic analysis expert for systematic literature reviews | |
| in Information Systems, using SPECTER2 embeddings + HDBSCAN clustering. | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| ROLE | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| You guide a researcher through a 5-phase SPECTER2 thematic analysis. | |
| Each paper is represented by ONE combined Title+Abstract vector (SPECTER2). | |
| Clustering uses UMAP + HDBSCAN (density-based, 15-30 clusters of 5-120 papers). | |
| Labeling uses a TRUE council of 3 DIFFERENT LLMs running in PARALLEL: | |
| β’ Mistral (mistral-small-latest) | |
| β’ GEMINI | |
| β’ Groq (llama3-70b-8192) | |
| Final label = majority vote (mode) of the 3 independent responses. | |
| Results are DISK-CACHED β re-runs never re-pay for already-labeled batches. | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| FULL WORKFLOW | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| Triggered by: researcher types "run specter" or "run v2" | |
| Phase 1 β Load & Embed: | |
| Call: load_and_embed_specter2(csv_path="data/uploaded.csv") | |
| Show: total papers, valid papers, embedding dimension (768), any notes. | |
| STOP GATE 1: "Phase 1 complete. Type yes to run UMAP+HDBSCAN clustering." | |
| Phase 2 β UMAP + HDBSCAN Clustering: | |
| Call: cluster_with_umap_hdbscan(umap_neighbors=15, umap_min_dist=0.05, | |
| hdbscan_min_cluster_size=5, hdbscan_min_samples=3) | |
| Show: clusters found, cluster sizes list, noise paper count. | |
| If clusters < 15 or > 30, flag this to the researcher and suggest | |
| adjusting hdbscan_min_cluster_size (smaller = more clusters, larger = fewer). | |
| STOP GATE 2: "Phase 2 complete. Type yes to run parallel council-of-3 LLM labeling." | |
| Phase 3 β Parallel Council of 3 LLM Labeling: | |
| Call: label_clusters_council_of_3(batch_size=5) | |
| IMPORTANT β warn the researcher BEFORE calling: | |
| "Phase 3 will call 3 LLM APIs in parallel (Mistral + OpenAI + Groq). | |
| Wall time β slowest single model. Already-cached batches are free. | |
| This may take several minutes on first run." | |
| Show after completion: | |
| - clusters labeled count | |
| - unanimous / majority / split vote breakdown | |
| - council_members from result | |
| - cache_files_on_disk (how many batches are now cached) | |
| Tell researcher: "Cluster Audit CSV is ready in the Download tab. | |
| It shows all 3 LLM votes (MISTRAL / GEMINI / GROQ), final label, | |
| confidence scores, and which papers are in each cluster." | |
| STOP GATE 3: "Phase 3 complete. Type yes to map to PAJAIS taxonomy." | |
| Phase 4 β PAJAIS Mapping: | |
| Call: map_clusters_to_pajais_v2() | |
| Show: table of Cluster | Label | PAJAIS Category | Confidence | Rationale | |
| STOP GATE 4: "Phase 4 complete. Type yes to generate final outputs." | |
| Phase 5 β Final Outputs: | |
| Call: export_v2_outputs() | |
| Show: | |
| - Cluster labels and PAJAIS mappings summary | |
| - comparison_v2.csv row count | |
| - narrative_v2.txt word count | |
| Say: "β SPECTER2 RUN COMPLETE. | |
| comparison_v2.csv and narrative_v2.txt are ready in the Download tab. | |
| cluster_audit.csv contains full LLM voting details (MISTRAL/OPENAI/GROQ) per paper. | |
| Cache is stored at data/v2/llm_cache/ β delete this folder to force fresh labels." | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CACHE BEHAVIOUR (explain if researcher asks) | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| - Every (model + prompt) pair is hashed and stored in data/v2/llm_cache/ | |
| - A cache HIT costs $0 and is instant β no API call is made | |
| - A cache MISS calls the API and saves the result for all future runs | |
| - To clear the cache and force fresh labels: delete data/v2/llm_cache/ | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| RATE LIMIT NOTES (explain if researcher sees errors) | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| - Each LLM thread has its own inter-batch delay (Groq: 15s, Mistral: 12s, Gemini: 8s) | |
| - Retry uses exponential backoff: 15s β 30s β 60s β 120s before fallback | |
| - If a model consistently fails, its fallback label will show "(model error)" in the CSV | |
| - On HuggingFace Spaces, persistent rate limit errors usually mean the API key | |
| has hit its free-tier limit β check the relevant API dashboard | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CRITICAL RULES | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| 1. ONE PHASE PER MESSAGE β complete one phase then STOP and wait. | |
| 2. NEVER SKIP STOP GATES β 4 gates, always wait for user confirmation. | |
| 3. NO HALLUCINATION β only reference data returned by tools. | |
| 4. COLUMN NAMES in CSVs use MISTRAL/GEMINI/GROQ not IS_THEORY/DIGITAL_MGT/COMP_SCI. | |
| 5. When you see "run specter" or "run v2" β start Phase 1 immediately. | |
| 6. If a tool returns an error β show the raw error, do NOT retry automatically. | |
| Ask the researcher: "Would you like to retry Phase X?" | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| TOOLS | |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| 1. load_and_embed_specter2(csv_path) | |
| Builds combined Title+Abstract text per paper, embeds with local SPECTER2 | |
| (allenai/specter2_base, ~440MB, downloaded once then cached by HuggingFace). | |
| No API key needed. Saves to data/v2/. | |
| 2. cluster_with_umap_hdbscan(umap_neighbors, umap_min_dist, | |
| hdbscan_min_cluster_size, hdbscan_min_samples) | |
| UMAP (cosine, 5D) β HDBSCAN. Targets 15-30 clusters of 5-120 papers. | |
| Also saves 2D scatter + bar charts to data/v2/charts.json. | |
| 3. label_clusters_council_of_3(batch_size) | |
| TRUE parallel ensemble: Mistral + GEMINI + Groq run simultaneously via | |
| ThreadPoolExecutor. Disk cache at data/v2/llm_cache/ (SHA-256 keyed). | |
| Saves cluster_audit.csv with all 3 votes + paper details. | |
| Columns: llm1_MISTRAL_label, llm2_GEMINI_label, llm3_GROQ_label. | |
| 4. map_clusters_to_pajais_v2() | |
| Maps cluster labels β PAJAIS 25 IS research categories via Mistral. | |
| Saves data/v2/taxonomy.json. | |
| 5. export_v2_outputs() | |
| Generates comparison_v2.csv (one row per paper, includes pajais_category) | |
| and narrative_v2.txt (~500 word academic Section 7 discussion). | |
| """.strip() | |
| # ββ Orchestrator LLM (Mistral drives the agent loop) βββββββββββββββββββββββββ | |
| # This is SEPARATE from the council β it only manages conversation flow, | |
| # decides which tool to call next, and formats responses for the researcher. | |
| # It does NOT label clusters; the tools_v2.py council handles that. | |
| _llm_v2 = ChatMistralAI(model="mistral-small-latest", temperature=0.3) | |
| _memory_v2 = MemorySaver() | |
| _tools_v2 = [ | |
| load_and_embed_specter2, | |
| cluster_with_umap_hdbscan, | |
| label_clusters_council_of_3, | |
| map_clusters_to_pajais_v2, | |
| export_v2_outputs, | |
| ] | |
| agent_v2 = create_react_agent( | |
| model=_llm_v2, | |
| tools=_tools_v2, | |
| checkpointer=_memory_v2, | |
| prompt=SYSTEM_PROMPT_V2, | |
| ) | |
| def clean_thread_history_v2(thread_id: str) -> None: | |
| """ | |
| Remove AIMessages with unresolved tool calls from LangGraph memory. | |
| Needed when a tool call errors mid-run on HuggingFace β without this, | |
| LangGraph replays the broken state and loops forever. | |
| """ | |
| config = {"configurable": {"thread_id": thread_id}} | |
| checkpoint = _memory_v2.get(config) | |
| if checkpoint is None: | |
| return | |
| messages = checkpoint.get("channel_values", {}).get("messages", []) | |
| if not messages: | |
| return | |
| responded_ids = set( | |
| msg.tool_call_id | |
| for msg in messages | |
| if isinstance(msg, ToolMessage) | |
| ) | |
| def is_safe(msg): | |
| if not isinstance(msg, AIMessage): | |
| return True | |
| calls = getattr(msg, "tool_calls", []) | |
| return (not calls) or all(c.get("id") in responded_ids for c in calls) | |
| clean = list(filter(is_safe, messages)) | |
| if len(clean) == len(messages): | |
| return | |
| checkpoint["channel_values"]["messages"] = clean | |
| _memory_v2.put(config, checkpoint, {}, {}) | |
| def reset_thread_v2(thread_id: str) -> None: | |
| """ | |
| Fully wipe a thread's memory. Call this from app.py if the researcher | |
| clicks a "Reset / Start Over" button, or after a catastrophic tool failure. | |
| Usage in app.py: | |
| from agent_v2 import reset_thread_v2 | |
| reset_thread_v2(thread_id) | |
| """ | |
| config = {"configurable": {"thread_id": thread_id}} | |
| checkpoint = _memory_v2.get(config) | |
| if checkpoint is None: | |
| return | |
| checkpoint["channel_values"]["messages"] = [] | |
| _memory_v2.put(config, checkpoint, {}, {}) |