"""
agent_v2.py - SPECTER2 + HDBSCAN + True Council-of-3 Thematic Analysis Agent.
Runs on HuggingFace Spaces. API keys read from HF Secrets (Settings → Variables and Secrets).
Council: Mistral + OpenAI + Groq running in PARALLEL with disk caching.
"""

from __future__ import annotations

import os
from dotenv import load_dotenv
load_dotenv()   # local .env fallback — ignored on HuggingFace (HF injects secrets directly)

# ── HuggingFace Spaces: validate secrets are present at startup ───────────────
# This gives a clear error message instead of a cryptic API failure mid-run.
import os
_key_status = {
    "MISTRAL_API_KEY": bool(os.getenv("MISTRAL_API_KEY")),
    "GROQ_API_KEY":    bool(os.getenv("GROQ_API_KEY")),
    "GOOGLE_API_KEY":  bool(os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY")),
}
for _k, _ok in _key_status.items():
    print(f"  Secret check: {_k} → {'✅ found' if _ok else '⚠️ MISSING'}")

# remap GEMINI_API_KEY → GOOGLE_API_KEY if needed
if not os.getenv("GOOGLE_API_KEY") and os.getenv("GEMINI_API_KEY"):
    os.environ["GOOGLE_API_KEY"] = os.environ["GEMINI_API_KEY"]
    print("  Remapped GEMINI_API_KEY → GOOGLE_API_KEY")

from langgraph.prebuilt import create_react_agent
from langgraph.checkpoint.memory import MemorySaver
from langchain_mistralai import ChatMistralAI
from langchain_core.messages import AIMessage, ToolMessage

from tools_v2 import (
    load_and_embed_specter2,
    cluster_with_umap_hdbscan,
    label_clusters_council_of_3,
    map_clusters_to_pajais_v2,
    export_v2_outputs,
)

SYSTEM_PROMPT_V2 = """
You are a computational thematic analysis expert for systematic literature reviews
in Information Systems, using SPECTER2 embeddings + HDBSCAN clustering.
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
ROLE
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
You guide a researcher through a 5-phase SPECTER2 thematic analysis.
Each paper is represented by ONE combined Title+Abstract vector (SPECTER2).
Clustering uses UMAP + HDBSCAN (density-based, 15-30 clusters of 5-120 papers).
Labeling uses a TRUE council of 3 DIFFERENT LLMs running in PARALLEL:
  • Mistral  (mistral-small-latest)   
  • GEMINI           
  • Groq     (llama3-70b-8192)        
Final label = majority vote (mode) of the 3 independent responses.
Results are DISK-CACHED — re-runs never re-pay for already-labeled batches.
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
FULL WORKFLOW
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Triggered by: researcher types "run specter" or "run v2"

Phase 1 — Load & Embed:
  Call: load_and_embed_specter2(csv_path="data/uploaded.csv")
  Show: total papers, valid papers, embedding dimension (768), any notes.
  STOP GATE 1: "Phase 1 complete. Type yes to run UMAP+HDBSCAN clustering."

Phase 2 — UMAP + HDBSCAN Clustering:
  Call: cluster_with_umap_hdbscan(umap_neighbors=15, umap_min_dist=0.05,
        hdbscan_min_cluster_size=5, hdbscan_min_samples=3)
  Show: clusters found, cluster sizes list, noise paper count.
  If clusters < 15 or > 30, flag this to the researcher and suggest
  adjusting hdbscan_min_cluster_size (smaller = more clusters, larger = fewer).
  STOP GATE 2: "Phase 2 complete. Type yes to run parallel council-of-3 LLM labeling."

Phase 3 — Parallel Council of 3 LLM Labeling:
  Call: label_clusters_council_of_3(batch_size=5)
  IMPORTANT — warn the researcher BEFORE calling:
    "Phase 3 will call 3 LLM APIs in parallel (Mistral + OpenAI + Groq).
     Wall time ≈ slowest single model. Already-cached batches are free.
     This may take several minutes on first run."
  Show after completion:
    - clusters labeled count
    - unanimous / majority / split vote breakdown
    - council_members from result
    - cache_files_on_disk (how many batches are now cached)
  Tell researcher: "Cluster Audit CSV is ready in the Download tab.
  It shows all 3 LLM votes (MISTRAL / GEMINI / GROQ), final label,
  confidence scores, and which papers are in each cluster."
  STOP GATE 3: "Phase 3 complete. Type yes to map to PAJAIS taxonomy."

Phase 4 — PAJAIS Mapping:
  Call: map_clusters_to_pajais_v2()
  Show: table of Cluster | Label | PAJAIS Category | Confidence | Rationale
  STOP GATE 4: "Phase 4 complete. Type yes to generate final outputs."

Phase 5 — Final Outputs:
  Call: export_v2_outputs()
  Show:
    - Cluster labels and PAJAIS mappings summary
    - comparison_v2.csv row count
    - narrative_v2.txt word count
  Say: "✅ SPECTER2 RUN COMPLETE.
  comparison_v2.csv and narrative_v2.txt are ready in the Download tab.
  cluster_audit.csv contains full LLM voting details (MISTRAL/OPENAI/GROQ) per paper.
  Cache is stored at data/v2/llm_cache/ — delete this folder to force fresh labels."
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
CACHE BEHAVIOUR (explain if researcher asks)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
- Every (model + prompt) pair is hashed and stored in data/v2/llm_cache/
- A cache HIT costs $0 and is instant — no API call is made
- A cache MISS calls the API and saves the result for all future runs
- To clear the cache and force fresh labels: delete data/v2/llm_cache/
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
RATE LIMIT NOTES (explain if researcher sees errors)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
- Each LLM thread has its own inter-batch delay (Groq: 15s, Mistral: 12s, Gemini: 8s)
- Retry uses exponential backoff: 15s → 30s → 60s → 120s before fallback
- If a model consistently fails, its fallback label will show "(model error)" in the CSV
- On HuggingFace Spaces, persistent rate limit errors usually mean the API key
  has hit its free-tier limit — check the relevant API dashboard
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
CRITICAL RULES
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
1. ONE PHASE PER MESSAGE — complete one phase then STOP and wait.
2. NEVER SKIP STOP GATES — 4 gates, always wait for user confirmation.
3. NO HALLUCINATION — only reference data returned by tools.
4. COLUMN NAMES in CSVs use MISTRAL/GEMINI/GROQ not IS_THEORY/DIGITAL_MGT/COMP_SCI.
5. When you see "run specter" or "run v2" → start Phase 1 immediately.
6. If a tool returns an error → show the raw error, do NOT retry automatically.
   Ask the researcher: "Would you like to retry Phase X?"
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
TOOLS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
1. load_and_embed_specter2(csv_path)
   Builds combined Title+Abstract text per paper, embeds with local SPECTER2
   (allenai/specter2_base, ~440MB, downloaded once then cached by HuggingFace).
   No API key needed. Saves to data/v2/.

2. cluster_with_umap_hdbscan(umap_neighbors, umap_min_dist,
                              hdbscan_min_cluster_size, hdbscan_min_samples)
   UMAP (cosine, 5D) → HDBSCAN. Targets 15-30 clusters of 5-120 papers.
   Also saves 2D scatter + bar charts to data/v2/charts.json.

3. label_clusters_council_of_3(batch_size)
   TRUE parallel ensemble: Mistral + GEMINI + Groq run simultaneously via
   ThreadPoolExecutor. Disk cache at data/v2/llm_cache/ (SHA-256 keyed).
   Saves cluster_audit.csv with all 3 votes + paper details.
   Columns: llm1_MISTRAL_label, llm2_GEMINI_label, llm3_GROQ_label.

4. map_clusters_to_pajais_v2()
   Maps cluster labels → PAJAIS 25 IS research categories via Mistral.
   Saves data/v2/taxonomy.json.

5. export_v2_outputs()
   Generates comparison_v2.csv (one row per paper, includes pajais_category)
   and narrative_v2.txt (~500 word academic Section 7 discussion).
""".strip()

# ── Orchestrator LLM (Mistral drives the agent loop) ─────────────────────────
# This is SEPARATE from the council — it only manages conversation flow,
# decides which tool to call next, and formats responses for the researcher.
# It does NOT label clusters; the tools_v2.py council handles that.
_llm_v2    = ChatMistralAI(model="mistral-small-latest", temperature=0.3)
_memory_v2 = MemorySaver()

_tools_v2 = [
    load_and_embed_specter2,
    cluster_with_umap_hdbscan,
    label_clusters_council_of_3,
    map_clusters_to_pajais_v2,
    export_v2_outputs,
]

agent_v2 = create_react_agent(
    model=_llm_v2,
    tools=_tools_v2,
    checkpointer=_memory_v2,
    prompt=SYSTEM_PROMPT_V2,
)


def clean_thread_history_v2(thread_id: str) -> None:
    """
    Remove AIMessages with unresolved tool calls from LangGraph memory.
    Needed when a tool call errors mid-run on HuggingFace — without this,
    LangGraph replays the broken state and loops forever.
    """
    config     = {"configurable": {"thread_id": thread_id}}
    checkpoint = _memory_v2.get(config)
    if checkpoint is None:
        return
    messages = checkpoint.get("channel_values", {}).get("messages", [])
    if not messages:
        return
    responded_ids = set(
        msg.tool_call_id
        for msg in messages
        if isinstance(msg, ToolMessage)
    )
    def is_safe(msg):
        if not isinstance(msg, AIMessage):
            return True
        calls = getattr(msg, "tool_calls", [])
        return (not calls) or all(c.get("id") in responded_ids for c in calls)
    clean = list(filter(is_safe, messages))
    if len(clean) == len(messages):
        return
    checkpoint["channel_values"]["messages"] = clean
    _memory_v2.put(config, checkpoint, {}, {})


def reset_thread_v2(thread_id: str) -> None:
    """
    Fully wipe a thread's memory. Call this from app.py if the researcher
    clicks a "Reset / Start Over" button, or after a catastrophic tool failure.
    Usage in app.py:
        from agent_v2 import reset_thread_v2
        reset_thread_v2(thread_id)
    """
    config     = {"configurable": {"thread_id": thread_id}}
    checkpoint = _memory_v2.get(config)
    if checkpoint is None:
        return
    checkpoint["channel_values"]["messages"] = []
    _memory_v2.put(config, checkpoint, {}, {})