"""
Epistemic Metrics — RC+xi tension and coherence measurement for the Reasoning Forge.

Implements the core RC+xi equations within the forge context:
  - Epistemic tension (Eq. 2): xi_n = ||A_{n+1} - A_n||^2
  - Phase coherence (Eq. 11): Gamma = mean(|cos(theta_i - theta_bar)|)
  - Perspective coverage scoring
  - Tension decay tracking across debate rounds

These metrics let the forge quantify whether multi-agent reasoning actually
converges (productive tension resolution) or stalls (tension suppression).
"""

from __future__ import annotations

import math
import re
from collections import Counter
from typing import Dict, List, Optional, Tuple


# ---------------------------------------------------------------------------
# Text -> vector helpers (lightweight, no external deps)
# ---------------------------------------------------------------------------

_STOP_WORDS = {
    "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
    "have", "has", "had", "do", "does", "did", "will", "would", "shall",
    "should", "may", "might", "must", "can", "could", "to", "of", "in",
    "for", "on", "with", "at", "by", "from", "as", "into", "through",
    "during", "before", "after", "and", "but", "or", "nor", "not", "so",
    "yet", "both", "this", "that", "these", "those", "it", "its", "they",
    "them", "their", "we", "our", "you", "your", "he", "she", "his", "her",
}


def _tokenize(text: str) -> List[str]:
    return [w for w in re.findall(r"[a-z]{3,}", text.lower()) if w not in _STOP_WORDS]


def _term_vector(text: str) -> Counter:
    return Counter(_tokenize(text))


def _cosine_similarity(vec_a: Counter, vec_b: Counter) -> float:
    keys = set(vec_a) | set(vec_b)
    if not keys:
        return 0.0
    dot = sum(vec_a.get(k, 0) * vec_b.get(k, 0) for k in keys)
    mag_a = math.sqrt(sum(v * v for v in vec_a.values()))
    mag_b = math.sqrt(sum(v * v for v in vec_b.values()))
    if mag_a == 0 or mag_b == 0:
        return 0.0
    return dot / (mag_a * mag_b)


# ---------------------------------------------------------------------------
# Perspective vocabulary banks (for coverage scoring)
# ---------------------------------------------------------------------------

_PERSPECTIVE_VOCAB = {
    "Newton": {
        "force", "energy", "momentum", "conservation", "equilibrium", "dynamics",
        "causality", "mass", "acceleration", "entropy", "thermodynamic",
        "symmetry", "invariance", "field", "potential", "kinetic",
    },
    "Quantum": {
        "probability", "superposition", "uncertainty", "complementarity",
        "entanglement", "wave", "particle", "observer", "collapse",
        "interference", "tunneling", "decoherence", "amplitude",
    },
    "Ethics": {
        "ethical", "moral", "fairness", "justice", "rights", "duty",
        "consequence", "harm", "benefit", "stakeholder", "autonomy",
        "consent", "accountability", "responsibility", "welfare",
    },
    "Philosophy": {
        "epistemology", "ontology", "metaphysics", "assumption", "paradox",
        "dialectic", "phenomenology", "consciousness", "existence", "meaning",
        "truth", "knowledge", "belief", "certainty", "skepticism",
    },
    "DaVinci": {
        "creative", "invention", "analogy", "design", "innovation",
        "prototype", "biomimicry", "synthesis", "novel", "interdisciplinary",
        "combination", "reimagine", "solution", "insight",
    },
    "Empathy": {
        "emotional", "experience", "feeling", "compassion", "support",
        "community", "relationship", "wellbeing", "vulnerability",
        "understanding", "perspective", "human", "care", "dignity",
    },
    "Consciousness": {
        "awareness", "recursive", "self-referential", "metacognition",
        "emergence", "cognition", "reflection", "introspection",
        "sentience", "subjective", "qualia", "binding", "attention",
        "intentionality", "phenomenal",
    },
    "SystemsArchitecture": {
        "modular", "scalable", "interface", "pattern", "component",
        "microservice", "pipeline", "throughput", "latency", "resilience",
        "abstraction", "coupling", "cohesion", "architecture",
    },
}


# ---------------------------------------------------------------------------
# EpistemicMetrics
# ---------------------------------------------------------------------------

class EpistemicMetrics:
    """Measure RC+xi epistemic tension and coherence across agent analyses."""

    def score_pairwise_tension(
        self, analyses: Dict[str, str],
    ) -> Dict[str, float]:
        """Compute epistemic tension between each pair of agent analyses.

        Tension is 1 - cosine_similarity: high when perspectives diverge,
        low when they repeat each other.

        Returns:
            Dict with keys like "Newton_vs_Ethics" -> tension float 0-1.
        """
        agents = list(analyses.keys())
        vectors = {name: _term_vector(text) for name, text in analyses.items()}
        tensions = {}
        for i in range(len(agents)):
            for j in range(i + 1, len(agents)):
                sim = _cosine_similarity(vectors[agents[i]], vectors[agents[j]])
                tensions[f"{agents[i]}_vs_{agents[j]}"] = round(1.0 - sim, 4)
        return tensions

    def score_ensemble_coherence(
        self, analyses: Dict[str, str],
    ) -> float:
        """Phase coherence Gamma across the agent ensemble.

        Analogous to Eq. 11 in the embodied sim:
          Gamma = mean(cos(theta_i - theta_bar))

        Here 'theta' is the term-vector direction, and coherence measures
        how much all agents point in a similar semantic direction.

        Returns:
            Gamma in [0, 1] where 1 = all agents semantically aligned.
        """
        vectors = [_term_vector(text) for text in analyses.values()]
        if len(vectors) < 2:
            return 1.0

        # Build centroid
        centroid: Counter = Counter()
        for v in vectors:
            centroid.update(v)

        similarities = [_cosine_similarity(v, centroid) for v in vectors]
        return round(sum(similarities) / len(similarities), 4)

    def score_tension_magnitude(
        self, analyses: Dict[str, str],
    ) -> float:
        """Overall epistemic tension magnitude (mean pairwise tension).

        Analogous to Eq. 2 xi_n but measured across agents rather than
        across time steps.

        Returns:
            Mean tension 0-1 where 0 = all identical, 1 = fully orthogonal.
        """
        tensions = self.score_pairwise_tension(analyses)
        if not tensions:
            return 0.0
        return round(sum(tensions.values()) / len(tensions), 4)

    def score_tension_productivity(
        self,
        analyses: Dict[str, str],
        synthesis: str,
    ) -> Dict[str, float]:
        """Evaluate whether tension is productive (resolved in synthesis)
        or destructive (suppressed or ignored).

        Productive tension: agents diverge but synthesis addresses the
        divergence explicitly. Destructive: synthesis ignores disagreements.

        Returns:
            Dict with tension_magnitude, coherence_gain, productivity score.
        """
        tension = self.score_tension_magnitude(analyses)
        ensemble_coherence = self.score_ensemble_coherence(analyses)

        # How much of each agent's unique vocabulary appears in synthesis
        synthesis_vec = _term_vector(synthesis)
        agent_coverage_in_synthesis = []
        for name, text in analyses.items():
            agent_vec = _term_vector(text)
            unique_to_agent = set(agent_vec) - set().union(
                *(_term_vector(t) for n, t in analyses.items() if n != name)
            )
            if unique_to_agent:
                covered = sum(1 for w in unique_to_agent if w in synthesis_vec)
                agent_coverage_in_synthesis.append(covered / len(unique_to_agent))
            else:
                agent_coverage_in_synthesis.append(1.0)

        synthesis_coverage = sum(agent_coverage_in_synthesis) / max(len(agent_coverage_in_synthesis), 1)

        # Productivity = high tension + high synthesis coverage
        # (divergent views that get integrated = productive)
        productivity = tension * synthesis_coverage
        # Coherence gain: synthesis should be more coherent than raw ensemble
        synthesis_vs_agents = _cosine_similarity(synthesis_vec, _term_vector(" ".join(analyses.values())))
        coherence_gain = max(0.0, synthesis_vs_agents - ensemble_coherence)

        return {
            "tension_magnitude": round(tension, 4),
            "ensemble_coherence": round(ensemble_coherence, 4),
            "synthesis_coverage": round(synthesis_coverage, 4),
            "coherence_gain": round(coherence_gain, 4),
            "productivity": round(productivity, 4),
        }

    def score_perspective_coverage(
        self, analyses: Dict[str, str],
    ) -> Dict[str, float]:
        """Score how deeply each RC+xi perspective is actually engaged.

        Returns:
            Dict mapping perspective name -> coverage score 0-1.
        """
        all_text_lower = {name: text.lower() for name, text in analyses.items()}
        coverage = {}
        for perspective, vocab in _PERSPECTIVE_VOCAB.items():
            # Check across all agents, not just the named agent
            all_words = " ".join(all_text_lower.values())
            hits = sum(1 for term in vocab if term in all_words)
            coverage[perspective] = round(hits / len(vocab), 4)
        return coverage

    def score_debate_convergence(
        self,
        round_analyses: List[Dict[str, str]],
    ) -> Dict[str, object]:
        """Track tension decay across multiple debate rounds.

        Takes a list of analyses dicts (one per round). Measures whether
        tension decreases (convergence) or increases (divergence).

        Returns:
            Dict with per-round tension, decay_rate, is_converging.
        """
        if not round_analyses:
            return {"per_round_tension": [], "decay_rate": 0.0, "is_converging": False}

        per_round = [self.score_tension_magnitude(a) for a in round_analyses]

        if len(per_round) >= 2:
            initial = per_round[0]
            final = per_round[-1]
            decay_rate = (initial - final) / max(initial, 1e-6)
        else:
            decay_rate = 0.0

        return {
            "per_round_tension": per_round,
            "decay_rate": round(decay_rate, 4),
            "is_converging": decay_rate > 0.05,
        }

    def full_epistemic_report(
        self,
        analyses: Dict[str, str],
        synthesis: str,
    ) -> Dict[str, object]:
        """Complete RC+xi metrics report for a single forge cycle."""
        return {
            "pairwise_tension": self.score_pairwise_tension(analyses),
            "tension_magnitude": self.score_tension_magnitude(analyses),
            "ensemble_coherence": self.score_ensemble_coherence(analyses),
            "perspective_coverage": self.score_perspective_coverage(analyses),
            "tension_productivity": self.score_tension_productivity(analyses, synthesis),
        }