| """ |
| Epistemic Metrics — RC+xi tension and coherence measurement for the Reasoning Forge. |
| |
| Implements the core RC+xi equations within the forge context: |
| - Epistemic tension (Eq. 2): xi_n = ||A_{n+1} - A_n||^2 |
| - Phase coherence (Eq. 11): Gamma = mean(|cos(theta_i - theta_bar)|) |
| - Perspective coverage scoring |
| - Tension decay tracking across debate rounds |
| |
| These metrics let the forge quantify whether multi-agent reasoning actually |
| converges (productive tension resolution) or stalls (tension suppression). |
| """ |
|
|
| from __future__ import annotations |
|
|
| import math |
| import re |
| from collections import Counter |
| from typing import Dict, List, Optional, Tuple |
|
|
|
|
| |
| |
| |
|
|
| _STOP_WORDS = { |
| "the", "a", "an", "is", "are", "was", "were", "be", "been", "being", |
| "have", "has", "had", "do", "does", "did", "will", "would", "shall", |
| "should", "may", "might", "must", "can", "could", "to", "of", "in", |
| "for", "on", "with", "at", "by", "from", "as", "into", "through", |
| "during", "before", "after", "and", "but", "or", "nor", "not", "so", |
| "yet", "both", "this", "that", "these", "those", "it", "its", "they", |
| "them", "their", "we", "our", "you", "your", "he", "she", "his", "her", |
| } |
|
|
|
|
| def _tokenize(text: str) -> List[str]: |
| return [w for w in re.findall(r"[a-z]{3,}", text.lower()) if w not in _STOP_WORDS] |
|
|
|
|
| def _term_vector(text: str) -> Counter: |
| return Counter(_tokenize(text)) |
|
|
|
|
| def _cosine_similarity(vec_a: Counter, vec_b: Counter) -> float: |
| keys = set(vec_a) | set(vec_b) |
| if not keys: |
| return 0.0 |
| dot = sum(vec_a.get(k, 0) * vec_b.get(k, 0) for k in keys) |
| mag_a = math.sqrt(sum(v * v for v in vec_a.values())) |
| mag_b = math.sqrt(sum(v * v for v in vec_b.values())) |
| if mag_a == 0 or mag_b == 0: |
| return 0.0 |
| return dot / (mag_a * mag_b) |
|
|
|
|
| |
| |
| |
|
|
| _PERSPECTIVE_VOCAB = { |
| "Newton": { |
| "force", "energy", "momentum", "conservation", "equilibrium", "dynamics", |
| "causality", "mass", "acceleration", "entropy", "thermodynamic", |
| "symmetry", "invariance", "field", "potential", "kinetic", |
| }, |
| "Quantum": { |
| "probability", "superposition", "uncertainty", "complementarity", |
| "entanglement", "wave", "particle", "observer", "collapse", |
| "interference", "tunneling", "decoherence", "amplitude", |
| }, |
| "Ethics": { |
| "ethical", "moral", "fairness", "justice", "rights", "duty", |
| "consequence", "harm", "benefit", "stakeholder", "autonomy", |
| "consent", "accountability", "responsibility", "welfare", |
| }, |
| "Philosophy": { |
| "epistemology", "ontology", "metaphysics", "assumption", "paradox", |
| "dialectic", "phenomenology", "consciousness", "existence", "meaning", |
| "truth", "knowledge", "belief", "certainty", "skepticism", |
| }, |
| "DaVinci": { |
| "creative", "invention", "analogy", "design", "innovation", |
| "prototype", "biomimicry", "synthesis", "novel", "interdisciplinary", |
| "combination", "reimagine", "solution", "insight", |
| }, |
| "Empathy": { |
| "emotional", "experience", "feeling", "compassion", "support", |
| "community", "relationship", "wellbeing", "vulnerability", |
| "understanding", "perspective", "human", "care", "dignity", |
| }, |
| "Consciousness": { |
| "awareness", "recursive", "self-referential", "metacognition", |
| "emergence", "cognition", "reflection", "introspection", |
| "sentience", "subjective", "qualia", "binding", "attention", |
| "intentionality", "phenomenal", |
| }, |
| "SystemsArchitecture": { |
| "modular", "scalable", "interface", "pattern", "component", |
| "microservice", "pipeline", "throughput", "latency", "resilience", |
| "abstraction", "coupling", "cohesion", "architecture", |
| }, |
| } |
|
|
|
|
| |
| |
| |
|
|
| class EpistemicMetrics: |
| """Measure RC+xi epistemic tension and coherence across agent analyses.""" |
|
|
| def score_pairwise_tension( |
| self, analyses: Dict[str, str], |
| ) -> Dict[str, float]: |
| """Compute epistemic tension between each pair of agent analyses. |
| |
| Tension is 1 - cosine_similarity: high when perspectives diverge, |
| low when they repeat each other. |
| |
| Returns: |
| Dict with keys like "Newton_vs_Ethics" -> tension float 0-1. |
| """ |
| agents = list(analyses.keys()) |
| vectors = {name: _term_vector(text) for name, text in analyses.items()} |
| tensions = {} |
| for i in range(len(agents)): |
| for j in range(i + 1, len(agents)): |
| sim = _cosine_similarity(vectors[agents[i]], vectors[agents[j]]) |
| tensions[f"{agents[i]}_vs_{agents[j]}"] = round(1.0 - sim, 4) |
| return tensions |
|
|
| def score_ensemble_coherence( |
| self, analyses: Dict[str, str], |
| ) -> float: |
| """Phase coherence Gamma across the agent ensemble. |
| |
| Analogous to Eq. 11 in the embodied sim: |
| Gamma = mean(cos(theta_i - theta_bar)) |
| |
| Here 'theta' is the term-vector direction, and coherence measures |
| how much all agents point in a similar semantic direction. |
| |
| Returns: |
| Gamma in [0, 1] where 1 = all agents semantically aligned. |
| """ |
| vectors = [_term_vector(text) for text in analyses.values()] |
| if len(vectors) < 2: |
| return 1.0 |
|
|
| |
| centroid: Counter = Counter() |
| for v in vectors: |
| centroid.update(v) |
|
|
| similarities = [_cosine_similarity(v, centroid) for v in vectors] |
| return round(sum(similarities) / len(similarities), 4) |
|
|
| def score_tension_magnitude( |
| self, analyses: Dict[str, str], |
| ) -> float: |
| """Overall epistemic tension magnitude (mean pairwise tension). |
| |
| Analogous to Eq. 2 xi_n but measured across agents rather than |
| across time steps. |
| |
| Returns: |
| Mean tension 0-1 where 0 = all identical, 1 = fully orthogonal. |
| """ |
| tensions = self.score_pairwise_tension(analyses) |
| if not tensions: |
| return 0.0 |
| return round(sum(tensions.values()) / len(tensions), 4) |
|
|
| def score_tension_productivity( |
| self, |
| analyses: Dict[str, str], |
| synthesis: str, |
| ) -> Dict[str, float]: |
| """Evaluate whether tension is productive (resolved in synthesis) |
| or destructive (suppressed or ignored). |
| |
| Productive tension: agents diverge but synthesis addresses the |
| divergence explicitly. Destructive: synthesis ignores disagreements. |
| |
| Returns: |
| Dict with tension_magnitude, coherence_gain, productivity score. |
| """ |
| tension = self.score_tension_magnitude(analyses) |
| ensemble_coherence = self.score_ensemble_coherence(analyses) |
|
|
| |
| synthesis_vec = _term_vector(synthesis) |
| agent_coverage_in_synthesis = [] |
| for name, text in analyses.items(): |
| agent_vec = _term_vector(text) |
| unique_to_agent = set(agent_vec) - set().union( |
| *(_term_vector(t) for n, t in analyses.items() if n != name) |
| ) |
| if unique_to_agent: |
| covered = sum(1 for w in unique_to_agent if w in synthesis_vec) |
| agent_coverage_in_synthesis.append(covered / len(unique_to_agent)) |
| else: |
| agent_coverage_in_synthesis.append(1.0) |
|
|
| synthesis_coverage = sum(agent_coverage_in_synthesis) / max(len(agent_coverage_in_synthesis), 1) |
|
|
| |
| |
| productivity = tension * synthesis_coverage |
| |
| synthesis_vs_agents = _cosine_similarity(synthesis_vec, _term_vector(" ".join(analyses.values()))) |
| coherence_gain = max(0.0, synthesis_vs_agents - ensemble_coherence) |
|
|
| return { |
| "tension_magnitude": round(tension, 4), |
| "ensemble_coherence": round(ensemble_coherence, 4), |
| "synthesis_coverage": round(synthesis_coverage, 4), |
| "coherence_gain": round(coherence_gain, 4), |
| "productivity": round(productivity, 4), |
| } |
|
|
| def score_perspective_coverage( |
| self, analyses: Dict[str, str], |
| ) -> Dict[str, float]: |
| """Score how deeply each RC+xi perspective is actually engaged. |
| |
| Returns: |
| Dict mapping perspective name -> coverage score 0-1. |
| """ |
| all_text_lower = {name: text.lower() for name, text in analyses.items()} |
| coverage = {} |
| for perspective, vocab in _PERSPECTIVE_VOCAB.items(): |
| |
| all_words = " ".join(all_text_lower.values()) |
| hits = sum(1 for term in vocab if term in all_words) |
| coverage[perspective] = round(hits / len(vocab), 4) |
| return coverage |
|
|
| def score_debate_convergence( |
| self, |
| round_analyses: List[Dict[str, str]], |
| ) -> Dict[str, object]: |
| """Track tension decay across multiple debate rounds. |
| |
| Takes a list of analyses dicts (one per round). Measures whether |
| tension decreases (convergence) or increases (divergence). |
| |
| Returns: |
| Dict with per-round tension, decay_rate, is_converging. |
| """ |
| if not round_analyses: |
| return {"per_round_tension": [], "decay_rate": 0.0, "is_converging": False} |
|
|
| per_round = [self.score_tension_magnitude(a) for a in round_analyses] |
|
|
| if len(per_round) >= 2: |
| initial = per_round[0] |
| final = per_round[-1] |
| decay_rate = (initial - final) / max(initial, 1e-6) |
| else: |
| decay_rate = 0.0 |
|
|
| return { |
| "per_round_tension": per_round, |
| "decay_rate": round(decay_rate, 4), |
| "is_converging": decay_rate > 0.05, |
| } |
|
|
| def full_epistemic_report( |
| self, |
| analyses: Dict[str, str], |
| synthesis: str, |
| ) -> Dict[str, object]: |
| """Complete RC+xi metrics report for a single forge cycle.""" |
| return { |
| "pairwise_tension": self.score_pairwise_tension(analyses), |
| "tension_magnitude": self.score_tension_magnitude(analyses), |
| "ensemble_coherence": self.score_ensemble_coherence(analyses), |
| "perspective_coverage": self.score_perspective_coverage(analyses), |
| "tension_productivity": self.score_tension_productivity(analyses, synthesis), |
| } |
|
|