Codette-Reasoning / reasoning_forge /epistemic_metrics.py
Jonathan Harrison
Full Codette codebase sync — transparency release
74f2af5
"""
Epistemic Metrics — RC+xi tension and coherence measurement for the Reasoning Forge.
Implements the core RC+xi equations within the forge context:
- Epistemic tension (Eq. 2): xi_n = ||A_{n+1} - A_n||^2
- Phase coherence (Eq. 11): Gamma = mean(|cos(theta_i - theta_bar)|)
- Perspective coverage scoring
- Tension decay tracking across debate rounds
These metrics let the forge quantify whether multi-agent reasoning actually
converges (productive tension resolution) or stalls (tension suppression).
"""
from __future__ import annotations
import math
import re
from collections import Counter
from typing import Dict, List, Optional, Tuple
# ---------------------------------------------------------------------------
# Text -> vector helpers (lightweight, no external deps)
# ---------------------------------------------------------------------------
_STOP_WORDS = {
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
"have", "has", "had", "do", "does", "did", "will", "would", "shall",
"should", "may", "might", "must", "can", "could", "to", "of", "in",
"for", "on", "with", "at", "by", "from", "as", "into", "through",
"during", "before", "after", "and", "but", "or", "nor", "not", "so",
"yet", "both", "this", "that", "these", "those", "it", "its", "they",
"them", "their", "we", "our", "you", "your", "he", "she", "his", "her",
}
def _tokenize(text: str) -> List[str]:
return [w for w in re.findall(r"[a-z]{3,}", text.lower()) if w not in _STOP_WORDS]
def _term_vector(text: str) -> Counter:
return Counter(_tokenize(text))
def _cosine_similarity(vec_a: Counter, vec_b: Counter) -> float:
keys = set(vec_a) | set(vec_b)
if not keys:
return 0.0
dot = sum(vec_a.get(k, 0) * vec_b.get(k, 0) for k in keys)
mag_a = math.sqrt(sum(v * v for v in vec_a.values()))
mag_b = math.sqrt(sum(v * v for v in vec_b.values()))
if mag_a == 0 or mag_b == 0:
return 0.0
return dot / (mag_a * mag_b)
# ---------------------------------------------------------------------------
# Perspective vocabulary banks (for coverage scoring)
# ---------------------------------------------------------------------------
_PERSPECTIVE_VOCAB = {
"Newton": {
"force", "energy", "momentum", "conservation", "equilibrium", "dynamics",
"causality", "mass", "acceleration", "entropy", "thermodynamic",
"symmetry", "invariance", "field", "potential", "kinetic",
},
"Quantum": {
"probability", "superposition", "uncertainty", "complementarity",
"entanglement", "wave", "particle", "observer", "collapse",
"interference", "tunneling", "decoherence", "amplitude",
},
"Ethics": {
"ethical", "moral", "fairness", "justice", "rights", "duty",
"consequence", "harm", "benefit", "stakeholder", "autonomy",
"consent", "accountability", "responsibility", "welfare",
},
"Philosophy": {
"epistemology", "ontology", "metaphysics", "assumption", "paradox",
"dialectic", "phenomenology", "consciousness", "existence", "meaning",
"truth", "knowledge", "belief", "certainty", "skepticism",
},
"DaVinci": {
"creative", "invention", "analogy", "design", "innovation",
"prototype", "biomimicry", "synthesis", "novel", "interdisciplinary",
"combination", "reimagine", "solution", "insight",
},
"Empathy": {
"emotional", "experience", "feeling", "compassion", "support",
"community", "relationship", "wellbeing", "vulnerability",
"understanding", "perspective", "human", "care", "dignity",
},
"Consciousness": {
"awareness", "recursive", "self-referential", "metacognition",
"emergence", "cognition", "reflection", "introspection",
"sentience", "subjective", "qualia", "binding", "attention",
"intentionality", "phenomenal",
},
"SystemsArchitecture": {
"modular", "scalable", "interface", "pattern", "component",
"microservice", "pipeline", "throughput", "latency", "resilience",
"abstraction", "coupling", "cohesion", "architecture",
},
}
# ---------------------------------------------------------------------------
# EpistemicMetrics
# ---------------------------------------------------------------------------
class EpistemicMetrics:
"""Measure RC+xi epistemic tension and coherence across agent analyses."""
def score_pairwise_tension(
self, analyses: Dict[str, str],
) -> Dict[str, float]:
"""Compute epistemic tension between each pair of agent analyses.
Tension is 1 - cosine_similarity: high when perspectives diverge,
low when they repeat each other.
Returns:
Dict with keys like "Newton_vs_Ethics" -> tension float 0-1.
"""
agents = list(analyses.keys())
vectors = {name: _term_vector(text) for name, text in analyses.items()}
tensions = {}
for i in range(len(agents)):
for j in range(i + 1, len(agents)):
sim = _cosine_similarity(vectors[agents[i]], vectors[agents[j]])
tensions[f"{agents[i]}_vs_{agents[j]}"] = round(1.0 - sim, 4)
return tensions
def score_ensemble_coherence(
self, analyses: Dict[str, str],
) -> float:
"""Phase coherence Gamma across the agent ensemble.
Analogous to Eq. 11 in the embodied sim:
Gamma = mean(cos(theta_i - theta_bar))
Here 'theta' is the term-vector direction, and coherence measures
how much all agents point in a similar semantic direction.
Returns:
Gamma in [0, 1] where 1 = all agents semantically aligned.
"""
vectors = [_term_vector(text) for text in analyses.values()]
if len(vectors) < 2:
return 1.0
# Build centroid
centroid: Counter = Counter()
for v in vectors:
centroid.update(v)
similarities = [_cosine_similarity(v, centroid) for v in vectors]
return round(sum(similarities) / len(similarities), 4)
def score_tension_magnitude(
self, analyses: Dict[str, str],
) -> float:
"""Overall epistemic tension magnitude (mean pairwise tension).
Analogous to Eq. 2 xi_n but measured across agents rather than
across time steps.
Returns:
Mean tension 0-1 where 0 = all identical, 1 = fully orthogonal.
"""
tensions = self.score_pairwise_tension(analyses)
if not tensions:
return 0.0
return round(sum(tensions.values()) / len(tensions), 4)
def score_tension_productivity(
self,
analyses: Dict[str, str],
synthesis: str,
) -> Dict[str, float]:
"""Evaluate whether tension is productive (resolved in synthesis)
or destructive (suppressed or ignored).
Productive tension: agents diverge but synthesis addresses the
divergence explicitly. Destructive: synthesis ignores disagreements.
Returns:
Dict with tension_magnitude, coherence_gain, productivity score.
"""
tension = self.score_tension_magnitude(analyses)
ensemble_coherence = self.score_ensemble_coherence(analyses)
# How much of each agent's unique vocabulary appears in synthesis
synthesis_vec = _term_vector(synthesis)
agent_coverage_in_synthesis = []
for name, text in analyses.items():
agent_vec = _term_vector(text)
unique_to_agent = set(agent_vec) - set().union(
*(_term_vector(t) for n, t in analyses.items() if n != name)
)
if unique_to_agent:
covered = sum(1 for w in unique_to_agent if w in synthesis_vec)
agent_coverage_in_synthesis.append(covered / len(unique_to_agent))
else:
agent_coverage_in_synthesis.append(1.0)
synthesis_coverage = sum(agent_coverage_in_synthesis) / max(len(agent_coverage_in_synthesis), 1)
# Productivity = high tension + high synthesis coverage
# (divergent views that get integrated = productive)
productivity = tension * synthesis_coverage
# Coherence gain: synthesis should be more coherent than raw ensemble
synthesis_vs_agents = _cosine_similarity(synthesis_vec, _term_vector(" ".join(analyses.values())))
coherence_gain = max(0.0, synthesis_vs_agents - ensemble_coherence)
return {
"tension_magnitude": round(tension, 4),
"ensemble_coherence": round(ensemble_coherence, 4),
"synthesis_coverage": round(synthesis_coverage, 4),
"coherence_gain": round(coherence_gain, 4),
"productivity": round(productivity, 4),
}
def score_perspective_coverage(
self, analyses: Dict[str, str],
) -> Dict[str, float]:
"""Score how deeply each RC+xi perspective is actually engaged.
Returns:
Dict mapping perspective name -> coverage score 0-1.
"""
all_text_lower = {name: text.lower() for name, text in analyses.items()}
coverage = {}
for perspective, vocab in _PERSPECTIVE_VOCAB.items():
# Check across all agents, not just the named agent
all_words = " ".join(all_text_lower.values())
hits = sum(1 for term in vocab if term in all_words)
coverage[perspective] = round(hits / len(vocab), 4)
return coverage
def score_debate_convergence(
self,
round_analyses: List[Dict[str, str]],
) -> Dict[str, object]:
"""Track tension decay across multiple debate rounds.
Takes a list of analyses dicts (one per round). Measures whether
tension decreases (convergence) or increases (divergence).
Returns:
Dict with per-round tension, decay_rate, is_converging.
"""
if not round_analyses:
return {"per_round_tension": [], "decay_rate": 0.0, "is_converging": False}
per_round = [self.score_tension_magnitude(a) for a in round_analyses]
if len(per_round) >= 2:
initial = per_round[0]
final = per_round[-1]
decay_rate = (initial - final) / max(initial, 1e-6)
else:
decay_rate = 0.0
return {
"per_round_tension": per_round,
"decay_rate": round(decay_rate, 4),
"is_converging": decay_rate > 0.05,
}
def full_epistemic_report(
self,
analyses: Dict[str, str],
synthesis: str,
) -> Dict[str, object]:
"""Complete RC+xi metrics report for a single forge cycle."""
return {
"pairwise_tension": self.score_pairwise_tension(analyses),
"tension_magnitude": self.score_tension_magnitude(analyses),
"ensemble_coherence": self.score_ensemble_coherence(analyses),
"perspective_coverage": self.score_perspective_coverage(analyses),
"tension_productivity": self.score_tension_productivity(analyses, synthesis),
}