Codette-Reasoning / reasoning_forge /cocoon_stability.py
Raiff1982's picture
Upload 120 files
ed1b365 verified
"""
Cocoon Stability Field — Collapse Detection Engine
===================================================
FFT-based stability validation that detects synthesis loop collapse
BEFORE corrupted output is generated.
Based on Codette_Deep_Simulation_v1.py cocoon_stability_field() equation:
stability = ∫|F(k)|² dk < ε_threshold
Purpose: Halt debate if system enters instability zone (gamma < 0.4,
runaway vocabulary patterns, self-referential cascades).
Recovered from: J:\codette-training-lab\new data\Codette_Deep_Simulation_v1.py
"""
import numpy as np
from typing import Dict, List, Tuple, Optional
import logging
logger = logging.getLogger(__name__)
class CocoonStabilityField:
"""
FFT-based stability validator for debate coherence.
Monitors frequency-domain energy distribution in agent responses.
If energy becomes too concentrated (self-similarity, repeating patterns)
or too diffuse (completely incoherent), flags collapse risk.
"""
# Stability threshold parameters (empirically calibrated)
ENERGY_CONCENTRATION_THRESHOLD = 0.85 # Max allowed variance in top frequencies
SELF_SIMILARITY_THRESHOLD = 0.75 # Max allowed cosine similarity between consecutive responses
COHERENCE_FLOOR = 0.3 # Minimum coherence before stability alert
RUNAWAY_VOCABULARY_RATIO = 0.6 # % unique words triggering concern
def __init__(self, verbose: bool = False):
self.verbose = verbose
self.frequency_signatures: Dict[str, np.ndarray] = {}
self.stability_history: List[Dict] = []
def text_to_spectrum(self, text: str, fft_size: int = 256) -> np.ndarray:
"""
Convert text to frequency spectrum for FFT analysis.
Args:
text: Response text to analyze
fft_size: FFT size (should be power of 2)
Returns:
Normalized power spectrum [0, 1]
"""
# Character-based encoding
char_codes = np.array([ord(c) % 256 for c in text[:1000]], dtype=np.float32)
# Pad to fft_size
padded = np.zeros(fft_size, dtype=np.float32)
padded[: len(char_codes)] = char_codes
# Apply FFT
fft_result = np.fft.fft(padded)
power_spectrum = np.abs(fft_result) ** 2
# Normalize
max_power = np.max(power_spectrum) or 1.0
normalized_spectrum = power_spectrum / max_power
return normalized_spectrum[:128] # Return only positive frequencies
def check_energy_concentration(self, spectrum: np.ndarray) -> Tuple[float, bool]:
"""
Check if spectral energy is too concentrated (self-similarity syndrome).
Concentrated energy = agent repeating itself/copying other agents.
Args:
spectrum: Power spectrum from FFT
Returns:
(concentration_ratio, is_concerning)
"""
# Get top 10 frequencies
top_k = 10
top_powers = np.sort(spectrum)[-top_k:]
top_sum = np.sum(top_powers)
total_sum = np.sum(spectrum) or 1.0
concentration = top_sum / total_sum
is_concerning = concentration > self.ENERGY_CONCENTRATION_THRESHOLD
return concentration, is_concerning
def check_self_similarity(self, agent_name: str,
spectrum: np.ndarray) -> Tuple[float, bool]:
"""
Check if agent is repeating itself (same response shape).
Args:
agent_name: Name of agent for history lookup
spectrum: New response spectrum
Returns:
(similarity_score, is_concerning)
"""
if agent_name not in self.frequency_signatures:
self.frequency_signatures[agent_name] = spectrum
return 0.0, False
prev_spectrum = self.frequency_signatures[agent_name]
similarity = np.dot(prev_spectrum, spectrum) / (
np.linalg.norm(prev_spectrum) * np.linalg.norm(spectrum) + 1e-8
)
self.frequency_signatures[agent_name] = spectrum # Update
is_concerning = similarity > self.SELF_SIMILARITY_THRESHOLD
return float(similarity), is_concerning
def check_vocabulary_diversity(self, text: str) -> Tuple[float, bool]:
"""
Check if response vocabulary is repeating (indicators of "Another perspective on...").
Args:
text: Response text
Returns:
(uniqueness_ratio, is_concerning)
"""
if len(text) < 20:
return 1.0, False
words = text.lower().split()
if len(words) == 0:
return 1.0, False
unique_words = len(set(words))
uniqueness = unique_words / len(words)
is_concerning = uniqueness < (1.0 - self.RUNAWAY_VOCABULARY_RATIO)
return uniqueness, is_concerning
def validate_analysis(self, agent_name: str, text: str) -> Dict:
"""
Full stability validation for a single agent response.
Args:
agent_name: Name of agent
text: Response text
Returns:
{
'agent': str,
'is_stable': bool,
'stability_score': float (0-1),
'flags': List[str],
'spectrum': np.ndarray,
'concerns': Dict
}
"""
spectrum = self.text_to_spectrum(text)
flags = []
concerns = {
'energy_concentration': None,
'self_similarity': None,
'vocabulary_diversity': None
}
# Check 1: Energy concentration
conc, conc_concerning = self.check_energy_concentration(spectrum)
concerns['energy_concentration'] = {
'ratio': float(conc),
'concerning': conc_concerning
}
if conc_concerning:
flags.append('HIGH_ENERGY_CONCENTRATION')
# Check 2: Self-similarity
similarity, sim_concerning = self.check_self_similarity(agent_name, spectrum)
concerns['self_similarity'] = {
'ratio': float(similarity),
'concerning': sim_concerning
}
if sim_concerning:
flags.append('REPEATING_RESPONSE_PATTERN')
# Check 3: Vocabulary diversity
uniqueness, vocab_concerning = self.check_vocabulary_diversity(text)
concerns['vocabulary_diversity'] = {
'uniqueness': float(uniqueness),
'concerning': vocab_concerning
}
if vocab_concerning:
flags.append('LOW_VOCABULARY_DIVERSITY')
# Check 4: Response length sanity
if len(text) < 50:
flags.append('SUSPICIOUSLY_SHORT')
if len(text) > 10000:
flags.append('SUSPICIOUSLY_LONG')
# Overall stability score
num_flags = len(flags)
stability_score = max(0.0, 1.0 - (num_flags * 0.25))
is_stable = stability_score > self.COHERENCE_FLOOR
if self.verbose and flags:
logger.info(f" {agent_name}: stability={stability_score:.2f}, flags={flags}")
return {
'agent': agent_name,
'is_stable': is_stable,
'stability_score': stability_score,
'flags': flags,
'spectrum': spectrum,
'concerns': concerns
}
def validate_round(self, analyses: Dict[str, str],
round_num: int) -> Tuple[bool, List[Dict], float]:
"""
Validate all agents' responses in a debate round.
Args:
analyses: Dict mapping agent_name → response_text
round_num: Round number (for logging)
Returns:
(all_stable, validation_reports, avg_stability)
"""
reports = []
stability_scores = []
for agent_name, text in analyses.items():
report = self.validate_analysis(agent_name, text)
reports.append(report)
stability_scores.append(report['stability_score'])
avg_stability = np.mean(stability_scores) if stability_scores else 0.5
all_stable = all(r['is_stable'] for r in reports)
unstable_agents = [r['agent'] for r in reports if not r['is_stable']]
if unstable_agents:
logger.warning(
f"Round {round_num}: Unstable agents detected: {unstable_agents} "
f"(avg_stability={avg_stability:.2f})"
)
# Store in history
self.stability_history.append({
'round': round_num,
'all_stable': all_stable,
'avg_stability': avg_stability,
'unstable_agents': unstable_agents,
'reports': reports
})
return all_stable, reports, avg_stability
def should_halt_debate(self, analyses: Dict[str, str],
round_num: int, gamma: Optional[float] = None) -> Tuple[bool, str]:
"""
Determine if debate should halt before synthesis.
Halt if:
1. Multiple agents unstable
2. Gamma coherence < 0.35 (system collapse zone)
3. Too many "REPEATING_RESPONSE_PATTERN" flags
Args:
analyses: Current round analyses
round_num: Current round number
gamma: Current gamma coherence (optional)
Returns:
(should_halt, reason)
"""
all_stable, reports, avg_stability = self.validate_round(analyses, round_num)
if not all_stable:
unstable_count = sum(1 for r in reports if not r['is_stable'])
if unstable_count >= 2:
reason = (
f"Multiple agents unstable ({unstable_count}/{len(reports)}) "
f"at round {round_num}. Avg stability: {avg_stability:.2f}"
)
logger.warning(f"STABILITY CHECK: Halting debate. {reason}")
return True, reason
if gamma is not None and gamma < 0.35:
reason = f"System in collapse zone (gamma={gamma:.2f} < 0.35)"
logger.warning(f"STABILITY CHECK: Halting debate. {reason}")
return True, reason
# Check for repeating response patterns (synthesis loop indicator)
repeating_count = sum(
1 for r in reports
if 'REPEATING_RESPONSE_PATTERN' in r['flags']
)
if repeating_count >= 2:
reason = (
f"Multiple agents repeating response patterns ({repeating_count}) "
f"at round {round_num}. Synthesis loop risk."
)
logger.warning(f"STABILITY CHECK: Halting debate. {reason}")
return True, reason
return False, ""
def get_summary(self) -> Dict:
"""Get stability history summary."""
if not self.stability_history:
return {"message": "No stability checks performed"}
return {
"total_rounds_checked": len(self.stability_history),
"average_stability": np.mean([h['avg_stability'] for h in self.stability_history]),
"halts_triggered": sum(1 for h in self.stability_history if not h['all_stable']),
"recent": self.stability_history[-3:] if len(self.stability_history) >= 3 else self.stability_history,
}