| """
|
| Cocoon Stability Field — Collapse Detection Engine
|
| ===================================================
|
|
|
| FFT-based stability validation that detects synthesis loop collapse
|
| BEFORE corrupted output is generated.
|
|
|
| Based on Codette_Deep_Simulation_v1.py cocoon_stability_field() equation:
|
| stability = ∫|F(k)|² dk < ε_threshold
|
|
|
| Purpose: Halt debate if system enters instability zone (gamma < 0.4,
|
| runaway vocabulary patterns, self-referential cascades).
|
|
|
| Recovered from: J:\codette-training-lab\new data\Codette_Deep_Simulation_v1.py
|
| """
|
|
|
| import numpy as np
|
| from typing import Dict, List, Tuple, Optional
|
| import logging
|
|
|
| logger = logging.getLogger(__name__)
|
|
|
|
|
| class CocoonStabilityField:
|
| """
|
| FFT-based stability validator for debate coherence.
|
|
|
| Monitors frequency-domain energy distribution in agent responses.
|
| If energy becomes too concentrated (self-similarity, repeating patterns)
|
| or too diffuse (completely incoherent), flags collapse risk.
|
| """
|
|
|
|
|
| ENERGY_CONCENTRATION_THRESHOLD = 0.85
|
| SELF_SIMILARITY_THRESHOLD = 0.75
|
| COHERENCE_FLOOR = 0.3
|
| RUNAWAY_VOCABULARY_RATIO = 0.6
|
|
|
| def __init__(self, verbose: bool = False):
|
| self.verbose = verbose
|
| self.frequency_signatures: Dict[str, np.ndarray] = {}
|
| self.stability_history: List[Dict] = []
|
|
|
| def text_to_spectrum(self, text: str, fft_size: int = 256) -> np.ndarray:
|
| """
|
| Convert text to frequency spectrum for FFT analysis.
|
|
|
| Args:
|
| text: Response text to analyze
|
| fft_size: FFT size (should be power of 2)
|
|
|
| Returns:
|
| Normalized power spectrum [0, 1]
|
| """
|
|
|
| char_codes = np.array([ord(c) % 256 for c in text[:1000]], dtype=np.float32)
|
|
|
|
|
| padded = np.zeros(fft_size, dtype=np.float32)
|
| padded[: len(char_codes)] = char_codes
|
|
|
|
|
| fft_result = np.fft.fft(padded)
|
| power_spectrum = np.abs(fft_result) ** 2
|
|
|
|
|
| max_power = np.max(power_spectrum) or 1.0
|
| normalized_spectrum = power_spectrum / max_power
|
|
|
| return normalized_spectrum[:128]
|
|
|
| def check_energy_concentration(self, spectrum: np.ndarray) -> Tuple[float, bool]:
|
| """
|
| Check if spectral energy is too concentrated (self-similarity syndrome).
|
|
|
| Concentrated energy = agent repeating itself/copying other agents.
|
|
|
| Args:
|
| spectrum: Power spectrum from FFT
|
|
|
| Returns:
|
| (concentration_ratio, is_concerning)
|
| """
|
|
|
| top_k = 10
|
| top_powers = np.sort(spectrum)[-top_k:]
|
| top_sum = np.sum(top_powers)
|
| total_sum = np.sum(spectrum) or 1.0
|
|
|
| concentration = top_sum / total_sum
|
| is_concerning = concentration > self.ENERGY_CONCENTRATION_THRESHOLD
|
|
|
| return concentration, is_concerning
|
|
|
| def check_self_similarity(self, agent_name: str,
|
| spectrum: np.ndarray) -> Tuple[float, bool]:
|
| """
|
| Check if agent is repeating itself (same response shape).
|
|
|
| Args:
|
| agent_name: Name of agent for history lookup
|
| spectrum: New response spectrum
|
|
|
| Returns:
|
| (similarity_score, is_concerning)
|
| """
|
| if agent_name not in self.frequency_signatures:
|
| self.frequency_signatures[agent_name] = spectrum
|
| return 0.0, False
|
|
|
| prev_spectrum = self.frequency_signatures[agent_name]
|
| similarity = np.dot(prev_spectrum, spectrum) / (
|
| np.linalg.norm(prev_spectrum) * np.linalg.norm(spectrum) + 1e-8
|
| )
|
|
|
| self.frequency_signatures[agent_name] = spectrum
|
|
|
| is_concerning = similarity > self.SELF_SIMILARITY_THRESHOLD
|
| return float(similarity), is_concerning
|
|
|
| def check_vocabulary_diversity(self, text: str) -> Tuple[float, bool]:
|
| """
|
| Check if response vocabulary is repeating (indicators of "Another perspective on...").
|
|
|
| Args:
|
| text: Response text
|
|
|
| Returns:
|
| (uniqueness_ratio, is_concerning)
|
| """
|
| if len(text) < 20:
|
| return 1.0, False
|
|
|
| words = text.lower().split()
|
| if len(words) == 0:
|
| return 1.0, False
|
|
|
| unique_words = len(set(words))
|
| uniqueness = unique_words / len(words)
|
|
|
| is_concerning = uniqueness < (1.0 - self.RUNAWAY_VOCABULARY_RATIO)
|
|
|
| return uniqueness, is_concerning
|
|
|
| def validate_analysis(self, agent_name: str, text: str) -> Dict:
|
| """
|
| Full stability validation for a single agent response.
|
|
|
| Args:
|
| agent_name: Name of agent
|
| text: Response text
|
|
|
| Returns:
|
| {
|
| 'agent': str,
|
| 'is_stable': bool,
|
| 'stability_score': float (0-1),
|
| 'flags': List[str],
|
| 'spectrum': np.ndarray,
|
| 'concerns': Dict
|
| }
|
| """
|
| spectrum = self.text_to_spectrum(text)
|
|
|
| flags = []
|
| concerns = {
|
| 'energy_concentration': None,
|
| 'self_similarity': None,
|
| 'vocabulary_diversity': None
|
| }
|
|
|
|
|
| conc, conc_concerning = self.check_energy_concentration(spectrum)
|
| concerns['energy_concentration'] = {
|
| 'ratio': float(conc),
|
| 'concerning': conc_concerning
|
| }
|
| if conc_concerning:
|
| flags.append('HIGH_ENERGY_CONCENTRATION')
|
|
|
|
|
| similarity, sim_concerning = self.check_self_similarity(agent_name, spectrum)
|
| concerns['self_similarity'] = {
|
| 'ratio': float(similarity),
|
| 'concerning': sim_concerning
|
| }
|
| if sim_concerning:
|
| flags.append('REPEATING_RESPONSE_PATTERN')
|
|
|
|
|
| uniqueness, vocab_concerning = self.check_vocabulary_diversity(text)
|
| concerns['vocabulary_diversity'] = {
|
| 'uniqueness': float(uniqueness),
|
| 'concerning': vocab_concerning
|
| }
|
| if vocab_concerning:
|
| flags.append('LOW_VOCABULARY_DIVERSITY')
|
|
|
|
|
| if len(text) < 50:
|
| flags.append('SUSPICIOUSLY_SHORT')
|
| if len(text) > 10000:
|
| flags.append('SUSPICIOUSLY_LONG')
|
|
|
|
|
| num_flags = len(flags)
|
| stability_score = max(0.0, 1.0 - (num_flags * 0.25))
|
|
|
| is_stable = stability_score > self.COHERENCE_FLOOR
|
|
|
| if self.verbose and flags:
|
| logger.info(f" {agent_name}: stability={stability_score:.2f}, flags={flags}")
|
|
|
| return {
|
| 'agent': agent_name,
|
| 'is_stable': is_stable,
|
| 'stability_score': stability_score,
|
| 'flags': flags,
|
| 'spectrum': spectrum,
|
| 'concerns': concerns
|
| }
|
|
|
| def validate_round(self, analyses: Dict[str, str],
|
| round_num: int) -> Tuple[bool, List[Dict], float]:
|
| """
|
| Validate all agents' responses in a debate round.
|
|
|
| Args:
|
| analyses: Dict mapping agent_name → response_text
|
| round_num: Round number (for logging)
|
|
|
| Returns:
|
| (all_stable, validation_reports, avg_stability)
|
| """
|
| reports = []
|
| stability_scores = []
|
|
|
| for agent_name, text in analyses.items():
|
| report = self.validate_analysis(agent_name, text)
|
| reports.append(report)
|
| stability_scores.append(report['stability_score'])
|
|
|
| avg_stability = np.mean(stability_scores) if stability_scores else 0.5
|
|
|
| all_stable = all(r['is_stable'] for r in reports)
|
|
|
| unstable_agents = [r['agent'] for r in reports if not r['is_stable']]
|
| if unstable_agents:
|
| logger.warning(
|
| f"Round {round_num}: Unstable agents detected: {unstable_agents} "
|
| f"(avg_stability={avg_stability:.2f})"
|
| )
|
|
|
|
|
| self.stability_history.append({
|
| 'round': round_num,
|
| 'all_stable': all_stable,
|
| 'avg_stability': avg_stability,
|
| 'unstable_agents': unstable_agents,
|
| 'reports': reports
|
| })
|
|
|
| return all_stable, reports, avg_stability
|
|
|
| def should_halt_debate(self, analyses: Dict[str, str],
|
| round_num: int, gamma: Optional[float] = None) -> Tuple[bool, str]:
|
| """
|
| Determine if debate should halt before synthesis.
|
|
|
| Halt if:
|
| 1. Multiple agents unstable
|
| 2. Gamma coherence < 0.35 (system collapse zone)
|
| 3. Too many "REPEATING_RESPONSE_PATTERN" flags
|
|
|
| Args:
|
| analyses: Current round analyses
|
| round_num: Current round number
|
| gamma: Current gamma coherence (optional)
|
|
|
| Returns:
|
| (should_halt, reason)
|
| """
|
| all_stable, reports, avg_stability = self.validate_round(analyses, round_num)
|
|
|
| if not all_stable:
|
| unstable_count = sum(1 for r in reports if not r['is_stable'])
|
| if unstable_count >= 2:
|
| reason = (
|
| f"Multiple agents unstable ({unstable_count}/{len(reports)}) "
|
| f"at round {round_num}. Avg stability: {avg_stability:.2f}"
|
| )
|
| logger.warning(f"STABILITY CHECK: Halting debate. {reason}")
|
| return True, reason
|
|
|
| if gamma is not None and gamma < 0.35:
|
| reason = f"System in collapse zone (gamma={gamma:.2f} < 0.35)"
|
| logger.warning(f"STABILITY CHECK: Halting debate. {reason}")
|
| return True, reason
|
|
|
|
|
| repeating_count = sum(
|
| 1 for r in reports
|
| if 'REPEATING_RESPONSE_PATTERN' in r['flags']
|
| )
|
| if repeating_count >= 2:
|
| reason = (
|
| f"Multiple agents repeating response patterns ({repeating_count}) "
|
| f"at round {round_num}. Synthesis loop risk."
|
| )
|
| logger.warning(f"STABILITY CHECK: Halting debate. {reason}")
|
| return True, reason
|
|
|
| return False, ""
|
|
|
| def get_summary(self) -> Dict:
|
| """Get stability history summary."""
|
| if not self.stability_history:
|
| return {"message": "No stability checks performed"}
|
|
|
| return {
|
| "total_rounds_checked": len(self.stability_history),
|
| "average_stability": np.mean([h['avg_stability'] for h in self.stability_history]),
|
| "halts_triggered": sum(1 for h in self.stability_history if not h['all_stable']),
|
| "recent": self.stability_history[-3:] if len(self.stability_history) >= 3 else self.stability_history,
|
| }
|
|
|