File size: 11,595 Bytes

ed1b365

"""

Cocoon Stability Field — Collapse Detection Engine

===================================================



FFT-based stability validation that detects synthesis loop collapse

BEFORE corrupted output is generated.



Based on Codette_Deep_Simulation_v1.py cocoon_stability_field() equation:

    stability = ∫|F(k)|² dk < ε_threshold



Purpose: Halt debate if system enters instability zone (gamma < 0.4,

runaway vocabulary patterns, self-referential cascades).



Recovered from: J:\codette-training-lab\new data\Codette_Deep_Simulation_v1.py

"""

import numpy as np
from typing import Dict, List, Tuple, Optional
import logging

logger = logging.getLogger(__name__)


class CocoonStabilityField:
    """

    FFT-based stability validator for debate coherence.



    Monitors frequency-domain energy distribution in agent responses.

    If energy becomes too concentrated (self-similarity, repeating patterns)

    or too diffuse (completely incoherent), flags collapse risk.

    """

    # Stability threshold parameters (empirically calibrated)
    ENERGY_CONCENTRATION_THRESHOLD = 0.85  # Max allowed variance in top frequencies
    SELF_SIMILARITY_THRESHOLD = 0.75       # Max allowed cosine similarity between consecutive responses
    COHERENCE_FLOOR = 0.3                   # Minimum coherence before stability alert
    RUNAWAY_VOCABULARY_RATIO = 0.6          # % unique words triggering concern

    def __init__(self, verbose: bool = False):
        self.verbose = verbose
        self.frequency_signatures: Dict[str, np.ndarray] = {}
        self.stability_history: List[Dict] = []

    def text_to_spectrum(self, text: str, fft_size: int = 256) -> np.ndarray:
        """

        Convert text to frequency spectrum for FFT analysis.



        Args:

            text: Response text to analyze

            fft_size: FFT size (should be power of 2)



        Returns:

            Normalized power spectrum [0, 1]

        """
        # Character-based encoding
        char_codes = np.array([ord(c) % 256 for c in text[:1000]], dtype=np.float32)

        # Pad to fft_size
        padded = np.zeros(fft_size, dtype=np.float32)
        padded[: len(char_codes)] = char_codes

        # Apply FFT
        fft_result = np.fft.fft(padded)
        power_spectrum = np.abs(fft_result) ** 2

        # Normalize
        max_power = np.max(power_spectrum) or 1.0
        normalized_spectrum = power_spectrum / max_power

        return normalized_spectrum[:128]  # Return only positive frequencies

    def check_energy_concentration(self, spectrum: np.ndarray) -> Tuple[float, bool]:
        """

        Check if spectral energy is too concentrated (self-similarity syndrome).



        Concentrated energy = agent repeating itself/copying other agents.



        Args:

            spectrum: Power spectrum from FFT



        Returns:

            (concentration_ratio, is_concerning)

        """
        # Get top 10 frequencies
        top_k = 10
        top_powers = np.sort(spectrum)[-top_k:]
        top_sum = np.sum(top_powers)
        total_sum = np.sum(spectrum) or 1.0

        concentration = top_sum / total_sum
        is_concerning = concentration > self.ENERGY_CONCENTRATION_THRESHOLD

        return concentration, is_concerning

    def check_self_similarity(self, agent_name: str,

                             spectrum: np.ndarray) -> Tuple[float, bool]:
        """

        Check if agent is repeating itself (same response shape).



        Args:

            agent_name: Name of agent for history lookup

            spectrum: New response spectrum



        Returns:

            (similarity_score, is_concerning)

        """
        if agent_name not in self.frequency_signatures:
            self.frequency_signatures[agent_name] = spectrum
            return 0.0, False

        prev_spectrum = self.frequency_signatures[agent_name]
        similarity = np.dot(prev_spectrum, spectrum) / (
            np.linalg.norm(prev_spectrum) * np.linalg.norm(spectrum) + 1e-8
        )

        self.frequency_signatures[agent_name] = spectrum  # Update

        is_concerning = similarity > self.SELF_SIMILARITY_THRESHOLD
        return float(similarity), is_concerning

    def check_vocabulary_diversity(self, text: str) -> Tuple[float, bool]:
        """

        Check if response vocabulary is repeating (indicators of "Another perspective on...").



        Args:

            text: Response text



        Returns:

            (uniqueness_ratio, is_concerning)

        """
        if len(text) < 20:
            return 1.0, False

        words = text.lower().split()
        if len(words) == 0:
            return 1.0, False

        unique_words = len(set(words))
        uniqueness = unique_words / len(words)

        is_concerning = uniqueness < (1.0 - self.RUNAWAY_VOCABULARY_RATIO)

        return uniqueness, is_concerning

    def validate_analysis(self, agent_name: str, text: str) -> Dict:
        """

        Full stability validation for a single agent response.



        Args:

            agent_name: Name of agent

            text: Response text



        Returns:

            {

                'agent': str,

                'is_stable': bool,

                'stability_score': float (0-1),

                'flags': List[str],

                'spectrum': np.ndarray,

                'concerns': Dict

            }

        """
        spectrum = self.text_to_spectrum(text)

        flags = []
        concerns = {
            'energy_concentration': None,
            'self_similarity': None,
            'vocabulary_diversity': None
        }

        # Check 1: Energy concentration
        conc, conc_concerning = self.check_energy_concentration(spectrum)
        concerns['energy_concentration'] = {
            'ratio': float(conc),
            'concerning': conc_concerning
        }
        if conc_concerning:
            flags.append('HIGH_ENERGY_CONCENTRATION')

        # Check 2: Self-similarity
        similarity, sim_concerning = self.check_self_similarity(agent_name, spectrum)
        concerns['self_similarity'] = {
            'ratio': float(similarity),
            'concerning': sim_concerning
        }
        if sim_concerning:
            flags.append('REPEATING_RESPONSE_PATTERN')

        # Check 3: Vocabulary diversity
        uniqueness, vocab_concerning = self.check_vocabulary_diversity(text)
        concerns['vocabulary_diversity'] = {
            'uniqueness': float(uniqueness),
            'concerning': vocab_concerning
        }
        if vocab_concerning:
            flags.append('LOW_VOCABULARY_DIVERSITY')

        # Check 4: Response length sanity
        if len(text) < 50:
            flags.append('SUSPICIOUSLY_SHORT')
        if len(text) > 10000:
            flags.append('SUSPICIOUSLY_LONG')

        # Overall stability score
        num_flags = len(flags)
        stability_score = max(0.0, 1.0 - (num_flags * 0.25))

        is_stable = stability_score > self.COHERENCE_FLOOR

        if self.verbose and flags:
            logger.info(f"  {agent_name}: stability={stability_score:.2f}, flags={flags}")

        return {
            'agent': agent_name,
            'is_stable': is_stable,
            'stability_score': stability_score,
            'flags': flags,
            'spectrum': spectrum,
            'concerns': concerns
        }

    def validate_round(self, analyses: Dict[str, str],

                       round_num: int) -> Tuple[bool, List[Dict], float]:
        """

        Validate all agents' responses in a debate round.



        Args:

            analyses: Dict mapping agent_name → response_text

            round_num: Round number (for logging)



        Returns:

            (all_stable, validation_reports, avg_stability)

        """
        reports = []
        stability_scores = []

        for agent_name, text in analyses.items():
            report = self.validate_analysis(agent_name, text)
            reports.append(report)
            stability_scores.append(report['stability_score'])

        avg_stability = np.mean(stability_scores) if stability_scores else 0.5

        all_stable = all(r['is_stable'] for r in reports)

        unstable_agents = [r['agent'] for r in reports if not r['is_stable']]
        if unstable_agents:
            logger.warning(
                f"Round {round_num}: Unstable agents detected: {unstable_agents} "
                f"(avg_stability={avg_stability:.2f})"
            )

        # Store in history
        self.stability_history.append({
            'round': round_num,
            'all_stable': all_stable,
            'avg_stability': avg_stability,
            'unstable_agents': unstable_agents,
            'reports': reports
        })

        return all_stable, reports, avg_stability

    def should_halt_debate(self, analyses: Dict[str, str],

                          round_num: int, gamma: Optional[float] = None) -> Tuple[bool, str]:
        """

        Determine if debate should halt before synthesis.



        Halt if:

        1. Multiple agents unstable

        2. Gamma coherence < 0.35 (system collapse zone)

        3. Too many "REPEATING_RESPONSE_PATTERN" flags



        Args:

            analyses: Current round analyses

            round_num: Current round number

            gamma: Current gamma coherence (optional)



        Returns:

            (should_halt, reason)

        """
        all_stable, reports, avg_stability = self.validate_round(analyses, round_num)

        if not all_stable:
            unstable_count = sum(1 for r in reports if not r['is_stable'])
            if unstable_count >= 2:
                reason = (
                    f"Multiple agents unstable ({unstable_count}/{len(reports)}) "
                    f"at round {round_num}. Avg stability: {avg_stability:.2f}"
                )
                logger.warning(f"STABILITY CHECK: Halting debate. {reason}")
                return True, reason

        if gamma is not None and gamma < 0.35:
            reason = f"System in collapse zone (gamma={gamma:.2f} < 0.35)"
            logger.warning(f"STABILITY CHECK: Halting debate. {reason}")
            return True, reason

        # Check for repeating response patterns (synthesis loop indicator)
        repeating_count = sum(
            1 for r in reports
            if 'REPEATING_RESPONSE_PATTERN' in r['flags']
        )
        if repeating_count >= 2:
            reason = (
                f"Multiple agents repeating response patterns ({repeating_count}) "
                f"at round {round_num}. Synthesis loop risk."
            )
            logger.warning(f"STABILITY CHECK: Halting debate. {reason}")
            return True, reason

        return False, ""

    def get_summary(self) -> Dict:
        """Get stability history summary."""
        if not self.stability_history:
            return {"message": "No stability checks performed"}

        return {
            "total_rounds_checked": len(self.stability_history),
            "average_stability": np.mean([h['avg_stability'] for h in self.stability_history]),
            "halts_triggered": sum(1 for h in self.stability_history if not h['all_stable']),
            "recent": self.stability_history[-3:] if len(self.stability_history) >= 3 else self.stability_history,
        }