File size: 11,595 Bytes
ed1b365 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 | """
Cocoon Stability Field — Collapse Detection Engine
===================================================
FFT-based stability validation that detects synthesis loop collapse
BEFORE corrupted output is generated.
Based on Codette_Deep_Simulation_v1.py cocoon_stability_field() equation:
stability = ∫|F(k)|² dk < ε_threshold
Purpose: Halt debate if system enters instability zone (gamma < 0.4,
runaway vocabulary patterns, self-referential cascades).
Recovered from: J:\codette-training-lab\new data\Codette_Deep_Simulation_v1.py
"""
import numpy as np
from typing import Dict, List, Tuple, Optional
import logging
logger = logging.getLogger(__name__)
class CocoonStabilityField:
"""
FFT-based stability validator for debate coherence.
Monitors frequency-domain energy distribution in agent responses.
If energy becomes too concentrated (self-similarity, repeating patterns)
or too diffuse (completely incoherent), flags collapse risk.
"""
# Stability threshold parameters (empirically calibrated)
ENERGY_CONCENTRATION_THRESHOLD = 0.85 # Max allowed variance in top frequencies
SELF_SIMILARITY_THRESHOLD = 0.75 # Max allowed cosine similarity between consecutive responses
COHERENCE_FLOOR = 0.3 # Minimum coherence before stability alert
RUNAWAY_VOCABULARY_RATIO = 0.6 # % unique words triggering concern
def __init__(self, verbose: bool = False):
self.verbose = verbose
self.frequency_signatures: Dict[str, np.ndarray] = {}
self.stability_history: List[Dict] = []
def text_to_spectrum(self, text: str, fft_size: int = 256) -> np.ndarray:
"""
Convert text to frequency spectrum for FFT analysis.
Args:
text: Response text to analyze
fft_size: FFT size (should be power of 2)
Returns:
Normalized power spectrum [0, 1]
"""
# Character-based encoding
char_codes = np.array([ord(c) % 256 for c in text[:1000]], dtype=np.float32)
# Pad to fft_size
padded = np.zeros(fft_size, dtype=np.float32)
padded[: len(char_codes)] = char_codes
# Apply FFT
fft_result = np.fft.fft(padded)
power_spectrum = np.abs(fft_result) ** 2
# Normalize
max_power = np.max(power_spectrum) or 1.0
normalized_spectrum = power_spectrum / max_power
return normalized_spectrum[:128] # Return only positive frequencies
def check_energy_concentration(self, spectrum: np.ndarray) -> Tuple[float, bool]:
"""
Check if spectral energy is too concentrated (self-similarity syndrome).
Concentrated energy = agent repeating itself/copying other agents.
Args:
spectrum: Power spectrum from FFT
Returns:
(concentration_ratio, is_concerning)
"""
# Get top 10 frequencies
top_k = 10
top_powers = np.sort(spectrum)[-top_k:]
top_sum = np.sum(top_powers)
total_sum = np.sum(spectrum) or 1.0
concentration = top_sum / total_sum
is_concerning = concentration > self.ENERGY_CONCENTRATION_THRESHOLD
return concentration, is_concerning
def check_self_similarity(self, agent_name: str,
spectrum: np.ndarray) -> Tuple[float, bool]:
"""
Check if agent is repeating itself (same response shape).
Args:
agent_name: Name of agent for history lookup
spectrum: New response spectrum
Returns:
(similarity_score, is_concerning)
"""
if agent_name not in self.frequency_signatures:
self.frequency_signatures[agent_name] = spectrum
return 0.0, False
prev_spectrum = self.frequency_signatures[agent_name]
similarity = np.dot(prev_spectrum, spectrum) / (
np.linalg.norm(prev_spectrum) * np.linalg.norm(spectrum) + 1e-8
)
self.frequency_signatures[agent_name] = spectrum # Update
is_concerning = similarity > self.SELF_SIMILARITY_THRESHOLD
return float(similarity), is_concerning
def check_vocabulary_diversity(self, text: str) -> Tuple[float, bool]:
"""
Check if response vocabulary is repeating (indicators of "Another perspective on...").
Args:
text: Response text
Returns:
(uniqueness_ratio, is_concerning)
"""
if len(text) < 20:
return 1.0, False
words = text.lower().split()
if len(words) == 0:
return 1.0, False
unique_words = len(set(words))
uniqueness = unique_words / len(words)
is_concerning = uniqueness < (1.0 - self.RUNAWAY_VOCABULARY_RATIO)
return uniqueness, is_concerning
def validate_analysis(self, agent_name: str, text: str) -> Dict:
"""
Full stability validation for a single agent response.
Args:
agent_name: Name of agent
text: Response text
Returns:
{
'agent': str,
'is_stable': bool,
'stability_score': float (0-1),
'flags': List[str],
'spectrum': np.ndarray,
'concerns': Dict
}
"""
spectrum = self.text_to_spectrum(text)
flags = []
concerns = {
'energy_concentration': None,
'self_similarity': None,
'vocabulary_diversity': None
}
# Check 1: Energy concentration
conc, conc_concerning = self.check_energy_concentration(spectrum)
concerns['energy_concentration'] = {
'ratio': float(conc),
'concerning': conc_concerning
}
if conc_concerning:
flags.append('HIGH_ENERGY_CONCENTRATION')
# Check 2: Self-similarity
similarity, sim_concerning = self.check_self_similarity(agent_name, spectrum)
concerns['self_similarity'] = {
'ratio': float(similarity),
'concerning': sim_concerning
}
if sim_concerning:
flags.append('REPEATING_RESPONSE_PATTERN')
# Check 3: Vocabulary diversity
uniqueness, vocab_concerning = self.check_vocabulary_diversity(text)
concerns['vocabulary_diversity'] = {
'uniqueness': float(uniqueness),
'concerning': vocab_concerning
}
if vocab_concerning:
flags.append('LOW_VOCABULARY_DIVERSITY')
# Check 4: Response length sanity
if len(text) < 50:
flags.append('SUSPICIOUSLY_SHORT')
if len(text) > 10000:
flags.append('SUSPICIOUSLY_LONG')
# Overall stability score
num_flags = len(flags)
stability_score = max(0.0, 1.0 - (num_flags * 0.25))
is_stable = stability_score > self.COHERENCE_FLOOR
if self.verbose and flags:
logger.info(f" {agent_name}: stability={stability_score:.2f}, flags={flags}")
return {
'agent': agent_name,
'is_stable': is_stable,
'stability_score': stability_score,
'flags': flags,
'spectrum': spectrum,
'concerns': concerns
}
def validate_round(self, analyses: Dict[str, str],
round_num: int) -> Tuple[bool, List[Dict], float]:
"""
Validate all agents' responses in a debate round.
Args:
analyses: Dict mapping agent_name → response_text
round_num: Round number (for logging)
Returns:
(all_stable, validation_reports, avg_stability)
"""
reports = []
stability_scores = []
for agent_name, text in analyses.items():
report = self.validate_analysis(agent_name, text)
reports.append(report)
stability_scores.append(report['stability_score'])
avg_stability = np.mean(stability_scores) if stability_scores else 0.5
all_stable = all(r['is_stable'] for r in reports)
unstable_agents = [r['agent'] for r in reports if not r['is_stable']]
if unstable_agents:
logger.warning(
f"Round {round_num}: Unstable agents detected: {unstable_agents} "
f"(avg_stability={avg_stability:.2f})"
)
# Store in history
self.stability_history.append({
'round': round_num,
'all_stable': all_stable,
'avg_stability': avg_stability,
'unstable_agents': unstable_agents,
'reports': reports
})
return all_stable, reports, avg_stability
def should_halt_debate(self, analyses: Dict[str, str],
round_num: int, gamma: Optional[float] = None) -> Tuple[bool, str]:
"""
Determine if debate should halt before synthesis.
Halt if:
1. Multiple agents unstable
2. Gamma coherence < 0.35 (system collapse zone)
3. Too many "REPEATING_RESPONSE_PATTERN" flags
Args:
analyses: Current round analyses
round_num: Current round number
gamma: Current gamma coherence (optional)
Returns:
(should_halt, reason)
"""
all_stable, reports, avg_stability = self.validate_round(analyses, round_num)
if not all_stable:
unstable_count = sum(1 for r in reports if not r['is_stable'])
if unstable_count >= 2:
reason = (
f"Multiple agents unstable ({unstable_count}/{len(reports)}) "
f"at round {round_num}. Avg stability: {avg_stability:.2f}"
)
logger.warning(f"STABILITY CHECK: Halting debate. {reason}")
return True, reason
if gamma is not None and gamma < 0.35:
reason = f"System in collapse zone (gamma={gamma:.2f} < 0.35)"
logger.warning(f"STABILITY CHECK: Halting debate. {reason}")
return True, reason
# Check for repeating response patterns (synthesis loop indicator)
repeating_count = sum(
1 for r in reports
if 'REPEATING_RESPONSE_PATTERN' in r['flags']
)
if repeating_count >= 2:
reason = (
f"Multiple agents repeating response patterns ({repeating_count}) "
f"at round {round_num}. Synthesis loop risk."
)
logger.warning(f"STABILITY CHECK: Halting debate. {reason}")
return True, reason
return False, ""
def get_summary(self) -> Dict:
"""Get stability history summary."""
if not self.stability_history:
return {"message": "No stability checks performed"}
return {
"total_rounds_checked": len(self.stability_history),
"average_stability": np.mean([h['avg_stability'] for h in self.stability_history]),
"halts_triggered": sum(1 for h in self.stability_history if not h['all_stable']),
"recent": self.stability_history[-3:] if len(self.stability_history) >= 3 else self.stability_history,
}
|