File size: 14,737 Bytes
3595bd8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 | """
collapse_metrics.py - Metrics for quantifying classifier collapse phenomena
△ OBSERVE: These metrics quantify different aspects of classifier collapse
∞ TRACE: They measure the transition from superposition to definite state
✰ COLLAPSE: They help characterize collapse patterns across different models
This module provides functions for calculating quantitative metrics that
characterize different aspects of classifier collapse. These metrics help
standardize the analysis of collapse phenomena and enable comparisons across
different models and prompting strategies.
Author: Recursion Labs
License: MIT
"""
import logging
from typing import Dict, List, Optional, Union, Tuple, Any
import numpy as np
from scipy.stats import entropy
from scipy.spatial.distance import cosine, euclidean
logger = logging.getLogger(__name__)
def calculate_collapse_rate(
pre_weights: np.ndarray,
post_weights: np.ndarray
) -> float:
"""
△ OBSERVE: Calculate how quickly state collapsed from superposition
This metric quantifies the speed of collapse by comparing attention
weight distributions before and after the collapse event.
Args:
pre_weights: Attention weights before collapse
post_weights: Attention weights after collapse
Returns:
Collapse rate (0.0 = no collapse, 1.0 = complete collapse)
"""
# Return 0 if arrays are empty
if pre_weights.size == 0 or post_weights.size == 0:
return 0.0
# Handle shape mismatches
if pre_weights.shape != post_weights.shape:
logger.warning(f"Weight shape mismatch: {pre_weights.shape} vs {post_weights.shape}")
# Try to take minimum dimensions if shapes don't match
try:
min_shape = tuple(min(a, b) for a, b in zip(pre_weights.shape, post_weights.shape))
pre_weights = pre_weights[tuple(slice(0, d) for d in min_shape)]
post_weights = post_weights[tuple(slice(0, d) for d in min_shape)]
except Exception as e:
logger.error(f"Failed to reshape weights: {e}")
return 0.0
# Flatten arrays for easier comparison
pre_flat = pre_weights.flatten()
post_flat = post_weights.flatten()
# Calculate normalized distances between distributions
try:
# Cosine distance (0.0 = identical, 1.0 = orthogonal)
cosine_dist = cosine(pre_flat, post_flat) if np.any(pre_flat) and np.any(post_flat) else 0.0
# Euclidean distance normalized by array size
euc_dist = euclidean(pre_flat, post_flat) / np.sqrt(pre_flat.size)
euc_dist_norm = min(1.0, euc_dist) # Cap at 1.0
# Combined metric: average of cosine and normalized euclidean
collapse_rate = (cosine_dist + euc_dist_norm) / 2
return float(collapse_rate)
except Exception as e:
logger.error(f"Error calculating collapse rate: {e}")
return 0.0
def measure_path_continuity(
pre_weights: np.ndarray,
post_weights: np.ndarray
) -> float:
"""
∞ TRACE: Measure continuity of attribution paths through collapse
This metric quantifies how well attribution paths maintain their
integrity across the collapse event.
Args:
pre_weights: Attention weights before collapse
post_weights: Attention weights after collapse
Returns:
Continuity score (0.0 = complete fragmentation, 1.0 = perfect continuity)
"""
# Higher collapse rate means lower continuity
collapse_rate = calculate_collapse_rate(pre_weights, post_weights)
# Continuity is inverse of collapse rate
return 1.0 - collapse_rate
def measure_attribution_entropy(attention_weights: np.ndarray) -> float:
"""
△ OBSERVE: Measure entropy of attribution paths
This metric quantifies how distributed or concentrated the attribution
is across possible paths. High entropy indicates diffuse attribution,
while low entropy indicates concentrated attribution.
Args:
attention_weights: Attention weight matrix to analyze
Returns:
Attribution entropy (0.0 = concentrated, 1.0 = maximally diffuse)
"""
# Return 0 if array is empty
if attention_weights.size == 0:
return 0.0
# Flatten array for entropy calculation
flat_weights = attention_weights.flatten()
# Normalize weights to create a probability distribution
total_weight = np.sum(flat_weights)
if total_weight <= 0:
return 0.0
prob_dist = flat_weights / total_weight
# Calculate entropy
try:
raw_entropy = entropy(prob_dist)
# Normalize by maximum possible entropy (log2(n))
max_entropy = np.log2(flat_weights.size)
normalized_entropy = raw_entropy / max_entropy if max_entropy > 0 else 0.0
return float(normalized_entropy)
except Exception as e:
logger.error(f"Error calculating attribution entropy: {e}")
return 0.0
def calculate_ghost_circuit_strength(
ghost_circuits: List[Dict[str, Any]]
) -> float:
"""
✰ COLLAPSE: Calculate overall strength of ghost circuits
This metric quantifies the strength of ghost circuits relative
to the primary activation paths.
Args:
ghost_circuits: List of detected ghost circuits
Returns:
Ghost circuit strength (0.0 = no ghosts, 1.0 = ghosts equal to primary)
"""
if not ghost_circuits:
return 0.0
# Extract activation values
activations = [ghost.get("activation", 0.0) for ghost in ghost_circuits]
# Calculate weighted average based on activation
avg_activation = np.mean(activations) if activations else 0.0
# Normalize to 0-1 range (assuming activation is already 0-1)
return float(min(1.0, avg_activation))
def calculate_attribution_confidence(
attribution_paths: List[List[Any]],
path_weights: Optional[List[float]] = None
) -> float:
"""
∞ TRACE: Calculate confidence score for attribution paths
This metric quantifies how confidently the model attributes its output
to specific input elements.
Args:
attribution_paths: List of attribution paths (each a list of nodes)
path_weights: Optional weights for each path (defaults to uniform)
Returns:
Attribution confidence (0.0 = uncertain, 1.0 = highly confident)
"""
if not attribution_paths:
return 0.0
# Use uniform weights if none provided
if path_weights is None:
path_weights = [1.0 / len(attribution_paths)] * len(attribution_paths)
else:
# Normalize weights to sum to 1.0
total_weight = sum(path_weights)
path_weights = [w / total_weight for w in path_weights] if total_weight > 0 else path_weights
# Calculate path length variance (more uniform = higher confidence)
path_lengths = [len(path) for path in attribution_paths]
length_variance = np.var(path_lengths) if len(path_lengths) > 1 else 0.0
# Normalize variance to 0-1 range
# Assume max variance is when half paths are length 1 and half are max length
max_length = max(path_lengths) if path_lengths else 1
theoretical_max_var = ((max_length - 1) ** 2) / 4 # Theoretical maximum variance
normalized_variance = min(1.0, length_variance / theoretical_max_var) if theoretical_max_var > 0 else 0.0
# Invert normalized variance to get consistency score (more consistent = higher confidence)
consistency_score = 1.0 - normalized_variance
# Weight consistency by path weights (dominant paths contribute more to confidence)
# Calculate weighted avg of path weights (more concentrated = higher confidence)
weight_entropy = entropy(path_weights)
max_weight_entropy = np.log2(len(path_weights))
normalized_weight_entropy = weight_entropy / max_weight_entropy if max_weight_entropy > 0 else 0.0
weight_concentration = 1.0 - normalized_weight_entropy
# Combine consistency and concentration for final confidence score
confidence_score = (consistency_score + weight_concentration) / 2
return float(confidence_score)
def calculate_collapse_quantum_uncertainty(
pre_logits: np.ndarray,
post_logits: np.ndarray
) -> float:
"""
✰ COLLAPSE: Calculate Heisenberg-inspired uncertainty metric
This metric applies the quantum-inspired uncertainty principle to
transformer outputs, measuring uncertainty across the collapse.
Args:
pre_logits: Logits before collapse
post_logits: Logits after collapse
Returns:
Quantum uncertainty metric (0.0 = certain, 1.0 = maximally uncertain)
"""
# Return 0 if arrays are empty
if pre_logits.size == 0 or post_logits.size == 0:
return 0.0
# Handle shape mismatches
if pre_logits.shape != post_logits.shape:
logger.warning(f"Logit shape mismatch: {pre_logits.shape} vs {post_logits.shape}")
return 0.0
try:
# Calculate "position" uncertainty (variance in token probabilities)
pre_probs = softmax(pre_logits)
post_probs = softmax(post_logits)
pos_uncertainty = np.mean(np.var(post_probs, axis=-1))
# Calculate "momentum" uncertainty (change rate between states)
mom_uncertainty = np.mean(np.abs(post_probs - pre_probs))
# Combined metric inspired by Heisenberg uncertainty
# Higher values in both dimensions indicate more quantum-like behavior
uncertainty_product = pos_uncertainty * mom_uncertainty
# Normalize to 0-1 range (empirically determined max is around 0.25)
normalized_uncertainty = min(1.0, uncertainty_product * 4)
return float(normalized_uncertainty)
except Exception as e:
logger.error(f"Error calculating quantum uncertainty: {e}")
return 0.0
def calculate_collapse_coherence(
attribution_graph: Any,
threshold: float = 0.1
) -> float:
"""
△ OBSERVE: Calculate coherence of attribution paths post-collapse
This metric quantifies how coherent the attribution paths remain
after collapse, reflecting the "quantum coherence" of the system.
Args:
attribution_graph: Graph of attribution paths
threshold: Minimum edge weight to consider
Returns:
Coherence score (0.0 = incoherent, 1.0 = fully coherent)
"""
# This is a simplified version for when an actual graph isn't available
# In real implementation, would analyze graph structure
# If no graph provided, return 0
if attribution_graph is None:
return 0.0
try:
# If graph has coherence attribute, use it
if hasattr(attribution_graph, 'continuity_score'):
return float(attribution_graph.continuity_score)
# Otherwise return placeholder value
return 0.5 # Placeholder mid-value
except Exception as e:
logger.error(f"Error calculating collapse coherence: {e}")
return 0.0
def softmax(x: np.ndarray) -> np.ndarray:
"""Apply softmax function to convert logits to probabilities."""
exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
return exp_x / np.sum(exp_x, axis=-1, keepdims=True)
def calculate_collapse_metrics_bundle(
pre_state: Dict[str, Any],
post_state: Dict[str, Any],
ghost_circuits: Optional[List[Dict[str, Any]]] = None,
attribution_graph: Optional[Any] = None
) -> Dict[str, float]:
"""
△ OBSERVE: Calculate a complete bundle of collapse metrics
This convenience function calculates multiple collapse metrics
at once, returning a dictionary of results.
Args:
pre_state: Model state before collapse
post_state: Model state after collapse
ghost_circuits: Optional list of detected ghost circuits
attribution_graph: Optional attribution graph
Returns:
Dictionary mapping metric names to values
"""
metrics = {}
# Extract relevant state components
pre_weights = pre_state.get("attention_weights", np.array([]))
post_weights = post_state.get("attention_weights", np.array([]))
pre_logits = pre_state.get("logits", np.array([]))
post_logits = post_state.get("logits", np.array([]))
# Calculate metrics
metrics["collapse_rate"] = calculate_collapse_rate(pre_weights, post_weights)
metrics["path_continuity"] = measure_path_continuity(pre_weights, post_weights)
metrics["attribution_entropy"] = measure_attribution_entropy(post_weights)
if ghost_circuits:
metrics["ghost_circuit_strength"] = calculate_ghost_circuit_strength(ghost_circuits)
if pre_logits.size > 0 and post_logits.size > 0:
metrics["quantum_uncertainty"] = calculate_collapse_quantum_uncertainty(pre_logits, post_logits)
if attribution_graph is not None:
metrics["collapse_coherence"] = calculate_collapse_coherence(attribution_graph)
return metrics
if __name__ == "__main__":
# Simple usage example
# Create synthetic pre and post states
pre_state = {
"attention_weights": np.random.random((8, 10, 10)), # 8 heads, 10 tokens
"logits": np.random.random((1, 10, 1000)) # Batch 1, 10 tokens, 1000 vocab
}
# Create post state with changes to simulate collapse
post_state = {
"attention_weights": pre_state["attention_weights"] * np.random.uniform(0.5, 1.0, pre_state["attention_weights"].shape),
"logits": pre_state["logits"] * 0.2 + np.random.random((1, 10, 1000)) * 0.8 # Shifted logits
}
# Calculate individual metrics
collapse_rate = calculate_collapse_rate(pre_state["attention_weights"], post_state["attention_weights"])
path_continuity = measure_path_continuity(pre_state["attention_weights"], post_state["attention_weights"])
attribution_entropy = measure_attribution_entropy(post_state["attention_weights"])
quantum_uncertainty = calculate_collapse_quantum_uncertainty(pre_state["logits"], post_state["logits"])
print(f"Collapse Rate: {collapse_rate:.3f}")
print(f"Path Continuity: {path_continuity:.3f}")
print(f"Attribution Entropy: {attribution_entropy:.3f}")
print(f"Quantum Uncertainty: {quantum_uncertainty:.3f}")
# Calculate complete metrics bundle
metrics_bundle = calculate_collapse_metrics_bundle(pre_state, post_state)
print("\nMetrics Bundle:")
for metric, value in metrics_bundle.items():
print(f" {metric}: {value:.3f}")
path_weights
|