| | """ |
| | collapse_metrics.py - Metrics for quantifying classifier collapse phenomena |
| | |
| | △ OBSERVE: These metrics quantify different aspects of classifier collapse |
| | ∞ TRACE: They measure the transition from superposition to definite state |
| | ✰ COLLAPSE: They help characterize collapse patterns across different models |
| | |
| | This module provides functions for calculating quantitative metrics that |
| | characterize different aspects of classifier collapse. These metrics help |
| | standardize the analysis of collapse phenomena and enable comparisons across |
| | different models and prompting strategies. |
| | |
| | Author: Recursion Labs |
| | License: MIT |
| | """ |
| |
|
| | import logging |
| | from typing import Dict, List, Optional, Union, Tuple, Any |
| | import numpy as np |
| | from scipy.stats import entropy |
| | from scipy.spatial.distance import cosine, euclidean |
| |
|
| | logger = logging.getLogger(__name__) |
| |
|
| | def calculate_collapse_rate( |
| | pre_weights: np.ndarray, |
| | post_weights: np.ndarray |
| | ) -> float: |
| | """ |
| | △ OBSERVE: Calculate how quickly state collapsed from superposition |
| | |
| | This metric quantifies the speed of collapse by comparing attention |
| | weight distributions before and after the collapse event. |
| | |
| | Args: |
| | pre_weights: Attention weights before collapse |
| | post_weights: Attention weights after collapse |
| | |
| | Returns: |
| | Collapse rate (0.0 = no collapse, 1.0 = complete collapse) |
| | """ |
| | |
| | if pre_weights.size == 0 or post_weights.size == 0: |
| | return 0.0 |
| | |
| | |
| | if pre_weights.shape != post_weights.shape: |
| | logger.warning(f"Weight shape mismatch: {pre_weights.shape} vs {post_weights.shape}") |
| | |
| | try: |
| | min_shape = tuple(min(a, b) for a, b in zip(pre_weights.shape, post_weights.shape)) |
| | pre_weights = pre_weights[tuple(slice(0, d) for d in min_shape)] |
| | post_weights = post_weights[tuple(slice(0, d) for d in min_shape)] |
| | except Exception as e: |
| | logger.error(f"Failed to reshape weights: {e}") |
| | return 0.0 |
| | |
| | |
| | pre_flat = pre_weights.flatten() |
| | post_flat = post_weights.flatten() |
| | |
| | |
| | try: |
| | |
| | cosine_dist = cosine(pre_flat, post_flat) if np.any(pre_flat) and np.any(post_flat) else 0.0 |
| | |
| | |
| | euc_dist = euclidean(pre_flat, post_flat) / np.sqrt(pre_flat.size) |
| | euc_dist_norm = min(1.0, euc_dist) |
| | |
| | |
| | collapse_rate = (cosine_dist + euc_dist_norm) / 2 |
| | |
| | return float(collapse_rate) |
| | except Exception as e: |
| | logger.error(f"Error calculating collapse rate: {e}") |
| | return 0.0 |
| |
|
| | def measure_path_continuity( |
| | pre_weights: np.ndarray, |
| | post_weights: np.ndarray |
| | ) -> float: |
| | """ |
| | ∞ TRACE: Measure continuity of attribution paths through collapse |
| | |
| | This metric quantifies how well attribution paths maintain their |
| | integrity across the collapse event. |
| | |
| | Args: |
| | pre_weights: Attention weights before collapse |
| | post_weights: Attention weights after collapse |
| | |
| | Returns: |
| | Continuity score (0.0 = complete fragmentation, 1.0 = perfect continuity) |
| | """ |
| | |
| | collapse_rate = calculate_collapse_rate(pre_weights, post_weights) |
| | |
| | |
| | return 1.0 - collapse_rate |
| |
|
| | def measure_attribution_entropy(attention_weights: np.ndarray) -> float: |
| | """ |
| | △ OBSERVE: Measure entropy of attribution paths |
| | |
| | This metric quantifies how distributed or concentrated the attribution |
| | is across possible paths. High entropy indicates diffuse attribution, |
| | while low entropy indicates concentrated attribution. |
| | |
| | Args: |
| | attention_weights: Attention weight matrix to analyze |
| | |
| | Returns: |
| | Attribution entropy (0.0 = concentrated, 1.0 = maximally diffuse) |
| | """ |
| | |
| | if attention_weights.size == 0: |
| | return 0.0 |
| | |
| | |
| | flat_weights = attention_weights.flatten() |
| | |
| | |
| | total_weight = np.sum(flat_weights) |
| | if total_weight <= 0: |
| | return 0.0 |
| | |
| | prob_dist = flat_weights / total_weight |
| | |
| | |
| | try: |
| | raw_entropy = entropy(prob_dist) |
| | |
| | |
| | max_entropy = np.log2(flat_weights.size) |
| | normalized_entropy = raw_entropy / max_entropy if max_entropy > 0 else 0.0 |
| | |
| | return float(normalized_entropy) |
| | except Exception as e: |
| | logger.error(f"Error calculating attribution entropy: {e}") |
| | return 0.0 |
| |
|
| | def calculate_ghost_circuit_strength( |
| | ghost_circuits: List[Dict[str, Any]] |
| | ) -> float: |
| | """ |
| | ✰ COLLAPSE: Calculate overall strength of ghost circuits |
| | |
| | This metric quantifies the strength of ghost circuits relative |
| | to the primary activation paths. |
| | |
| | Args: |
| | ghost_circuits: List of detected ghost circuits |
| | |
| | Returns: |
| | Ghost circuit strength (0.0 = no ghosts, 1.0 = ghosts equal to primary) |
| | """ |
| | if not ghost_circuits: |
| | return 0.0 |
| | |
| | |
| | activations = [ghost.get("activation", 0.0) for ghost in ghost_circuits] |
| | |
| | |
| | avg_activation = np.mean(activations) if activations else 0.0 |
| | |
| | |
| | return float(min(1.0, avg_activation)) |
| |
|
| | def calculate_attribution_confidence( |
| | attribution_paths: List[List[Any]], |
| | path_weights: Optional[List[float]] = None |
| | ) -> float: |
| | """ |
| | ∞ TRACE: Calculate confidence score for attribution paths |
| | |
| | This metric quantifies how confidently the model attributes its output |
| | to specific input elements. |
| | |
| | Args: |
| | attribution_paths: List of attribution paths (each a list of nodes) |
| | path_weights: Optional weights for each path (defaults to uniform) |
| | |
| | Returns: |
| | Attribution confidence (0.0 = uncertain, 1.0 = highly confident) |
| | """ |
| | if not attribution_paths: |
| | return 0.0 |
| | |
| | |
| | if path_weights is None: |
| | path_weights = [1.0 / len(attribution_paths)] * len(attribution_paths) |
| | else: |
| | |
| | total_weight = sum(path_weights) |
| | path_weights = [w / total_weight for w in path_weights] if total_weight > 0 else path_weights |
| | |
| | |
| | path_lengths = [len(path) for path in attribution_paths] |
| | length_variance = np.var(path_lengths) if len(path_lengths) > 1 else 0.0 |
| | |
| | |
| | |
| | max_length = max(path_lengths) if path_lengths else 1 |
| | theoretical_max_var = ((max_length - 1) ** 2) / 4 |
| | normalized_variance = min(1.0, length_variance / theoretical_max_var) if theoretical_max_var > 0 else 0.0 |
| | |
| | |
| | consistency_score = 1.0 - normalized_variance |
| | |
| | |
| | |
| | weight_entropy = entropy(path_weights) |
| | max_weight_entropy = np.log2(len(path_weights)) |
| | normalized_weight_entropy = weight_entropy / max_weight_entropy if max_weight_entropy > 0 else 0.0 |
| | weight_concentration = 1.0 - normalized_weight_entropy |
| | |
| | |
| | confidence_score = (consistency_score + weight_concentration) / 2 |
| | |
| | return float(confidence_score) |
| |
|
| | def calculate_collapse_quantum_uncertainty( |
| | pre_logits: np.ndarray, |
| | post_logits: np.ndarray |
| | ) -> float: |
| | """ |
| | ✰ COLLAPSE: Calculate Heisenberg-inspired uncertainty metric |
| | |
| | This metric applies the quantum-inspired uncertainty principle to |
| | transformer outputs, measuring uncertainty across the collapse. |
| | |
| | Args: |
| | pre_logits: Logits before collapse |
| | post_logits: Logits after collapse |
| | |
| | Returns: |
| | Quantum uncertainty metric (0.0 = certain, 1.0 = maximally uncertain) |
| | """ |
| | |
| | if pre_logits.size == 0 or post_logits.size == 0: |
| | return 0.0 |
| | |
| | |
| | if pre_logits.shape != post_logits.shape: |
| | logger.warning(f"Logit shape mismatch: {pre_logits.shape} vs {post_logits.shape}") |
| | return 0.0 |
| | |
| | try: |
| | |
| | pre_probs = softmax(pre_logits) |
| | post_probs = softmax(post_logits) |
| | |
| | pos_uncertainty = np.mean(np.var(post_probs, axis=-1)) |
| | |
| | |
| | mom_uncertainty = np.mean(np.abs(post_probs - pre_probs)) |
| | |
| | |
| | |
| | uncertainty_product = pos_uncertainty * mom_uncertainty |
| | |
| | |
| | normalized_uncertainty = min(1.0, uncertainty_product * 4) |
| | |
| | return float(normalized_uncertainty) |
| | except Exception as e: |
| | logger.error(f"Error calculating quantum uncertainty: {e}") |
| | return 0.0 |
| |
|
| | def calculate_collapse_coherence( |
| | attribution_graph: Any, |
| | threshold: float = 0.1 |
| | ) -> float: |
| | """ |
| | △ OBSERVE: Calculate coherence of attribution paths post-collapse |
| | |
| | This metric quantifies how coherent the attribution paths remain |
| | after collapse, reflecting the "quantum coherence" of the system. |
| | |
| | Args: |
| | attribution_graph: Graph of attribution paths |
| | threshold: Minimum edge weight to consider |
| | |
| | Returns: |
| | Coherence score (0.0 = incoherent, 1.0 = fully coherent) |
| | """ |
| | |
| | |
| | |
| | |
| | if attribution_graph is None: |
| | return 0.0 |
| | |
| | try: |
| | |
| | if hasattr(attribution_graph, 'continuity_score'): |
| | return float(attribution_graph.continuity_score) |
| | |
| | |
| | return 0.5 |
| | except Exception as e: |
| | logger.error(f"Error calculating collapse coherence: {e}") |
| | return 0.0 |
| |
|
| | def softmax(x: np.ndarray) -> np.ndarray: |
| | """Apply softmax function to convert logits to probabilities.""" |
| | exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True)) |
| | return exp_x / np.sum(exp_x, axis=-1, keepdims=True) |
| |
|
| | def calculate_collapse_metrics_bundle( |
| | pre_state: Dict[str, Any], |
| | post_state: Dict[str, Any], |
| | ghost_circuits: Optional[List[Dict[str, Any]]] = None, |
| | attribution_graph: Optional[Any] = None |
| | ) -> Dict[str, float]: |
| | """ |
| | △ OBSERVE: Calculate a complete bundle of collapse metrics |
| | |
| | This convenience function calculates multiple collapse metrics |
| | at once, returning a dictionary of results. |
| | |
| | Args: |
| | pre_state: Model state before collapse |
| | post_state: Model state after collapse |
| | ghost_circuits: Optional list of detected ghost circuits |
| | attribution_graph: Optional attribution graph |
| | |
| | Returns: |
| | Dictionary mapping metric names to values |
| | """ |
| | metrics = {} |
| | |
| | |
| | pre_weights = pre_state.get("attention_weights", np.array([])) |
| | post_weights = post_state.get("attention_weights", np.array([])) |
| | pre_logits = pre_state.get("logits", np.array([])) |
| | post_logits = post_state.get("logits", np.array([])) |
| | |
| | |
| | metrics["collapse_rate"] = calculate_collapse_rate(pre_weights, post_weights) |
| | metrics["path_continuity"] = measure_path_continuity(pre_weights, post_weights) |
| | metrics["attribution_entropy"] = measure_attribution_entropy(post_weights) |
| | |
| | if ghost_circuits: |
| | metrics["ghost_circuit_strength"] = calculate_ghost_circuit_strength(ghost_circuits) |
| | |
| | if pre_logits.size > 0 and post_logits.size > 0: |
| | metrics["quantum_uncertainty"] = calculate_collapse_quantum_uncertainty(pre_logits, post_logits) |
| | |
| | if attribution_graph is not None: |
| | metrics["collapse_coherence"] = calculate_collapse_coherence(attribution_graph) |
| | |
| | return metrics |
| |
|
| |
|
| | if __name__ == "__main__": |
| | |
| | |
| | |
| | pre_state = { |
| | "attention_weights": np.random.random((8, 10, 10)), |
| | "logits": np.random.random((1, 10, 1000)) |
| | } |
| | |
| | |
| | post_state = { |
| | "attention_weights": pre_state["attention_weights"] * np.random.uniform(0.5, 1.0, pre_state["attention_weights"].shape), |
| | "logits": pre_state["logits"] * 0.2 + np.random.random((1, 10, 1000)) * 0.8 |
| | } |
| | |
| | |
| | collapse_rate = calculate_collapse_rate(pre_state["attention_weights"], post_state["attention_weights"]) |
| | path_continuity = measure_path_continuity(pre_state["attention_weights"], post_state["attention_weights"]) |
| | attribution_entropy = measure_attribution_entropy(post_state["attention_weights"]) |
| | quantum_uncertainty = calculate_collapse_quantum_uncertainty(pre_state["logits"], post_state["logits"]) |
| | |
| | print(f"Collapse Rate: {collapse_rate:.3f}") |
| | print(f"Path Continuity: {path_continuity:.3f}") |
| | print(f"Attribution Entropy: {attribution_entropy:.3f}") |
| | print(f"Quantum Uncertainty: {quantum_uncertainty:.3f}") |
| | |
| | |
| | metrics_bundle = calculate_collapse_metrics_bundle(pre_state, post_state) |
| | |
| | print("\nMetrics Bundle:") |
| | for metric, value in metrics_bundle.items(): |
| | print(f" {metric}: {value:.3f}") |
| |
|
| | path_weights |
| |
|