Spaces:
Running
Running
| """ | |
| Shared pytest fixtures for the test suite. | |
| Provides reusable mock data structures and synthetic tensors | |
| to test utility functions without loading actual ML models. | |
| """ | |
| # Disable TensorFlow before any other imports (mirrors app.py) | |
| import os | |
| os.environ["USE_TF"] = "0" | |
| import pytest | |
| import torch | |
| import numpy as np | |
| # ============================================================================= | |
| # Synthetic Attention Matrices | |
| # ============================================================================= | |
| def uniform_attention_matrix(): | |
| """4x4 uniform attention matrix (each position attends equally to all).""" | |
| size = 4 | |
| return torch.ones(size, size) / size | |
| def previous_token_attention_matrix(): | |
| """ | |
| 4x4 attention matrix where each position attends primarily to the previous token. | |
| Position 0 attends to itself (no previous token). | |
| """ | |
| size = 4 | |
| matrix = torch.zeros(size, size) | |
| # Position 0 attends to itself | |
| matrix[0, 0] = 1.0 | |
| # Other positions attend strongly to previous token | |
| for i in range(1, size): | |
| matrix[i, i-1] = 0.8 | |
| matrix[i, i] = 0.2 # Some self-attention | |
| return matrix | |
| def first_token_attention_matrix(): | |
| """4x4 attention matrix where all positions attend strongly to first token.""" | |
| size = 4 | |
| matrix = torch.zeros(size, size) | |
| for i in range(size): | |
| matrix[i, 0] = 0.7 # Strong attention to first token | |
| matrix[i, i] = 0.3 # Some self-attention | |
| return matrix | |
| def peaked_attention_matrix(): | |
| """4x4 attention matrix with peaked (low entropy) attention at one position.""" | |
| size = 4 | |
| matrix = torch.zeros(size, size) | |
| # Each position attends almost entirely to position 2 | |
| for i in range(size): | |
| matrix[i, 2] = 0.95 | |
| # Distribute remaining across others | |
| for j in range(size): | |
| if j != 2: | |
| matrix[i, j] = 0.05 / (size - 1) | |
| return matrix | |
| # ============================================================================= | |
| # Mock Activation Data Structures | |
| # ============================================================================= | |
| def mock_activation_data(): | |
| """ | |
| Mock activation data structure similar to execute_forward_pass output. | |
| Used for testing functions that process activation data. | |
| """ | |
| return { | |
| 'model': 'mock-model', | |
| 'prompt': 'Hello world', | |
| 'input_ids': [[1, 2, 3, 4]], | |
| 'attention_modules': ['model.layers.0.self_attn', 'model.layers.1.self_attn'], | |
| 'attention_outputs': { | |
| 'model.layers.0.self_attn': { | |
| 'output': [ | |
| [[0.1, 0.2, 0.3]], # Hidden states (simplified) | |
| [[[[0.25, 0.25, 0.25, 0.25], # Attention weights [batch, heads, seq, seq] | |
| [0.25, 0.25, 0.25, 0.25], | |
| [0.25, 0.25, 0.25, 0.25], | |
| [0.25, 0.25, 0.25, 0.25]]]] | |
| ] | |
| }, | |
| 'model.layers.1.self_attn': { | |
| 'output': [ | |
| [[0.1, 0.2, 0.3]], | |
| [[[[0.1, 0.2, 0.3, 0.4], | |
| [0.1, 0.2, 0.3, 0.4], | |
| [0.1, 0.2, 0.3, 0.4], | |
| [0.1, 0.2, 0.3, 0.4]]]] | |
| ] | |
| } | |
| }, | |
| 'block_modules': ['model.layers.0', 'model.layers.1'], | |
| 'block_outputs': { | |
| 'model.layers.0': {'output': [[[0.1, 0.2, 0.3, 0.4]]]}, | |
| 'model.layers.1': {'output': [[[0.2, 0.3, 0.4, 0.5]]]} | |
| }, | |
| 'norm_parameters': ['model.norm.weight'], | |
| 'norm_data': [[1.0, 1.0, 1.0, 1.0]], | |
| 'actual_output': {'token': ' world', 'probability': 0.85}, | |
| 'global_top5_tokens': [ | |
| {'token': 'world', 'probability': 0.85}, | |
| {'token': 'there', 'probability': 0.05}, | |
| {'token': 'friend', 'probability': 0.03}, | |
| {'token': 'everyone', 'probability': 0.02}, | |
| {'token': 'all', 'probability': 0.01} | |
| ] | |
| } | |
| # ============================================================================= | |
| # Mock Module/Parameter Patterns | |
| # ============================================================================= | |
| def mock_module_patterns(): | |
| """Mock module patterns as returned by extract_patterns.""" | |
| return { | |
| 'model.layers.{N}.self_attn': ['model.layers.0.self_attn', 'model.layers.1.self_attn'], | |
| 'model.layers.{N}.mlp': ['model.layers.0.mlp', 'model.layers.1.mlp'], | |
| 'model.layers.{N}': ['model.layers.0', 'model.layers.1'], | |
| 'model.embed_tokens': ['model.embed_tokens'], | |
| 'model.norm': ['model.norm'] | |
| } | |
| def mock_param_patterns(): | |
| """Mock parameter patterns as returned by extract_patterns.""" | |
| return { | |
| 'model.layers.{N}.self_attn.q_proj.weight': ['model.layers.0.self_attn.q_proj.weight'], | |
| 'model.layers.{N}.self_attn.k_proj.weight': ['model.layers.0.self_attn.k_proj.weight'], | |
| 'model.norm.weight': ['model.norm.weight'], | |
| 'lm_head.weight': ['lm_head.weight'] | |
| } | |
| # ============================================================================= | |
| # Synthetic Logits for Ablation Metrics | |
| # ============================================================================= | |
| def identical_logits(): | |
| """Two identical logit tensors for testing KL divergence = 0.""" | |
| logits = torch.tensor([[[1.0, 2.0, 3.0, 4.0], | |
| [2.0, 3.0, 4.0, 5.0]]]) # [1, 2, 4] = [batch, seq, vocab] | |
| return logits, logits.clone() | |
| def different_logits(): | |
| """Two different logit tensors for testing KL divergence > 0.""" | |
| logits_p = torch.tensor([[[1.0, 2.0, 3.0, 4.0], | |
| [2.0, 3.0, 4.0, 5.0]]]) | |
| logits_q = torch.tensor([[[4.0, 3.0, 2.0, 1.0], | |
| [5.0, 4.0, 3.0, 2.0]]]) | |
| return logits_p, logits_q | |
| def prob_delta_data(): | |
| """Data for testing probability delta computation.""" | |
| # Reference favors token 3, ablated favors token 0 | |
| logits_ref = torch.tensor([[[1.0, 2.0, 3.0, 10.0], # pos 0: predicts token 3 | |
| [1.0, 2.0, 10.0, 3.0]]]) # pos 1: predicts token 2 | |
| logits_abl = torch.tensor([[[10.0, 2.0, 3.0, 1.0], # pos 0: predicts token 0 | |
| [10.0, 2.0, 1.0, 3.0]]]) # pos 1: predicts token 0 | |
| input_ids = torch.tensor([[0, 3, 2]]) # Actual tokens: start, 3, 2 | |
| return logits_ref, logits_abl, input_ids | |
| # ============================================================================= | |
| # Attribution Data for Visualization Tests | |
| # ============================================================================= | |
| def mock_attribution_result(): | |
| """Mock output from compute_integrated_gradients or compute_simple_gradient_attribution.""" | |
| return { | |
| 'tokens': ['Hello', ' world', '!'], | |
| 'token_ids': [1, 2, 3], | |
| 'attributions': [0.5, 1.0, 0.2], # Raw attribution scores | |
| 'normalized_attributions': [0.5, 1.0, 0.2], # Already normalized for simplicity | |
| 'target_token': 'next', | |
| 'target_token_id': 100 | |
| } | |