Spaces:

cdpearlman
/

LLMVis

Sleeping

cdpearlman Cursor commited on Feb 3

Commit

2965a7d

1 Parent(s): aabd66e

chore: remove unused code, imports, and deprecated functions

- Delete components/tokenization_panel.py (superseded by pipeline.py)

- Remove 6 unused imports from app.py

- Remove deprecated _get_top_attended_tokens() and references

- Remove unused create_stage_summary() from pipeline.py

- Remove 7 unused utility functions from model_patterns.py and beam_search.py

- Update utils/__init__.py exports and README.md

Total: ~1,087 lines removed. All 81 tests pass.
Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (8) hide show

README.md +1 -2
app.py +1 -10
components/pipeline.py +0 -23
components/tokenization_panel.py +0 -302
todo.md +19 -0
utils/__init__.py +3 -14
utils/beam_search.py +1 -90
utils/model_patterns.py +1 -646

README.md CHANGED Viewed

@@ -77,10 +77,9 @@ Open your browser and navigate to `http://127.0.0.1:8050/`.
 *   `app.py`: Main application entry point and layout orchestration.
 *   `components/`: Modular UI components.
-    *   `pipeline.py`: The core 5-stage visualization.
     *   `investigation_panel.py`: Ablation and attribution interfaces.
     *   `ablation_panel.py`: Specific logic for head ablation UI.
-    *   `tokenization_panel.py`: Token visualization.
 *   `utils/`: Backend logic and helper functions.
     *   `model_patterns.py`: Activation capture and hooking logic.
     *   `model_config.py`: Model family definitions and auto-detection.

 *   `app.py`: Main application entry point and layout orchestration.
 *   `components/`: Modular UI components.
+    *   `pipeline.py`: The core 5-stage visualization with tokenization display.
     *   `investigation_panel.py`: Ablation and attribution interfaces.
     *   `ablation_panel.py`: Specific logic for head ablation UI.
 *   `utils/`: Backend logic and helper functions.
     *   `model_patterns.py`: Activation capture and hooking logic.
     *   `model_config.py`: Model family definitions and auto-detection.

app.py CHANGED Viewed

@@ -10,11 +10,7 @@ from dash import html, dcc, Input, Output, State, callback, no_update, ALL, MATC
 import json
 import torch
 from utils import (load_model_and_get_patterns, execute_forward_pass, extract_layer_data,
-                   categorize_single_layer_heads, perform_beam_search,
-                   execute_forward_pass_with_head_ablation,
-                   execute_forward_pass_with_multi_layer_head_ablation,
-                   evaluate_sequence_ablation, score_sequence,
-                   get_head_category_counts, generate_bertviz_model_view_html)
 from utils.head_detection import categorize_all_heads
 from utils.model_config import get_auto_selections
 from utils.token_attribution import compute_integrated_gradients, compute_simple_gradient_attribution
@@ -523,11 +519,6 @@ def update_pipeline_content(activation_data, model_name):
         else:
             top_tokens = []
-        # Get attention info from first layer
-        top_attended = None
-        if layer_data:
-            top_attended = layer_data[0].get('top_attended_tokens', [])
         # Generate BertViz HTML
         from utils import generate_bertviz_html
         attention_html = None

 import json
 import torch
 from utils import (load_model_and_get_patterns, execute_forward_pass, extract_layer_data,
+                   perform_beam_search, execute_forward_pass_with_multi_layer_head_ablation)
 from utils.head_detection import categorize_all_heads
 from utils.model_config import get_auto_selections
 from utils.token_attribution import compute_integrated_gradients, compute_simple_gradient_attribution
         else:
             top_tokens = []
         # Generate BertViz HTML
         from utils import generate_bertviz_html
         attention_html = None

components/pipeline.py CHANGED Viewed

@@ -809,26 +809,3 @@ def create_output_content(top_tokens=None, predicted_token=None, predicted_prob=
     return html.Div(content_items)
-def create_stage_summary(stage_id, activation_data=None, model_config=None):
-    """
-    Generate summary text for a stage (shown when collapsed).
-    Args:
-        stage_id: Stage identifier ('tokenization', 'embedding', etc.)
-        activation_data: Optional activation data from forward pass
-        model_config: Optional model configuration
-    """
-    if not activation_data:
-        return "Awaiting input..."
-    summaries = {
-        'tokenization': lambda: f"{len(activation_data.get('input_ids', [[]])[0])} tokens",
-        'embedding': lambda: f"{model_config.hidden_size if model_config else 768}-dim vectors" if model_config else "Vectors ready",
-        'attention': lambda: f"{model_config.num_attention_heads if model_config else 12} heads" if model_config else "Context gathered",
-        'mlp': lambda: f"{model_config.num_hidden_layers if model_config else 12} layers" if model_config else "Transformations applied",
-        'output': lambda: f"→ {activation_data.get('actual_output', {}).get('token', '?')}" if activation_data.get('actual_output') else "Output computed"
-    }
-    return summaries.get(stage_id, lambda: "")()


809
810	return html.Div(content_items)
811

components/tokenization_panel.py DELETED Viewed

@@ -1,302 +0,0 @@
-"""
-Tokenization panel component for visualizing the tokenization process.
-Displays tokens in vertical rows: [token] → [ID] → [embedding] per token.
-"""
-from dash import html, dcc
-def create_static_tokenization_diagram():
-    """Create static HTML/CSS diagram showing example tokenization flow."""
-    return html.Div([
-        html.H4("Example: How text becomes model input",
-                style={'marginBottom': '1rem', 'color': '#495057', 'fontSize': '16px'}),
-        # Example flow: text -> tokens -> IDs -> embeddings
-        html.Div([
-            # Input text
-            html.Div([
-                html.Div('"Hello world"',
-                        className='example-text',
-                        style={'padding': '8px 12px', 'backgroundColor': '#e9ecef',
-                               'borderRadius': '4px', 'fontFamily': 'monospace'})
-            ], style={'flex': '1', 'textAlign': 'center'}),
-            html.Div('→', style={'padding': '0 10px', 'fontSize': '20px', 'color': '#6c757d'}),
-            # Tokens
-            html.Div([
-                html.Div([
-                    html.Span('["Hello"', style={'fontFamily': 'monospace'}),
-                    html.Span(', " world"]', style={'fontFamily': 'monospace'})
-                ], style={'padding': '8px 12px', 'backgroundColor': '#d4edff',
-                         'borderRadius': '4px'})
-            ], style={'flex': '1', 'textAlign': 'center'}),
-            html.Div('→', style={'padding': '0 10px', 'fontSize': '20px', 'color': '#6c757d'}),
-            # IDs
-            html.Div([
-                html.Div('[1234, 5678]',
-                        style={'padding': '8px 12px', 'backgroundColor': '#ffe5d4',
-                               'borderRadius': '4px', 'fontFamily': 'monospace'})
-            ], style={'flex': '1', 'textAlign': 'center'}),
-            html.Div('→', style={'padding': '0 10px', 'fontSize': '20px', 'color': '#6c757d'}),
-            # Embeddings
-            html.Div([
-                html.Div('[[ ... ], [ ... ]]',
-                        style={'padding': '8px 12px', 'backgroundColor': '#e5d4ff',
-                               'borderRadius': '4px', 'fontFamily': 'monospace'})
-            ], style={'flex': '1', 'textAlign': 'center'})
-        ], style={'display': 'flex', 'alignItems': 'center', 'justifyContent': 'center',
-                 'padding': '1rem', 'backgroundColor': '#f8f9fa', 'borderRadius': '8px',
-                 'border': '1px solid #dee2e6'})
-    ], style={'marginBottom': '2rem'})
-def create_tokenization_panel():
-    """Create the tokenization visualization panel with three columns."""
-    return html.Div([
-        # Section title and subtitle
-        html.Div([
-            html.H3("Step 1: Tokenization & Embedding",
-                   className="section-title",
-                   style={'marginBottom': '0.5rem'}),
-            html.P("This is the first step in processing text through a transformer model. "
-                  "The input text is broken into tokens, converted to IDs, and embedded as vectors.",
-                  style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '1.5rem'})
-        ]),
-        # Static example diagram (always visible)
-        create_static_tokenization_diagram(),
-        # Dynamic tokenization display container (populated by callback)
-        html.Div(id='tokenization-display-container', children=[
-            # This will be populated after analysis runs
-        ])
-    ], id='tokenization-panel', style={'display': 'none'}, className='tokenization-section')
-def create_tokenization_display(tokens_list, token_ids_list, color_palette=None):
-    """
-    Create a vertical tokenization display showing each token's flow.
-    Args:
-        tokens_list: List of token strings
-        token_ids_list: List of token IDs
-        color_palette: Optional list of colors for each token (auto-generated if None)
-    Returns:
-        Dash HTML component with vertical token rows: [token] → [ID] → [embedding]
-    """
-    if color_palette is None:
-        # Generate distinct colors for each token
-        color_palette = generate_token_colors(len(tokens_list))
-    preview_token = tokens_list[0] if tokens_list else ""
-    preview_id = token_ids_list[0] if token_ids_list else ""
-    preview_color = color_palette[0] if color_palette else '#f8f9fa'
-    return html.Details([
-        html.Summary(
-            html.Div([
-                html.Span("Tokenization preview:", style={'color': '#6c757d', 'fontSize': '13px'}),
-                html.Span(preview_token, style={
-                    'padding': '4px 8px',
-                    'backgroundColor': preview_color,
-                    'borderRadius': '4px',
-                    'fontFamily': 'monospace',
-                    'fontSize': '12px'
-                }),
-                html.Span('→', style={'color': '#6c757d'}),
-                html.Span(str(preview_id), style={
-                    'padding': '4px 8px',
-                    'backgroundColor': '#ffe5d4',
-                    'borderRadius': '4px',
-                    'fontFamily': 'monospace',
-                    'fontSize': '12px'
-                }),
-                html.Span('→', style={'color': '#6c757d'}),
-                html.Span('[ ... ]', style={
-                    'padding': '4px 8px',
-                    'backgroundColor': '#e5d4ff',
-                    'borderRadius': '4px',
-                    'fontFamily': 'monospace',
-                    'fontSize': '12px'
-                }),
-                html.Span('...', style={'color': '#6c757d'}),
-                html.Span("Expand", style={'marginLeft': 'auto', 'color': '#667eea', 'fontWeight': '500'})
-            ], style={'display': 'flex', 'alignItems': 'center', 'gap': '8px', 'flexWrap': 'wrap'})
-        ),
-        html.Div([
-            html.H4("Full Tokenization:",
-                   style={'marginTop': '1.5rem', 'marginBottom': '1rem',
-                          'color': '#495057', 'fontSize': '16px'}),
-            # Column headers row
-            html.Div([
-                html.Span("Token", className='token-header',
-                         style={'flex': '1', 'fontWeight': '600', 'color': '#495057', 'fontSize': '13px'}),
-                html.Span("", style={'width': '32px'}),  # Arrow spacer
-                html.Span("ID", className='token-header',
-                         style={'flex': '1', 'fontWeight': '600', 'color': '#495057', 'fontSize': '13px'}),
-                html.Span("", style={'width': '32px'}),  # Arrow spacer
-                html.Span("Embedding", className='token-header',
-                         style={'flex': '1', 'fontWeight': '600', 'color': '#495057', 'fontSize': '13px'})
-            ], className='tokenization-header-row',
-               style={'display': 'flex', 'alignItems': 'center', 'gap': '4px',
-                      'marginBottom': '0.75rem', 'paddingBottom': '0.5rem',
-                      'borderBottom': '1px solid #e9ecef'}),
-            # Vertical token rows - each row shows [token] → [ID] → [embedding]
-            html.Div([
-                create_token_row(token, token_id, color, idx)
-                for idx, (token, token_id, color) in enumerate(zip(tokens_list, token_ids_list, color_palette))
-            ], className='tokenization-rows')
-        ], style={'padding': '1rem', 'backgroundColor': '#ffffff',
-                  'borderRadius': '8px', 'border': '1px solid #dee2e6'})
-    ], open=False, style={'marginTop': '1rem'})
-def create_token_row(token, token_id, color, idx):
-    """
-    Create a single horizontal row showing: [token] → [ID] → [embedding].
-    Args:
-        token: Token string
-        token_id: Token ID number
-        color: Background color for the token
-        idx: Index of the token (for key uniqueness)
-    Returns:
-        Dash HTML component for a single token row
-    """
-    # Tooltip text for educational purposes
-    tooltips = {
-        'token': "The text is broken into 'tokens' - small pieces like words or parts of words. "
-                "This is how the model reads text. Breaking words into smaller pieces lets the model "
-                "understand new words by combining pieces it already knows.",
-        'id': "Each token gets a unique number (ID) from the model's dictionary. "
-             "Think of it like a phonebook - every token has its own number. "
-             "The model uses these numbers instead of the actual text.",
-        'embedding': "Each token number is turned into a list of numbers called an 'embedding.' "
-                    "These numbers capture the token's meaning. Similar words get similar numbers. "
-                    "This list of numbers is what actually goes into the model's layers."
-    }
-    return html.Div([
-        # Token box
-        html.Div(
-            token,
-            className='token-row-box token-row-token',
-            style={
-                'flex': '1',
-                'padding': '8px 12px',
-                'backgroundColor': color,
-                'borderRadius': '6px',
-                'border': f'2px solid {darken_color(color)}',
-                'fontFamily': 'monospace',
-                'fontSize': '13px',
-                'textAlign': 'center',
-                'wordBreak': 'break-word',
-                'minWidth': '60px'
-            },
-            title=tooltips['token']
-        ),
-        # Arrow
-        html.Span('→', className='token-row-arrow',
-                  style={'color': '#6c757d', 'fontSize': '16px', 'padding': '0 8px'}),
-        # ID box
-        html.Div(
-            str(token_id),
-            className='token-row-box token-row-id',
-            style={
-                'flex': '1',
-                'padding': '8px 12px',
-                'backgroundColor': '#ffe5d4',
-                'borderRadius': '6px',
-                'border': '2px solid #e6cfc0',
-                'fontFamily': 'monospace',
-                'fontSize': '13px',
-                'textAlign': 'center',
-                'minWidth': '60px'
-            },
-            title=tooltips['id']
-        ),
-        # Arrow
-        html.Span('→', className='token-row-arrow',
-                  style={'color': '#6c757d', 'fontSize': '16px', 'padding': '0 8px'}),
-        # Embedding box
-        html.Div(
-            '[ ... ]',
-            className='token-row-box token-row-embedding',
-            style={
-                'flex': '1',
-                'padding': '8px 12px',
-                'backgroundColor': '#e5d4ff',
-                'borderRadius': '6px',
-                'border': '2px solid #cfbfe6',
-                'fontFamily': 'monospace',
-                'fontSize': '13px',
-                'textAlign': 'center',
-                'minWidth': '60px'
-            },
-            title=tooltips['embedding']
-        )
-    ], className='token-row',
-       style={'display': 'flex', 'alignItems': 'center', 'gap': '4px', 'marginBottom': '8px'})
-def generate_token_colors(num_tokens):
-    """Generate a list of distinct colors for tokens."""
-    # Predefined pleasant color palette
-    base_colors = [
-        '#ffcccb',  # Light red
-        '#add8e6',  # Light blue
-        '#90ee90',  # Light green
-        '#ffb6c1',  # Light pink
-        '#ffd700',  # Gold
-        '#dda0dd',  # Plum
-        '#f0e68c',  # Khaki
-        '#ff6347',  # Tomato
-        '#98fb98',  # Pale green
-        '#87ceeb',  # Sky blue
-        '#ffa07a',  # Light salmon
-        '#da70d6',  # Orchid
-    ]
-    # Cycle through colors if we have more tokens than colors
-    colors = []
-    for i in range(num_tokens):
-        colors.append(base_colors[i % len(base_colors)])
-    return colors
-def darken_color(hex_color, factor=0.8):
-    """Darken a hex color by a factor."""
-    # Remove '#' if present
-    hex_color = hex_color.lstrip('#')
-    # Convert to RGB
-    r, g, b = int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16)
-    # Darken
-    r, g, b = int(r * factor), int(g * factor), int(b * factor)
-    # Convert back to hex
-    return f'#{r:02x}{g:02x}{b:02x}'

todo.md CHANGED Viewed

@@ -112,3 +112,22 @@
 - [x] Replace per-layer ablation loop in app.py with single call to new function
 - [x] Add 5 tests for multi-layer ablation in test_model_patterns.py
 - [x] Verify all 78 tests pass

 - [x] Replace per-layer ablation loop in app.py with single call to new function
 - [x] Add 5 tests for multi-layer ablation in test_model_patterns.py
 - [x] Verify all 78 tests pass
+## Completed: Codebase Cleanup
+- [x] Delete unused file: `components/tokenization_panel.py` (302 lines, 6 functions)
+- [x] Remove 6 unused imports from `app.py`
+- [x] Remove deprecated `_get_top_attended_tokens()` function from model_patterns.py
+- [x] Remove `top_attended_tokens` field from extract_layer_data() return values
+- [x] Remove unused `create_stage_summary()` function from pipeline.py
+- [x] Remove 7 unused utility functions from utils/:
+  - `get_check_token_probabilities`
+  - `execute_forward_pass_with_layer_ablation`
+  - `generate_category_bertviz_html`
+  - `generate_head_view_with_categories`
+  - `compute_sequence_trajectory`
+  - `compute_layer_wise_summaries`
+  - `compute_position_layer_matrix`
+- [x] Update `utils/__init__.py` exports
+- [x] Update README.md to remove reference to deleted file
+- [x] Verify all 81 tests pass

utils/__init__.py CHANGED Viewed

@@ -1,17 +1,14 @@
 from .model_patterns import (load_model_and_get_patterns, execute_forward_pass,
                              logit_lens_transformation, extract_layer_data,
-                             generate_bertviz_html, generate_category_bertviz_html,
-                             generate_head_view_with_categories, get_head_category_counts,
-                             get_check_token_probabilities, execute_forward_pass_with_layer_ablation,
                              execute_forward_pass_with_head_ablation,
                              execute_forward_pass_with_multi_layer_head_ablation,
                              merge_token_probabilities,
                              compute_global_top5_tokens, detect_significant_probability_increases,
-                             compute_layer_wise_summaries, evaluate_sequence_ablation,
-                             compute_position_layer_matrix, generate_bertviz_model_view_html)
 from .model_config import get_model_family, get_family_config, get_auto_selections, MODEL_TO_FAMILY, MODEL_FAMILIES
 from .head_detection import categorize_all_heads, categorize_single_layer_heads, format_categorization_summary, HeadCategorizationConfig
-from .beam_search import perform_beam_search, compute_sequence_trajectory
 from .ablation_metrics import compute_kl_divergence, score_sequence, get_token_probability_deltas
 from .token_attribution import compute_integrated_gradients, compute_simple_gradient_attribution, create_attribution_visualization_data
@@ -20,22 +17,15 @@ __all__ = [
     # Model patterns
     'load_model_and_get_patterns',
     'execute_forward_pass',
-    'execute_forward_pass_with_layer_ablation',
     'execute_forward_pass_with_head_ablation',
     'execute_forward_pass_with_multi_layer_head_ablation',
     'evaluate_sequence_ablation',
     'logit_lens_transformation',
     'extract_layer_data',
     'generate_bertviz_html',
-    'generate_category_bertviz_html',
-    'generate_head_view_with_categories',
-    'get_head_category_counts',
-    'get_check_token_probabilities',
     'merge_token_probabilities',
     'compute_global_top5_tokens',
     'detect_significant_probability_increases',
-    'compute_layer_wise_summaries',
-    'compute_position_layer_matrix',
     'generate_bertviz_model_view_html',
     # Model config
@@ -53,7 +43,6 @@ __all__ = [
     # Beam search
     'perform_beam_search',
-    'compute_sequence_trajectory',
     # Ablation metrics
     'compute_kl_divergence',

 from .model_patterns import (load_model_and_get_patterns, execute_forward_pass,
                              logit_lens_transformation, extract_layer_data,
+                             generate_bertviz_html,
                              execute_forward_pass_with_head_ablation,
                              execute_forward_pass_with_multi_layer_head_ablation,
                              merge_token_probabilities,
                              compute_global_top5_tokens, detect_significant_probability_increases,
+                             evaluate_sequence_ablation, generate_bertviz_model_view_html)
 from .model_config import get_model_family, get_family_config, get_auto_selections, MODEL_TO_FAMILY, MODEL_FAMILIES
 from .head_detection import categorize_all_heads, categorize_single_layer_heads, format_categorization_summary, HeadCategorizationConfig
+from .beam_search import perform_beam_search
 from .ablation_metrics import compute_kl_divergence, score_sequence, get_token_probability_deltas
 from .token_attribution import compute_integrated_gradients, compute_simple_gradient_attribution, create_attribution_visualization_data
     # Model patterns
     'load_model_and_get_patterns',
     'execute_forward_pass',
     'execute_forward_pass_with_head_ablation',
     'execute_forward_pass_with_multi_layer_head_ablation',
     'evaluate_sequence_ablation',
     'logit_lens_transformation',
     'extract_layer_data',
     'generate_bertviz_html',
     'merge_token_probabilities',
     'compute_global_top5_tokens',
     'detect_significant_probability_increases',
     'generate_bertviz_model_view_html',
     # Model config
     # Beam search
     'perform_beam_search',
     # Ablation metrics
     'compute_kl_divergence',

utils/beam_search.py CHANGED Viewed

@@ -4,9 +4,7 @@ Beam search utility for text generation and sequence analysis.
 import torch
 import torch.nn.functional as F
-from typing import List, Tuple, Dict, Any, Optional
-import numpy as np
-from utils.model_patterns import get_norm_layer_from_parameter
 import re
 def _make_head_ablation_hook(head_indices: List[int], num_heads: int):
@@ -179,90 +177,3 @@ def perform_beam_search(model, tokenizer, prompt: str, beam_width: int = 3, max_
         # Ensure hooks are removed even if error occurs
         for hook in hooks:
             hook.remove()
-def compute_sequence_trajectory(activation_data: Dict[str, Any], model, tokenizer) -> Dict[int, List[float]]:
-    """
-    Compute the trajectory of the sequence score across layers.
-    For each layer, calculates the probability assigned to the *actual* next token
-    at each step of the sequence.
-    Args:
-        activation_data: Data from execute_forward_pass (must contain block_outputs for all layers)
-        model: HuggingFace model
-        tokenizer: HuggingFace tokenizer
-    Returns:
-        Dict mapping layer_num -> list of scores (one per step in the generated sequence)
-    """
-    if not activation_data or 'block_outputs' not in activation_data:
-        return {}
-    # Extract layer outputs
-    block_outputs = activation_data['block_outputs']
-    input_ids = activation_data['input_ids']
-    if isinstance(input_ids, list):
-        input_ids = torch.tensor(input_ids)
-    # Identify tokens: input_ids shape is [1, seq_len]
-    # The "generated" part starts after the prompt, but here we likely have the full sequence.
-    # We want to evaluate P(token_t | tokens_<t) for the whole sequence or just the new part?
-    # Usually, we visualize the whole sequence.
-    # We need the logits from each layer
-    # block_outputs keys are like "model.layers.0", "model.layers.1", etc.
-    # Sort layers
-    import re
-    layer_info = sorted(
-        [(int(re.findall(r'\d+', name)[0]), name)
-         for name in block_outputs.keys() if re.findall(r'\d+', name)]
-    )
-    # Get norm parameter for logit lens
-    norm_params = activation_data.get('norm_parameters', [])
-    norm_parameter = norm_params[0] if norm_params else None
-    final_norm = get_norm_layer_from_parameter(model, norm_parameter)
-    lm_head = model.get_output_embeddings()
-    trajectories = {}
-    # We only care about predictions for positions 0 to N-1 (predicting 1 to N)
-    target_ids = input_ids[0, 1:]
-    with torch.no_grad():
-        for layer_num, module_name in layer_info:
-            output_data = block_outputs[module_name]['output']
-            # Convert to tensor [batch, seq_len, hidden_dim]
-            hidden = torch.tensor(output_data) if not isinstance(output_data, torch.Tensor) else output_data
-            if hidden.dim() == 4: # PyVene sometimes returns [1, 1, seq_len, dim] ? No usually [1, seq, dim]
-                # If shape is weird, adjust
-                pass
-            # Ensure batch dim
-            if hidden.dim() == 2:
-                hidden = hidden.unsqueeze(0)
-            # Apply final norm
-            if final_norm is not None:
-                hidden = final_norm(hidden)
-            # Project to logits
-            logits = lm_head(hidden) # [batch, seq_len, vocab_size]
-            # We want log probs of the *next* token
-            # Logits at pos t predict token at t+1
-            # So we take logits at [0, :-1, :] and gather targets [0, 1:]
-            shift_logits = logits[0, :-1, :]
-            log_probs = F.log_softmax(shift_logits, dim=-1)
-            # Gather log probs of the actual target tokens
-            # target_ids shape [seq_len-1]
-            target_log_probs = log_probs.gather(1, target_ids.unsqueeze(1)).squeeze(1)
-            trajectories[layer_num] = target_log_probs.tolist()
-    return trajectories

 import torch
 import torch.nn.functional as F
+from typing import List, Dict, Any, Optional
 import re
 def _make_head_ablation_hook(head_indices: List[int], num_heads: int):
         # Ensure hooks are removed even if error occurs
         for hook in hooks:
             hook.remove()

utils/model_patterns.py CHANGED Viewed

@@ -657,174 +657,6 @@ def execute_forward_pass_with_multi_layer_head_ablation(model, tokenizer, prompt
     return result
-def execute_forward_pass_with_layer_ablation(model, tokenizer, prompt: str, config: Dict[str, Any],
-                                             ablate_layer_num: int, reference_activation_data: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Execute forward pass with mean ablation on a specific layer.
-    Args:
-        model: Loaded transformer model
-        tokenizer: Loaded tokenizer
-        prompt: Input text prompt
-        config: Dict with module lists like {"attention_modules": [...], "block_modules": [...], ...}
-        ablate_layer_num: Layer number to ablate
-        reference_activation_data: Original activation data containing the reference activations
-    Returns:
-        JSON-serializable dict with captured activations (with ablated layer)
-    """
-    # Extract module lists from config
-    attention_modules = config.get("attention_modules", [])
-    block_modules = config.get("block_modules", [])
-    norm_parameters = config.get("norm_parameters", [])
-    logit_lens_parameter = config.get("logit_lens_parameter")
-    all_modules = attention_modules + block_modules
-    if not all_modules:
-        return {"error": "No modules specified"}
-    # Find the target module for the layer to ablate
-    target_module_name = None
-    for mod_name in block_modules:
-        layer_match = re.search(r'\.(\d+)(?:\.|$)', mod_name)
-        if layer_match and int(layer_match.group(1)) == ablate_layer_num:
-            target_module_name = mod_name
-            break
-    if not target_module_name:
-        return {"error": f"Could not find module for layer {ablate_layer_num}"}
-    # Get reference activations from ALL layers for mean computation
-    block_outputs = reference_activation_data.get('block_outputs', {})
-    if not block_outputs:
-        return {"error": "No block outputs found in reference data"}
-    # Collect all layer activations to compute global mean
-    all_layer_tensors = []
-    for mod_name, output_data in block_outputs.items():
-        output = output_data['output']
-        if isinstance(output, list):
-            tensor = torch.tensor(output)
-        else:
-            tensor = output
-        all_layer_tensors.append(tensor)
-    # Stack all layers and compute mean across ALL layers and sequence positions
-    # This gives us a single mean vector that represents the average activation
-    stacked = torch.stack(all_layer_tensors, dim=0)  # [num_layers, batch, seq_len, hidden_dim]
-    # Compute mean across layers and sequence dimension
-    mean_activation = stacked.mean(dim=(0, 2), keepdim=True)  # [1, batch, 1, hidden_dim]
-    mean_activation = mean_activation.squeeze(0)  # [batch, 1, hidden_dim]
-    # Prepare inputs
-    inputs = tokenizer(prompt, return_tensors="pt")
-    seq_len = inputs['input_ids'].shape[1]
-    # Broadcast mean to match sequence length
-    ablation_value = mean_activation.expand(-1, seq_len, -1)  # [batch, seq_len, hidden_dim]
-    # Build IntervenableConfig from module names
-    intervenable_representations = []
-    for mod_name in all_modules:
-        layer_match = re.search(r'\.(\d+)(?:\.|$)', mod_name)
-        if not layer_match:
-            return {"error": f"Invalid module name format: {mod_name}"}
-        if 'attn' in mod_name or 'attention' in mod_name:
-            component = 'attention_output'
-        else:
-            component = 'block_output'
-        intervenable_representations.append(
-            RepresentationConfig(layer=int(layer_match.group(1)), component=component, unit="pos")
-        )
-    intervenable_config = IntervenableConfig(
-        intervenable_representations=intervenable_representations
-    )
-    intervenable_model = IntervenableModel(intervenable_config, model)
-    # Register hooks to capture activations
-    captured = {}
-    name_to_module = dict(intervenable_model.model.named_modules())
-    def make_hook(mod_name: str):
-        return lambda module, inputs, output: captured.update({mod_name: {"output": safe_to_serializable(output)}})
-    # Register ablation hook for target module
-    def ablation_hook(module, input, output):
-        # Replace output with mean activation
-        if isinstance(output, tuple):
-            # For modules that return tuples (hidden_states, ...), replace first element
-            ablated = (ablation_value,) + output[1:]
-            return ablated
-        else:
-            return ablation_value
-    hooks = []
-    for mod_name in all_modules:
-        if mod_name in name_to_module:
-            if mod_name == target_module_name:
-                # Apply ablation hook
-                hooks.append(name_to_module[mod_name].register_forward_hook(ablation_hook))
-            else:
-                # Regular capture hook
-                hooks.append(name_to_module[mod_name].register_forward_hook(make_hook(mod_name)))
-    # Execute forward pass
-    with torch.no_grad():
-        model_output = intervenable_model.model(**inputs, use_cache=False)
-    # Remove hooks
-    for hook in hooks:
-        hook.remove()
-    # Capture ablated layer output as well
-    captured[target_module_name] = {"output": safe_to_serializable(ablation_value)}
-    # Separate outputs by type
-    attention_outputs = {}
-    block_outputs = {}
-    for mod_name, output in captured.items():
-        if 'attn' in mod_name or 'attention' in mod_name:
-            attention_outputs[mod_name] = output
-        else:
-            block_outputs[mod_name] = output
-    # Capture normalization parameters
-    all_params = dict(model.named_parameters())
-    norm_data = [safe_to_serializable(all_params[p]) for p in norm_parameters if p in all_params]
-    # Extract predicted token from model output
-    actual_output = None
-    global_top5_tokens = []
-    try:
-        output_token, output_prob = get_actual_model_output(model_output, tokenizer)
-        actual_output = {"token": output_token, "probability": output_prob}
-        global_top5_tokens = compute_global_top5_tokens(model_output, tokenizer, top_k=5)
-    except Exception as e:
-        print(f"Warning: Could not extract model output: {e}")
-    # Build output dictionary
-    result = {
-        "model": getattr(model.config, "name_or_path", "unknown"),
-        "prompt": prompt,
-        "input_ids": safe_to_serializable(inputs["input_ids"]),
-        "attention_modules": list(attention_outputs.keys()),
-        "attention_outputs": attention_outputs,
-        "block_modules": list(block_outputs.keys()),
-        "block_outputs": block_outputs,
-        "norm_parameters": norm_parameters,
-        "norm_data": norm_data,
-        "actual_output": actual_output,
-        "global_top5_tokens": global_top5_tokens,
-        "ablated_layer": ablate_layer_num
-    }
-    return result
 def evaluate_sequence_ablation(model, tokenizer, sequence_text: str, config: Dict[str, Any],
                              ablation_type: str, ablation_target: Any) -> Dict[str, Any]:
     """
@@ -1159,85 +991,6 @@ def _get_top_tokens(activation_data: Dict[str, Any], module_name: str, model, to
         return None
-def get_check_token_probabilities(activation_data: Dict[str, Any], model, tokenizer, check_token: str) -> Optional[Dict[str, Any]]:
-    """
-    Collect check token probabilities across all layers.
-    Sums probabilities of token variants (with and without leading space).
-    Returns layer numbers and merged probabilities for plotting.
-    """
-    if not check_token or not check_token.strip():
-        return None
-    try:
-        # Get block modules (all layers)
-        layer_modules = activation_data.get('block_modules', [])
-        if not layer_modules:
-            return None
-        # Extract and sort layers
-        layer_info = sorted(
-            [(int(re.findall(r'\d+', name)[0]), name)
-             for name in layer_modules if re.findall(r'\d+', name)]
-        )
-        # Try tokenizing with and without leading space
-        token_variants = [
-            (check_token.strip(), tokenizer.encode(check_token.strip(), add_special_tokens=False)),
-            (' ' + check_token.strip(), tokenizer.encode(' ' + check_token.strip(), add_special_tokens=False))
-        ]
-        # Get token IDs for both variants (if they exist and differ)
-        target_token_ids = []
-        for variant_text, token_ids in token_variants:
-            if token_ids:
-                tid = token_ids[-1]  # Use last sub-token
-                if tid not in target_token_ids:
-                    target_token_ids.append(tid)
-        if not target_token_ids:
-            return None
-        # Get norm parameter
-        norm_params = activation_data.get('norm_parameters', [])
-        norm_parameter = norm_params[0] if norm_params else None
-        final_norm = get_norm_layer_from_parameter(model, norm_parameter)
-        lm_head = model.get_output_embeddings()
-        # Collect probabilities for all layers (sum both variants)
-        layers = []
-        probabilities = []
-        for layer_num, module_name in layer_info:
-            layer_output = activation_data['block_outputs'][module_name]['output']
-            with torch.no_grad():
-                hidden = torch.tensor(layer_output) if not isinstance(layer_output, torch.Tensor) else layer_output
-                if hidden.dim() == 4:
-                    hidden = hidden.squeeze(0)
-                if final_norm is not None:
-                    hidden = final_norm(hidden)
-                logits = lm_head(hidden)
-                probs = F.softmax(logits[0, -1, :], dim=-1)
-                # Sum probabilities of all variants
-                merged_prob = sum(probs[tid].item() for tid in target_token_ids)
-                layers.append(layer_num)
-                probabilities.append(merged_prob)
-        return {
-            'token': check_token.strip(),  # Return canonical form without leading space
-            'layers': layers,
-            'probabilities': probabilities
-        }
-    except Exception as e:
-        print(f"Error computing check token probabilities: {e}")
-        return None
 def detect_significant_probability_increases(layer_wise_probs: Dict[int, Dict[str, float]],
                                             layer_wise_deltas: Dict[int, Dict[str, float]],
                                             actual_output_token: str,
@@ -1281,246 +1034,13 @@ def detect_significant_probability_increases(layer_wise_probs: Dict[int, Dict[st
     return significant_layers
-def _get_top_attended_tokens(activation_data: Dict[str, Any], layer_num: int, tokenizer, top_k: int = 3) -> Optional[List[Tuple[str, float]]]:
-    """
-    DEPRECATED: This function is deprecated and will be removed in a future version.
-    Use head categorization from head_detection.py instead for more meaningful attention analysis.
-    Get top-K attended input tokens for the current position (last token) in a layer.
-    Averages attention across all heads.
-    Args:
-        activation_data: Output from execute_forward_pass
-        layer_num: Layer number to analyze
-        tokenizer: Tokenizer for decoding tokens
-        top_k: Number of top attended tokens to return
-    Returns:
-        List of (token_string, attention_weight) tuples, sorted by weight (highest first)
-    """
-    import warnings
-    warnings.warn(
-        "_get_top_attended_tokens is deprecated. Use categorize_all_heads() from head_detection.py instead.",
-        DeprecationWarning,
-        stacklevel=2
-    )
-    try:
-        attention_outputs = activation_data.get('attention_outputs', {})
-        input_ids = activation_data.get('input_ids', [])
-        # print(f"DEBUG _get_top_attended_tokens: layer_num={layer_num}, attention_outputs keys={list(attention_outputs.keys())}")
-        if not attention_outputs or not input_ids:
-            print(f"DEBUG _get_top_attended_tokens: Missing data - attention_outputs empty={not attention_outputs}, input_ids empty={not input_ids}")
-            return None
-        # Find attention output for this layer
-        target_module = None
-        for module_name in attention_outputs.keys():
-            numbers = re.findall(r'\d+', module_name)
-            if numbers and int(numbers[0]) == layer_num:
-                target_module = module_name
-                break
-        if not target_module:
-            return None
-        attention_output = attention_outputs[target_module]['output']
-        if not isinstance(attention_output, list) or len(attention_output) < 2:
-            return None
-        # Get attention weights: [batch, heads, seq_len, seq_len]
-        attention_weights = torch.tensor(attention_output[1])
-        # Average across heads: [seq_len, seq_len]
-        avg_attention = attention_weights[0].mean(dim=0)
-        # Get attention from last position to all positions
-        last_pos_attention = avg_attention[-1, :]  # [seq_len]
-        # Get top-K attended positions
-        top_values, top_indices = torch.topk(last_pos_attention, min(top_k, len(last_pos_attention)))
-        # Convert to tokens
-        input_ids_tensor = torch.tensor(input_ids[0]) if isinstance(input_ids[0], list) else torch.tensor(input_ids)
-        result = []
-        for idx, weight in zip(top_indices, top_values):
-            token_id = input_ids_tensor[idx].item()
-            token_str = tokenizer.decode([token_id], skip_special_tokens=False)
-            result.append((token_str, weight.item()))
-        return result
-    except Exception as e:
-        print(f"Warning: Could not compute attended tokens for layer {layer_num}: {e}")
-        return None
-def compute_position_layer_matrix(activation_data: Dict[str, Any], model, tokenizer) -> Dict[str, Any]:
-    """
-    Compute a 2D matrix of layer-to-layer deltas for each token position.
-    This function computes the top-token probability delta at each (layer, position) pair,
-    creating a heatmap-ready data structure.
-    Args:
-        activation_data: Activation data from forward pass
-        model: Transformer model for logit lens computation
-        tokenizer: Tokenizer for decoding tokens
-    Returns:
-        Dict with:
-            - 'matrix': 2D list [num_layers, seq_len] of delta values
-            - 'tokens': List of token strings for X-axis labels
-            - 'layer_nums': List of layer numbers for Y-axis labels
-            - 'top_tokens': 2D list [num_layers, seq_len] of top token strings at each cell
-    """
-    import copy
-    import numpy as np
-    input_ids = activation_data.get('input_ids', [[]])
-    if not input_ids or not input_ids[0]:
-        return {'matrix': [], 'tokens': [], 'layer_nums': [], 'top_tokens': []}
-    seq_len = len(input_ids[0])
-    # Get token strings for X-axis labels
-    tokens = [tokenizer.decode([tid]) for tid in input_ids[0]]
-    # Get layer modules and sort by layer number
-    layer_modules = activation_data.get('block_modules', [])
-    if not layer_modules:
-        return {'matrix': [], 'tokens': tokens, 'layer_nums': [], 'top_tokens': []}
-    layer_info = sorted(
-        [(int(re.findall(r'\d+', name)[0]), name)
-         for name in layer_modules if re.findall(r'\d+', name)]
-    )
-    layer_nums = [ln for ln, _ in layer_info]
-    num_layers = len(layer_nums)
-    # Helper function to slice data to a specific position (adapted from app.py)
-    def slice_data(data, pos):
-        if not data:
-            return data
-        sliced = copy.deepcopy(data)
-        # Slice Block Outputs: [batch, seq, hidden] -> [batch, 1, hidden]
-        if 'block_outputs' in sliced:
-            for mod in sliced['block_outputs']:
-                out = sliced['block_outputs'][mod]['output']
-                if isinstance(out, list) and len(out) > 0 and isinstance(out[0], list):
-                    if pos < len(out[0]):
-                        sliced['block_outputs'][mod]['output'] = [[out[0][pos]]]
-        # Slice Attention Outputs: [batch, heads, seq, seq] -> [batch, heads, 1, seq]
-        if 'attention_outputs' in sliced:
-            for mod in sliced['attention_outputs']:
-                out = sliced['attention_outputs'][mod]['output']
-                if len(out) > 1:
-                    attns = out[1]
-                    if isinstance(attns, list) and len(attns) > 0:
-                        batch_0 = attns[0]
-                        new_batch_0 = []
-                        for head in batch_0:
-                            if pos < len(head):
-                                new_batch_0.append([head[pos]])
-                        sliced['attention_outputs'][mod]['output'] = [out[0], [new_batch_0]] + out[2:]
-        # Slice input_ids
-        if 'input_ids' in sliced:
-            ids = sliced['input_ids'][0]
-            if pos < len(ids):
-                sliced['input_ids'][0] = ids[:pos+1]
-        return sliced
-    # Initialize matrix and top_tokens 2D array
-    matrix = [[0.0] * seq_len for _ in range(num_layers)]
-    top_tokens_matrix = [[''] * seq_len for _ in range(num_layers)]
-    # Compute delta for each position
-    for pos in range(seq_len):
-        sliced = slice_data(activation_data, pos)
-        layer_data = extract_layer_data(sliced, model, tokenizer)
-        if not layer_data:
-            continue
-        # Fill in matrix for this position
-        for layer_info_item in layer_data:
-            layer_num = layer_info_item.get('layer_num')
-            if layer_num is None or layer_num not in layer_nums:
-                continue
-            layer_idx = layer_nums.index(layer_num)
-            # Get top token and its delta (layer-to-layer change)
-            top_token = layer_info_item.get('top_token', '')
-            deltas = layer_info_item.get('deltas', {})
-            # The delta for the top token represents how much it changed from prev layer
-            delta = deltas.get(top_token, 0.0) if top_token else 0.0
-            matrix[layer_idx][pos] = delta
-            top_tokens_matrix[layer_idx][pos] = top_token if top_token else ''
-    return {
-        'matrix': matrix,
-        'tokens': tokens,
-        'layer_nums': layer_nums,
-        'top_tokens': top_tokens_matrix
-    }
-def compute_layer_wise_summaries(layer_data: List[Dict[str, Any]], activation_data: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Compute summary structures from layer data for easy access.
-    Args:
-        layer_data: List of layer data dicts from extract_layer_data()
-        activation_data: Activation data containing actual output token
-    Returns:
-        Dict with: layer_wise_top5_probs, layer_wise_top5_deltas, significant_layers
-    """
-    layer_wise_top5_probs = {}  # layer_num -> {token: prob}
-    layer_wise_top5_deltas = {}  # layer_num -> {token: delta}
-    for layer_info in layer_data:
-        layer_num = layer_info.get('layer_num')
-        if layer_num is not None:
-            layer_wise_top5_probs[layer_num] = layer_info.get('global_top5_probs', {})
-            layer_wise_top5_deltas[layer_num] = layer_info.get('global_top5_deltas', {})
-    # Extract actual output token from activation data
-    actual_output = activation_data.get('actual_output', {})
-    actual_output_token = actual_output.get('token', '').strip() if actual_output else ''
-    # Detect significant layers based on actual output token
-    significant_layers = []
-    if actual_output_token:
-        significant_layers = detect_significant_probability_increases(
-            layer_wise_top5_probs,
-            layer_wise_top5_deltas,
-            actual_output_token,
-            threshold=1.0
-        )
-    return {
-        'layer_wise_top5_probs': layer_wise_top5_probs,
-        'layer_wise_top5_deltas': layer_wise_top5_deltas,
-        'significant_layers': significant_layers
-    }
 def extract_layer_data(activation_data: Dict[str, Any], model, tokenizer) -> List[Dict[str, Any]]:
     """
     Extract layer-by-layer data for accordion display with top-5, deltas, and attention.
     Also tracks global top 5 tokens across all layers.
     Returns:
-        List of dicts with: layer_num, top_token, top_prob, top_5_tokens, deltas, top_attended_tokens,
         global_top5_probs, global_top5_deltas
     """
     layer_modules = activation_data.get('block_modules', [])
@@ -1561,11 +1081,6 @@ def extract_layer_data(activation_data: Dict[str, Any], model, tokenizer) -> Lis
     for layer_num, module_name in layer_info:
         top_tokens = _get_top_tokens(activation_data, module_name, model, tokenizer, top_k=5) if can_compute_predictions else None
-        # NOTE: top_attended_tokens is deprecated. Use categorize_all_heads() from
-        # head_detection.py instead for more meaningful attention analysis.
-        # Kept as None for backward compatibility with existing code.
-        top_attended = None
         # Get probabilities for global top 5 tokens at this layer
         global_top5_probs = {}
         global_top5_deltas = {}
@@ -1596,7 +1111,6 @@ def extract_layer_data(activation_data: Dict[str, Any], model, tokenizer) -> Lis
                 'top_3_tokens': top_tokens[:3],  # Keep for backward compatibility
                 'top_5_tokens': top_tokens[:5],  # New: top-5 for bar chart
                 'deltas': deltas,
-                'top_attended_tokens': top_attended,
                 'global_top5_probs': global_top5_probs,  # New: global top 5 probs at this layer
                 'global_top5_deltas': global_top5_deltas  # New: global top 5 deltas
             })
@@ -1613,7 +1127,6 @@ def extract_layer_data(activation_data: Dict[str, Any], model, tokenizer) -> Lis
                 'top_3_tokens': [],
                 'top_5_tokens': [],
                 'deltas': {},
-                'top_attended_tokens': top_attended,
                 'global_top5_probs': {},
                 'global_top5_deltas': {}
             })
@@ -1758,164 +1271,6 @@ def generate_bertviz_html(activation_data: Dict[str, Any], layer_index: int, vie
         return f"<p>Error generating visualization: {str(e)}</p>"
-def generate_category_bertviz_html(activation_data: Dict[str, Any], category_heads: List[Dict[str, Any]]) -> str:
-    """
-    Generate BertViz attention visualization HTML for a specific category of heads.
-    Shows only the attention patterns for heads in the specified category.
-    Args:
-        activation_data: Output from execute_forward_pass
-        category_heads: List of head info dicts for this category (from categorize_all_heads)
-    Returns:
-        HTML string for the visualization
-    """
-    try:
-        from bertviz import head_view
-        from transformers import AutoTokenizer
-        if not category_heads:
-            return "<p>No heads in this category.</p>"
-        # Extract attention modules and sort by layer
-        attention_outputs = activation_data.get('attention_outputs', {})
-        if not attention_outputs:
-            return "<p>No attention data available</p>"
-        # Build a map of layer -> head indices for this category
-        category_map = {}  # layer_num -> list of head indices
-        for head_info in category_heads:
-            layer = head_info['layer']
-            head = head_info['head']
-            if layer not in category_map:
-                category_map[layer] = []
-            category_map[layer].append(head)
-        # Sort attention modules by layer number and filter heads
-        # Track which layers we've already processed to avoid duplicates
-        layer_attention_pairs = []
-        processed_layers = set()
-        for module_name in attention_outputs.keys():
-            numbers = re.findall(r'\d+', module_name)
-            if numbers:
-                layer_num = int(numbers[0])
-                # Skip layers not in this category
-                if layer_num not in category_map:
-                    continue
-                # Skip if we've already processed this layer (prevents duplicate/mismatched tensors)
-                if layer_num in processed_layers:
-                    continue
-                attention_output = attention_outputs[module_name]['output']
-                if isinstance(attention_output, list) and len(attention_output) >= 2:
-                    # Get attention weights (element 1 of the output tuple)
-                    full_attention = torch.tensor(attention_output[1])  # [batch, heads, seq, seq]
-                    # Filter to only include heads in this category
-                    head_indices = category_map[layer_num]
-                    filtered_attention = full_attention[:, head_indices, :, :]  # Select specific heads
-                    layer_attention_pairs.append((layer_num, filtered_attention))
-                    processed_layers.add(layer_num)
-        if not layer_attention_pairs:
-            return "<p>No valid attention data found for this category.</p>"
-        # Sort by layer number and extract attention tensors
-        layer_attention_pairs.sort(key=lambda x: x[0])
-        attentions = tuple(attn for _, attn in layer_attention_pairs)
-        # Get tokens
-        input_ids = torch.tensor(activation_data['input_ids'])
-        model_name = activation_data.get('model', 'unknown')
-        # Load tokenizer and convert to tokens
-        tokenizer = AutoTokenizer.from_pretrained(model_name)
-        raw_tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
-        # Clean up tokens (remove special tokenizer artifacts like Ġ for GPT-2)
-        tokens = [token.replace('Ġ', ' ') if token.startswith('Ġ') else token for token in raw_tokens]
-        # Generate visualization using head_view (better for showing specific heads)
-        html_result = head_view(attentions, tokens, html_action='return')
-        base_html = html_result.data if hasattr(html_result, 'data') else str(html_result)
-        # Create a legend mapping head indices to their actual layer-head labels
-        legend_items = []
-        head_counter = 0
-        for layer_num, _ in layer_attention_pairs:
-            head_indices = category_map[layer_num]
-            for head_idx in head_indices:
-                legend_items.append(f"Head {head_counter}: L{layer_num}-H{head_idx}")
-                head_counter += 1
-        legend_html = """
-        <div style="background-color: #f8f9fa; padding: 10px; margin-bottom: 10px; border-radius: 5px; border: 1px solid #dee2e6;">
-            <strong style="color: #495057;">Head Index Reference:</strong><br/>
-            <div style="font-size: 12px; color: #6c757d; margin-top: 5px; display: grid; grid-template-columns: repeat(auto-fill, minmax(150px, 1fr)); gap: 5px;">
-                {items}
-            </div>
-        </div>
-        """.format(items=''.join(f'<span>{item}</span>' for item in legend_items))
-        # Prepend legend to the visualization
-        return legend_html + base_html
-    except Exception as e:
-        import traceback
-        traceback.print_exc()
-        return f"<p>Error generating category visualization: {str(e)}</p>"
-def generate_head_view_with_categories(activation_data: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Generate BertViz head view HTML along with head categorization data.
-    Combines the head_view visualization with categorization from head_detection.py
-    to provide both visual attention patterns and semantic categorization.
-    Args:
-        activation_data: Output from execute_forward_pass with attention data
-    Returns:
-        Dict with:
-            - 'html': BertViz head_view HTML string
-            - 'categories': Dict from categorize_all_heads (category -> list of head info)
-            - 'summary': Formatted text summary of head categorization
-            - 'error': Error message if visualization failed (optional)
-    """
-    from .head_detection import categorize_all_heads, format_categorization_summary
-    result = {
-        'html': None,
-        'categories': {},
-        'summary': '',
-        'error': None
-    }
-    # Generate the base head_view visualization
-    try:
-        result['html'] = generate_bertviz_html(activation_data, layer_index=0, view_type='full')
-    except Exception as e:
-        result['error'] = f"Failed to generate head view: {str(e)}"
-        result['html'] = f"<p>Error generating visualization: {str(e)}</p>"
-    # Generate head categorization
-    try:
-        result['categories'] = categorize_all_heads(activation_data)
-        result['summary'] = format_categorization_summary(result['categories'])
-    except Exception as e:
-        if result['error']:
-            result['error'] += f"; Categorization failed: {str(e)}"
-        else:
-            result['error'] = f"Categorization failed: {str(e)}"
-    return result
 def get_head_category_counts(activation_data: Dict[str, Any]) -> Dict[str, int]:
     """
     Get counts of attention heads in each category.

     return result
 def evaluate_sequence_ablation(model, tokenizer, sequence_text: str, config: Dict[str, Any],
                              ablation_type: str, ablation_target: Any) -> Dict[str, Any]:
     """
         return None
 def detect_significant_probability_increases(layer_wise_probs: Dict[int, Dict[str, float]],
                                             layer_wise_deltas: Dict[int, Dict[str, float]],
                                             actual_output_token: str,
     return significant_layers
 def extract_layer_data(activation_data: Dict[str, Any], model, tokenizer) -> List[Dict[str, Any]]:
     """
     Extract layer-by-layer data for accordion display with top-5, deltas, and attention.
     Also tracks global top 5 tokens across all layers.
     Returns:
+        List of dicts with: layer_num, top_token, top_prob, top_5_tokens, deltas,
         global_top5_probs, global_top5_deltas
     """
     layer_modules = activation_data.get('block_modules', [])
     for layer_num, module_name in layer_info:
         top_tokens = _get_top_tokens(activation_data, module_name, model, tokenizer, top_k=5) if can_compute_predictions else None
         # Get probabilities for global top 5 tokens at this layer
         global_top5_probs = {}
         global_top5_deltas = {}
                 'top_3_tokens': top_tokens[:3],  # Keep for backward compatibility
                 'top_5_tokens': top_tokens[:5],  # New: top-5 for bar chart
                 'deltas': deltas,
                 'global_top5_probs': global_top5_probs,  # New: global top 5 probs at this layer
                 'global_top5_deltas': global_top5_deltas  # New: global top 5 deltas
             })
                 'top_3_tokens': [],
                 'top_5_tokens': [],
                 'deltas': {},
                 'global_top5_probs': {},
                 'global_top5_deltas': {}
             })
         return f"<p>Error generating visualization: {str(e)}</p>"
 def get_head_category_counts(activation_data: Dict[str, Any]) -> Dict[str, int]:
     """
     Get counts of attention heads in each category.