Spaces:

cdpearlman
/

LLMVis

Running

App Files Files Community

cdpearlman Cursor commited on Feb 16

Commit

d60cfe2

1 Parent(s): 3f991b4

feat(output): Add token scrubber with per-position top-5 next-token probabilities

Browse files

Files changed (8) hide show

app.py +62 -7
components/pipeline.py +219 -46
plans.md +6 -5
tests/conftest.py +4 -0
tests/test_model_patterns.py +130 -0
todo.md +17 -0
utils/__init__.py +3 -1
utils/model_patterns.py +128 -3

app.py CHANGED Viewed

@@ -28,7 +28,8 @@ from components.model_selector import create_model_selector
 from components.glossary import create_glossary_modal
 from components.pipeline import (create_pipeline_container, create_tokenization_content,
                                   create_embedding_content, create_attention_content,
-                                  create_mlp_content, create_output_content)
 from components.investigation_panel import create_investigation_panel, create_attribution_results_display
 from components.ablation_panel import create_selected_heads_display, create_ablation_results_display
 from components.chatbot import create_chatbot_container, render_messages
@@ -375,9 +376,12 @@ def run_generation(n_clicks, model_name, prompt, max_new_tokens, beam_width, pat
         # the full-sequence analysis runs when the user selects a beam.
         if max_new_tokens == 1:
             full_text = results[0]['text']
         else:
             full_text = prompt
-        activation_data = execute_forward_pass(model, tokenizer, full_text, config)
         results_ui = []
         if max_new_tokens > 1:
@@ -418,10 +422,11 @@ def run_generation(n_clicks, model_name, prompt, max_new_tokens, beam_width, pat
      Output('session-activation-store', 'data', allow_duplicate=True)],
     Input({'type': 'result-item', 'index': ALL}, 'n_clicks'),
     [State('generation-results-store', 'data'),
-     State('session-activation-store', 'data')],
     prevent_initial_call=True
 )
-def store_selected_beam(n_clicks_list, results_data, existing_activation_data):
     """
     Store selected beam and re-run forward pass on the full sequence.
@@ -490,7 +495,12 @@ def store_selected_beam(n_clicks_list, results_data, existing_activation_data):
             model = AutoModelForCausalLM.from_pretrained(model_name, attn_implementation='eager')
             tokenizer = AutoTokenizer.from_pretrained(model_name)
             model.eval()
-            new_activation_data = execute_forward_pass(model, tokenizer, result['text'], config)
         except Exception as e:
             import traceback
             traceback.print_exc()
@@ -594,9 +604,19 @@ def update_pipeline_content(activation_data, model_name):
         # Stage 5: Output
         # Get original prompt for context display
         original_prompt = activation_data.get('prompt', '')
         outputs.append(f"→ {predicted_token}")
-        outputs.append(create_output_content(top_tokens, predicted_token, predicted_prob,
-                                             original_prompt=original_prompt))
         return tuple(outputs)
@@ -606,6 +626,41 @@ def update_pipeline_content(activation_data, model_name):
         return tuple(empty_outputs)
 # ============================================================================
 # CALLBACKS: Sidebar
 # ============================================================================

 from components.glossary import create_glossary_modal
 from components.pipeline import (create_pipeline_container, create_tokenization_content,
                                   create_embedding_content, create_attention_content,
+                                  create_mlp_content, create_output_content,
+                                  _build_token_display, _build_top5_chart)
 from components.investigation_panel import create_investigation_panel, create_attribution_results_display
 from components.ablation_panel import create_selected_heads_display, create_ablation_results_display
 from components.chatbot import create_chatbot_container, render_messages
         # the full-sequence analysis runs when the user selects a beam.
         if max_new_tokens == 1:
             full_text = results[0]['text']
+            # Pass original_prompt so per-position top-5 is computed for the scrubber
+            activation_data = execute_forward_pass(model, tokenizer, full_text, config,
+                                                   original_prompt=prompt)
         else:
             full_text = prompt
+            activation_data = execute_forward_pass(model, tokenizer, full_text, config)
         results_ui = []
         if max_new_tokens > 1:
      Output('session-activation-store', 'data', allow_duplicate=True)],
     Input({'type': 'result-item', 'index': ALL}, 'n_clicks'),
     [State('generation-results-store', 'data'),
+     State('session-activation-store', 'data'),
+     State('session-original-prompt-store', 'data')],
     prevent_initial_call=True
 )
+def store_selected_beam(n_clicks_list, results_data, existing_activation_data, original_prompt_data):
     """
     Store selected beam and re-run forward pass on the full sequence.
             model = AutoModelForCausalLM.from_pretrained(model_name, attn_implementation='eager')
             tokenizer = AutoTokenizer.from_pretrained(model_name)
             model.eval()
+            # Pass original_prompt so per-position top-5 data is computed for scrubber
+            orig_prompt = original_prompt_data.get('prompt', '') if original_prompt_data else ''
+            new_activation_data = execute_forward_pass(
+                model, tokenizer, result['text'], config,
+                original_prompt=orig_prompt
+            )
         except Exception as e:
             import traceback
             traceback.print_exc()
         # Stage 5: Output
         # Get original prompt for context display
         original_prompt = activation_data.get('prompt', '')
+        # Per-position data for the scrubber (populated when original_prompt was given)
+        per_position_data = activation_data.get('per_position_top5', [])
+        generated_tokens = activation_data.get('generated_tokens', [])
+        scrubber_prompt = activation_data.get('original_prompt', original_prompt)
         outputs.append(f"→ {predicted_token}")
+        outputs.append(create_output_content(
+            top_tokens, predicted_token, predicted_prob,
+            original_prompt=original_prompt,
+            per_position_data=per_position_data,
+            generated_tokens=generated_tokens,
+            prompt_text=scrubber_prompt
+        ))
         return tuple(outputs)
         return tuple(empty_outputs)
+# ============================================================================
+# CALLBACKS: Output Scrubber
+# ============================================================================
+@app.callback(
+    [Output('output-token-display', 'children'),
+     Output('output-top5-chart', 'children')],
+    [Input('output-scrubber-slider', 'value')],
+    [State('session-activation-store', 'data')],
+    prevent_initial_call=True
+)
+def update_output_scrubber(position, activation_data):
+    """Update the token display and top-5 chart when the scrubber slider moves."""
+    if activation_data is None or position is None:
+        return no_update, no_update
+    per_position_data = activation_data.get('per_position_top5', [])
+    generated_tokens = activation_data.get('generated_tokens', [])
+    prompt_text = activation_data.get('original_prompt', activation_data.get('prompt', ''))
+    if not per_position_data or not generated_tokens:
+        return no_update, no_update
+    # Clamp position to valid range
+    position = max(0, min(position, len(per_position_data) - 1))
+    pos_data = per_position_data[position]
+    token_display = _build_token_display(
+        prompt_text, generated_tokens, position, pos_data['actual_prob']
+    )
+    top5_chart = _build_top5_chart(pos_data['top5'], pos_data.get('actual_token'))
+    return token_display, top5_chart
 # ============================================================================
 # CALLBACKS: Sidebar
 # ============================================================================

components/pipeline.py CHANGED Viewed

@@ -689,18 +689,218 @@ def create_mlp_content(layer_count=None, hidden_dim=None, intermediate_dim=None)
     ])
-def create_output_content(top_tokens=None, predicted_token=None, predicted_prob=None,
-                          top5_chart=None, original_prompt=None):
     """
     Create content for the output selection stage.
     Args:
-        top_tokens: List of (token, probability) tuples for top predictions
-        predicted_token: The final predicted token
-        predicted_prob: Probability of the predicted token
-        top5_chart: Optional Plotly figure for top-5 visualization
-        original_prompt: Original input prompt to show context with prediction
     """
     content_items = [
         html.Div([
             html.H5("What happens here:", style={'color': '#495057', 'marginBottom': '8px'}),
@@ -711,75 +911,49 @@ def create_output_content(top_tokens=None, predicted_token=None, predicted_prob=
         ])
     ]
-    # Predicted token display with full prompt context
     if predicted_token:
-        # Build the full prompt + predicted token display
         prompt_display = original_prompt if original_prompt else ""
         content_items.append(
             html.Div([
                 html.Div([
                     html.Span("Model prediction:", style={'color': '#495057', 'marginBottom': '12px', 'display': 'block', 'fontWeight': '500'}),
                     html.Div([
-                        # Original prompt (dimmed)
                         html.Span(prompt_display, style={
-                            'color': '#6c757d',
-                            'fontFamily': 'monospace',
-                            'fontSize': '15px'
                         }),
-                        # Predicted token (highlighted)
                         html.Span(predicted_token, style={
-                            'padding': '4px 8px',
-                            'backgroundColor': '#00f2fe',
-                            'color': '#1a1a2e',
-                            'borderRadius': '4px',
-                            'fontFamily': 'monospace',
-                            'fontWeight': '600',
-                            'fontSize': '15px',
-                            'marginLeft': '2px'
                         })
                     ], style={'display': 'inline'}),
-                    # Confidence indicator
                     html.Div([
                         html.Span(f"{predicted_prob:.1%} confidence" if predicted_prob else "", style={
-                            'color': '#6c757d',
-                            'fontSize': '13px',
-                            'marginTop': '8px',
-                            'display': 'block'
                         })
                     ])
                 ], style={'textAlign': 'center'})
-            ], style={'padding': '20px', 'backgroundColor': 'white', 'borderRadius': '8px',
                       'border': '2px solid #00f2fe', 'marginBottom': '16px'})
         )
-    # Top-5 bar chart with improved hover formatting
     if top_tokens:
         tokens = [t[0] for t in top_tokens[:5]]
         probs = [t[1] for t in top_tokens[:5]]
         fig = go.Figure(go.Bar(
-            x=probs,
-            y=tokens,
-            orientation='h',
             marker_color=['#00f2fe' if i == 0 else '#4facfe' for i in range(len(tokens))],
-            text=[f"{p:.1%}" for p in probs],
-            textposition='outside',
-            # Format hover to show "Token (X%)" instead of long decimals
             hovertemplate='%{y} (%{x:.1%})<extra></extra>'
         ))
         fig.update_layout(
-            title="Top 5 Predictions",
-            xaxis_title="Probability",
-            yaxis_title="Token",
-            height=250,
-            margin=dict(l=20, r=60, t=40, b=20),
-            paper_bgcolor='rgba(0,0,0,0)',
-            plot_bgcolor='rgba(0,0,0,0)',
             yaxis=dict(autorange='reversed')
         )
         content_items.append(
             html.Div([
                 dcc.Graph(figure=fig, config={'displayModeBar': False})
@@ -792,7 +966,6 @@ def create_output_content(top_tokens=None, predicted_token=None, predicted_prob=
             ], style={'backgroundColor': 'white', 'borderRadius': '8px', 'border': '1px solid #e2e8f0'})
         )
-    # Disclaimer about token selection drivers
     content_items.append(
         html.Div([
             html.I(className='fas fa-info-circle', style={'color': '#6c757d', 'marginRight': '8px'}),

     ])
+def _build_token_display(prompt_text, generated_tokens, position, actual_prob):
+    """Build the token display for a given scrubber position.
+    Args:
+        prompt_text: Original prompt string.
+        generated_tokens: List of generated token strings.
+        position: Current slider position (0-indexed into generated_tokens).
+        actual_prob: Probability of the highlighted token at this position.
+    """
+    # Context = prompt + all generated tokens before the current position
+    context_parts = [
+        html.Span(prompt_text, style={
+            'color': '#6c757d',
+            'fontFamily': 'monospace',
+            'fontSize': '15px'
+        })
+    ]
+    for j in range(position):
+        context_parts.append(
+            html.Span(generated_tokens[j], style={
+                'color': '#6c757d',
+                'fontFamily': 'monospace',
+                'fontSize': '15px'
+            })
+        )
+    # Highlighted token at the current position
+    highlighted = html.Span(generated_tokens[position], style={
+        'padding': '4px 8px',
+        'backgroundColor': '#00f2fe',
+        'color': '#1a1a2e',
+        'borderRadius': '4px',
+        'fontFamily': 'monospace',
+        'fontWeight': '600',
+        'fontSize': '15px',
+        'marginLeft': '2px'
+    })
+    confidence = html.Div([
+        html.Span(
+            f"{actual_prob:.1%} confidence" if actual_prob else "",
+            style={'color': '#6c757d', 'fontSize': '13px', 'marginTop': '8px', 'display': 'block'}
+        )
+    ])
+    return html.Div([
+        html.Div([
+            html.Span(
+                f"Token {position + 1} of {len(generated_tokens)}:",
+                style={'color': '#495057', 'marginBottom': '12px', 'display': 'block', 'fontWeight': '500'}
+            ),
+            html.Div(context_parts + [highlighted], style={'display': 'inline'}),
+            confidence
+        ], style={'textAlign': 'center'})
+    ], style={
+        'padding': '20px', 'backgroundColor': 'white', 'borderRadius': '8px',
+        'border': '2px solid #00f2fe', 'marginBottom': '16px'
+    })
+def _build_top5_chart(top5_data, actual_token=None):
+    """Build the top-5 bar chart for a single scrubber position.
+    Args:
+        top5_data: List of {'token': str, 'probability': float}.
+        actual_token: The token that was actually generated (highlighted if present).
+    """
+    tokens = [entry['token'] for entry in top5_data]
+    probs = [entry['probability'] for entry in top5_data]
+    # Highlight the actual chosen token if it appears in the top 5
+    colors = []
+    actual_in_top5 = False
+    for t in tokens:
+        if actual_token and t.strip() == actual_token.strip():
+            colors.append('#00f2fe')
+            actual_in_top5 = True
+        else:
+            colors.append('#4facfe')
+    fig = go.Figure(go.Bar(
+        x=probs,
+        y=tokens,
+        orientation='h',
+        marker_color=colors,
+        text=[f"{p:.1%}" for p in probs],
+        textposition='outside',
+        hovertemplate='%{y} (%{x:.1%})<extra></extra>'
+    ))
+    fig.update_layout(
+        title="Top 5 Next-Token Predictions",
+        xaxis_title="Probability",
+        yaxis_title="Token",
+        height=250,
+        margin=dict(l=20, r=60, t=40, b=20),
+        paper_bgcolor='rgba(0,0,0,0)',
+        plot_bgcolor='rgba(0,0,0,0)',
+        yaxis=dict(autorange='reversed')
+    )
+    children = [dcc.Graph(figure=fig, config={'displayModeBar': False})]
+    # If the actual token is not in the top 5, add a note below
+    if actual_token and not actual_in_top5:
+        children.append(html.Div([
+            html.I(className='fas fa-info-circle', style={'color': '#6c757d', 'marginRight': '6px'}),
+            html.Span([
+                "The actual token \"", html.Strong(actual_token.strip()),
+                "\" was not in the top 5 predictions at this position."
+            ], style={'color': '#6c757d', 'fontSize': '13px'})
+        ], style={'padding': '8px 12px'}))
+    return html.Div(children, style={
+        'backgroundColor': 'white', 'borderRadius': '8px', 'border': '1px solid #e2e8f0'
+    })
+def create_output_content(top_tokens=None, predicted_token=None, predicted_prob=None,
+                          top5_chart=None, original_prompt=None,
+                          per_position_data=None, generated_tokens=None,
+                          prompt_text=None):
     """
     Create content for the output selection stage.
+    When per_position_data is available the output is an interactive scrubber
+    that lets the user step through each generated-token position.  Otherwise
+    falls back to the previous static display.
     Args:
+        top_tokens: List of (token, probability) tuples for top predictions (static mode).
+        predicted_token: The final predicted token (static mode).
+        predicted_prob: Probability of the predicted token (static mode).
+        top5_chart: Optional Plotly figure for top-5 visualization (static mode).
+        original_prompt: Original input prompt to show context with prediction (static mode).
+        per_position_data: List of per-position dicts from compute_per_position_top5 (scrubber mode).
+        generated_tokens: List of generated token strings (scrubber mode).
+        prompt_text: Original prompt text for context display (scrubber mode).
     """
+    # --- Scrubber mode ---
+    if per_position_data and generated_tokens:
+        num_positions = len(per_position_data)
+        prompt_display = prompt_text or original_prompt or ""
+        content_items = [
+            html.Div([
+                html.H5("What happens here:", style={'color': '#495057', 'marginBottom': '8px'}),
+                html.P([
+                    "The model converts the final hidden state into a ",
+                    html.Strong("probability distribution"),
+                    " over all possible next tokens. Use the slider below to step through "
+                    "each generated token and see the model's top predictions at that point."
+                ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
+            ])
+        ]
+        # Slider / scrubber
+        slider_marks = {i: {'label': generated_tokens[i].strip() or repr(generated_tokens[i])}
+                        for i in range(num_positions)}
+        content_items.append(
+            html.Div([
+                html.Span("Step through generated tokens:",
+                           style={'color': '#495057', 'fontWeight': '500', 'display': 'block',
+                                  'marginBottom': '8px'}),
+                dcc.Slider(
+                    id='output-scrubber-slider',
+                    min=0,
+                    max=max(num_positions - 1, 0),
+                    step=1,
+                    value=0,
+                    marks=slider_marks,
+                    included=False,
+                )
+            ], style={'marginBottom': '20px', 'padding': '12px 16px',
+                      'backgroundColor': '#f8f9fa', 'borderRadius': '8px',
+                      'border': '1px solid #dee2e6'})
+        )
+        # Initial render at position 0
+        pos0 = per_position_data[0]
+        content_items.append(
+            html.Div(
+                _build_token_display(prompt_display, generated_tokens, 0, pos0['actual_prob']),
+                id='output-token-display'
+            )
+        )
+        content_items.append(
+            html.Div(
+                _build_top5_chart(pos0['top5'], pos0.get('actual_token')),
+                id='output-top5-chart'
+            )
+        )
+        # Disclaimer
+        content_items.append(
+            html.Div([
+                html.I(className='fas fa-info-circle', style={'color': '#6c757d', 'marginRight': '8px'}),
+                html.Span([
+                    html.Strong("Note on Token Selection: "),
+                    "While the probabilities above show the model's raw preference for the immediate next token, the final choice ",
+                    "can be influenced by other factors. Techniques like ", html.Strong("Beam Search"),
+                    " look ahead at multiple possible sequences to find the best overall result, rather than just the single most likely token at each step. ",
+                    "Additionally, architectures like ", html.Strong("Mixture of Experts (MoE)"),
+                    " might route processing through different specialized internal networks which can impact the final output distribution."
+                ], style={'color': '#6c757d', 'fontSize': '13px'})
+            ], style={'marginTop': '16px', 'padding': '12px', 'backgroundColor': '#f8f9fa',
+                      'borderRadius': '6px', 'border': '1px solid #dee2e6'})
+        )
+        return html.Div(content_items)
+    # --- Static fallback (prompt-only analysis, no generated tokens yet) ---
     content_items = [
         html.Div([
             html.H5("What happens here:", style={'color': '#495057', 'marginBottom': '8px'}),
         ])
     ]
     if predicted_token:
         prompt_display = original_prompt if original_prompt else ""
         content_items.append(
             html.Div([
                 html.Div([
                     html.Span("Model prediction:", style={'color': '#495057', 'marginBottom': '12px', 'display': 'block', 'fontWeight': '500'}),
                     html.Div([
                         html.Span(prompt_display, style={
+                            'color': '#6c757d', 'fontFamily': 'monospace', 'fontSize': '15px'
                         }),
                         html.Span(predicted_token, style={
+                            'padding': '4px 8px', 'backgroundColor': '#00f2fe',
+                            'color': '#1a1a2e', 'borderRadius': '4px',
+                            'fontFamily': 'monospace', 'fontWeight': '600',
+                            'fontSize': '15px', 'marginLeft': '2px'
                         })
                     ], style={'display': 'inline'}),
                     html.Div([
                         html.Span(f"{predicted_prob:.1%} confidence" if predicted_prob else "", style={
+                            'color': '#6c757d', 'fontSize': '13px', 'marginTop': '8px', 'display': 'block'
                         })
                     ])
                 ], style={'textAlign': 'center'})
+            ], style={'padding': '20px', 'backgroundColor': 'white', 'borderRadius': '8px',
                       'border': '2px solid #00f2fe', 'marginBottom': '16px'})
         )
     if top_tokens:
         tokens = [t[0] for t in top_tokens[:5]]
         probs = [t[1] for t in top_tokens[:5]]
         fig = go.Figure(go.Bar(
+            x=probs, y=tokens, orientation='h',
             marker_color=['#00f2fe' if i == 0 else '#4facfe' for i in range(len(tokens))],
+            text=[f"{p:.1%}" for p in probs], textposition='outside',
             hovertemplate='%{y} (%{x:.1%})<extra></extra>'
         ))
         fig.update_layout(
+            title="Top 5 Predictions", xaxis_title="Probability", yaxis_title="Token",
+            height=250, margin=dict(l=20, r=60, t=40, b=20),
+            paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)',
             yaxis=dict(autorange='reversed')
         )
         content_items.append(
             html.Div([
                 dcc.Graph(figure=fig, config={'displayModeBar': False})
             ], style={'backgroundColor': 'white', 'borderRadius': '8px', 'border': '1px solid #e2e8f0'})
         )
     content_items.append(
         html.Div([
             html.I(className='fas fa-info-circle', style={'color': '#6c757d', 'marginRight': '8px'}),

plans.md CHANGED Viewed

@@ -1,11 +1,12 @@
 - specs on what each attention head does
-- change attention to entire generated sequence
-- output slider to look at each token
-    - put in a more obvious place?
 - experiment results side by side comparison
 - output streaming for chatbot
-- change width of chatbot window
 - shorter, concise responses in system prompt
 - add video links to glossary
     - three blue one brown
-- add output token generation to attention, tokenization, etc

 - specs on what each attention head does
 - experiment results side by side comparison
 - output streaming for chatbot
 - shorter, concise responses in system prompt
 - add video links to glossary
     - three blue one brown
+Done:
+- change attention to entire generated sequence
+- change width of chatbot window
+- add output token generation to attention, tokenization, etc
+- output slider to look at each token (scrubber with top-5 at each position)

tests/conftest.py CHANGED Viewed

@@ -5,6 +5,10 @@ Provides reusable mock data structures and synthetic tensors
 to test utility functions without loading actual ML models.
 """
 import pytest
 import torch
 import numpy as np

 to test utility functions without loading actual ML models.
 """
+# Disable TensorFlow before any other imports (mirrors app.py)
+import os
+os.environ["USE_TF"] = "0"
 import pytest
 import torch
 import numpy as np

tests/test_model_patterns.py CHANGED Viewed

@@ -263,6 +263,136 @@ class TestMultiLayerHeadAblation:
         assert '99' in result['error']  # Should mention the invalid layer
 class TestFullSequenceAttentionData:
     """
     Tests verifying that activation data for full sequences (prompt + generated output)

         assert '99' in result['error']  # Should mention the invalid layer
+class TestComputePerPositionTop5:
+    """Tests for compute_per_position_top5 function."""
+    def _make_mock_output(self, seq_len, vocab_size=10):
+        """Create a mock model output with predictable logits.
+        At each position i, logit[i] = 10.0 (highest), so the top-1 token
+        is always token index == position index. Other logits are 1.0.
+        """
+        logits = torch.ones(1, seq_len, vocab_size)
+        for i in range(seq_len):
+            # Make token (i % vocab_size) the top prediction at position i
+            logits[0, i, i % vocab_size] = 10.0
+        class MockOutput:
+            pass
+        out = MockOutput()
+        out.logits = logits
+        return out
+    def _make_mock_tokenizer(self, vocab_size=10):
+        """Create a mock tokenizer that decodes token IDs to 'tok_N'."""
+        from unittest.mock import MagicMock
+        tok = MagicMock()
+        def decode_fn(ids, skip_special_tokens=False):
+            if isinstance(ids, list) and len(ids) == 1:
+                return f"tok_{ids[0]}"
+            return "".join(f"tok_{i}" for i in ids)
+        tok.decode = decode_fn
+        return tok
+    def test_returns_correct_number_of_positions(self):
+        """With prompt_token_count=3 and seq_len=7, should return 4 positions (7-3)."""
+        from utils.model_patterns import compute_per_position_top5
+        model_output = self._make_mock_output(seq_len=7, vocab_size=10)
+        tokenizer = self._make_mock_tokenizer(vocab_size=10)
+        # Full sequence has 7 tokens, prompt has 3, so 4 generated tokens
+        result = compute_per_position_top5(model_output, tokenizer, prompt_token_count=3, top_k=5)
+        assert len(result) == 4  # positions 0, 1, 2, 3
+    def test_single_generated_token(self):
+        """With 1 generated token, should return exactly 1 position."""
+        from utils.model_patterns import compute_per_position_top5
+        model_output = self._make_mock_output(seq_len=4, vocab_size=10)
+        tokenizer = self._make_mock_tokenizer(vocab_size=10)
+        result = compute_per_position_top5(model_output, tokenizer, prompt_token_count=3, top_k=5)
+        assert len(result) == 1
+        assert result[0]['position'] == 0
+    def test_each_position_has_top_k_entries(self):
+        """Each position should have exactly top_k entries in top5 list."""
+        from utils.model_patterns import compute_per_position_top5
+        model_output = self._make_mock_output(seq_len=8, vocab_size=10)
+        tokenizer = self._make_mock_tokenizer(vocab_size=10)
+        result = compute_per_position_top5(model_output, tokenizer, prompt_token_count=3, top_k=5)
+        for pos_data in result:
+            assert len(pos_data['top5']) == 5
+    def test_top_k_3(self):
+        """Should respect custom top_k parameter."""
+        from utils.model_patterns import compute_per_position_top5
+        model_output = self._make_mock_output(seq_len=6, vocab_size=10)
+        tokenizer = self._make_mock_tokenizer(vocab_size=10)
+        result = compute_per_position_top5(model_output, tokenizer, prompt_token_count=3, top_k=3)
+        for pos_data in result:
+            assert len(pos_data['top5']) == 3
+    def test_probabilities_sorted_descending(self):
+        """Top-5 probabilities should be in descending order."""
+        from utils.model_patterns import compute_per_position_top5
+        model_output = self._make_mock_output(seq_len=6, vocab_size=10)
+        tokenizer = self._make_mock_tokenizer(vocab_size=10)
+        result = compute_per_position_top5(model_output, tokenizer, prompt_token_count=3, top_k=5)
+        for pos_data in result:
+            probs = [entry['probability'] for entry in pos_data['top5']]
+            assert probs == sorted(probs, reverse=True)
+    def test_probabilities_are_valid(self):
+        """All probabilities should be between 0 and 1."""
+        from utils.model_patterns import compute_per_position_top5
+        model_output = self._make_mock_output(seq_len=6, vocab_size=10)
+        tokenizer = self._make_mock_tokenizer(vocab_size=10)
+        result = compute_per_position_top5(model_output, tokenizer, prompt_token_count=3, top_k=5)
+        for pos_data in result:
+            for entry in pos_data['top5']:
+                assert 0.0 <= entry['probability'] <= 1.0
+            assert 0.0 <= pos_data['actual_prob'] <= 1.0
+    def test_actual_token_field_present(self):
+        """Each position should have actual_token and actual_prob fields."""
+        from utils.model_patterns import compute_per_position_top5
+        model_output = self._make_mock_output(seq_len=6, vocab_size=10)
+        tokenizer = self._make_mock_tokenizer(vocab_size=10)
+        result = compute_per_position_top5(model_output, tokenizer, prompt_token_count=3, top_k=5)
+        for pos_data in result:
+            assert 'actual_token' in pos_data
+            assert 'actual_prob' in pos_data
+            assert isinstance(pos_data['actual_token'], str)
+            assert isinstance(pos_data['actual_prob'], float)
+    def test_position_indices_sequential(self):
+        """Position indices should be 0, 1, 2, ... N-1."""
+        from utils.model_patterns import compute_per_position_top5
+        model_output = self._make_mock_output(seq_len=8, vocab_size=10)
+        tokenizer = self._make_mock_tokenizer(vocab_size=10)
+        result = compute_per_position_top5(model_output, tokenizer, prompt_token_count=3, top_k=5)
+        positions = [r['position'] for r in result]
+        assert positions == list(range(5))  # 8 - 3 = 5 positions
+    def test_does_not_include_position_beyond_sequence(self):
+        """Should NOT produce a position that predicts beyond the last token."""
+        from utils.model_patterns import compute_per_position_top5
+        model_output = self._make_mock_output(seq_len=5, vocab_size=10)
+        tokenizer = self._make_mock_tokenizer(vocab_size=10)
+        # prompt=3, seq=5, so 2 generated tokens -> positions 0 and 1
+        result = compute_per_position_top5(model_output, tokenizer, prompt_token_count=3, top_k=5)
+        assert len(result) == 2
+        # Position 0: logits at index 2 (prompt_len-1), predicts token at index 3
+        # Position 1: logits at index 3, predicts token at index 4
+        # NO position for logits at index 4 (would predict beyond sequence)
+    def test_prompt_equals_sequence_returns_empty(self):
+        """When prompt_token_count == seq_len (no generated tokens), return empty."""
+        from utils.model_patterns import compute_per_position_top5
+        model_output = self._make_mock_output(seq_len=3, vocab_size=10)
+        tokenizer = self._make_mock_tokenizer(vocab_size=10)
+        result = compute_per_position_top5(model_output, tokenizer, prompt_token_count=3, top_k=5)
+        assert result == []
 class TestFullSequenceAttentionData:
     """
     Tests verifying that activation data for full sequences (prompt + generated output)

todo.md CHANGED Viewed

@@ -208,3 +208,20 @@
 - [x] Added 5 tests in `test_model_patterns.py` (`TestFullSequenceAttentionData`) verifying attention matrix dimensions match full sequence length
 - Attention visualization now covers the entire chosen output (input + generated tokens), not just the input prompt
 - No changes needed in `model_patterns.py`, `beam_search.py`, `pipeline.py`, or `head_detection.py`

 - [x] Added 5 tests in `test_model_patterns.py` (`TestFullSequenceAttentionData`) verifying attention matrix dimensions match full sequence length
 - Attention visualization now covers the entire chosen output (input + generated tokens), not just the input prompt
 - No changes needed in `model_patterns.py`, `beam_search.py`, `pipeline.py`, or `head_detection.py`
+## Completed: Output Token Scrubber
+- [x] Add `compute_per_position_top5()` to `utils/model_patterns.py` — extracts top-5 next-token probabilities at each generated-token position from a single forward pass
+- [x] Add `original_prompt` parameter to `execute_forward_pass()` — when provided, computes per-position top-5 data and stores in activation_data
+- [x] Export `compute_per_position_top5` in `utils/__init__.py`
+- [x] Update `run_generation()` in app.py — passes `original_prompt=prompt` for single-token generation
+- [x] Update `store_selected_beam()` in app.py — reads original prompt from session store and passes to forward pass
+- [x] Rewrite `create_output_content()` in `components/pipeline.py` — scrubber mode with `dcc.Slider`, token display, and top-5 chart; falls back to static mode when no per-position data
+- [x] Add `_build_token_display()` and `_build_top5_chart()` helpers in pipeline.py
+- [x] Add `update_output_scrubber()` callback in app.py — responds to slider changes, updates token highlight and chart
+- [x] Update `update_pipeline_content()` in app.py — extracts per-position data and passes to output content
+- [x] Add 10 tests for `compute_per_position_top5` in `test_model_patterns.py`
+- [x] Fix `conftest.py` to set `USE_TF=0` for test import compatibility
+- [x] All 100 tests pass
+- Scrubber shows prompt context (gray) + highlighted token (cyan) + top-5 bar chart at each slider position
+- Pre-beam-selection falls back to static output display; scrubber activates after beam selection or single-token generation

utils/__init__.py CHANGED Viewed

@@ -4,7 +4,8 @@ from .model_patterns import (load_model_and_get_patterns, execute_forward_pass,
                              execute_forward_pass_with_head_ablation,
                              execute_forward_pass_with_multi_layer_head_ablation,
                              merge_token_probabilities,
-                             compute_global_top5_tokens, detect_significant_probability_increases,
                              evaluate_sequence_ablation, generate_bertviz_model_view_html)
 from .model_config import get_model_family, get_family_config, get_auto_selections, MODEL_TO_FAMILY, MODEL_FAMILIES
 from .head_detection import categorize_all_heads, categorize_single_layer_heads, format_categorization_summary, HeadCategorizationConfig
@@ -25,6 +26,7 @@ __all__ = [
     'generate_bertviz_html',
     'merge_token_probabilities',
     'compute_global_top5_tokens',
     'detect_significant_probability_increases',
     'generate_bertviz_model_view_html',

                              execute_forward_pass_with_head_ablation,
                              execute_forward_pass_with_multi_layer_head_ablation,
                              merge_token_probabilities,
+                             compute_global_top5_tokens, compute_per_position_top5,
+                             detect_significant_probability_increases,
                              evaluate_sequence_ablation, generate_bertviz_model_view_html)
 from .model_config import get_model_family, get_family_config, get_auto_selections, MODEL_TO_FAMILY, MODEL_FAMILIES
 from .head_detection import categorize_all_heads, categorize_single_layer_heads, format_categorization_summary, HeadCategorizationConfig
     'generate_bertviz_html',
     'merge_token_probabilities',
     'compute_global_top5_tokens',
+    'compute_per_position_top5',
     'detect_significant_probability_increases',
     'generate_bertviz_model_view_html',

utils/model_patterns.py CHANGED Viewed

@@ -125,6 +125,98 @@ def compute_global_top5_tokens(model_output, tokenizer, top_k: int = 5) -> List[
         return [{'token': t, 'probability': p} for t, p in merged[:top_k]]
 def get_actual_model_output(model_output, tokenizer) -> Tuple[str, float]:
     """
     Extract the predicted token from model's output.
@@ -148,16 +240,21 @@ def get_actual_model_output(model_output, tokenizer) -> Tuple[str, float]:
         return token_str, top_prob.item()
-def execute_forward_pass(model, tokenizer, prompt: str, config: Dict[str, Any], ablation_config: Optional[Dict[int, List[int]]] = None) -> Dict[str, Any]:
     """
     Execute forward pass with PyVene IntervenableModel to capture activations from specified modules.
     Args:
         model: Loaded transformer model
         tokenizer: Loaded tokenizer
-        prompt: Input text prompt
         config: Dict with module lists like {"attention_modules": [...], "block_modules": [...], ...}
         ablation_config: Optional dict mapping layer numbers to list of head indices to ablate.
     Returns:
         JSON-serializable dict with captured activations and metadata
@@ -255,6 +352,30 @@ def execute_forward_pass(model, tokenizer, prompt: str, config: Dict[str, Any],
     except Exception as e:
         print(f"Warning: Could not extract model output: {e}")
     # Build output dictionary
     result = {
         "model": getattr(model.config, "name_or_path", "unknown"),
@@ -267,7 +388,11 @@ def execute_forward_pass(model, tokenizer, prompt: str, config: Dict[str, Any],
         "norm_parameters": norm_parameters,
         "norm_data": norm_data,
         "actual_output": actual_output,
-        "global_top5_tokens": global_top5_tokens  # New: global top 5 from final output
     }
     print(f"Captured {len(captured)} module outputs using PyVene")

         return [{'token': t, 'probability': p} for t, p in merged[:top_k]]
+def compute_per_position_top5(model_output, tokenizer, prompt_token_count: int, top_k: int = 5) -> List[Dict[str, Any]]:
+    """
+    Compute top-K next-token probabilities at each generated-token position.
+    Uses logits already produced by the forward pass on the full sequence
+    (prompt + generated tokens).  Position i in the returned list corresponds
+    to the prediction of generated token g_i given the prefix up to g_{i-1}.
+    Args:
+        model_output: Output from model(**inputs) containing logits [1, seq_len, vocab].
+        tokenizer: Tokenizer for decoding token IDs.
+        prompt_token_count: Number of tokens in the original prompt (P).
+        top_k: Number of top tokens per position (default 5).
+    Returns:
+        List of dicts, one per generated token position::
+            [
+              {
+                "position": 0,
+                "top5": [{"token": str, "probability": float}, ...],
+                "actual_token": str,   # token actually generated at this position
+                "actual_prob": float   # its probability at this position
+              },
+              ...
+            ]
+    """
+    seq_len = model_output.logits.shape[1]
+    num_generated = seq_len - prompt_token_count
+    if num_generated <= 0:
+        return []
+    results = []
+    with torch.no_grad():
+        # Precompute input_ids from the logits tensor shape for actual-token lookup.
+        # The actual token at generated position i lives at input index prompt_token_count + i.
+        # We recover it from argmax only when we don't have the real ids; however
+        # the caller should pass the full-sequence ids.  Here we derive the actual
+        # token from the logits tensor's *next* position in the sequence.
+        all_logits = model_output.logits[0]  # [seq_len, vocab]
+        for i in range(num_generated):
+            logit_idx = prompt_token_count - 1 + i  # index into logits
+            next_token_idx = prompt_token_count + i  # index of the actual next token
+            probs = F.softmax(all_logits[logit_idx], dim=-1)
+            # --- top-K with merge ---
+            top_probs, top_indices = torch.topk(probs, k=min(top_k * 2, len(probs)))
+            candidates = [
+                (tokenizer.decode([idx.item()], skip_special_tokens=False), prob.item())
+                for idx, prob in zip(top_indices, top_probs)
+            ]
+            merged = merge_token_probabilities(candidates)
+            top5 = [{'token': t, 'probability': p} for t, p in merged[:top_k]]
+            # --- actual token at this position ---
+            # The actual next token is whichever token the model *was given* at
+            # next_token_idx.  We can infer it from the argmax of the embedding
+            # lookup, but the simplest reliable way is to use the input_ids that
+            # produced these logits.  Since we don't have direct access to
+            # input_ids here, we look at the logits at the *next* position:
+            # the token fed at position next_token_idx determined that position's
+            # context.  We recover it by checking which token index has the
+            # highest *un-softmaxed* logit at position (logit_idx - 1) ... but
+            # that is circular.  Instead, the caller stores the actual token ids
+            # alongside model_output.  We fall back to a secondary attribute.
+            actual_token_id = None
+            if hasattr(model_output, 'input_ids') and model_output.input_ids is not None:
+                actual_token_id = model_output.input_ids[0, next_token_idx].item()
+            elif hasattr(model_output, '_input_ids'):
+                actual_token_id = model_output._input_ids[0, next_token_idx].item()
+            if actual_token_id is not None:
+                actual_token = tokenizer.decode([actual_token_id], skip_special_tokens=False)
+                actual_prob = probs[actual_token_id].item()
+            else:
+                # Fallback: use the argmax as "actual" (only correct for greedy)
+                top_prob, top_idx = probs.max(dim=-1)
+                actual_token = tokenizer.decode([top_idx.item()], skip_special_tokens=False)
+                actual_prob = top_prob.item()
+            results.append({
+                'position': i,
+                'top5': top5,
+                'actual_token': actual_token,
+                'actual_prob': float(actual_prob),
+            })
+    return results
 def get_actual_model_output(model_output, tokenizer) -> Tuple[str, float]:
     """
     Extract the predicted token from model's output.
         return token_str, top_prob.item()
+def execute_forward_pass(model, tokenizer, prompt: str, config: Dict[str, Any],
+                         ablation_config: Optional[Dict[int, List[int]]] = None,
+                         original_prompt: Optional[str] = None) -> Dict[str, Any]:
     """
     Execute forward pass with PyVene IntervenableModel to capture activations from specified modules.
     Args:
         model: Loaded transformer model
         tokenizer: Loaded tokenizer
+        prompt: Input text prompt (may be full sequence: original prompt + generated tokens)
         config: Dict with module lists like {"attention_modules": [...], "block_modules": [...], ...}
         ablation_config: Optional dict mapping layer numbers to list of head indices to ablate.
+        original_prompt: When provided, enables per-position top-5 computation for
+            the output scrubber.  If prompt contains generated tokens beyond
+            original_prompt, each generated-token position gets its own top-5 data.
     Returns:
         JSON-serializable dict with captured activations and metadata
     except Exception as e:
         print(f"Warning: Could not extract model output: {e}")
+    # --- Per-position top-5 for the output scrubber ---
+    per_position_top5 = []
+    prompt_token_count = None
+    generated_tokens = []
+    if original_prompt is not None:
+        prompt_ids = tokenizer(original_prompt, return_tensors="pt")["input_ids"]
+        prompt_token_count = prompt_ids.shape[1]
+        seq_len = inputs["input_ids"].shape[1]
+        num_generated = seq_len - prompt_token_count
+        if num_generated > 0:
+            # Attach input_ids to model_output so compute_per_position_top5
+            # can look up the actual token at each position.
+            model_output.input_ids = inputs["input_ids"]
+            per_position_top5 = compute_per_position_top5(
+                model_output, tokenizer, prompt_token_count, top_k=5
+            )
+            # Decode each generated token individually for slider marks
+            full_ids = inputs["input_ids"][0].tolist()
+            generated_tokens = [
+                tokenizer.decode([full_ids[prompt_token_count + i]], skip_special_tokens=False)
+                for i in range(num_generated)
+            ]
     # Build output dictionary
     result = {
         "model": getattr(model.config, "name_or_path", "unknown"),
         "norm_parameters": norm_parameters,
         "norm_data": norm_data,
         "actual_output": actual_output,
+        "global_top5_tokens": global_top5_tokens,
+        "per_position_top5": per_position_top5,
+        "prompt_token_count": prompt_token_count,
+        "generated_tokens": generated_tokens,
+        "original_prompt": original_prompt,
     }
     print(f"Captured {len(captured)} module outputs using PyVene")