Spaces:

cdpearlman
/

LLMVis

Running

App Files Files Community

cdpearlman commited on Oct 2, 2025

Commit

f33c95a

1 Parent(s): 1dd1822

Using final layer output instead of mlp for simpler computation

Browse files

Files changed (4) hide show

app.py +20 -18
components/sidebar.py +5 -5
utils/model_config.py +27 -25
utils/model_patterns.py +77 -35

app.py CHANGED Viewed

@@ -67,11 +67,11 @@ app.layout = html.Div([
 @app.callback(
     [Output('session-patterns-store', 'data'),
      Output('attention-modules-dropdown', 'options'),
-     Output('mlp-modules-dropdown', 'options'),
      Output('norm-params-dropdown', 'options'),
      Output('logit-lens-dropdown', 'options'),
      Output('attention-modules-dropdown', 'value', allow_duplicate=True),
-     Output('mlp-modules-dropdown', 'value', allow_duplicate=True),
      Output('norm-params-dropdown', 'value', allow_duplicate=True),
      Output('logit-lens-dropdown', 'value', allow_duplicate=True),
      Output('loading-indicator', 'children')],
@@ -122,8 +122,9 @@ def load_model_patterns(selected_model):
         attention_options = create_grouped_options(
             module_patterns, ['attn', 'attention'], 'modules'
         )
-        mlp_options = create_grouped_options(
-            module_patterns, ['mlp'], 'modules'
         )
         norm_options = create_grouped_options(
             param_patterns, ['norm', 'layernorm', 'layer_norm'], 'params'
@@ -170,11 +171,11 @@ def load_model_patterns(selected_model):
         return (
             patterns_data,
             attention_options,
-            mlp_options,
             norm_options,
             logit_lens_options,
             auto_selections.get('attention_selection', []),
-            auto_selections.get('mlp_selection', []),
             auto_selections.get('norm_selection', []),
             auto_selections.get('logit_lens_selection'),
             loading_content
@@ -207,7 +208,7 @@ def show_loading_spinner(selected_model):
 # Callback to clear all selections when Clear button is pressed
 @app.callback(
     [Output('attention-modules-dropdown', 'value'),
-     Output('mlp-modules-dropdown', 'value'),
      Output('norm-params-dropdown', 'value'),
      Output('logit-lens-dropdown', 'value'),
      Output('session-activation-store', 'data'),
@@ -228,7 +229,7 @@ def clear_all_selections(n_clicks):
     return (
         None,  # attention-modules-dropdown value
-        None,  # mlp-modules-dropdown value
         None,  # norm-params-dropdown value
         None,  # logit-lens-dropdown value
         {},    # session-activation-store data
@@ -260,20 +261,20 @@ def show_analysis_loading_spinner(n_clicks):
     [State('model-dropdown', 'value'),
      State('prompt-input', 'value'),
      State('attention-modules-dropdown', 'value'),
-     State('mlp-modules-dropdown', 'value'),
      State('norm-params-dropdown', 'value'),
      State('logit-lens-dropdown', 'value'),
      State('session-patterns-store', 'data')],
     prevent_initial_call=True
 )
-def run_analysis(n_clicks, model_name, prompt, attn_patterns, mlp_patterns, norm_patterns, logit_pattern, patterns_data):
     """Run forward pass and generate cytoscape visualization."""
     print(f"\n=== DEBUG: run_analysis START ===")
     print(f"DEBUG: n_clicks={n_clicks}, model_name={model_name}, prompt='{prompt}'")
-    print(f"DEBUG: mlp_patterns={mlp_patterns}")
     print(f"DEBUG: logit_pattern={logit_pattern}")
-    if not n_clicks or not model_name or not prompt or not mlp_patterns:
         print("DEBUG: Missing required inputs, returning empty")
         return [], {}, None
@@ -289,10 +290,11 @@ def run_analysis(n_clicks, model_name, prompt, attn_patterns, mlp_patterns, norm
         param_patterns = patterns_data.get('param_patterns', {})
         all_patterns = {**module_patterns, **param_patterns}
         config = {
             'attention_modules': [mod for pattern in (attn_patterns or []) for mod in module_patterns.get(pattern, [])],
-            'mlp_modules': [mod for pattern in mlp_patterns for mod in module_patterns.get(pattern, [])],
-            'norm_parameters': [param for pattern in (norm_patterns or []) for param in param_patterns.get(pattern, [])],
             'logit_lens_parameter': all_patterns.get(logit_pattern, [None])[0] if logit_pattern else None
         }
@@ -341,11 +343,11 @@ def run_analysis(n_clicks, model_name, prompt, attn_patterns, mlp_patterns, norm
     Output('run-analysis-btn', 'disabled'),
     [Input('model-dropdown', 'value'),
      Input('prompt-input', 'value'),
-     Input('mlp-modules-dropdown', 'value')]
 )
-def enable_run_button(model, prompt, mlp_modules):
-    """Enable Run Analysis button when model, prompt, and MLP modules are selected."""
-    return not (model and prompt and mlp_modules)
 # Node click callback for analysis results
 @app.callback(

 @app.callback(
     [Output('session-patterns-store', 'data'),
      Output('attention-modules-dropdown', 'options'),
+     Output('block-modules-dropdown', 'options'),
      Output('norm-params-dropdown', 'options'),
      Output('logit-lens-dropdown', 'options'),
      Output('attention-modules-dropdown', 'value', allow_duplicate=True),
+     Output('block-modules-dropdown', 'value', allow_duplicate=True),
      Output('norm-params-dropdown', 'value', allow_duplicate=True),
      Output('logit-lens-dropdown', 'value', allow_duplicate=True),
      Output('loading-indicator', 'children')],
         attention_options = create_grouped_options(
             module_patterns, ['attn', 'attention'], 'modules'
         )
+        # Block options - layer/block modules (residual stream outputs)
+        block_options = create_grouped_options(
+            module_patterns, ['layers', 'h.', 'blocks', 'decoder.layers'], 'modules'
         )
         norm_options = create_grouped_options(
             param_patterns, ['norm', 'layernorm', 'layer_norm'], 'params'
         return (
             patterns_data,
             attention_options,
+            block_options,
             norm_options,
             logit_lens_options,
             auto_selections.get('attention_selection', []),
+            auto_selections.get('block_selection', []),
             auto_selections.get('norm_selection', []),
             auto_selections.get('logit_lens_selection'),
             loading_content
 # Callback to clear all selections when Clear button is pressed
 @app.callback(
     [Output('attention-modules-dropdown', 'value'),
+     Output('block-modules-dropdown', 'value'),
      Output('norm-params-dropdown', 'value'),
      Output('logit-lens-dropdown', 'value'),
      Output('session-activation-store', 'data'),
     return (
         None,  # attention-modules-dropdown value
+        None,  # block-modules-dropdown value
         None,  # norm-params-dropdown value
         None,  # logit-lens-dropdown value
         {},    # session-activation-store data
     [State('model-dropdown', 'value'),
      State('prompt-input', 'value'),
      State('attention-modules-dropdown', 'value'),
+     State('block-modules-dropdown', 'value'),
      State('norm-params-dropdown', 'value'),
      State('logit-lens-dropdown', 'value'),
      State('session-patterns-store', 'data')],
     prevent_initial_call=True
 )
+def run_analysis(n_clicks, model_name, prompt, attn_patterns, block_patterns, norm_patterns, logit_pattern, patterns_data):
     """Run forward pass and generate cytoscape visualization."""
     print(f"\n=== DEBUG: run_analysis START ===")
     print(f"DEBUG: n_clicks={n_clicks}, model_name={model_name}, prompt='{prompt}'")
+    print(f"DEBUG: block_patterns={block_patterns}")
     print(f"DEBUG: logit_pattern={logit_pattern}")
+    if not n_clicks or not model_name or not prompt or not block_patterns:
         print("DEBUG: Missing required inputs, returning empty")
         return [], {}, None
         param_patterns = patterns_data.get('param_patterns', {})
         all_patterns = {**module_patterns, **param_patterns}
+        # Use block patterns (full layer outputs / residual stream) for logit lens
         config = {
             'attention_modules': [mod for pattern in (attn_patterns or []) for mod in module_patterns.get(pattern, [])],
+            'block_modules': [mod for pattern in block_patterns for mod in module_patterns.get(pattern, [])],
+            'norm_parameters': param_patterns.get(norm_patterns, []) if norm_patterns else [],
             'logit_lens_parameter': all_patterns.get(logit_pattern, [None])[0] if logit_pattern else None
         }
     Output('run-analysis-btn', 'disabled'),
     [Input('model-dropdown', 'value'),
      Input('prompt-input', 'value'),
+     Input('block-modules-dropdown', 'value')]
 )
+def enable_run_button(model, prompt, block_modules):
+    """Enable Run Analysis button when model, prompt, and layer blocks are selected."""
+    return not (model and prompt and block_modules)
 # Node click callback for analysis results
 @app.callback(

components/sidebar.py CHANGED Viewed

@@ -3,7 +3,7 @@ Sidebar component with module and parameter selection dropdowns.
 This component provides the left sidebar interface for selecting:
 - Attention modules
-- MLP modules
 - Normalization parameters
 - Logit lens parameters
 """
@@ -31,14 +31,14 @@ def create_sidebar():
             )
         ], className="dropdown-container"),
-        # MLP modules dropdown
         html.Div([
-            html.Label("MLP Modules:", className="dropdown-label"),
             dcc.Dropdown(
-                id='mlp-modules-dropdown',
                 options=[],
                 value=None,
-                placeholder="Select MLP modules...",
                 multi=True,
                 className="module-dropdown"
             )

 This component provides the left sidebar interface for selecting:
 - Attention modules
+- Layer blocks (residual stream outputs)
 - Normalization parameters
 - Logit lens parameters
 """
             )
         ], className="dropdown-container"),
+        # Layer blocks dropdown (residual stream outputs)
         html.Div([
+            html.Label("Layer Blocks:", className="dropdown-label"),
             dcc.Dropdown(
+                id='block-modules-dropdown',
                 options=[],
                 value=None,
+                placeholder="Select layer blocks...",
                 multi=True,
                 className="module-dropdown"
             )

utils/model_config.py CHANGED Viewed

@@ -17,7 +17,7 @@ MODEL_FAMILIES: Dict[str, Dict[str, Any]] = {
             "mlp_pattern": "model.layers.{N}.mlp",
             "block_pattern": "model.layers.{N}",
         },
-        "norm_patterns": ["model.norm.weight"],
         "logit_lens_pattern": "lm_head.weight",
         "norm_type": "rmsnorm",
     },
@@ -30,7 +30,7 @@ MODEL_FAMILIES: Dict[str, Dict[str, Any]] = {
             "mlp_pattern": "transformer.h.{N}.mlp",
             "block_pattern": "transformer.h.{N}",
         },
-        "norm_patterns": ["transformer.ln_f.weight", "transformer.ln_f.bias"],
         "logit_lens_pattern": "lm_head.weight",
         "norm_type": "layernorm",
     },
@@ -43,7 +43,7 @@ MODEL_FAMILIES: Dict[str, Dict[str, Any]] = {
             "mlp_pattern": "model.decoder.layers.{N}.fc2",
             "block_pattern": "model.decoder.layers.{N}",
         },
-        "norm_patterns": ["model.decoder.final_layer_norm.weight", "model.decoder.final_layer_norm.bias"],
         "logit_lens_pattern": "lm_head.weight",
         "norm_type": "layernorm",
     },
@@ -56,7 +56,7 @@ MODEL_FAMILIES: Dict[str, Dict[str, Any]] = {
             "mlp_pattern": "gpt_neox.layers.{N}.mlp",
             "block_pattern": "gpt_neox.layers.{N}",
         },
-        "norm_patterns": ["gpt_neox.final_layer_norm.weight", "gpt_neox.final_layer_norm.bias"],
         "logit_lens_pattern": "embed_out.weight",
         "norm_type": "layernorm",
     },
@@ -69,7 +69,7 @@ MODEL_FAMILIES: Dict[str, Dict[str, Any]] = {
             "mlp_pattern": "transformer.h.{N}.mlp",
             "block_pattern": "transformer.h.{N}",
         },
-        "norm_patterns": ["transformer.ln_f.weight", "transformer.ln_f.bias"],
         "logit_lens_pattern": "lm_head.weight",
         "norm_type": "layernorm",
     },
@@ -82,7 +82,7 @@ MODEL_FAMILIES: Dict[str, Dict[str, Any]] = {
             "mlp_pattern": "transformer.h.{N}.mlp",
             "block_pattern": "transformer.h.{N}",
         },
-        "norm_patterns": ["transformer.ln_f.weight", "transformer.ln_f.bias"],
         "logit_lens_pattern": "lm_head.weight",
         "norm_type": "layernorm",
     },
@@ -95,8 +95,8 @@ MODEL_FAMILIES: Dict[str, Dict[str, Any]] = {
             "mlp_pattern": "transformer.blocks.{N}.ffn",
             "block_pattern": "transformer.blocks.{N}",
         },
-        "norm_patterns": ["transformer.norm_f.weight"],
-        "logit_lens_parameter": "lm_head.weight",
         "norm_type": "layernorm",
     },
 }
@@ -214,15 +214,15 @@ def get_auto_selections(model_name: str, module_patterns: Dict[str, List[str]],
         param_patterns: Available parameter patterns from the model
     Returns:
-        Dict with keys: attention_selection, mlp_selection, norm_selection, logit_lens_selection
         Each value is a list of pattern keys that should be pre-selected
     """
     family = get_model_family(model_name)
     if not family:
         return {
             'attention_selection': [],
-            'mlp_selection': [],
-            'norm_selection': [],
             'logit_lens_selection': None,
             'family_name': None
         }
@@ -231,16 +231,16 @@ def get_auto_selections(model_name: str, module_patterns: Dict[str, List[str]],
     if not config:
         return {
             'attention_selection': [],
-            'mlp_selection': [],
-            'norm_selection': [],
             'logit_lens_selection': None,
             'family_name': None
         }
     # Find matching patterns in the available patterns
     attention_matches = []
-    mlp_matches = []
-    norm_matches = []
     logit_lens_match = None
     # Match attention patterns
@@ -249,17 +249,19 @@ def get_auto_selections(model_name: str, module_patterns: Dict[str, List[str]],
         if _pattern_matches_template(pattern_key, attention_template):
             attention_matches.append(pattern_key)
-    # Match MLP patterns
-    mlp_template = config['templates'].get('mlp_pattern', '')
     for pattern_key in module_patterns.keys():
-        if _pattern_matches_template(pattern_key, mlp_template):
-            mlp_matches.append(pattern_key)
-    # Match normalization patterns
-    for norm_pattern in config.get('norm_patterns', []):
         for pattern_key in param_patterns.keys():
-            if _pattern_matches_template(pattern_key, norm_pattern):
-                norm_matches.append(pattern_key)
     # Match logit lens pattern - check both parameters AND modules
     logit_pattern = config.get('logit_lens_pattern', '')
@@ -277,8 +279,8 @@ def get_auto_selections(model_name: str, module_patterns: Dict[str, List[str]],
     return {
         'attention_selection': attention_matches,
-        'mlp_selection': mlp_matches,
-        'norm_selection': norm_matches,
         'logit_lens_selection': logit_lens_match,
         'family_name': family,
         'family_description': config.get('description', '')

             "mlp_pattern": "model.layers.{N}.mlp",
             "block_pattern": "model.layers.{N}",
         },
+        "norm_parameter": "model.norm.weight",
         "logit_lens_pattern": "lm_head.weight",
         "norm_type": "rmsnorm",
     },
             "mlp_pattern": "transformer.h.{N}.mlp",
             "block_pattern": "transformer.h.{N}",
         },
+        "norm_parameter": "transformer.ln_f.weight",
         "logit_lens_pattern": "lm_head.weight",
         "norm_type": "layernorm",
     },
             "mlp_pattern": "model.decoder.layers.{N}.fc2",
             "block_pattern": "model.decoder.layers.{N}",
         },
+        "norm_parameter": "model.decoder.final_layer_norm.weight",
         "logit_lens_pattern": "lm_head.weight",
         "norm_type": "layernorm",
     },
             "mlp_pattern": "gpt_neox.layers.{N}.mlp",
             "block_pattern": "gpt_neox.layers.{N}",
         },
+        "norm_parameter": "gpt_neox.final_layer_norm.weight",
         "logit_lens_pattern": "embed_out.weight",
         "norm_type": "layernorm",
     },
             "mlp_pattern": "transformer.h.{N}.mlp",
             "block_pattern": "transformer.h.{N}",
         },
+        "norm_parameter": "transformer.ln_f.weight",
         "logit_lens_pattern": "lm_head.weight",
         "norm_type": "layernorm",
     },
             "mlp_pattern": "transformer.h.{N}.mlp",
             "block_pattern": "transformer.h.{N}",
         },
+        "norm_parameter": "transformer.ln_f.weight",
         "logit_lens_pattern": "lm_head.weight",
         "norm_type": "layernorm",
     },
             "mlp_pattern": "transformer.blocks.{N}.ffn",
             "block_pattern": "transformer.blocks.{N}",
         },
+        "norm_parameter": "transformer.norm_f.weight",
+        "logit_lens_pattern": "lm_head.weight",
         "norm_type": "layernorm",
     },
 }
         param_patterns: Available parameter patterns from the model
     Returns:
+        Dict with keys: attention_selection, block_selection, norm_selection, logit_lens_selection
         Each value is a list of pattern keys that should be pre-selected
     """
     family = get_model_family(model_name)
     if not family:
         return {
             'attention_selection': [],
+            'block_selection': [],
+            'norm_selection': None,
             'logit_lens_selection': None,
             'family_name': None
         }
     if not config:
         return {
             'attention_selection': [],
+            'block_selection': [],
+            'norm_selection': None,
             'logit_lens_selection': None,
             'family_name': None
         }
     # Find matching patterns in the available patterns
     attention_matches = []
+    block_matches = []
+    norm_match = None
     logit_lens_match = None
     # Match attention patterns
         if _pattern_matches_template(pattern_key, attention_template):
             attention_matches.append(pattern_key)
+    # Match block patterns (full layer outputs - residual stream)
+    block_template = config['templates'].get('block_pattern', '')
     for pattern_key in module_patterns.keys():
+        if _pattern_matches_template(pattern_key, block_template):
+            block_matches.append(pattern_key)
+    # Match normalization parameter
+    norm_parameter = config.get('norm_parameter', '')
+    if norm_parameter:
         for pattern_key in param_patterns.keys():
+            if _pattern_matches_template(pattern_key, norm_parameter):
+                norm_match = pattern_key
+                break
     # Match logit lens pattern - check both parameters AND modules
     logit_pattern = config.get('logit_lens_pattern', '')
     return {
         'attention_selection': attention_matches,
+        'block_selection': block_matches,
+        'norm_selection': norm_match,
         'logit_lens_selection': logit_lens_match,
         'family_name': family,
         'family_description': config.get('description', '')

utils/model_patterns.py CHANGED Viewed

@@ -92,7 +92,7 @@ def execute_forward_pass(model, tokenizer, prompt: str, config: Dict[str, Any])
         model: Loaded transformer model
         tokenizer: Loaded tokenizer
         prompt: Input text prompt
-        config: Dict with module lists like {"attention_modules": [...], "mlp_modules": [...], ...}
     Returns:
         JSON-serializable dict with captured activations and metadata
@@ -101,12 +101,11 @@ def execute_forward_pass(model, tokenizer, prompt: str, config: Dict[str, Any])
     # Extract module lists from config
     attention_modules = config.get("attention_modules", [])
-    mlp_modules = config.get("mlp_modules", [])
-    other_modules = config.get("other_modules", [])
     norm_parameters = config.get("norm_parameters", [])
     logit_lens_parameter = config.get("logit_lens_parameter")
-    all_modules = attention_modules + mlp_modules + other_modules
     if not all_modules:
         print("No modules specified for capture")
         return {"error": "No modules specified"}
@@ -119,12 +118,12 @@ def execute_forward_pass(model, tokenizer, prompt: str, config: Dict[str, Any])
         if not layer_match:
             return {"error": f"Invalid module name format: {mod_name}"}
-        # Determine component type
-        if mod_name in mlp_modules:
-            component = 'mlp_output'
-        elif mod_name in attention_modules:
             component = 'attention_output'
         else:
             component = 'block_output'
         intervenable_representations.append(
@@ -162,10 +161,16 @@ def execute_forward_pass(model, tokenizer, prompt: str, config: Dict[str, Any])
     for hook in hooks:
         hook.remove()
-    # Separate outputs by type
-    attention_outputs = {k: v for k, v in captured.items() if k in attention_modules}
-    mlp_outputs = {k: v for k, v in captured.items() if k in mlp_modules}
-    other_outputs = {k: v for k, v in captured.items() if k in other_modules}
     # Capture normalization parameters (deprecated - kept for backward compatibility)
     all_params = dict(model.named_parameters())
@@ -184,47 +189,49 @@ def execute_forward_pass(model, tokenizer, prompt: str, config: Dict[str, Any])
         "model": getattr(model.config, "name_or_path", "unknown"),
         "prompt": prompt,
         "input_ids": safe_to_serializable(inputs["input_ids"]),
-        "attention_modules": attention_modules,
         "attention_outputs": attention_outputs,
-        "mlp_modules": mlp_modules,
-        "mlp_outputs": mlp_outputs,
-        "other_modules": other_modules,
-        "other_outputs": other_outputs,
         "norm_parameters": norm_parameters,
         "norm_data": norm_data,
         "logit_lens_parameter": logit_lens_parameter,
-        "actual_output": actual_output  # Store only token and probability, not full output
     }
     print(f"Captured {len(captured)} module outputs using PyVene")
     return result
-def logit_lens_transformation(mlp_output: Any, norm_data: List[Any], model, logit_lens_parameter: str, tokenizer) -> List[Tuple[str, float]]:
     """
     Transform layer output to top 3 token probabilities using logit lens.
     Applies final layer normalization before projection (critical for correctness).
     Uses model's built-in functions to minimize computational errors.
     Args:
-        mlp_output: Hidden state from any layer
         norm_data: Not used (deprecated - using model's norm layer directly)
         model: HuggingFace model
         logit_lens_parameter: Not used (deprecated)
         tokenizer: Tokenizer for decoding
     Returns:
         List of (token_string, probability) tuples for top 3 tokens
     """
     with torch.no_grad():
         # Convert to tensor and ensure proper shape [batch, seq_len, hidden_dim]
-        hidden = torch.tensor(mlp_output) if not isinstance(mlp_output, torch.Tensor) else mlp_output
         if hidden.dim() == 4:
             hidden = hidden.squeeze(0)
         # Step 1: Apply final layer normalization (critical for intermediate layers)
-        final_norm = get_final_norm_layer(model)
         if final_norm is not None:
             hidden = final_norm(hidden)
@@ -244,15 +251,31 @@ def logit_lens_transformation(mlp_output: Any, norm_data: List[Any], model, logi
         ]
-def get_final_norm_layer(model):
     """
-    Get the final layer normalization module from the model.
-    Returns None if not found.
-    Supports GPT-2 (transformer.ln_f), LLaMA (model.norm), and similar architectures.
     """
-    # Try common final norm layer names
-    for attr_path in ['transformer.ln_f', 'model.norm', 'model.decoder.final_layer_norm',
                       'gpt_neox.final_layer_norm', 'transformer.norm_f']:
         try:
             parts = attr_path.split('.')
@@ -274,10 +297,24 @@ def token_to_color(token: str) -> str:
 def _get_top_tokens(activation_data: Dict[str, Any], module_name: str, model, tokenizer) -> Optional[List[Tuple[str, float]]]:
-    """Helper: Get top 3 tokens for a layer's output."""
     try:
-        mlp_output = activation_data['mlp_outputs'][module_name]['output']
-        return logit_lens_transformation(mlp_output, [], model, None, tokenizer)
     except Exception as e:
         print(f"Warning: Could not compute logit lens for {module_name}: {e}")
         return None
@@ -314,15 +351,20 @@ def _create_edge(src_layer: int, tgt_layer: int, token: str, prob: float, rank:
 def format_data_for_cytoscape(activation_data: Dict[str, Any], model, tokenizer) -> List[Dict[str, Any]]:
-    """Convert activation data to Cytoscape format with nodes (layers) and edges (top-3 tokens)."""
-    mlp_modules = activation_data.get('mlp_modules', [])
-    if not mlp_modules:
         return []
     # Extract and sort layers by layer number
     layer_info = sorted(
         [(int(re.findall(r'\d+', name)[0]), name)
-         for name in mlp_modules if re.findall(r'\d+', name)]
     )
     elements = []

         model: Loaded transformer model
         tokenizer: Loaded tokenizer
         prompt: Input text prompt
+        config: Dict with module lists like {"attention_modules": [...], "block_modules": [...], ...}
     Returns:
         JSON-serializable dict with captured activations and metadata
     # Extract module lists from config
     attention_modules = config.get("attention_modules", [])
+    block_modules = config.get("block_modules", [])
     norm_parameters = config.get("norm_parameters", [])
     logit_lens_parameter = config.get("logit_lens_parameter")
+    all_modules = attention_modules + block_modules
     if not all_modules:
         print("No modules specified for capture")
         return {"error": "No modules specified"}
         if not layer_match:
             return {"error": f"Invalid module name format: {mod_name}"}
+        # Determine component type based on module name
+        if 'attn' in mod_name or 'attention' in mod_name:
             component = 'attention_output'
         else:
+            # Layer/block modules (e.g., "model.layers.0", "transformer.h.0")
+            # These represent the residual stream (full layer output)
             component = 'block_output'
         intervenable_representations.append(
     for hook in hooks:
         hook.remove()
+    # Separate outputs by type based on module name pattern
+    attention_outputs = {}
+    block_outputs = {}
+    for mod_name, output in captured.items():
+        if 'attn' in mod_name or 'attention' in mod_name:
+            attention_outputs[mod_name] = output
+        else:
+            # Block/layer outputs (residual stream - full layer output)
+            block_outputs[mod_name] = output
     # Capture normalization parameters (deprecated - kept for backward compatibility)
     all_params = dict(model.named_parameters())
         "model": getattr(model.config, "name_or_path", "unknown"),
         "prompt": prompt,
         "input_ids": safe_to_serializable(inputs["input_ids"]),
+        "attention_modules": list(attention_outputs.keys()),
         "attention_outputs": attention_outputs,
+        "block_modules": list(block_outputs.keys()),
+        "block_outputs": block_outputs,
         "norm_parameters": norm_parameters,
         "norm_data": norm_data,
         "logit_lens_parameter": logit_lens_parameter,
+        "actual_output": actual_output
     }
     print(f"Captured {len(captured)} module outputs using PyVene")
     return result
+def logit_lens_transformation(layer_output: Any, norm_data: List[Any], model, logit_lens_parameter: str, tokenizer, norm_parameter: Optional[str] = None) -> List[Tuple[str, float]]:
     """
     Transform layer output to top 3 token probabilities using logit lens.
+    For standard logit lens, use block/layer outputs (residual stream), not component outputs.
+    The residual stream contains the full hidden state with all accumulated information.
     Applies final layer normalization before projection (critical for correctness).
     Uses model's built-in functions to minimize computational errors.
     Args:
+        layer_output: Hidden state from any layer (preferably block output / residual stream)
         norm_data: Not used (deprecated - using model's norm layer directly)
         model: HuggingFace model
         logit_lens_parameter: Not used (deprecated)
         tokenizer: Tokenizer for decoding
+        norm_parameter: Parameter path for final norm layer (e.g., "model.norm.weight")
     Returns:
         List of (token_string, probability) tuples for top 3 tokens
     """
     with torch.no_grad():
         # Convert to tensor and ensure proper shape [batch, seq_len, hidden_dim]
+        hidden = torch.tensor(layer_output) if not isinstance(layer_output, torch.Tensor) else layer_output
         if hidden.dim() == 4:
             hidden = hidden.squeeze(0)
         # Step 1: Apply final layer normalization (critical for intermediate layers)
+        final_norm = get_norm_layer_from_parameter(model, norm_parameter)
         if final_norm is not None:
             hidden = final_norm(hidden)
         ]
+def get_norm_layer_from_parameter(model, norm_parameter: Optional[str]) -> Optional[Any]:
     """
+    Get the final layer normalization module from the model using the norm parameter path.
+    Args:
+        model: The transformer model
+        norm_parameter: Parameter path (e.g., "model.norm.weight") or None
+    Returns:
+        The normalization layer module, or None if not found
     """
+    if norm_parameter:
+        # Convert parameter path to module path (remove .weight/.bias suffix)
+        module_path = norm_parameter.replace('.weight', '').replace('.bias', '')
+        try:
+            parts = module_path.split('.')
+            obj = model
+            for part in parts:
+                obj = getattr(obj, part)
+            return obj
+        except AttributeError:
+            print(f"Warning: Could not find norm layer at {module_path}")
+    # Fallback: Try common final norm layer names if no parameter specified
+    for attr_path in ['model.norm', 'transformer.ln_f', 'model.decoder.final_layer_norm',
                       'gpt_neox.final_layer_norm', 'transformer.norm_f']:
         try:
             parts = attr_path.split('.')
 def _get_top_tokens(activation_data: Dict[str, Any], module_name: str, model, tokenizer) -> Optional[List[Tuple[str, float]]]:
+    """
+    Helper: Get top 3 tokens for a layer's block output.
+    Uses block outputs (residual stream) which represent the full hidden state
+    after all layer computations (attention + feedforward + residuals).
+    """
     try:
+        # Get block output (residual stream)
+        if module_name not in activation_data.get('block_outputs', {}):
+            return None
+        layer_output = activation_data['block_outputs'][module_name]['output']
+        # Get norm parameter from activation data (should be a single parameter or list with one item)
+        norm_params = activation_data.get('norm_parameters', [])
+        norm_parameter = norm_params[0] if norm_params else None
+        return logit_lens_transformation(layer_output, [], model, None, tokenizer, norm_parameter)
     except Exception as e:
         print(f"Warning: Could not compute logit lens for {module_name}: {e}")
         return None
 def format_data_for_cytoscape(activation_data: Dict[str, Any], model, tokenizer) -> List[Dict[str, Any]]:
+    """
+    Convert activation data to Cytoscape format with nodes (layers) and edges (top-3 tokens).
+    Uses block outputs (full layer outputs / residual stream) for logit lens visualization.
+    """
+    # Get block modules (full layer outputs)
+    layer_modules = activation_data.get('block_modules', [])
+    if not layer_modules:
         return []
     # Extract and sort layers by layer number
     layer_info = sorted(
         [(int(re.findall(r'\d+', name)[0]), name)
+         for name in layer_modules if re.findall(r'\d+', name)]
     )
     elements = []