Spaces:

cdpearlman
/

LLMVis

Sleeping

App Files Files Community

cdpearlman commited on Jan 29

Commit

72f82a4

1 Parent(s): 03a76e7

conductor(checkpoint): Checkpoint end of Phase 3: Visualization & Feedback Loop

Browse files

Files changed (5) hide show

app.py +17 -9
conductor/tracks/ablation_20260129/plan.md +1 -1
utils/__pycache__/__init__.cpython-311.pyc +0 -0
utils/__pycache__/model_patterns.cpython-311.pyc +0 -0
utils/model_patterns.py +18 -2

app.py CHANGED Viewed

@@ -312,6 +312,7 @@ def enable_run_button(model, prompt, block_modules, norm_params):
      Output('pipeline-container', 'style'),
      Output('investigation-panel', 'style'),
      Output('session-activation-store', 'data', allow_duplicate=True),
      Output('session-original-prompt-store', 'data'),
      Output('session-selected-beam-store', 'data')],
     [Input('generate-btn', 'n_clicks')],
@@ -360,7 +361,7 @@ def run_generation(n_clicks, model_name, prompt, max_new_tokens, beam_width, pat
         if not config['block_modules']:
             return (html.Div("Please select modules in the sidebar.", style={'color': 'red'}),
-                    results, {'display': 'none'}, {'display': 'none'}, {}, original_prompt_data, {})
         # AGENT F KEY CHANGE: Run analysis on ORIGINAL PROMPT only, not generated text
         # This ensures pipeline stages show how the model processes the user's input,
@@ -385,19 +386,19 @@ def run_generation(n_clicks, model_name, prompt, max_new_tokens, beam_width, pat
             # Show pipeline immediately (analyzing original prompt)
             return (results_ui, results, {'display': 'block'}, {'display': 'block'},
-                    activation_data, original_prompt_data, {})
         else:
             # Single token generation - store the result as selected beam
             selected_beam_data = {'text': results[0]['text'], 'score': results[0].get('score', 0)}
             return (results_ui, results, {'display': 'block'}, {'display': 'block'},
-                    activation_data, original_prompt_data, selected_beam_data)
     except Exception as e:
         import traceback
         traceback.print_exc()
         return (html.Div(f"Error: {e}", style={'color': 'red'}), [],
-                {'display': 'none'}, {'display': 'none'}, {}, {}, {})
 @app.callback(
@@ -760,7 +761,8 @@ def manage_ablation_heads(add_clicks, clear_clicks, remove_clicks,
 @app.callback(
-    Output('ablation-results-container', 'children'),
     [Input('run-ablation-btn', 'n_clicks')],
     [State('ablation-selected-heads', 'data'),
      State('session-activation-store', 'data'),
@@ -772,7 +774,7 @@ def manage_ablation_heads(add_clicks, clear_clicks, remove_clicks,
 def run_ablation_experiment(n_clicks, selected_heads, activation_data, model_name, prompt, selected_beam):
     """Run ablation on ORIGINAL PROMPT and compare results."""
     if not n_clicks or not selected_heads or not activation_data:
-        return no_update
     try:
         from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -805,25 +807,31 @@ def run_ablation_experiment(n_clicks, selected_heads, activation_data, model_nam
                     heads_by_layer[layer].append(head)
         if not heads_by_layer:
-            return html.Div("No valid heads selected.", style={'color': '#dc3545'})
         # Run ablation
         ablated_data = execute_forward_pass_with_multi_layer_head_ablation(
             model, tokenizer, sequence_text, config, heads_by_layer
         )
         ablated_output = ablated_data.get('actual_output', {})
         ablated_token = ablated_output.get('token', '')
         ablated_prob = ablated_output.get('probability', 0)
-        return create_ablation_results_display(
             original_token, ablated_token, original_prob, ablated_prob,
             selected_heads, selected_beam
         )
     except Exception as e:
         import traceback
         traceback.print_exc()
-        return html.Div(f"Ablation error: {str(e)}", style={'color': '#dc3545'})
     except Exception as e:
         import traceback

      Output('pipeline-container', 'style'),
      Output('investigation-panel', 'style'),
      Output('session-activation-store', 'data', allow_duplicate=True),
+     Output('session-activation-store-original', 'data', allow_duplicate=True),
      Output('session-original-prompt-store', 'data'),
      Output('session-selected-beam-store', 'data')],
     [Input('generate-btn', 'n_clicks')],
         if not config['block_modules']:
             return (html.Div("Please select modules in the sidebar.", style={'color': 'red'}),
+                    results, {'display': 'none'}, {'display': 'none'}, {}, {}, original_prompt_data, {})
         # AGENT F KEY CHANGE: Run analysis on ORIGINAL PROMPT only, not generated text
         # This ensures pipeline stages show how the model processes the user's input,
             # Show pipeline immediately (analyzing original prompt)
             return (results_ui, results, {'display': 'block'}, {'display': 'block'},
+                    activation_data, activation_data, original_prompt_data, {})
         else:
             # Single token generation - store the result as selected beam
             selected_beam_data = {'text': results[0]['text'], 'score': results[0].get('score', 0)}
             return (results_ui, results, {'display': 'block'}, {'display': 'block'},
+                    activation_data, activation_data, original_prompt_data, selected_beam_data)
     except Exception as e:
         import traceback
         traceback.print_exc()
         return (html.Div(f"Error: {e}", style={'color': 'red'}), [],
+                {'display': 'none'}, {'display': 'none'}, {}, {}, {}, {})
 @app.callback(
 @app.callback(
+    [Output('ablation-results-container', 'children'),
+     Output('session-activation-store', 'data', allow_duplicate=True)],
     [Input('run-ablation-btn', 'n_clicks')],
     [State('ablation-selected-heads', 'data'),
      State('session-activation-store', 'data'),
 def run_ablation_experiment(n_clicks, selected_heads, activation_data, model_name, prompt, selected_beam):
     """Run ablation on ORIGINAL PROMPT and compare results."""
     if not n_clicks or not selected_heads or not activation_data:
+        return no_update, no_update
     try:
         from transformers import AutoModelForCausalLM, AutoTokenizer
                     heads_by_layer[layer].append(head)
         if not heads_by_layer:
+            return html.Div("No valid heads selected.", style={'color': '#dc3545'}), no_update
         # Run ablation
         ablated_data = execute_forward_pass_with_multi_layer_head_ablation(
             model, tokenizer, sequence_text, config, heads_by_layer
         )
+        # Mark as ablated so UI knows
+        ablated_data['ablated'] = True
         ablated_output = ablated_data.get('actual_output', {})
         ablated_token = ablated_output.get('token', '')
         ablated_prob = ablated_output.get('probability', 0)
+        results_display = create_ablation_results_display(
             original_token, ablated_token, original_prob, ablated_prob,
             selected_heads, selected_beam
         )
+        return results_display, ablated_data
     except Exception as e:
         import traceback
         traceback.print_exc()
+        return html.Div(f"Ablation error: {str(e)}", style={'color': '#dc3545'}), no_update
     except Exception as e:
         import traceback

conductor/tracks/ablation_20260129/plan.md CHANGED Viewed

@@ -22,7 +22,7 @@
 - [ ] Task: Conductor - User Manual Verification 'Frontend Control Panel' (Protocol in workflow.md)
 ## Phase 3: Visualization & Feedback Loop
-- [ ] Task: Connect the Frontend Ablation State to the Backend Inference.
     - [ ] Sub-task: Update the main `app.py` callback to pass the `disabled_heads` list to the backend capture function.
     - [ ] Sub-task: Verify that toggling a head in the UI updates the Logit Lens/Output display.
 - [ ] Task: Visual Polish for Ablated State.

 - [ ] Task: Conductor - User Manual Verification 'Frontend Control Panel' (Protocol in workflow.md)
 ## Phase 3: Visualization & Feedback Loop
+- [~] Task: Connect the Frontend Ablation State to the Backend Inference.
     - [ ] Sub-task: Update the main `app.py` callback to pass the `disabled_heads` list to the backend capture function.
     - [ ] Sub-task: Verify that toggling a head in the UI updates the Logit Lens/Output display.
 - [ ] Task: Visual Polish for Ablated State.

utils/__pycache__/__init__.cpython-311.pyc CHANGED Viewed

Binary files a/utils/__pycache__/__init__.cpython-311.pyc and b/utils/__pycache__/__init__.cpython-311.pyc differ

utils/__pycache__/model_patterns.cpython-311.pyc CHANGED Viewed

Binary files a/utils/__pycache__/model_patterns.cpython-311.pyc and b/utils/__pycache__/model_patterns.cpython-311.pyc differ

utils/model_patterns.py CHANGED Viewed

@@ -566,7 +566,23 @@ def execute_forward_pass_with_multi_layer_head_ablation(model, tokenizer, prompt
                 # Reconstruct output tuple
                 if len(output) > 1:
-                    ablated_output = (ablated_hidden,) + output[1:]
                 else:
                     ablated_output = (ablated_hidden,)
@@ -592,7 +608,7 @@ def execute_forward_pass_with_multi_layer_head_ablation(model, tokenizer, prompt
     # Execute forward pass
     with torch.no_grad():
-        model_output = intervenable_model.model(**inputs, use_cache=False)
     # Remove hooks
     for hook in hooks:

                 # Reconstruct output tuple
                 if len(output) > 1:
+                    # Check for attention weights (usually index 2 if output_attentions=True)
+                    if len(output) > 2:
+                        attn_weights = output[2] # [batch, heads, seq, seq]
+                        if isinstance(attn_weights, torch.Tensor):
+                            # Zero out specified heads in attention weights too
+                            # Clone to avoid in-place modification errors if any
+                            attn_weights_mod = attn_weights.clone()
+                            for head_idx in ablate_head_indices:
+                                if 0 <= head_idx < num_heads:
+                                    attn_weights_mod[:, head_idx, :, :] = 0.0
+                            # Reconstruct tuple with modified weights
+                            ablated_output = (ablated_hidden, output[1], attn_weights_mod) + output[3:]
+                        else:
+                            ablated_output = (ablated_hidden,) + output[1:]
+                    else:
+                        ablated_output = (ablated_hidden,) + output[1:]
                 else:
                     ablated_output = (ablated_hidden,)
     # Execute forward pass
     with torch.no_grad():
+        model_output = intervenable_model.model(**inputs, use_cache=False, output_attentions=True)
     # Remove hooks
     for hook in hooks: