cdpearlman commited on
Commit
72f82a4
·
1 Parent(s): 03a76e7

conductor(checkpoint): Checkpoint end of Phase 3: Visualization & Feedback Loop

Browse files
app.py CHANGED
@@ -312,6 +312,7 @@ def enable_run_button(model, prompt, block_modules, norm_params):
312
  Output('pipeline-container', 'style'),
313
  Output('investigation-panel', 'style'),
314
  Output('session-activation-store', 'data', allow_duplicate=True),
 
315
  Output('session-original-prompt-store', 'data'),
316
  Output('session-selected-beam-store', 'data')],
317
  [Input('generate-btn', 'n_clicks')],
@@ -360,7 +361,7 @@ def run_generation(n_clicks, model_name, prompt, max_new_tokens, beam_width, pat
360
 
361
  if not config['block_modules']:
362
  return (html.Div("Please select modules in the sidebar.", style={'color': 'red'}),
363
- results, {'display': 'none'}, {'display': 'none'}, {}, original_prompt_data, {})
364
 
365
  # AGENT F KEY CHANGE: Run analysis on ORIGINAL PROMPT only, not generated text
366
  # This ensures pipeline stages show how the model processes the user's input,
@@ -385,19 +386,19 @@ def run_generation(n_clicks, model_name, prompt, max_new_tokens, beam_width, pat
385
 
386
  # Show pipeline immediately (analyzing original prompt)
387
  return (results_ui, results, {'display': 'block'}, {'display': 'block'},
388
- activation_data, original_prompt_data, {})
389
 
390
  else:
391
  # Single token generation - store the result as selected beam
392
  selected_beam_data = {'text': results[0]['text'], 'score': results[0].get('score', 0)}
393
  return (results_ui, results, {'display': 'block'}, {'display': 'block'},
394
- activation_data, original_prompt_data, selected_beam_data)
395
 
396
  except Exception as e:
397
  import traceback
398
  traceback.print_exc()
399
  return (html.Div(f"Error: {e}", style={'color': 'red'}), [],
400
- {'display': 'none'}, {'display': 'none'}, {}, {}, {})
401
 
402
 
403
  @app.callback(
@@ -760,7 +761,8 @@ def manage_ablation_heads(add_clicks, clear_clicks, remove_clicks,
760
 
761
 
762
  @app.callback(
763
- Output('ablation-results-container', 'children'),
 
764
  [Input('run-ablation-btn', 'n_clicks')],
765
  [State('ablation-selected-heads', 'data'),
766
  State('session-activation-store', 'data'),
@@ -772,7 +774,7 @@ def manage_ablation_heads(add_clicks, clear_clicks, remove_clicks,
772
  def run_ablation_experiment(n_clicks, selected_heads, activation_data, model_name, prompt, selected_beam):
773
  """Run ablation on ORIGINAL PROMPT and compare results."""
774
  if not n_clicks or not selected_heads or not activation_data:
775
- return no_update
776
 
777
  try:
778
  from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -805,25 +807,31 @@ def run_ablation_experiment(n_clicks, selected_heads, activation_data, model_nam
805
  heads_by_layer[layer].append(head)
806
 
807
  if not heads_by_layer:
808
- return html.Div("No valid heads selected.", style={'color': '#dc3545'})
809
 
810
  # Run ablation
811
  ablated_data = execute_forward_pass_with_multi_layer_head_ablation(
812
  model, tokenizer, sequence_text, config, heads_by_layer
813
  )
 
 
 
 
814
  ablated_output = ablated_data.get('actual_output', {})
815
  ablated_token = ablated_output.get('token', '')
816
  ablated_prob = ablated_output.get('probability', 0)
817
 
818
- return create_ablation_results_display(
819
  original_token, ablated_token, original_prob, ablated_prob,
820
  selected_heads, selected_beam
821
  )
822
 
 
 
823
  except Exception as e:
824
  import traceback
825
  traceback.print_exc()
826
- return html.Div(f"Ablation error: {str(e)}", style={'color': '#dc3545'})
827
 
828
  except Exception as e:
829
  import traceback
 
312
  Output('pipeline-container', 'style'),
313
  Output('investigation-panel', 'style'),
314
  Output('session-activation-store', 'data', allow_duplicate=True),
315
+ Output('session-activation-store-original', 'data', allow_duplicate=True),
316
  Output('session-original-prompt-store', 'data'),
317
  Output('session-selected-beam-store', 'data')],
318
  [Input('generate-btn', 'n_clicks')],
 
361
 
362
  if not config['block_modules']:
363
  return (html.Div("Please select modules in the sidebar.", style={'color': 'red'}),
364
+ results, {'display': 'none'}, {'display': 'none'}, {}, {}, original_prompt_data, {})
365
 
366
  # AGENT F KEY CHANGE: Run analysis on ORIGINAL PROMPT only, not generated text
367
  # This ensures pipeline stages show how the model processes the user's input,
 
386
 
387
  # Show pipeline immediately (analyzing original prompt)
388
  return (results_ui, results, {'display': 'block'}, {'display': 'block'},
389
+ activation_data, activation_data, original_prompt_data, {})
390
 
391
  else:
392
  # Single token generation - store the result as selected beam
393
  selected_beam_data = {'text': results[0]['text'], 'score': results[0].get('score', 0)}
394
  return (results_ui, results, {'display': 'block'}, {'display': 'block'},
395
+ activation_data, activation_data, original_prompt_data, selected_beam_data)
396
 
397
  except Exception as e:
398
  import traceback
399
  traceback.print_exc()
400
  return (html.Div(f"Error: {e}", style={'color': 'red'}), [],
401
+ {'display': 'none'}, {'display': 'none'}, {}, {}, {}, {})
402
 
403
 
404
  @app.callback(
 
761
 
762
 
763
  @app.callback(
764
+ [Output('ablation-results-container', 'children'),
765
+ Output('session-activation-store', 'data', allow_duplicate=True)],
766
  [Input('run-ablation-btn', 'n_clicks')],
767
  [State('ablation-selected-heads', 'data'),
768
  State('session-activation-store', 'data'),
 
774
  def run_ablation_experiment(n_clicks, selected_heads, activation_data, model_name, prompt, selected_beam):
775
  """Run ablation on ORIGINAL PROMPT and compare results."""
776
  if not n_clicks or not selected_heads or not activation_data:
777
+ return no_update, no_update
778
 
779
  try:
780
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
807
  heads_by_layer[layer].append(head)
808
 
809
  if not heads_by_layer:
810
+ return html.Div("No valid heads selected.", style={'color': '#dc3545'}), no_update
811
 
812
  # Run ablation
813
  ablated_data = execute_forward_pass_with_multi_layer_head_ablation(
814
  model, tokenizer, sequence_text, config, heads_by_layer
815
  )
816
+
817
+ # Mark as ablated so UI knows
818
+ ablated_data['ablated'] = True
819
+
820
  ablated_output = ablated_data.get('actual_output', {})
821
  ablated_token = ablated_output.get('token', '')
822
  ablated_prob = ablated_output.get('probability', 0)
823
 
824
+ results_display = create_ablation_results_display(
825
  original_token, ablated_token, original_prob, ablated_prob,
826
  selected_heads, selected_beam
827
  )
828
 
829
+ return results_display, ablated_data
830
+
831
  except Exception as e:
832
  import traceback
833
  traceback.print_exc()
834
+ return html.Div(f"Ablation error: {str(e)}", style={'color': '#dc3545'}), no_update
835
 
836
  except Exception as e:
837
  import traceback
conductor/tracks/ablation_20260129/plan.md CHANGED
@@ -22,7 +22,7 @@
22
  - [ ] Task: Conductor - User Manual Verification 'Frontend Control Panel' (Protocol in workflow.md)
23
 
24
  ## Phase 3: Visualization & Feedback Loop
25
- - [ ] Task: Connect the Frontend Ablation State to the Backend Inference.
26
  - [ ] Sub-task: Update the main `app.py` callback to pass the `disabled_heads` list to the backend capture function.
27
  - [ ] Sub-task: Verify that toggling a head in the UI updates the Logit Lens/Output display.
28
  - [ ] Task: Visual Polish for Ablated State.
 
22
  - [ ] Task: Conductor - User Manual Verification 'Frontend Control Panel' (Protocol in workflow.md)
23
 
24
  ## Phase 3: Visualization & Feedback Loop
25
+ - [~] Task: Connect the Frontend Ablation State to the Backend Inference.
26
  - [ ] Sub-task: Update the main `app.py` callback to pass the `disabled_heads` list to the backend capture function.
27
  - [ ] Sub-task: Verify that toggling a head in the UI updates the Logit Lens/Output display.
28
  - [ ] Task: Visual Polish for Ablated State.
utils/__pycache__/__init__.cpython-311.pyc CHANGED
Binary files a/utils/__pycache__/__init__.cpython-311.pyc and b/utils/__pycache__/__init__.cpython-311.pyc differ
 
utils/__pycache__/model_patterns.cpython-311.pyc CHANGED
Binary files a/utils/__pycache__/model_patterns.cpython-311.pyc and b/utils/__pycache__/model_patterns.cpython-311.pyc differ
 
utils/model_patterns.py CHANGED
@@ -566,7 +566,23 @@ def execute_forward_pass_with_multi_layer_head_ablation(model, tokenizer, prompt
566
 
567
  # Reconstruct output tuple
568
  if len(output) > 1:
569
- ablated_output = (ablated_hidden,) + output[1:]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
  else:
571
  ablated_output = (ablated_hidden,)
572
 
@@ -592,7 +608,7 @@ def execute_forward_pass_with_multi_layer_head_ablation(model, tokenizer, prompt
592
 
593
  # Execute forward pass
594
  with torch.no_grad():
595
- model_output = intervenable_model.model(**inputs, use_cache=False)
596
 
597
  # Remove hooks
598
  for hook in hooks:
 
566
 
567
  # Reconstruct output tuple
568
  if len(output) > 1:
569
+ # Check for attention weights (usually index 2 if output_attentions=True)
570
+ if len(output) > 2:
571
+ attn_weights = output[2] # [batch, heads, seq, seq]
572
+ if isinstance(attn_weights, torch.Tensor):
573
+ # Zero out specified heads in attention weights too
574
+ # Clone to avoid in-place modification errors if any
575
+ attn_weights_mod = attn_weights.clone()
576
+ for head_idx in ablate_head_indices:
577
+ if 0 <= head_idx < num_heads:
578
+ attn_weights_mod[:, head_idx, :, :] = 0.0
579
+
580
+ # Reconstruct tuple with modified weights
581
+ ablated_output = (ablated_hidden, output[1], attn_weights_mod) + output[3:]
582
+ else:
583
+ ablated_output = (ablated_hidden,) + output[1:]
584
+ else:
585
+ ablated_output = (ablated_hidden,) + output[1:]
586
  else:
587
  ablated_output = (ablated_hidden,)
588
 
 
608
 
609
  # Execute forward pass
610
  with torch.no_grad():
611
+ model_output = intervenable_model.model(**inputs, use_cache=False, output_attentions=True)
612
 
613
  # Remove hooks
614
  for hook in hooks: