Spaces:
Sleeping
Sleeping
Commit ·
72f82a4
1
Parent(s): 03a76e7
conductor(checkpoint): Checkpoint end of Phase 3: Visualization & Feedback Loop
Browse files
app.py
CHANGED
|
@@ -312,6 +312,7 @@ def enable_run_button(model, prompt, block_modules, norm_params):
|
|
| 312 |
Output('pipeline-container', 'style'),
|
| 313 |
Output('investigation-panel', 'style'),
|
| 314 |
Output('session-activation-store', 'data', allow_duplicate=True),
|
|
|
|
| 315 |
Output('session-original-prompt-store', 'data'),
|
| 316 |
Output('session-selected-beam-store', 'data')],
|
| 317 |
[Input('generate-btn', 'n_clicks')],
|
|
@@ -360,7 +361,7 @@ def run_generation(n_clicks, model_name, prompt, max_new_tokens, beam_width, pat
|
|
| 360 |
|
| 361 |
if not config['block_modules']:
|
| 362 |
return (html.Div("Please select modules in the sidebar.", style={'color': 'red'}),
|
| 363 |
-
results, {'display': 'none'}, {'display': 'none'}, {}, original_prompt_data, {})
|
| 364 |
|
| 365 |
# AGENT F KEY CHANGE: Run analysis on ORIGINAL PROMPT only, not generated text
|
| 366 |
# This ensures pipeline stages show how the model processes the user's input,
|
|
@@ -385,19 +386,19 @@ def run_generation(n_clicks, model_name, prompt, max_new_tokens, beam_width, pat
|
|
| 385 |
|
| 386 |
# Show pipeline immediately (analyzing original prompt)
|
| 387 |
return (results_ui, results, {'display': 'block'}, {'display': 'block'},
|
| 388 |
-
activation_data, original_prompt_data, {})
|
| 389 |
|
| 390 |
else:
|
| 391 |
# Single token generation - store the result as selected beam
|
| 392 |
selected_beam_data = {'text': results[0]['text'], 'score': results[0].get('score', 0)}
|
| 393 |
return (results_ui, results, {'display': 'block'}, {'display': 'block'},
|
| 394 |
-
activation_data, original_prompt_data, selected_beam_data)
|
| 395 |
|
| 396 |
except Exception as e:
|
| 397 |
import traceback
|
| 398 |
traceback.print_exc()
|
| 399 |
return (html.Div(f"Error: {e}", style={'color': 'red'}), [],
|
| 400 |
-
{'display': 'none'}, {'display': 'none'}, {}, {}, {})
|
| 401 |
|
| 402 |
|
| 403 |
@app.callback(
|
|
@@ -760,7 +761,8 @@ def manage_ablation_heads(add_clicks, clear_clicks, remove_clicks,
|
|
| 760 |
|
| 761 |
|
| 762 |
@app.callback(
|
| 763 |
-
Output('ablation-results-container', 'children'),
|
|
|
|
| 764 |
[Input('run-ablation-btn', 'n_clicks')],
|
| 765 |
[State('ablation-selected-heads', 'data'),
|
| 766 |
State('session-activation-store', 'data'),
|
|
@@ -772,7 +774,7 @@ def manage_ablation_heads(add_clicks, clear_clicks, remove_clicks,
|
|
| 772 |
def run_ablation_experiment(n_clicks, selected_heads, activation_data, model_name, prompt, selected_beam):
|
| 773 |
"""Run ablation on ORIGINAL PROMPT and compare results."""
|
| 774 |
if not n_clicks or not selected_heads or not activation_data:
|
| 775 |
-
return no_update
|
| 776 |
|
| 777 |
try:
|
| 778 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
@@ -805,25 +807,31 @@ def run_ablation_experiment(n_clicks, selected_heads, activation_data, model_nam
|
|
| 805 |
heads_by_layer[layer].append(head)
|
| 806 |
|
| 807 |
if not heads_by_layer:
|
| 808 |
-
return html.Div("No valid heads selected.", style={'color': '#dc3545'})
|
| 809 |
|
| 810 |
# Run ablation
|
| 811 |
ablated_data = execute_forward_pass_with_multi_layer_head_ablation(
|
| 812 |
model, tokenizer, sequence_text, config, heads_by_layer
|
| 813 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 814 |
ablated_output = ablated_data.get('actual_output', {})
|
| 815 |
ablated_token = ablated_output.get('token', '')
|
| 816 |
ablated_prob = ablated_output.get('probability', 0)
|
| 817 |
|
| 818 |
-
|
| 819 |
original_token, ablated_token, original_prob, ablated_prob,
|
| 820 |
selected_heads, selected_beam
|
| 821 |
)
|
| 822 |
|
|
|
|
|
|
|
| 823 |
except Exception as e:
|
| 824 |
import traceback
|
| 825 |
traceback.print_exc()
|
| 826 |
-
return html.Div(f"Ablation error: {str(e)}", style={'color': '#dc3545'})
|
| 827 |
|
| 828 |
except Exception as e:
|
| 829 |
import traceback
|
|
|
|
| 312 |
Output('pipeline-container', 'style'),
|
| 313 |
Output('investigation-panel', 'style'),
|
| 314 |
Output('session-activation-store', 'data', allow_duplicate=True),
|
| 315 |
+
Output('session-activation-store-original', 'data', allow_duplicate=True),
|
| 316 |
Output('session-original-prompt-store', 'data'),
|
| 317 |
Output('session-selected-beam-store', 'data')],
|
| 318 |
[Input('generate-btn', 'n_clicks')],
|
|
|
|
| 361 |
|
| 362 |
if not config['block_modules']:
|
| 363 |
return (html.Div("Please select modules in the sidebar.", style={'color': 'red'}),
|
| 364 |
+
results, {'display': 'none'}, {'display': 'none'}, {}, {}, original_prompt_data, {})
|
| 365 |
|
| 366 |
# AGENT F KEY CHANGE: Run analysis on ORIGINAL PROMPT only, not generated text
|
| 367 |
# This ensures pipeline stages show how the model processes the user's input,
|
|
|
|
| 386 |
|
| 387 |
# Show pipeline immediately (analyzing original prompt)
|
| 388 |
return (results_ui, results, {'display': 'block'}, {'display': 'block'},
|
| 389 |
+
activation_data, activation_data, original_prompt_data, {})
|
| 390 |
|
| 391 |
else:
|
| 392 |
# Single token generation - store the result as selected beam
|
| 393 |
selected_beam_data = {'text': results[0]['text'], 'score': results[0].get('score', 0)}
|
| 394 |
return (results_ui, results, {'display': 'block'}, {'display': 'block'},
|
| 395 |
+
activation_data, activation_data, original_prompt_data, selected_beam_data)
|
| 396 |
|
| 397 |
except Exception as e:
|
| 398 |
import traceback
|
| 399 |
traceback.print_exc()
|
| 400 |
return (html.Div(f"Error: {e}", style={'color': 'red'}), [],
|
| 401 |
+
{'display': 'none'}, {'display': 'none'}, {}, {}, {}, {})
|
| 402 |
|
| 403 |
|
| 404 |
@app.callback(
|
|
|
|
| 761 |
|
| 762 |
|
| 763 |
@app.callback(
|
| 764 |
+
[Output('ablation-results-container', 'children'),
|
| 765 |
+
Output('session-activation-store', 'data', allow_duplicate=True)],
|
| 766 |
[Input('run-ablation-btn', 'n_clicks')],
|
| 767 |
[State('ablation-selected-heads', 'data'),
|
| 768 |
State('session-activation-store', 'data'),
|
|
|
|
| 774 |
def run_ablation_experiment(n_clicks, selected_heads, activation_data, model_name, prompt, selected_beam):
|
| 775 |
"""Run ablation on ORIGINAL PROMPT and compare results."""
|
| 776 |
if not n_clicks or not selected_heads or not activation_data:
|
| 777 |
+
return no_update, no_update
|
| 778 |
|
| 779 |
try:
|
| 780 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
|
| 807 |
heads_by_layer[layer].append(head)
|
| 808 |
|
| 809 |
if not heads_by_layer:
|
| 810 |
+
return html.Div("No valid heads selected.", style={'color': '#dc3545'}), no_update
|
| 811 |
|
| 812 |
# Run ablation
|
| 813 |
ablated_data = execute_forward_pass_with_multi_layer_head_ablation(
|
| 814 |
model, tokenizer, sequence_text, config, heads_by_layer
|
| 815 |
)
|
| 816 |
+
|
| 817 |
+
# Mark as ablated so UI knows
|
| 818 |
+
ablated_data['ablated'] = True
|
| 819 |
+
|
| 820 |
ablated_output = ablated_data.get('actual_output', {})
|
| 821 |
ablated_token = ablated_output.get('token', '')
|
| 822 |
ablated_prob = ablated_output.get('probability', 0)
|
| 823 |
|
| 824 |
+
results_display = create_ablation_results_display(
|
| 825 |
original_token, ablated_token, original_prob, ablated_prob,
|
| 826 |
selected_heads, selected_beam
|
| 827 |
)
|
| 828 |
|
| 829 |
+
return results_display, ablated_data
|
| 830 |
+
|
| 831 |
except Exception as e:
|
| 832 |
import traceback
|
| 833 |
traceback.print_exc()
|
| 834 |
+
return html.Div(f"Ablation error: {str(e)}", style={'color': '#dc3545'}), no_update
|
| 835 |
|
| 836 |
except Exception as e:
|
| 837 |
import traceback
|
conductor/tracks/ablation_20260129/plan.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
| 22 |
- [ ] Task: Conductor - User Manual Verification 'Frontend Control Panel' (Protocol in workflow.md)
|
| 23 |
|
| 24 |
## Phase 3: Visualization & Feedback Loop
|
| 25 |
-
- [
|
| 26 |
- [ ] Sub-task: Update the main `app.py` callback to pass the `disabled_heads` list to the backend capture function.
|
| 27 |
- [ ] Sub-task: Verify that toggling a head in the UI updates the Logit Lens/Output display.
|
| 28 |
- [ ] Task: Visual Polish for Ablated State.
|
|
|
|
| 22 |
- [ ] Task: Conductor - User Manual Verification 'Frontend Control Panel' (Protocol in workflow.md)
|
| 23 |
|
| 24 |
## Phase 3: Visualization & Feedback Loop
|
| 25 |
+
- [~] Task: Connect the Frontend Ablation State to the Backend Inference.
|
| 26 |
- [ ] Sub-task: Update the main `app.py` callback to pass the `disabled_heads` list to the backend capture function.
|
| 27 |
- [ ] Sub-task: Verify that toggling a head in the UI updates the Logit Lens/Output display.
|
| 28 |
- [ ] Task: Visual Polish for Ablated State.
|
utils/__pycache__/__init__.cpython-311.pyc
CHANGED
|
Binary files a/utils/__pycache__/__init__.cpython-311.pyc and b/utils/__pycache__/__init__.cpython-311.pyc differ
|
|
|
utils/__pycache__/model_patterns.cpython-311.pyc
CHANGED
|
Binary files a/utils/__pycache__/model_patterns.cpython-311.pyc and b/utils/__pycache__/model_patterns.cpython-311.pyc differ
|
|
|
utils/model_patterns.py
CHANGED
|
@@ -566,7 +566,23 @@ def execute_forward_pass_with_multi_layer_head_ablation(model, tokenizer, prompt
|
|
| 566 |
|
| 567 |
# Reconstruct output tuple
|
| 568 |
if len(output) > 1:
|
| 569 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 570 |
else:
|
| 571 |
ablated_output = (ablated_hidden,)
|
| 572 |
|
|
@@ -592,7 +608,7 @@ def execute_forward_pass_with_multi_layer_head_ablation(model, tokenizer, prompt
|
|
| 592 |
|
| 593 |
# Execute forward pass
|
| 594 |
with torch.no_grad():
|
| 595 |
-
model_output = intervenable_model.model(**inputs, use_cache=False)
|
| 596 |
|
| 597 |
# Remove hooks
|
| 598 |
for hook in hooks:
|
|
|
|
| 566 |
|
| 567 |
# Reconstruct output tuple
|
| 568 |
if len(output) > 1:
|
| 569 |
+
# Check for attention weights (usually index 2 if output_attentions=True)
|
| 570 |
+
if len(output) > 2:
|
| 571 |
+
attn_weights = output[2] # [batch, heads, seq, seq]
|
| 572 |
+
if isinstance(attn_weights, torch.Tensor):
|
| 573 |
+
# Zero out specified heads in attention weights too
|
| 574 |
+
# Clone to avoid in-place modification errors if any
|
| 575 |
+
attn_weights_mod = attn_weights.clone()
|
| 576 |
+
for head_idx in ablate_head_indices:
|
| 577 |
+
if 0 <= head_idx < num_heads:
|
| 578 |
+
attn_weights_mod[:, head_idx, :, :] = 0.0
|
| 579 |
+
|
| 580 |
+
# Reconstruct tuple with modified weights
|
| 581 |
+
ablated_output = (ablated_hidden, output[1], attn_weights_mod) + output[3:]
|
| 582 |
+
else:
|
| 583 |
+
ablated_output = (ablated_hidden,) + output[1:]
|
| 584 |
+
else:
|
| 585 |
+
ablated_output = (ablated_hidden,) + output[1:]
|
| 586 |
else:
|
| 587 |
ablated_output = (ablated_hidden,)
|
| 588 |
|
|
|
|
| 608 |
|
| 609 |
# Execute forward pass
|
| 610 |
with torch.no_grad():
|
| 611 |
+
model_output = intervenable_model.model(**inputs, use_cache=False, output_attentions=True)
|
| 612 |
|
| 613 |
# Remove hooks
|
| 614 |
for hook in hooks:
|