cdpearlman Claude Opus 4.6 commited on
Commit
abf6a1c
Β·
1 Parent(s): 2f024ca

Reduce jargon across UI for younger/less-technical audiences

Browse files

Replace technical ML terminology with plain-English equivalents while
preserving technical terms as parentheticals for educational value:
- Tokenization β†’ Text Splitting
- Embedding/vector β†’ Meaning Encoding/list of numbers
- MLP (Feed-Forward) β†’ Knowledge Retrieval
- Ablation β†’ Test by Removing
- Token Attribution β†’ Word Influence
- Attention heads β†’ detectors (with "heads" as technical note)
- Activation score β†’ activity level
- Probability shift β†’ confidence change

Updated glossary entries to lead with friendly names. Chatbot
suggestions also updated for consistency.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

app.py CHANGED
@@ -94,7 +94,7 @@ app.layout = html.Div([
94
 
95
  html.Div([
96
  html.Div([
97
- html.Label("Number of New Tokens:", className="input-label"),
98
  dcc.Slider(
99
  id='max-new-tokens-slider',
100
  min=1, max=20, step=1, value=1,
@@ -668,11 +668,11 @@ def update_pipeline_content(activation_data, model_name):
668
  outputs.append(create_tokenization_content(tokens, input_ids))
669
 
670
  # Stage 2: Embedding
671
- outputs.append(f"{hidden_dim}-dim vectors")
672
  outputs.append(create_embedding_content(hidden_dim, len(tokens)))
673
 
674
  # Stage 3: Attention (Agent G: now includes head_categories)
675
- outputs.append(f"{num_heads} heads Γ— {num_layers} layers")
676
  outputs.append(create_attention_content(attention_html, None, head_categories=head_categories))
677
 
678
  # Stage 4: MLP
@@ -831,7 +831,7 @@ def update_ablation_selectors(activation_data, selected_layer, model_name):
831
  # Update head options based on selected layer
832
  head_options = []
833
  if selected_layer is not None:
834
- head_options = [{'label': f'Head {i}', 'value': i} for i in range(num_heads)]
835
 
836
  # If only layer changed, return no_update for layer options to avoid flickering
837
  if trigger_id == 'ablation-layer-select':
@@ -952,7 +952,7 @@ def run_ablation_experiment(n_clicks, selected_heads, activation_data, model_nam
952
  heads_by_layer[layer].append(head)
953
 
954
  if not heads_by_layer:
955
- return html.Div("No valid heads selected.", style={'color': '#dc3545'}), no_update, no_update
956
 
957
  # Run ablation for generation
958
  ablated_beam = None
@@ -1014,7 +1014,7 @@ def run_ablation_experiment(n_clicks, selected_heads, activation_data, model_nam
1014
  except Exception as e:
1015
  import traceback
1016
  traceback.print_exc()
1017
- return html.Div(f"Ablation error: {str(e)}", style={'color': '#dc3545'}), no_update, no_update
1018
 
1019
 
1020
  @app.callback(
 
94
 
95
  html.Div([
96
  html.Div([
97
+ html.Label("Words to Generate:", className="input-label"),
98
  dcc.Slider(
99
  id='max-new-tokens-slider',
100
  min=1, max=20, step=1, value=1,
 
668
  outputs.append(create_tokenization_content(tokens, input_ids))
669
 
670
  # Stage 2: Embedding
671
+ outputs.append(f"{hidden_dim} numbers per word")
672
  outputs.append(create_embedding_content(hidden_dim, len(tokens)))
673
 
674
  # Stage 3: Attention (Agent G: now includes head_categories)
675
+ outputs.append(f"{num_heads} detectors Γ— {num_layers} layers")
676
  outputs.append(create_attention_content(attention_html, None, head_categories=head_categories))
677
 
678
  # Stage 4: MLP
 
831
  # Update head options based on selected layer
832
  head_options = []
833
  if selected_layer is not None:
834
+ head_options = [{'label': f'Detector {i}', 'value': i} for i in range(num_heads)]
835
 
836
  # If only layer changed, return no_update for layer options to avoid flickering
837
  if trigger_id == 'ablation-layer-select':
 
952
  heads_by_layer[layer].append(head)
953
 
954
  if not heads_by_layer:
955
+ return html.Div("No valid detectors selected.", style={'color': '#dc3545'}), no_update, no_update
956
 
957
  # Run ablation for generation
958
  ablated_beam = None
 
1014
  except Exception as e:
1015
  import traceback
1016
  traceback.print_exc()
1017
+ return html.Div(f"Removal test error: {str(e)}", style={'color': '#dc3545'}), no_update, no_update
1018
 
1019
 
1020
  @app.callback(
components/ablation_panel.py CHANGED
@@ -111,17 +111,18 @@ def create_ablation_panel():
111
  return html.Div([
112
  # Explanation
113
  html.Div([
114
- html.H5("What is Ablation?", style={'color': '#495057', 'marginBottom': '8px'}),
115
  html.P([
116
- "Ablation lets you ", html.Strong("remove specific attention heads"),
117
- " to see how they affect the model's output. If removing a head changes the prediction significantly, ",
118
- "that head was important for this particular input."
 
119
  ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
120
  ]),
121
 
122
  # Head Selector Interface
123
  html.Div([
124
- html.Label("Add Head to Ablation List:", className="input-label", style={'marginBottom': '8px', 'display': 'block'}),
125
  html.Div([
126
  # Layer Select
127
  html.Div([
@@ -137,7 +138,7 @@ def create_ablation_panel():
137
  html.Div([
138
  dcc.Dropdown(
139
  id='ablation-head-select',
140
- placeholder="Head",
141
  options=[], # Populated by callback
142
  style={'fontSize': '14px'}
143
  )
@@ -147,16 +148,16 @@ def create_ablation_panel():
147
  html.Button([
148
  html.I(className='fas fa-plus'),
149
  ], id='ablation-add-head-btn', className='action-button secondary-button',
150
- title="Add Head", style={'padding': '8px 12px'})
151
 
152
  ], style={'display': 'flex', 'alignItems': 'center'})
153
  ], style={'marginBottom': '16px', 'padding': '16px', 'backgroundColor': '#f8f9fa', 'borderRadius': '8px', 'border': '1px solid #e2e8f0'}),
154
 
155
  # Selected heads display (chips with remove buttons)
156
  html.Div([
157
- html.Label("Selected Heads:", className="input-label", style={'marginBottom': '8px', 'display': 'block'}),
158
  html.Div(id='ablation-selected-display', children=[
159
- html.Span("No heads selected yet", style={'color': '#6c757d', 'fontSize': '13px', 'fontStyle': 'italic'})
160
  ], style={
161
  'padding': '12px',
162
  'backgroundColor': '#f8f9fa',
@@ -169,16 +170,16 @@ def create_ablation_panel():
169
  # Reset button
170
  html.Button([
171
  html.I(className='fas fa-trash-alt', style={'marginRight': '8px'}),
172
- "Clear Selected Heads"
173
  ], id='clear-ablation-btn', className='action-button secondary-button',
174
  style={'width': '100%', 'marginBottom': '8px'}),
175
 
176
  # Run ablation button
177
  html.Button([
178
  html.I(className='fas fa-play', style={'marginRight': '8px'}),
179
- "Run Ablation Experiment"
180
  ], id='run-ablation-btn', className='action-button primary-button',
181
- disabled=True, title="Add at least one head above to run the experiment",
182
  style={'width': '100%', 'marginBottom': '16px'}),
183
 
184
  # Results container
@@ -204,7 +205,7 @@ def create_selected_heads_display(selected_heads):
204
  """
205
  if not selected_heads:
206
  return html.Div(
207
- "No heads selected yet",
208
  style={'color': '#6c757d', 'fontSize': '13px', 'fontStyle': 'italic', 'padding': '8px 0'}
209
  )
210
 
@@ -281,9 +282,9 @@ def create_ablation_results_display(original_data, ablated_data, selected_heads,
281
 
282
  # Summary of what was ablated
283
  results.append(html.Div([
284
- html.H5("Ablation Results", style={'color': '#495057', 'marginBottom': '16px'}),
285
  html.Div([
286
- html.Span("Ablated heads: ", style={'color': '#6c757d'}),
287
  html.Span(', '.join(all_heads_formatted),
288
  style={'fontWeight': '500', 'color': '#667eea', 'fontFamily': 'monospace'})
289
  ], style={'marginBottom': '16px'})
@@ -418,7 +419,7 @@ def create_ablation_results_display(original_data, ablated_data, selected_heads,
418
 
419
  # Ablated Output Column (Red Theme)
420
  html.Div([
421
- html.Div("ABLATED OUTPUT", style={
422
  'backgroundColor': '#dc3545', 'color': 'white', 'padding': '4px 16px',
423
  'borderRadius': '16px', 'fontWeight': 'bold', 'fontSize': '12px',
424
  'display': 'inline-block', 'marginBottom': '15px'
@@ -453,14 +454,14 @@ def create_ablation_results_display(original_data, ablated_data, selected_heads,
453
  html.Div([
454
  # Tokens Changed
455
  html.Div([
456
- html.Div("TOKENS CHANGED:", style={'fontSize': '11px', 'fontWeight': 'bold', 'color': '#495057'}),
457
  html.Div(f"{tokens_changed}/{max_len}", style={'fontSize': '28px', 'fontWeight': 'bold', 'color': '#212529', 'lineHeight': '1.2'}),
458
  html.Div(f"{percent_changed:.1f}% of sequence modified", style={'fontSize': '11px', 'color': '#6c757d'})
459
  ], style={'flex': '1', 'borderRight': '1px solid #dee2e6', 'paddingRight': '15px'}),
460
 
461
  # Avg Prob Shift
462
  html.Div([
463
- html.Div("AVERAGE PROBABILITY SHIFT:", style={'fontSize': '11px', 'fontWeight': 'bold', 'color': '#495057'}),
464
  html.Div([
465
  html.Span(f"{avg_prob_shift*100:+.1f}%", style={'color': '#dc3545' if avg_prob_shift < 0 else '#28a745', 'marginRight': '5px'}),
466
  html.I(className=f"fas {'fa-arrow-down' if avg_prob_shift < 0 else 'fa-arrow-up'}", style={'color': '#dc3545' if avg_prob_shift < 0 else '#28a745'})
 
111
  return html.Div([
112
  # Explanation
113
  html.Div([
114
+ html.H5("What is Test by Removing?", style={'color': '#495057', 'marginBottom': '8px'}),
115
  html.P([
116
+ "This tool lets you ", html.Strong("remove specific attention detectors"),
117
+ " to see how they affect the model's output. If removing a detector changes the prediction significantly, ",
118
+ "that detector was important for this particular input.",
119
+ " (This technique is called ", html.Em("ablation"), " in research.)"
120
  ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
121
  ]),
122
 
123
  # Head Selector Interface
124
  html.Div([
125
+ html.Label("Add a Detector to Remove:", className="input-label", style={'marginBottom': '8px', 'display': 'block'}),
126
  html.Div([
127
  # Layer Select
128
  html.Div([
 
138
  html.Div([
139
  dcc.Dropdown(
140
  id='ablation-head-select',
141
+ placeholder="Detector",
142
  options=[], # Populated by callback
143
  style={'fontSize': '14px'}
144
  )
 
148
  html.Button([
149
  html.I(className='fas fa-plus'),
150
  ], id='ablation-add-head-btn', className='action-button secondary-button',
151
+ title="Add Detector", style={'padding': '8px 12px'})
152
 
153
  ], style={'display': 'flex', 'alignItems': 'center'})
154
  ], style={'marginBottom': '16px', 'padding': '16px', 'backgroundColor': '#f8f9fa', 'borderRadius': '8px', 'border': '1px solid #e2e8f0'}),
155
 
156
  # Selected heads display (chips with remove buttons)
157
  html.Div([
158
+ html.Label("Selected Detectors:", className="input-label", style={'marginBottom': '8px', 'display': 'block'}),
159
  html.Div(id='ablation-selected-display', children=[
160
+ html.Span("No detectors selected yet", style={'color': '#6c757d', 'fontSize': '13px', 'fontStyle': 'italic'})
161
  ], style={
162
  'padding': '12px',
163
  'backgroundColor': '#f8f9fa',
 
170
  # Reset button
171
  html.Button([
172
  html.I(className='fas fa-trash-alt', style={'marginRight': '8px'}),
173
+ "Clear Selected Detectors"
174
  ], id='clear-ablation-btn', className='action-button secondary-button',
175
  style={'width': '100%', 'marginBottom': '8px'}),
176
 
177
  # Run ablation button
178
  html.Button([
179
  html.I(className='fas fa-play', style={'marginRight': '8px'}),
180
+ "Run Removal Test"
181
  ], id='run-ablation-btn', className='action-button primary-button',
182
+ disabled=True, title="Add at least one detector above to run the test",
183
  style={'width': '100%', 'marginBottom': '16px'}),
184
 
185
  # Results container
 
205
  """
206
  if not selected_heads:
207
  return html.Div(
208
+ "No detectors selected yet",
209
  style={'color': '#6c757d', 'fontSize': '13px', 'fontStyle': 'italic', 'padding': '8px 0'}
210
  )
211
 
 
282
 
283
  # Summary of what was ablated
284
  results.append(html.Div([
285
+ html.H5("Removal Test Results", style={'color': '#495057', 'marginBottom': '16px'}),
286
  html.Div([
287
+ html.Span("Removed detectors: ", style={'color': '#6c757d'}),
288
  html.Span(', '.join(all_heads_formatted),
289
  style={'fontWeight': '500', 'color': '#667eea', 'fontFamily': 'monospace'})
290
  ], style={'marginBottom': '16px'})
 
419
 
420
  # Ablated Output Column (Red Theme)
421
  html.Div([
422
+ html.Div("MODIFIED OUTPUT", style={
423
  'backgroundColor': '#dc3545', 'color': 'white', 'padding': '4px 16px',
424
  'borderRadius': '16px', 'fontWeight': 'bold', 'fontSize': '12px',
425
  'display': 'inline-block', 'marginBottom': '15px'
 
454
  html.Div([
455
  # Tokens Changed
456
  html.Div([
457
+ html.Div("WORDS CHANGED:", style={'fontSize': '11px', 'fontWeight': 'bold', 'color': '#495057'}),
458
  html.Div(f"{tokens_changed}/{max_len}", style={'fontSize': '28px', 'fontWeight': 'bold', 'color': '#212529', 'lineHeight': '1.2'}),
459
  html.Div(f"{percent_changed:.1f}% of sequence modified", style={'fontSize': '11px', 'color': '#6c757d'})
460
  ], style={'flex': '1', 'borderRight': '1px solid #dee2e6', 'paddingRight': '15px'}),
461
 
462
  # Avg Prob Shift
463
  html.Div([
464
+ html.Div("AVERAGE CONFIDENCE CHANGE:", style={'fontSize': '11px', 'fontWeight': 'bold', 'color': '#495057'}),
465
  html.Div([
466
  html.Span(f"{avg_prob_shift*100:+.1f}%", style={'color': '#dc3545' if avg_prob_shift < 0 else '#28a745', 'marginRight': '5px'}),
467
  html.I(className=f"fas {'fa-arrow-down' if avg_prob_shift < 0 else 'fa-arrow-up'}", style={'color': '#dc3545' if avg_prob_shift < 0 else '#28a745'})
components/chatbot.py CHANGED
@@ -13,11 +13,11 @@ from typing import List, Dict, Optional
13
  GREETING_MESSAGE = """Hi there! I'm your AI assistant for exploring transformer models.
14
 
15
  I can help you understand:
16
- - How attention heads and layers process your input
17
  - What various experiments can reveal about model behavior
18
  - General transformer and ML concepts
19
 
20
- Try asking: "What does attention head 0 in layer 1 do?" or "Why did ablating this head change the output?"
21
  """
22
 
23
 
 
13
  GREETING_MESSAGE = """Hi there! I'm your AI assistant for exploring transformer models.
14
 
15
  I can help you understand:
16
+ - How attention detectors and layers process your input
17
  - What various experiments can reveal about model behavior
18
  - General transformer and ML concepts
19
 
20
+ Try asking: "What does attention detector 0 in layer 1 do?" or "Why did removing this detector change the output?"
21
  """
22
 
23
 
components/glossary.py CHANGED
@@ -20,50 +20,50 @@ def create_glossary_modal():
20
 
21
  html.Div([
22
  _create_term_entry(
23
- "Tokenization",
24
- "Breaking text into pieces",
25
  "Models don't read words like we do. They break text into small chunks called 'tokens'. A token can be a whole word (like 'apple'), part of a word (like 'ing' in 'playing'), or even a space.",
26
  "https://www.youtube.com/embed/wjZofJX0v4M?start=0"
27
  ),
28
  _create_term_entry(
29
- "Embedding",
30
- "Converting tokens to numbers",
31
- "Once text is tokenized, each token is converted into a list of numbers (a vector). This vector represents the meaning of the token. Words with similar meanings (like 'dog' and 'puppy') have similar vectors.",
32
  "https://www.youtube.com/embed/wjZofJX0v4M?start=195"
33
  ),
34
  _create_term_entry(
35
- "Attention",
36
  "Context Lookup",
37
  "This is how the model understands context. When processing a word (like 'it'), the model 'pays attention' to other words in the sentence (like 'the cat') to figure out what 'it' refers to. It's like a spotlight shining on relevant past information.",
38
  "https://www.youtube.com/embed/eMlx5fFNoYc?start=0"
39
  ),
40
  _create_term_entry(
41
- "Attention Heads",
42
  "Parallel Context Searchers",
43
- "Instead of having just one attention mechanism, models use multiple 'heads' in parallel. Each head can learn to look for different types of relationships (e.g., one head might look for adjectives, while another tracks pronouns).",
44
  "https://www.youtube.com/embed/eMlx5fFNoYc?start=420"
45
  ),
46
  _create_term_entry(
47
- "Residual Stream",
48
  "The Information Highway",
49
  "Think of this as a conveyor belt carrying the model's current understanding of the sentence. As it passes through each layer, the layer adds new information to it (via addition), refining the prediction step-by-step.",
50
  "https://www.youtube.com/embed/wjZofJX0v4M?start=1173"
51
  ),
52
  _create_term_entry(
53
- "Logits / Log-Probs",
54
  "Prediction Scores",
55
- "The raw scores the model assigns to every possible next token. Higher scores mean the model thinks that token is more likely to come next.",
56
  "https://www.youtube.com/embed/wjZofJX0v4M?start=850"
57
  ),
58
  _create_term_entry(
59
- "Beam Search",
60
  "Exploring Multiple Paths",
61
- "Instead of just picking the single best next word, Beam Search explores several likely future paths simultaneously (like parallel universes) and picks the one that makes the most sense overall. The 'Number of Generation Choices' setting controls how many paths are explored at once."
62
  ),
63
  _create_term_entry(
64
- "Ablation (Activation Patching)",
65
  "Digital Brain Surgery",
66
- "A technique used to understand which parts of a model are responsible for certain behaviors. By artificially modifying or 'turning off' specific attention heads or activations, we can measure how much the model's output changes, revealing the importance of those components."
67
  )
68
  ], className="glossary-content-area"),
69
 
 
20
 
21
  html.Div([
22
  _create_term_entry(
23
+ "Text Splitting (Tokenization)",
24
+ "Breaking text into pieces",
25
  "Models don't read words like we do. They break text into small chunks called 'tokens'. A token can be a whole word (like 'apple'), part of a word (like 'ing' in 'playing'), or even a space.",
26
  "https://www.youtube.com/embed/wjZofJX0v4M?start=0"
27
  ),
28
  _create_term_entry(
29
+ "Meaning Encoding (Embedding)",
30
+ "Converting words to numbers",
31
+ "Once text is split into pieces, each piece is converted into a list of numbers. This list represents the meaning of the piece. Words with similar meanings (like 'dog' and 'puppy') get similar numbers.",
32
  "https://www.youtube.com/embed/wjZofJX0v4M?start=195"
33
  ),
34
  _create_term_entry(
35
+ "Attention",
36
  "Context Lookup",
37
  "This is how the model understands context. When processing a word (like 'it'), the model 'pays attention' to other words in the sentence (like 'the cat') to figure out what 'it' refers to. It's like a spotlight shining on relevant past information.",
38
  "https://www.youtube.com/embed/eMlx5fFNoYc?start=0"
39
  ),
40
  _create_term_entry(
41
+ "Attention Detectors (Heads)",
42
  "Parallel Context Searchers",
43
+ "Instead of having just one attention mechanism, models use multiple 'detectors' (called 'heads') in parallel. Each detector can learn to look for different types of relationships (e.g., one might look for adjectives, while another tracks pronouns).",
44
  "https://www.youtube.com/embed/eMlx5fFNoYc?start=420"
45
  ),
46
  _create_term_entry(
47
+ "Residual Stream",
48
  "The Information Highway",
49
  "Think of this as a conveyor belt carrying the model's current understanding of the sentence. As it passes through each layer, the layer adds new information to it (via addition), refining the prediction step-by-step.",
50
  "https://www.youtube.com/embed/wjZofJX0v4M?start=1173"
51
  ),
52
  _create_term_entry(
53
+ "Confidence Scores (Logits)",
54
  "Prediction Scores",
55
+ "The raw scores the model assigns to every possible next word. Higher scores mean the model thinks that word is more likely to come next.",
56
  "https://www.youtube.com/embed/wjZofJX0v4M?start=850"
57
  ),
58
  _create_term_entry(
59
+ "Beam Search",
60
  "Exploring Multiple Paths",
61
+ "Instead of just picking the single best next word, Beam Search explores several likely future paths simultaneously (like parallel universes) and picks the one that makes the most sense overall. The 'Options to Generate' setting controls how many paths are explored at once."
62
  ),
63
  _create_term_entry(
64
+ "Test by Removing (Ablation)",
65
  "Digital Brain Surgery",
66
+ "A technique used to understand which parts of a model are responsible for certain behaviors. By artificially 'turning off' specific attention detectors, we can measure how much the model's output changes, revealing the importance of those components."
67
  )
68
  ], className="glossary-content-area"),
69
 
components/investigation_panel.py CHANGED
@@ -27,13 +27,13 @@ def create_investigation_panel():
27
  html.Div([
28
  html.Button([
29
  html.I(className='fas fa-cut', style={'marginRight': '8px'}),
30
- "Ablation"
31
  ], id='investigation-tab-ablation', className='investigation-tab active',
32
  n_clicks=0, style=get_tab_style(True)),
33
 
34
  html.Button([
35
  html.I(className='fas fa-highlighter', style={'marginRight': '8px'}),
36
- "Token Attribution"
37
  ], id='investigation-tab-attribution', className='investigation-tab',
38
  n_clicks=0, style=get_tab_style(False))
39
  ], className='investigation-tabs', style={
@@ -95,22 +95,23 @@ def create_attribution_content():
95
  return html.Div([
96
  # Explanation
97
  html.Div([
98
- html.H5("What is Token Attribution?", style={'color': '#495057', 'marginBottom': '8px'}),
99
  html.P([
100
- "Token attribution uses ", html.Strong("gradient analysis"),
101
- " to identify which input tokens had the most influence on the model's prediction. ",
102
- "Tokens with higher attribution contributed more to the final output."
 
103
  ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
104
  ]),
105
 
106
  # Method selector
107
  html.Div([
108
- html.Label("Attribution Method:", className="input-label", style={'marginBottom': '8px', 'display': 'block'}),
109
  dcc.RadioItems(
110
  id='attribution-method-radio',
111
  options=[
112
- {'label': ' Integrated Gradients (more accurate, slower)', 'value': 'integrated'},
113
- {'label': ' Simple Gradient (faster, less accurate)', 'value': 'simple'}
114
  ],
115
  value='integrated',
116
  style={'display': 'flex', 'flexDirection': 'column', 'gap': '8px'}
@@ -119,7 +120,7 @@ def create_attribution_content():
119
 
120
  # Target token selector
121
  html.Div([
122
- html.Label("Target Token:", className="input-label", style={'marginBottom': '8px', 'display': 'block'}),
123
  dcc.Dropdown(
124
  id='attribution-target-dropdown',
125
  options=[], # Populated by callback with top-5 predictions
@@ -132,7 +133,7 @@ def create_attribution_content():
132
  # Run attribution button
133
  html.Button([
134
  html.I(className='fas fa-highlighter', style={'marginRight': '8px'}),
135
- "Compute Attribution"
136
  ], id='run-attribution-btn', className='action-button primary-button',
137
  style={'width': '100%', 'marginBottom': '16px'}),
138
 
@@ -174,7 +175,7 @@ def create_attribution_results_display(attribution_data, target_token):
174
  'fontFamily': 'monospace',
175
  'fontSize': '13px',
176
  'fontWeight': '500' if norm > 0.3 else '400'
177
- }, title=f"Attribution: {norm:.2f}")
178
  )
179
 
180
  # Create bar chart
@@ -188,9 +189,9 @@ def create_attribution_results_display(attribution_data, target_token):
188
  ))
189
 
190
  fig.update_layout(
191
- title="Attribution Scores by Token",
192
- xaxis_title="Attribution (normalized)",
193
- yaxis_title="Input Token",
194
  height=max(200, len(tokens) * 30),
195
  margin=dict(l=20, r=60, t=40, b=20),
196
  paper_bgcolor='rgba(0,0,0,0)',
@@ -199,9 +200,9 @@ def create_attribution_results_display(attribution_data, target_token):
199
  )
200
 
201
  return html.Div([
202
- html.H5("Token Attribution Results", style={'color': '#495057', 'marginBottom': '8px'}),
203
  html.P([
204
- "Attribution for predicting: ",
205
  html.Span(target_token, style={
206
  'padding': '4px 10px',
207
  'backgroundColor': '#667eea',
@@ -214,7 +215,7 @@ def create_attribution_results_display(attribution_data, target_token):
214
 
215
  # Token chips visualization
216
  html.Div([
217
- html.H6("Input tokens (darker = more important):", style={'color': '#6c757d', 'marginBottom': '8px'}),
218
  html.Div(token_chips, style={'lineHeight': '2'})
219
  ], style={
220
  'padding': '16px',
@@ -237,7 +238,7 @@ def create_attribution_results_display(attribution_data, target_token):
237
  html.Div([
238
  html.I(className='fas fa-info-circle', style={'color': '#667eea', 'marginRight': '8px'}),
239
  html.Span(
240
- "Tokens with higher attribution scores contributed more to the model's prediction. "
241
  "This helps identify which parts of the input were most influential.",
242
  style={'color': '#6c757d', 'fontSize': '13px'}
243
  )
 
27
  html.Div([
28
  html.Button([
29
  html.I(className='fas fa-cut', style={'marginRight': '8px'}),
30
+ "Test by Removing"
31
  ], id='investigation-tab-ablation', className='investigation-tab active',
32
  n_clicks=0, style=get_tab_style(True)),
33
 
34
  html.Button([
35
  html.I(className='fas fa-highlighter', style={'marginRight': '8px'}),
36
+ "Word Influence"
37
  ], id='investigation-tab-attribution', className='investigation-tab',
38
  n_clicks=0, style=get_tab_style(False))
39
  ], className='investigation-tabs', style={
 
95
  return html.Div([
96
  # Explanation
97
  html.Div([
98
+ html.H5("What is Word Influence?", style={'color': '#495057', 'marginBottom': '8px'}),
99
  html.P([
100
+ "This tool uses mathematical analysis ",
101
+ "to identify which input words had the most influence on the model's prediction. ",
102
+ "Words with higher influence scores contributed more to the final output.",
103
+ " (This technique is called ", html.Em("token attribution"), " in research.)"
104
  ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
105
  ]),
106
 
107
  # Method selector
108
  html.Div([
109
+ html.Label("Analysis Method:", className="input-label", style={'marginBottom': '8px', 'display': 'block'}),
110
  dcc.RadioItems(
111
  id='attribution-method-radio',
112
  options=[
113
+ {'label': ' Deep Analysis (more accurate, slower)', 'value': 'integrated'},
114
+ {'label': ' Quick Analysis (faster, less accurate)', 'value': 'simple'}
115
  ],
116
  value='integrated',
117
  style={'display': 'flex', 'flexDirection': 'column', 'gap': '8px'}
 
120
 
121
  # Target token selector
122
  html.Div([
123
+ html.Label("Target Word:", className="input-label", style={'marginBottom': '8px', 'display': 'block'}),
124
  dcc.Dropdown(
125
  id='attribution-target-dropdown',
126
  options=[], # Populated by callback with top-5 predictions
 
133
  # Run attribution button
134
  html.Button([
135
  html.I(className='fas fa-highlighter', style={'marginRight': '8px'}),
136
+ "Find Word Influence"
137
  ], id='run-attribution-btn', className='action-button primary-button',
138
  style={'width': '100%', 'marginBottom': '16px'}),
139
 
 
175
  'fontFamily': 'monospace',
176
  'fontSize': '13px',
177
  'fontWeight': '500' if norm > 0.3 else '400'
178
+ }, title=f"Influence: {norm:.2f}")
179
  )
180
 
181
  # Create bar chart
 
189
  ))
190
 
191
  fig.update_layout(
192
+ title="Influence Scores by Input Word",
193
+ xaxis_title="Influence (normalized)",
194
+ yaxis_title="Input Word",
195
  height=max(200, len(tokens) * 30),
196
  margin=dict(l=20, r=60, t=40, b=20),
197
  paper_bgcolor='rgba(0,0,0,0)',
 
200
  )
201
 
202
  return html.Div([
203
+ html.H5("Word Influence Results", style={'color': '#495057', 'marginBottom': '8px'}),
204
  html.P([
205
+ "Influence on predicting: ",
206
  html.Span(target_token, style={
207
  'padding': '4px 10px',
208
  'backgroundColor': '#667eea',
 
215
 
216
  # Token chips visualization
217
  html.Div([
218
+ html.H6("Input words (darker = more important):", style={'color': '#6c757d', 'marginBottom': '8px'}),
219
  html.Div(token_chips, style={'lineHeight': '2'})
220
  ], style={
221
  'padding': '16px',
 
238
  html.Div([
239
  html.I(className='fas fa-info-circle', style={'color': '#667eea', 'marginRight': '8px'}),
240
  html.Span(
241
+ "Words with higher influence scores contributed more to the model's prediction. "
242
  "This helps identify which parts of the input were most influential.",
243
  style={'color': '#6c757d', 'fontSize': '13px'}
244
  )
components/pipeline.py CHANGED
@@ -39,7 +39,7 @@ def create_pipeline_container():
39
  create_stage_container(
40
  stage_id='tokenization',
41
  stage_num=1,
42
- title='Tokenization',
43
  icon='fa-puzzle-piece',
44
  color='#667eea',
45
  summary_id='stage-1-summary',
@@ -50,7 +50,7 @@ def create_pipeline_container():
50
  create_stage_container(
51
  stage_id='embedding',
52
  stage_num=2,
53
- title='Embedding',
54
  icon='fa-cube',
55
  color='#764ba2',
56
  summary_id='stage-2-summary',
@@ -72,7 +72,7 @@ def create_pipeline_container():
72
  create_stage_container(
73
  stage_id='mlp',
74
  stage_num=4,
75
- title='MLP (Feed-Forward)',
76
  icon='fa-network-wired',
77
  color='#4facfe',
78
  summary_id='stage-4-summary',
@@ -98,10 +98,10 @@ def create_flow_indicator():
98
  """Create the horizontal flow indicator showing all stages."""
99
  stages = [
100
  ('Input', '#6c757d'),
101
- ('Tokens', '#667eea'),
102
- ('Embed', '#764ba2'),
103
  ('Attention', '#f093fb'),
104
- ('MLP', '#4facfe'),
105
  ('Output', '#00f2fe'),
106
  ]
107
 
@@ -288,9 +288,9 @@ def create_tokenization_content(tokens, token_ids, model_name=None):
288
  html.Div([
289
  html.H5("What happens here:", style={'color': '#495057', 'marginBottom': '8px'}),
290
  html.P([
291
- "Your text is split into ",
292
- html.Strong(f"{len(tokens)} tokens"),
293
- " - small pieces that the model can understand. Each token is assigned a unique ID from the model's vocabulary."
294
  ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
295
  ]),
296
 
@@ -339,19 +339,19 @@ def create_embedding_content(hidden_dim=None, num_tokens=None):
339
  hidden_dim: Embedding dimension (e.g., 768)
340
  num_tokens: Number of tokens being processed
341
  """
342
- dim_text = f"{hidden_dim}-dimensional" if hidden_dim else "high-dimensional"
343
-
344
  return html.Div([
345
  html.Div([
346
  html.H5("What happens here:", style={'color': '#495057', 'marginBottom': '8px'}),
347
  html.P([
348
- "Each token ID is used to look up a ", html.Strong(dim_text), " vector from a ",
349
- html.Strong("pre-learned embedding table"), ". Think of it like a dictionary: the model has already ",
350
- "memorized a numeric representation for every word in its vocabulary during training."
351
  ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '12px'}),
352
  html.P([
353
- "These embeddings capture semantic meaning - words with similar meanings (like 'happy' and 'joyful') ",
354
- "have similar vectors, allowing the model to understand relationships between words."
355
  ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
356
  ]),
357
 
@@ -373,7 +373,7 @@ def create_embedding_content(hidden_dim=None, num_tokens=None):
373
  html.Span('β†’', style={'margin': '0 16px', 'fontSize': '24px', 'color': '#adb5bd'}),
374
  html.Div([
375
  html.Span('[', style={'fontSize': '20px', 'color': '#495057'}),
376
- html.Span(f' {dim_text} vector ', style={
377
  'padding': '4px 12px',
378
  'backgroundColor': '#e5d4ff',
379
  'borderRadius': '4px',
@@ -390,8 +390,8 @@ def create_embedding_content(hidden_dim=None, num_tokens=None):
390
  html.I(className='fas fa-lightbulb', style={'color': '#ffc107', 'marginRight': '8px'}),
391
  html.Span([
392
  html.Strong("How the lookup table was created: "),
393
- "During training on billions of text examples, the model learned which numbers best represent each token. ",
394
- "This table is frozen after training - every time you use the model, the same token always maps to the same vector."
395
  ], style={'color': '#6c757d', 'fontSize': '13px'})
396
  ], style={'marginTop': '16px', 'padding': '12px', 'backgroundColor': '#fff8e1', 'borderRadius': '6px'}),
397
 
@@ -400,12 +400,12 @@ def create_embedding_content(hidden_dim=None, num_tokens=None):
400
  html.I(className='fas fa-map-marker-alt', style={'color': '#5c6bc0', 'marginRight': '8px'}),
401
  html.Span([
402
  html.Strong("Position matters too: "),
403
- "Token embeddings alone don't capture word order β€” 'the cat chased the dog' and 'the dog chased the cat' ",
404
  "would look the same. To fix this, the model also encodes ", html.Strong("positional information"),
405
- ". Some models (like GPT-2) add a learned position vector to each token embedding. ",
406
  "Others (like Pythia) use a technique called Rotary Positional Encoding, which encodes ",
407
- "the relative distance between tokens directly in the attention step. ",
408
- "Either way, the model knows both ", html.Em("what"), " each token is and ",
409
  html.Em("where"), " it sits in the sequence."
410
  ], style={'color': '#6c757d', 'fontSize': '13px'})
411
  ], style={'marginTop': '12px', 'padding': '12px', 'backgroundColor': '#e8eaf6', 'borderRadius': '6px'})
@@ -431,13 +431,13 @@ def create_attention_content(attention_html=None, top_attended=None, layer_info=
431
  html.Div([
432
  html.H5("What happens here:", style={'color': '#495057', 'marginBottom': '8px'}),
433
  html.P([
434
- "The model looks at ", html.Strong("all tokens at once"),
435
  " and figures out which ones are related to each other. This is called 'attention' β€” ",
436
- "each token 'attends to' other tokens to gather context for its prediction."
437
  ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '12px'}),
438
  html.P([
439
- "Attention has multiple ", html.Strong("heads"), " β€” each head learns to look for different types of relationships. ",
440
- "Below you can see what role each head plays and whether it's active on your current input."
441
  ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
442
  ])
443
  ]
@@ -476,9 +476,9 @@ def create_attention_content(attention_html=None, top_attended=None, layer_info=
476
  html.I(className='fas fa-lightbulb', style={'color': '#f39c12', 'marginRight': '8px', 'fontSize': '16px'}),
477
  html.Span([
478
  html.Strong("Try this: "),
479
- f"Select Layer {guided_head['layer']}, Head {guided_head['head']} in the visualization below β€” ",
480
- f"this is a {guided_cat} head ",
481
- f"(activation: {guided_head['activation_score']:.0%} on your input)."
482
  ], style={'color': '#495057', 'fontSize': '13px'})
483
  ], style={
484
  'padding': '12px 16px', 'backgroundColor': '#fef9e7', 'borderRadius': '8px',
@@ -554,7 +554,7 @@ def create_attention_content(attention_html=None, top_attended=None, layer_info=
554
  'fontFamily': 'monospace', 'fontSize': '12px', 'fontWeight': '500',
555
  'minWidth': '60px', 'color': '#495057' if is_active else '#aaa',
556
  'display': 'inline-flex', 'alignItems': 'center',
557
- }, title=f"See Layer {head_info['layer']}, Head {head_info['head']} in the visualization below"),
558
  # Activation bar
559
  html.Div([
560
  html.Div(style={
@@ -660,10 +660,10 @@ def create_attention_content(attention_html=None, top_attended=None, layer_info=
660
 
661
  content_items.append(
662
  html.Div([
663
- html.H5("Attention Head Roles:", style={'color': '#495057', 'marginBottom': '8px'}),
664
  html.P([
665
- "Each category represents a type of behavior we detected in this model's attention heads. ",
666
- "Click a category to see individual heads and how strongly they're activated on your input."
667
  ], style={'color': '#6c757d', 'fontSize': '12px', 'marginBottom': '12px'}),
668
  legend,
669
  html.Div(category_sections),
@@ -671,8 +671,8 @@ def create_attention_content(attention_html=None, top_attended=None, layer_info=
671
  html.Div([
672
  html.I(className='fas fa-info-circle', style={'color': '#6c757d', 'marginRight': '6px', 'fontSize': '11px'}),
673
  html.Span(
674
- "These categories are simplified labels based on each head's dominant behavior. "
675
- "In reality, heads can serve multiple roles and may behave differently on different inputs.",
676
  style={'color': '#999', 'fontSize': '11px'}
677
  )
678
  ], style={'marginTop': '12px', 'padding': '8px 12px', 'backgroundColor': '#f8f9fa', 'borderRadius': '6px'})
@@ -684,7 +684,7 @@ def create_attention_content(attention_html=None, top_attended=None, layer_info=
684
  html.Div([
685
  html.I(className='fas fa-info-circle', style={'color': '#6c757d', 'marginRight': '8px'}),
686
  html.Span(
687
- "Head categorization is not available for this model. "
688
  "The attention visualization below still shows the full attention patterns.",
689
  style={'color': '#6c757d', 'fontSize': '13px'}
690
  )
@@ -702,26 +702,26 @@ def create_attention_content(attention_html=None, top_attended=None, layer_info=
702
  html.Div([
703
  html.Div([
704
  html.I(className='fas fa-mouse-pointer', style={'color': '#f093fb', 'marginRight': '8px'}),
705
- html.Strong("Select heads: "),
706
- html.Span("Click on layer/head numbers at the top to view specific attention heads.",
707
  style={'color': '#6c757d'})
708
  ], style={'marginBottom': '4px'}),
709
  html.Div([
710
  html.Span("β€’ ", style={'color': '#f093fb', 'fontWeight': 'bold'}),
711
  html.Strong("Single click ", style={'color': '#495057'}),
712
- html.Span("on a colored head square: selects or deselects that head",
713
  style={'color': '#6c757d'})
714
  ], style={'marginLeft': '28px', 'marginBottom': '4px', 'fontSize': '13px'}),
715
  html.Div([
716
  html.Span("β€’ ", style={'color': '#f093fb', 'fontWeight': 'bold'}),
717
  html.Strong("Double click ", style={'color': '#495057'}),
718
- html.Span("on a colored head square: selects only that head (deselects all others)",
719
  style={'color': '#6c757d'})
720
  ], style={'marginLeft': '28px', 'marginBottom': '12px', 'fontSize': '13px'}),
721
  html.Div([
722
  html.I(className='fas fa-arrows-alt-h', style={'color': '#f093fb', 'marginRight': '8px'}),
723
  html.Strong("Lines show attention: "),
724
- html.Span("Each line connects a token (left) to tokens it attends to (right). ",
725
  style={'color': '#6c757d'})
726
  ], style={'marginBottom': '8px'}),
727
  html.Div([
@@ -774,14 +774,14 @@ def create_mlp_content(layer_count=None, hidden_dim=None, intermediate_dim=None)
774
  html.Div([
775
  html.H5("What happens here:", style={'color': '#495057', 'marginBottom': '8px'}),
776
  html.P([
777
- "After attention gathers context, each token's representation passes through a ",
778
- html.Strong("Feed-Forward Network (MLP)"),
779
  ". This is where the model's ", html.Strong("factual knowledge"), " is stored."
780
  ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '12px'}),
781
  html.P([
782
- "During training, the MLP weights learned to encode facts and patterns from the training data. ",
783
- "For example, when processing 'The capital of France is', the MLP layers help recall that 'Paris' is the answer. ",
784
- "Researchers have found that specific facts are often stored in specific MLP neurons."
785
  ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
786
  ]),
787
 
@@ -851,7 +851,7 @@ def create_mlp_content(layer_count=None, hidden_dim=None, intermediate_dim=None)
851
  html.Span([
852
  f"This happens in each of the model's ",
853
  html.Strong(f"{layer_count} layers" if layer_count else "transformer layers"),
854
- ", with attention and MLP working together - attention gathers context, MLP retrieves knowledge."
855
  ], style={'color': '#6c757d', 'fontSize': '13px'})
856
  ], style={'marginTop': '12px', 'padding': '12px', 'backgroundColor': '#e3f2fd', 'borderRadius': '6px'}),
857
 
@@ -860,8 +860,8 @@ def create_mlp_content(layer_count=None, hidden_dim=None, intermediate_dim=None)
860
  html.I(className='fas fa-road', style={'color': '#26a69a', 'marginRight': '8px'}),
861
  html.Span([
862
  html.Strong("Adding, not replacing: "),
863
- "The MLP doesn't replace the token's representation β€” it ",
864
- html.Strong("adds"), " to the residual stream. Each layer contributes new information on top of ",
865
  "everything computed before it, so the model accumulates understanding across all layers."
866
  ], style={'color': '#6c757d', 'fontSize': '13px'})
867
  ], style={'marginTop': '12px', 'padding': '12px', 'backgroundColor': '#e0f2f1', 'borderRadius': '6px'})
 
39
  create_stage_container(
40
  stage_id='tokenization',
41
  stage_num=1,
42
+ title='Text Splitting',
43
  icon='fa-puzzle-piece',
44
  color='#667eea',
45
  summary_id='stage-1-summary',
 
50
  create_stage_container(
51
  stage_id='embedding',
52
  stage_num=2,
53
+ title='Meaning Encoding',
54
  icon='fa-cube',
55
  color='#764ba2',
56
  summary_id='stage-2-summary',
 
72
  create_stage_container(
73
  stage_id='mlp',
74
  stage_num=4,
75
+ title='Knowledge Retrieval',
76
  icon='fa-network-wired',
77
  color='#4facfe',
78
  summary_id='stage-4-summary',
 
98
  """Create the horizontal flow indicator showing all stages."""
99
  stages = [
100
  ('Input', '#6c757d'),
101
+ ('Split', '#667eea'),
102
+ ('Encode', '#764ba2'),
103
  ('Attention', '#f093fb'),
104
+ ('Knowledge', '#4facfe'),
105
  ('Output', '#00f2fe'),
106
  ]
107
 
 
288
  html.Div([
289
  html.H5("What happens here:", style={'color': '#495057', 'marginBottom': '8px'}),
290
  html.P([
291
+ "Your text is split into ",
292
+ html.Strong(f"{len(tokens)} pieces"),
293
+ " (called ", html.Em("tokens"), ") β€” small chunks that the model can understand. Each piece is assigned a unique ID from the model's vocabulary."
294
  ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
295
  ]),
296
 
 
339
  hidden_dim: Embedding dimension (e.g., 768)
340
  num_tokens: Number of tokens being processed
341
  """
342
+ dim_text = f"a list of {hidden_dim} numbers" if hidden_dim else "a long list of numbers"
343
+
344
  return html.Div([
345
  html.Div([
346
  html.H5("What happens here:", style={'color': '#495057', 'marginBottom': '8px'}),
347
  html.P([
348
+ "Each piece is converted into ", html.Strong(dim_text), " using a ",
349
+ html.Strong("pre-learned lookup table"), ". Think of it like a dictionary: the model has already ",
350
+ "memorized a numeric code for every word in its vocabulary during training."
351
  ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '12px'}),
352
  html.P([
353
+ "These numeric codes capture meaning β€” words with similar meanings (like 'happy' and 'joyful') ",
354
+ "get similar numbers, allowing the model to understand relationships between words."
355
  ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
356
  ]),
357
 
 
373
  html.Span('β†’', style={'margin': '0 16px', 'fontSize': '24px', 'color': '#adb5bd'}),
374
  html.Div([
375
  html.Span('[', style={'fontSize': '20px', 'color': '#495057'}),
376
+ html.Span(f' {dim_text} ', style={
377
  'padding': '4px 12px',
378
  'backgroundColor': '#e5d4ff',
379
  'borderRadius': '4px',
 
390
  html.I(className='fas fa-lightbulb', style={'color': '#ffc107', 'marginRight': '8px'}),
391
  html.Span([
392
  html.Strong("How the lookup table was created: "),
393
+ "During training on billions of text examples, the model learned which numbers best represent each word piece. ",
394
+ "This table is frozen after training β€” every time you use the model, the same piece always maps to the same list of numbers."
395
  ], style={'color': '#6c757d', 'fontSize': '13px'})
396
  ], style={'marginTop': '16px', 'padding': '12px', 'backgroundColor': '#fff8e1', 'borderRadius': '6px'}),
397
 
 
400
  html.I(className='fas fa-map-marker-alt', style={'color': '#5c6bc0', 'marginRight': '8px'}),
401
  html.Span([
402
  html.Strong("Position matters too: "),
403
+ "These numeric codes alone don't capture word order β€” 'the cat chased the dog' and 'the dog chased the cat' ",
404
  "would look the same. To fix this, the model also encodes ", html.Strong("positional information"),
405
+ ". Some models (like GPT-2) add a learned set of position numbers to each piece's code. ",
406
  "Others (like Pythia) use a technique called Rotary Positional Encoding, which encodes ",
407
+ "the relative distance between pieces directly in the attention step. ",
408
+ "Either way, the model knows both ", html.Em("what"), " each piece is and ",
409
  html.Em("where"), " it sits in the sequence."
410
  ], style={'color': '#6c757d', 'fontSize': '13px'})
411
  ], style={'marginTop': '12px', 'padding': '12px', 'backgroundColor': '#e8eaf6', 'borderRadius': '6px'})
 
431
  html.Div([
432
  html.H5("What happens here:", style={'color': '#495057', 'marginBottom': '8px'}),
433
  html.P([
434
+ "The model looks at ", html.Strong("all pieces at once"),
435
  " and figures out which ones are related to each other. This is called 'attention' β€” ",
436
+ "each piece 'attends to' other pieces to gather context for its prediction."
437
  ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '12px'}),
438
  html.P([
439
+ "Attention uses multiple ", html.Strong("detectors"), " (technically called 'heads') β€” each one learns to look for different types of relationships. ",
440
+ "Below you can see what role each detector plays and whether it's active on your current input."
441
  ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
442
  ])
443
  ]
 
476
  html.I(className='fas fa-lightbulb', style={'color': '#f39c12', 'marginRight': '8px', 'fontSize': '16px'}),
477
  html.Span([
478
  html.Strong("Try this: "),
479
+ f"Select Layer {guided_head['layer']}, Detector {guided_head['head']} in the visualization below β€” ",
480
+ f"this is a {guided_cat} detector ",
481
+ f"(activity level: {guided_head['activation_score']:.0%} on your input)."
482
  ], style={'color': '#495057', 'fontSize': '13px'})
483
  ], style={
484
  'padding': '12px 16px', 'backgroundColor': '#fef9e7', 'borderRadius': '8px',
 
554
  'fontFamily': 'monospace', 'fontSize': '12px', 'fontWeight': '500',
555
  'minWidth': '60px', 'color': '#495057' if is_active else '#aaa',
556
  'display': 'inline-flex', 'alignItems': 'center',
557
+ }, title=f"See Layer {head_info['layer']}, Detector {head_info['head']} in the visualization below"),
558
  # Activation bar
559
  html.Div([
560
  html.Div(style={
 
660
 
661
  content_items.append(
662
  html.Div([
663
+ html.H5("Attention Detector Roles:", style={'color': '#495057', 'marginBottom': '8px'}),
664
  html.P([
665
+ "Each category represents a type of behavior we detected in this model's attention detectors. ",
666
+ "Click a category to see individual detectors and how strongly they're activated on your input."
667
  ], style={'color': '#6c757d', 'fontSize': '12px', 'marginBottom': '12px'}),
668
  legend,
669
  html.Div(category_sections),
 
671
  html.Div([
672
  html.I(className='fas fa-info-circle', style={'color': '#6c757d', 'marginRight': '6px', 'fontSize': '11px'}),
673
  html.Span(
674
+ "These categories are simplified labels based on each detector's dominant behavior. "
675
+ "In reality, detectors can serve multiple roles and may behave differently on different inputs.",
676
  style={'color': '#999', 'fontSize': '11px'}
677
  )
678
  ], style={'marginTop': '12px', 'padding': '8px 12px', 'backgroundColor': '#f8f9fa', 'borderRadius': '6px'})
 
684
  html.Div([
685
  html.I(className='fas fa-info-circle', style={'color': '#6c757d', 'marginRight': '8px'}),
686
  html.Span(
687
+ "Detector categorization is not available for this model. "
688
  "The attention visualization below still shows the full attention patterns.",
689
  style={'color': '#6c757d', 'fontSize': '13px'}
690
  )
 
702
  html.Div([
703
  html.Div([
704
  html.I(className='fas fa-mouse-pointer', style={'color': '#f093fb', 'marginRight': '8px'}),
705
+ html.Strong("Select detectors: "),
706
+ html.Span("Click on layer/detector numbers at the top to view specific attention detectors.",
707
  style={'color': '#6c757d'})
708
  ], style={'marginBottom': '4px'}),
709
  html.Div([
710
  html.Span("β€’ ", style={'color': '#f093fb', 'fontWeight': 'bold'}),
711
  html.Strong("Single click ", style={'color': '#495057'}),
712
+ html.Span("on a colored square: selects or deselects that detector",
713
  style={'color': '#6c757d'})
714
  ], style={'marginLeft': '28px', 'marginBottom': '4px', 'fontSize': '13px'}),
715
  html.Div([
716
  html.Span("β€’ ", style={'color': '#f093fb', 'fontWeight': 'bold'}),
717
  html.Strong("Double click ", style={'color': '#495057'}),
718
+ html.Span("on a colored square: selects only that detector (deselects all others)",
719
  style={'color': '#6c757d'})
720
  ], style={'marginLeft': '28px', 'marginBottom': '12px', 'fontSize': '13px'}),
721
  html.Div([
722
  html.I(className='fas fa-arrows-alt-h', style={'color': '#f093fb', 'marginRight': '8px'}),
723
  html.Strong("Lines show attention: "),
724
+ html.Span("Each line connects a word (left) to words it attends to (right). ",
725
  style={'color': '#6c757d'})
726
  ], style={'marginBottom': '8px'}),
727
  html.Div([
 
774
  html.Div([
775
  html.H5("What happens here:", style={'color': '#495057', 'marginBottom': '8px'}),
776
  html.P([
777
+ "After attention gathers context, each piece's representation passes through a ",
778
+ html.Strong("knowledge retrieval layer"), " (technically called a Feed-Forward Network or MLP)",
779
  ". This is where the model's ", html.Strong("factual knowledge"), " is stored."
780
  ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '12px'}),
781
  html.P([
782
+ "During training, these layers learned to encode facts and patterns from the training data. ",
783
+ "For example, when processing 'The capital of France is', these knowledge layers help recall that 'Paris' is the answer. ",
784
+ "Researchers have found that specific facts are often stored in specific neurons within these layers."
785
  ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
786
  ]),
787
 
 
851
  html.Span([
852
  f"This happens in each of the model's ",
853
  html.Strong(f"{layer_count} layers" if layer_count else "transformer layers"),
854
+ ", with attention and knowledge retrieval working together β€” attention gathers context, and the knowledge layers retrieve facts."
855
  ], style={'color': '#6c757d', 'fontSize': '13px'})
856
  ], style={'marginTop': '12px', 'padding': '12px', 'backgroundColor': '#e3f2fd', 'borderRadius': '6px'}),
857
 
 
860
  html.I(className='fas fa-road', style={'color': '#26a69a', 'marginRight': '8px'}),
861
  html.Span([
862
  html.Strong("Adding, not replacing: "),
863
+ "The knowledge layer doesn't replace the piece's representation β€” it ",
864
+ html.Strong("adds"), " to it. Each layer contributes new information on top of ",
865
  "everything computed before it, so the model accumulates understanding across all layers."
866
  ], style={'color': '#6c757d', 'fontSize': '13px'})
867
  ], style={'marginTop': '12px', 'padding': '12px', 'backgroundColor': '#e0f2f1', 'borderRadius': '6px'})