Spaces:

cdpearlman
/

LLMVis

Running

cdpearlman Claude Opus 4.6 commited on Mar 16

Commit

abf6a1c

1 Parent(s): 2f024ca

Reduce jargon across UI for younger/less-technical audiences

Replace technical ML terminology with plain-English equivalents while
preserving technical terms as parentheticals for educational value:
- Tokenization → Text Splitting
- Embedding/vector → Meaning Encoding/list of numbers
- MLP (Feed-Forward) → Knowledge Retrieval
- Ablation → Test by Removing
- Token Attribution → Word Influence
- Attention heads → detectors (with "heads" as technical note)
- Activation score → activity level
- Probability shift → confidence change

Updated glossary entries to lead with friendly names. Chatbot
suggestions also updated for consistency.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (6) hide show

app.py +6 -6
components/ablation_panel.py +19 -18
components/chatbot.py +2 -2
components/glossary.py +15 -15
components/investigation_panel.py +20 -19
components/pipeline.py +50 -50

app.py CHANGED Viewed

@@ -94,7 +94,7 @@ app.layout = html.Div([
                         html.Div([
                             html.Div([
-                                html.Label("Number of New Tokens:", className="input-label"),
                                 dcc.Slider(
                                     id='max-new-tokens-slider',
                                     min=1, max=20, step=1, value=1,
@@ -668,11 +668,11 @@ def update_pipeline_content(activation_data, model_name):
         outputs.append(create_tokenization_content(tokens, input_ids))
         # Stage 2: Embedding
-        outputs.append(f"{hidden_dim}-dim vectors")
         outputs.append(create_embedding_content(hidden_dim, len(tokens)))
         # Stage 3: Attention (Agent G: now includes head_categories)
-        outputs.append(f"{num_heads} heads × {num_layers} layers")
         outputs.append(create_attention_content(attention_html, None, head_categories=head_categories))
         # Stage 4: MLP
@@ -831,7 +831,7 @@ def update_ablation_selectors(activation_data, selected_layer, model_name):
     # Update head options based on selected layer
     head_options = []
     if selected_layer is not None:
-        head_options = [{'label': f'Head {i}', 'value': i} for i in range(num_heads)]
     # If only layer changed, return no_update for layer options to avoid flickering
     if trigger_id == 'ablation-layer-select':
@@ -952,7 +952,7 @@ def run_ablation_experiment(n_clicks, selected_heads, activation_data, model_nam
                     heads_by_layer[layer].append(head)
         if not heads_by_layer:
-            return html.Div("No valid heads selected.", style={'color': '#dc3545'}), no_update, no_update
         # Run ablation for generation
         ablated_beam = None
@@ -1014,7 +1014,7 @@ def run_ablation_experiment(n_clicks, selected_heads, activation_data, model_nam
     except Exception as e:
         import traceback
         traceback.print_exc()
-        return html.Div(f"Ablation error: {str(e)}", style={'color': '#dc3545'}), no_update, no_update
 @app.callback(

                         html.Div([
                             html.Div([
+                                html.Label("Words to Generate:", className="input-label"),
                                 dcc.Slider(
                                     id='max-new-tokens-slider',
                                     min=1, max=20, step=1, value=1,
         outputs.append(create_tokenization_content(tokens, input_ids))
         # Stage 2: Embedding
+        outputs.append(f"{hidden_dim} numbers per word")
         outputs.append(create_embedding_content(hidden_dim, len(tokens)))
         # Stage 3: Attention (Agent G: now includes head_categories)
+        outputs.append(f"{num_heads} detectors × {num_layers} layers")
         outputs.append(create_attention_content(attention_html, None, head_categories=head_categories))
         # Stage 4: MLP
     # Update head options based on selected layer
     head_options = []
     if selected_layer is not None:
+        head_options = [{'label': f'Detector {i}', 'value': i} for i in range(num_heads)]
     # If only layer changed, return no_update for layer options to avoid flickering
     if trigger_id == 'ablation-layer-select':
                     heads_by_layer[layer].append(head)
         if not heads_by_layer:
+            return html.Div("No valid detectors selected.", style={'color': '#dc3545'}), no_update, no_update
         # Run ablation for generation
         ablated_beam = None
     except Exception as e:
         import traceback
         traceback.print_exc()
+        return html.Div(f"Removal test error: {str(e)}", style={'color': '#dc3545'}), no_update, no_update
 @app.callback(

components/ablation_panel.py CHANGED Viewed

@@ -111,17 +111,18 @@ def create_ablation_panel():
     return html.Div([
         # Explanation
         html.Div([
-            html.H5("What is Ablation?", style={'color': '#495057', 'marginBottom': '8px'}),
             html.P([
-                "Ablation lets you ", html.Strong("remove specific attention heads"),
-                " to see how they affect the model's output. If removing a head changes the prediction significantly, ",
-                "that head was important for this particular input."
             ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
         ]),
         # Head Selector Interface
         html.Div([
-            html.Label("Add Head to Ablation List:", className="input-label", style={'marginBottom': '8px', 'display': 'block'}),
             html.Div([
                 # Layer Select
                 html.Div([
@@ -137,7 +138,7 @@ def create_ablation_panel():
                 html.Div([
                     dcc.Dropdown(
                         id='ablation-head-select',
-                        placeholder="Head",
                         options=[], # Populated by callback
                         style={'fontSize': '14px'}
                     )
@@ -147,16 +148,16 @@ def create_ablation_panel():
                 html.Button([
                     html.I(className='fas fa-plus'),
                 ], id='ablation-add-head-btn', className='action-button secondary-button',
-                   title="Add Head", style={'padding': '8px 12px'})
             ], style={'display': 'flex', 'alignItems': 'center'})
         ], style={'marginBottom': '16px', 'padding': '16px', 'backgroundColor': '#f8f9fa', 'borderRadius': '8px', 'border': '1px solid #e2e8f0'}),
         # Selected heads display (chips with remove buttons)
         html.Div([
-            html.Label("Selected Heads:", className="input-label", style={'marginBottom': '8px', 'display': 'block'}),
             html.Div(id='ablation-selected-display', children=[
-                html.Span("No heads selected yet", style={'color': '#6c757d', 'fontSize': '13px', 'fontStyle': 'italic'})
             ], style={
                 'padding': '12px',
                 'backgroundColor': '#f8f9fa',
@@ -169,16 +170,16 @@ def create_ablation_panel():
         # Reset button
         html.Button([
             html.I(className='fas fa-trash-alt', style={'marginRight': '8px'}),
-            "Clear Selected Heads"
         ], id='clear-ablation-btn', className='action-button secondary-button',
            style={'width': '100%', 'marginBottom': '8px'}),
         # Run ablation button
         html.Button([
             html.I(className='fas fa-play', style={'marginRight': '8px'}),
-            "Run Ablation Experiment"
         ], id='run-ablation-btn', className='action-button primary-button',
-           disabled=True, title="Add at least one head above to run the experiment",
            style={'width': '100%', 'marginBottom': '16px'}),
         # Results container
@@ -204,7 +205,7 @@ def create_selected_heads_display(selected_heads):
     """
     if not selected_heads:
         return html.Div(
-            "No heads selected yet",
             style={'color': '#6c757d', 'fontSize': '13px', 'fontStyle': 'italic', 'padding': '8px 0'}
         )
@@ -281,9 +282,9 @@ def create_ablation_results_display(original_data, ablated_data, selected_heads,
     # Summary of what was ablated
     results.append(html.Div([
-        html.H5("Ablation Results", style={'color': '#495057', 'marginBottom': '16px'}),
         html.Div([
-            html.Span("Ablated heads: ", style={'color': '#6c757d'}),
             html.Span(', '.join(all_heads_formatted),
                      style={'fontWeight': '500', 'color': '#667eea', 'fontFamily': 'monospace'})
         ], style={'marginBottom': '16px'})
@@ -418,7 +419,7 @@ def create_ablation_results_display(original_data, ablated_data, selected_heads,
         # Ablated Output Column (Red Theme)
         html.Div([
-            html.Div("ABLATED OUTPUT", style={
                 'backgroundColor': '#dc3545', 'color': 'white', 'padding': '4px 16px',
                 'borderRadius': '16px', 'fontWeight': 'bold', 'fontSize': '12px',
                 'display': 'inline-block', 'marginBottom': '15px'
@@ -453,14 +454,14 @@ def create_ablation_results_display(original_data, ablated_data, selected_heads,
         html.Div([
             # Tokens Changed
             html.Div([
-                html.Div("TOKENS CHANGED:", style={'fontSize': '11px', 'fontWeight': 'bold', 'color': '#495057'}),
                 html.Div(f"{tokens_changed}/{max_len}", style={'fontSize': '28px', 'fontWeight': 'bold', 'color': '#212529', 'lineHeight': '1.2'}),
                 html.Div(f"{percent_changed:.1f}% of sequence modified", style={'fontSize': '11px', 'color': '#6c757d'})
             ], style={'flex': '1', 'borderRight': '1px solid #dee2e6', 'paddingRight': '15px'}),
             # Avg Prob Shift
             html.Div([
-                html.Div("AVERAGE PROBABILITY SHIFT:", style={'fontSize': '11px', 'fontWeight': 'bold', 'color': '#495057'}),
                 html.Div([
                     html.Span(f"{avg_prob_shift*100:+.1f}%", style={'color': '#dc3545' if avg_prob_shift < 0 else '#28a745', 'marginRight': '5px'}),
                     html.I(className=f"fas {'fa-arrow-down' if avg_prob_shift < 0 else 'fa-arrow-up'}", style={'color': '#dc3545' if avg_prob_shift < 0 else '#28a745'})

     return html.Div([
         # Explanation
         html.Div([
+            html.H5("What is Test by Removing?", style={'color': '#495057', 'marginBottom': '8px'}),
             html.P([
+                "This tool lets you ", html.Strong("remove specific attention detectors"),
+                " to see how they affect the model's output. If removing a detector changes the prediction significantly, ",
+                "that detector was important for this particular input.",
+                " (This technique is called ", html.Em("ablation"), " in research.)"
             ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
         ]),
         # Head Selector Interface
         html.Div([
+            html.Label("Add a Detector to Remove:", className="input-label", style={'marginBottom': '8px', 'display': 'block'}),
             html.Div([
                 # Layer Select
                 html.Div([
                 html.Div([
                     dcc.Dropdown(
                         id='ablation-head-select',
+                        placeholder="Detector",
                         options=[], # Populated by callback
                         style={'fontSize': '14px'}
                     )
                 html.Button([
                     html.I(className='fas fa-plus'),
                 ], id='ablation-add-head-btn', className='action-button secondary-button',
+                   title="Add Detector", style={'padding': '8px 12px'})
             ], style={'display': 'flex', 'alignItems': 'center'})
         ], style={'marginBottom': '16px', 'padding': '16px', 'backgroundColor': '#f8f9fa', 'borderRadius': '8px', 'border': '1px solid #e2e8f0'}),
         # Selected heads display (chips with remove buttons)
         html.Div([
+            html.Label("Selected Detectors:", className="input-label", style={'marginBottom': '8px', 'display': 'block'}),
             html.Div(id='ablation-selected-display', children=[
+                html.Span("No detectors selected yet", style={'color': '#6c757d', 'fontSize': '13px', 'fontStyle': 'italic'})
             ], style={
                 'padding': '12px',
                 'backgroundColor': '#f8f9fa',
         # Reset button
         html.Button([
             html.I(className='fas fa-trash-alt', style={'marginRight': '8px'}),
+            "Clear Selected Detectors"
         ], id='clear-ablation-btn', className='action-button secondary-button',
            style={'width': '100%', 'marginBottom': '8px'}),
         # Run ablation button
         html.Button([
             html.I(className='fas fa-play', style={'marginRight': '8px'}),
+            "Run Removal Test"
         ], id='run-ablation-btn', className='action-button primary-button',
+           disabled=True, title="Add at least one detector above to run the test",
            style={'width': '100%', 'marginBottom': '16px'}),
         # Results container
     """
     if not selected_heads:
         return html.Div(
+            "No detectors selected yet",
             style={'color': '#6c757d', 'fontSize': '13px', 'fontStyle': 'italic', 'padding': '8px 0'}
         )
     # Summary of what was ablated
     results.append(html.Div([
+        html.H5("Removal Test Results", style={'color': '#495057', 'marginBottom': '16px'}),
         html.Div([
+            html.Span("Removed detectors: ", style={'color': '#6c757d'}),
             html.Span(', '.join(all_heads_formatted),
                      style={'fontWeight': '500', 'color': '#667eea', 'fontFamily': 'monospace'})
         ], style={'marginBottom': '16px'})
         # Ablated Output Column (Red Theme)
         html.Div([
+            html.Div("MODIFIED OUTPUT", style={
                 'backgroundColor': '#dc3545', 'color': 'white', 'padding': '4px 16px',
                 'borderRadius': '16px', 'fontWeight': 'bold', 'fontSize': '12px',
                 'display': 'inline-block', 'marginBottom': '15px'
         html.Div([
             # Tokens Changed
             html.Div([
+                html.Div("WORDS CHANGED:", style={'fontSize': '11px', 'fontWeight': 'bold', 'color': '#495057'}),
                 html.Div(f"{tokens_changed}/{max_len}", style={'fontSize': '28px', 'fontWeight': 'bold', 'color': '#212529', 'lineHeight': '1.2'}),
                 html.Div(f"{percent_changed:.1f}% of sequence modified", style={'fontSize': '11px', 'color': '#6c757d'})
             ], style={'flex': '1', 'borderRight': '1px solid #dee2e6', 'paddingRight': '15px'}),
             # Avg Prob Shift
             html.Div([
+                html.Div("AVERAGE CONFIDENCE CHANGE:", style={'fontSize': '11px', 'fontWeight': 'bold', 'color': '#495057'}),
                 html.Div([
                     html.Span(f"{avg_prob_shift*100:+.1f}%", style={'color': '#dc3545' if avg_prob_shift < 0 else '#28a745', 'marginRight': '5px'}),
                     html.I(className=f"fas {'fa-arrow-down' if avg_prob_shift < 0 else 'fa-arrow-up'}", style={'color': '#dc3545' if avg_prob_shift < 0 else '#28a745'})

components/chatbot.py CHANGED Viewed

@@ -13,11 +13,11 @@ from typing import List, Dict, Optional
 GREETING_MESSAGE = """Hi there! I'm your AI assistant for exploring transformer models.
 I can help you understand:
-- How attention heads and layers process your input
 - What various experiments can reveal about model behavior
 - General transformer and ML concepts
-Try asking: "What does attention head 0 in layer 1 do?" or "Why did ablating this head change the output?"
 """

 GREETING_MESSAGE = """Hi there! I'm your AI assistant for exploring transformer models.
 I can help you understand:
+- How attention detectors and layers process your input
 - What various experiments can reveal about model behavior
 - General transformer and ML concepts
+Try asking: "What does attention detector 0 in layer 1 do?" or "Why did removing this detector change the output?"
 """

components/glossary.py CHANGED Viewed

@@ -20,50 +20,50 @@ def create_glossary_modal():
             html.Div([
                 _create_term_entry(
-                    "Tokenization",
-                    "Breaking text into pieces",
                     "Models don't read words like we do. They break text into small chunks called 'tokens'. A token can be a whole word (like 'apple'), part of a word (like 'ing' in 'playing'), or even a space.",
                     "https://www.youtube.com/embed/wjZofJX0v4M?start=0"
                 ),
                 _create_term_entry(
-                    "Embedding",
-                    "Converting tokens to numbers",
-                    "Once text is tokenized, each token is converted into a list of numbers (a vector). This vector represents the meaning of the token. Words with similar meanings (like 'dog' and 'puppy') have similar vectors.",
                     "https://www.youtube.com/embed/wjZofJX0v4M?start=195"
                 ),
                 _create_term_entry(
-                    "Attention",
                     "Context Lookup",
                     "This is how the model understands context. When processing a word (like 'it'), the model 'pays attention' to other words in the sentence (like 'the cat') to figure out what 'it' refers to. It's like a spotlight shining on relevant past information.",
                     "https://www.youtube.com/embed/eMlx5fFNoYc?start=0"
                 ),
                 _create_term_entry(
-                    "Attention Heads",
                     "Parallel Context Searchers",
-                    "Instead of having just one attention mechanism, models use multiple 'heads' in parallel. Each head can learn to look for different types of relationships (e.g., one head might look for adjectives, while another tracks pronouns).",
                     "https://www.youtube.com/embed/eMlx5fFNoYc?start=420"
                 ),
                 _create_term_entry(
-                    "Residual Stream",
                     "The Information Highway",
                     "Think of this as a conveyor belt carrying the model's current understanding of the sentence. As it passes through each layer, the layer adds new information to it (via addition), refining the prediction step-by-step.",
                     "https://www.youtube.com/embed/wjZofJX0v4M?start=1173"
                 ),
                 _create_term_entry(
-                    "Logits / Log-Probs",
                     "Prediction Scores",
-                    "The raw scores the model assigns to every possible next token. Higher scores mean the model thinks that token is more likely to come next.",
                     "https://www.youtube.com/embed/wjZofJX0v4M?start=850"
                 ),
                 _create_term_entry(
-                    "Beam Search",
                     "Exploring Multiple Paths",
-                    "Instead of just picking the single best next word, Beam Search explores several likely future paths simultaneously (like parallel universes) and picks the one that makes the most sense overall. The 'Number of Generation Choices' setting controls how many paths are explored at once."
                 ),
                 _create_term_entry(
-                    "Ablation (Activation Patching)",
                     "Digital Brain Surgery",
-                    "A technique used to understand which parts of a model are responsible for certain behaviors. By artificially modifying or 'turning off' specific attention heads or activations, we can measure how much the model's output changes, revealing the importance of those components."
                 )
             ], className="glossary-content-area"),

             html.Div([
                 _create_term_entry(
+                    "Text Splitting (Tokenization)",
+                    "Breaking text into pieces",
                     "Models don't read words like we do. They break text into small chunks called 'tokens'. A token can be a whole word (like 'apple'), part of a word (like 'ing' in 'playing'), or even a space.",
                     "https://www.youtube.com/embed/wjZofJX0v4M?start=0"
                 ),
                 _create_term_entry(
+                    "Meaning Encoding (Embedding)",
+                    "Converting words to numbers",
+                    "Once text is split into pieces, each piece is converted into a list of numbers. This list represents the meaning of the piece. Words with similar meanings (like 'dog' and 'puppy') get similar numbers.",
                     "https://www.youtube.com/embed/wjZofJX0v4M?start=195"
                 ),
                 _create_term_entry(
+                    "Attention",
                     "Context Lookup",
                     "This is how the model understands context. When processing a word (like 'it'), the model 'pays attention' to other words in the sentence (like 'the cat') to figure out what 'it' refers to. It's like a spotlight shining on relevant past information.",
                     "https://www.youtube.com/embed/eMlx5fFNoYc?start=0"
                 ),
                 _create_term_entry(
+                    "Attention Detectors (Heads)",
                     "Parallel Context Searchers",
+                    "Instead of having just one attention mechanism, models use multiple 'detectors' (called 'heads') in parallel. Each detector can learn to look for different types of relationships (e.g., one might look for adjectives, while another tracks pronouns).",
                     "https://www.youtube.com/embed/eMlx5fFNoYc?start=420"
                 ),
                 _create_term_entry(
+                    "Residual Stream",
                     "The Information Highway",
                     "Think of this as a conveyor belt carrying the model's current understanding of the sentence. As it passes through each layer, the layer adds new information to it (via addition), refining the prediction step-by-step.",
                     "https://www.youtube.com/embed/wjZofJX0v4M?start=1173"
                 ),
                 _create_term_entry(
+                    "Confidence Scores (Logits)",
                     "Prediction Scores",
+                    "The raw scores the model assigns to every possible next word. Higher scores mean the model thinks that word is more likely to come next.",
                     "https://www.youtube.com/embed/wjZofJX0v4M?start=850"
                 ),
                 _create_term_entry(
+                    "Beam Search",
                     "Exploring Multiple Paths",
+                    "Instead of just picking the single best next word, Beam Search explores several likely future paths simultaneously (like parallel universes) and picks the one that makes the most sense overall. The 'Options to Generate' setting controls how many paths are explored at once."
                 ),
                 _create_term_entry(
+                    "Test by Removing (Ablation)",
                     "Digital Brain Surgery",
+                    "A technique used to understand which parts of a model are responsible for certain behaviors. By artificially 'turning off' specific attention detectors, we can measure how much the model's output changes, revealing the importance of those components."
                 )
             ], className="glossary-content-area"),

components/investigation_panel.py CHANGED Viewed

@@ -27,13 +27,13 @@ def create_investigation_panel():
         html.Div([
             html.Button([
                 html.I(className='fas fa-cut', style={'marginRight': '8px'}),
-                "Ablation"
             ], id='investigation-tab-ablation', className='investigation-tab active',
                n_clicks=0, style=get_tab_style(True)),
             html.Button([
                 html.I(className='fas fa-highlighter', style={'marginRight': '8px'}),
-                "Token Attribution"
             ], id='investigation-tab-attribution', className='investigation-tab',
                n_clicks=0, style=get_tab_style(False))
         ], className='investigation-tabs', style={
@@ -95,22 +95,23 @@ def create_attribution_content():
     return html.Div([
         # Explanation
         html.Div([
-            html.H5("What is Token Attribution?", style={'color': '#495057', 'marginBottom': '8px'}),
             html.P([
-                "Token attribution uses ", html.Strong("gradient analysis"),
-                " to identify which input tokens had the most influence on the model's prediction. ",
-                "Tokens with higher attribution contributed more to the final output."
             ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
         ]),
         # Method selector
         html.Div([
-            html.Label("Attribution Method:", className="input-label", style={'marginBottom': '8px', 'display': 'block'}),
             dcc.RadioItems(
                 id='attribution-method-radio',
                 options=[
-                    {'label': ' Integrated Gradients (more accurate, slower)', 'value': 'integrated'},
-                    {'label': ' Simple Gradient (faster, less accurate)', 'value': 'simple'}
                 ],
                 value='integrated',
                 style={'display': 'flex', 'flexDirection': 'column', 'gap': '8px'}
@@ -119,7 +120,7 @@ def create_attribution_content():
         # Target token selector
         html.Div([
-            html.Label("Target Token:", className="input-label", style={'marginBottom': '8px', 'display': 'block'}),
             dcc.Dropdown(
                 id='attribution-target-dropdown',
                 options=[],  # Populated by callback with top-5 predictions
@@ -132,7 +133,7 @@ def create_attribution_content():
         # Run attribution button
         html.Button([
             html.I(className='fas fa-highlighter', style={'marginRight': '8px'}),
-            "Compute Attribution"
         ], id='run-attribution-btn', className='action-button primary-button',
            style={'width': '100%', 'marginBottom': '16px'}),
@@ -174,7 +175,7 @@ def create_attribution_results_display(attribution_data, target_token):
                 'fontFamily': 'monospace',
                 'fontSize': '13px',
                 'fontWeight': '500' if norm > 0.3 else '400'
-            }, title=f"Attribution: {norm:.2f}")
         )
     # Create bar chart
@@ -188,9 +189,9 @@ def create_attribution_results_display(attribution_data, target_token):
     ))
     fig.update_layout(
-        title="Attribution Scores by Token",
-        xaxis_title="Attribution (normalized)",
-        yaxis_title="Input Token",
         height=max(200, len(tokens) * 30),
         margin=dict(l=20, r=60, t=40, b=20),
         paper_bgcolor='rgba(0,0,0,0)',
@@ -199,9 +200,9 @@ def create_attribution_results_display(attribution_data, target_token):
     )
     return html.Div([
-        html.H5("Token Attribution Results", style={'color': '#495057', 'marginBottom': '8px'}),
         html.P([
-            "Attribution for predicting: ",
             html.Span(target_token, style={
                 'padding': '4px 10px',
                 'backgroundColor': '#667eea',
@@ -214,7 +215,7 @@ def create_attribution_results_display(attribution_data, target_token):
         # Token chips visualization
         html.Div([
-            html.H6("Input tokens (darker = more important):", style={'color': '#6c757d', 'marginBottom': '8px'}),
             html.Div(token_chips, style={'lineHeight': '2'})
         ], style={
             'padding': '16px',
@@ -237,7 +238,7 @@ def create_attribution_results_display(attribution_data, target_token):
         html.Div([
             html.I(className='fas fa-info-circle', style={'color': '#667eea', 'marginRight': '8px'}),
             html.Span(
-                "Tokens with higher attribution scores contributed more to the model's prediction. "
                 "This helps identify which parts of the input were most influential.",
                 style={'color': '#6c757d', 'fontSize': '13px'}
             )

         html.Div([
             html.Button([
                 html.I(className='fas fa-cut', style={'marginRight': '8px'}),
+                "Test by Removing"
             ], id='investigation-tab-ablation', className='investigation-tab active',
                n_clicks=0, style=get_tab_style(True)),
             html.Button([
                 html.I(className='fas fa-highlighter', style={'marginRight': '8px'}),
+                "Word Influence"
             ], id='investigation-tab-attribution', className='investigation-tab',
                n_clicks=0, style=get_tab_style(False))
         ], className='investigation-tabs', style={
     return html.Div([
         # Explanation
         html.Div([
+            html.H5("What is Word Influence?", style={'color': '#495057', 'marginBottom': '8px'}),
             html.P([
+                "This tool uses mathematical analysis ",
+                "to identify which input words had the most influence on the model's prediction. ",
+                "Words with higher influence scores contributed more to the final output.",
+                " (This technique is called ", html.Em("token attribution"), " in research.)"
             ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
         ]),
         # Method selector
         html.Div([
+            html.Label("Analysis Method:", className="input-label", style={'marginBottom': '8px', 'display': 'block'}),
             dcc.RadioItems(
                 id='attribution-method-radio',
                 options=[
+                    {'label': ' Deep Analysis (more accurate, slower)', 'value': 'integrated'},
+                    {'label': ' Quick Analysis (faster, less accurate)', 'value': 'simple'}
                 ],
                 value='integrated',
                 style={'display': 'flex', 'flexDirection': 'column', 'gap': '8px'}
         # Target token selector
         html.Div([
+            html.Label("Target Word:", className="input-label", style={'marginBottom': '8px', 'display': 'block'}),
             dcc.Dropdown(
                 id='attribution-target-dropdown',
                 options=[],  # Populated by callback with top-5 predictions
         # Run attribution button
         html.Button([
             html.I(className='fas fa-highlighter', style={'marginRight': '8px'}),
+            "Find Word Influence"
         ], id='run-attribution-btn', className='action-button primary-button',
            style={'width': '100%', 'marginBottom': '16px'}),
                 'fontFamily': 'monospace',
                 'fontSize': '13px',
                 'fontWeight': '500' if norm > 0.3 else '400'
+            }, title=f"Influence: {norm:.2f}")
         )
     # Create bar chart
     ))
     fig.update_layout(
+        title="Influence Scores by Input Word",
+        xaxis_title="Influence (normalized)",
+        yaxis_title="Input Word",
         height=max(200, len(tokens) * 30),
         margin=dict(l=20, r=60, t=40, b=20),
         paper_bgcolor='rgba(0,0,0,0)',
     )
     return html.Div([
+        html.H5("Word Influence Results", style={'color': '#495057', 'marginBottom': '8px'}),
         html.P([
+            "Influence on predicting: ",
             html.Span(target_token, style={
                 'padding': '4px 10px',
                 'backgroundColor': '#667eea',
         # Token chips visualization
         html.Div([
+            html.H6("Input words (darker = more important):", style={'color': '#6c757d', 'marginBottom': '8px'}),
             html.Div(token_chips, style={'lineHeight': '2'})
         ], style={
             'padding': '16px',
         html.Div([
             html.I(className='fas fa-info-circle', style={'color': '#667eea', 'marginRight': '8px'}),
             html.Span(
+                "Words with higher influence scores contributed more to the model's prediction. "
                 "This helps identify which parts of the input were most influential.",
                 style={'color': '#6c757d', 'fontSize': '13px'}
             )

components/pipeline.py CHANGED Viewed

@@ -39,7 +39,7 @@ def create_pipeline_container():
             create_stage_container(
                 stage_id='tokenization',
                 stage_num=1,
-                title='Tokenization',
                 icon='fa-puzzle-piece',
                 color='#667eea',
                 summary_id='stage-1-summary',
@@ -50,7 +50,7 @@ def create_pipeline_container():
             create_stage_container(
                 stage_id='embedding',
                 stage_num=2,
-                title='Embedding',
                 icon='fa-cube',
                 color='#764ba2',
                 summary_id='stage-2-summary',
@@ -72,7 +72,7 @@ def create_pipeline_container():
             create_stage_container(
                 stage_id='mlp',
                 stage_num=4,
-                title='MLP (Feed-Forward)',
                 icon='fa-network-wired',
                 color='#4facfe',
                 summary_id='stage-4-summary',
@@ -98,10 +98,10 @@ def create_flow_indicator():
     """Create the horizontal flow indicator showing all stages."""
     stages = [
         ('Input', '#6c757d'),
-        ('Tokens', '#667eea'),
-        ('Embed', '#764ba2'),
         ('Attention', '#f093fb'),
-        ('MLP', '#4facfe'),
         ('Output', '#00f2fe'),
     ]
@@ -288,9 +288,9 @@ def create_tokenization_content(tokens, token_ids, model_name=None):
         html.Div([
             html.H5("What happens here:", style={'color': '#495057', 'marginBottom': '8px'}),
             html.P([
-                "Your text is split into ",
-                html.Strong(f"{len(tokens)} tokens"),
-                " - small pieces that the model can understand. Each token is assigned a unique ID from the model's vocabulary."
             ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
         ]),
@@ -339,19 +339,19 @@ def create_embedding_content(hidden_dim=None, num_tokens=None):
         hidden_dim: Embedding dimension (e.g., 768)
         num_tokens: Number of tokens being processed
     """
-    dim_text = f"{hidden_dim}-dimensional" if hidden_dim else "high-dimensional"
     return html.Div([
         html.Div([
             html.H5("What happens here:", style={'color': '#495057', 'marginBottom': '8px'}),
             html.P([
-                "Each token ID is used to look up a ", html.Strong(dim_text), " vector from a ",
-                html.Strong("pre-learned embedding table"), ". Think of it like a dictionary: the model has already ",
-                "memorized a numeric representation for every word in its vocabulary during training."
             ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '12px'}),
             html.P([
-                "These embeddings capture semantic meaning - words with similar meanings (like 'happy' and 'joyful') ",
-                "have similar vectors, allowing the model to understand relationships between words."
             ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
         ]),
@@ -373,7 +373,7 @@ def create_embedding_content(hidden_dim=None, num_tokens=None):
                 html.Span('→', style={'margin': '0 16px', 'fontSize': '24px', 'color': '#adb5bd'}),
                 html.Div([
                     html.Span('[', style={'fontSize': '20px', 'color': '#495057'}),
-                    html.Span(f' {dim_text} vector ', style={
                         'padding': '4px 12px',
                         'backgroundColor': '#e5d4ff',
                         'borderRadius': '4px',
@@ -390,8 +390,8 @@ def create_embedding_content(hidden_dim=None, num_tokens=None):
             html.I(className='fas fa-lightbulb', style={'color': '#ffc107', 'marginRight': '8px'}),
             html.Span([
                 html.Strong("How the lookup table was created: "),
-                "During training on billions of text examples, the model learned which numbers best represent each token. ",
-                "This table is frozen after training - every time you use the model, the same token always maps to the same vector."
             ], style={'color': '#6c757d', 'fontSize': '13px'})
         ], style={'marginTop': '16px', 'padding': '12px', 'backgroundColor': '#fff8e1', 'borderRadius': '6px'}),
@@ -400,12 +400,12 @@ def create_embedding_content(hidden_dim=None, num_tokens=None):
             html.I(className='fas fa-map-marker-alt', style={'color': '#5c6bc0', 'marginRight': '8px'}),
             html.Span([
                 html.Strong("Position matters too: "),
-                "Token embeddings alone don't capture word order — 'the cat chased the dog' and 'the dog chased the cat' ",
                 "would look the same. To fix this, the model also encodes ", html.Strong("positional information"),
-                ". Some models (like GPT-2) add a learned position vector to each token embedding. ",
                 "Others (like Pythia) use a technique called Rotary Positional Encoding, which encodes ",
-                "the relative distance between tokens directly in the attention step. ",
-                "Either way, the model knows both ", html.Em("what"), " each token is and ",
                 html.Em("where"), " it sits in the sequence."
             ], style={'color': '#6c757d', 'fontSize': '13px'})
         ], style={'marginTop': '12px', 'padding': '12px', 'backgroundColor': '#e8eaf6', 'borderRadius': '6px'})
@@ -431,13 +431,13 @@ def create_attention_content(attention_html=None, top_attended=None, layer_info=
         html.Div([
             html.H5("What happens here:", style={'color': '#495057', 'marginBottom': '8px'}),
             html.P([
-                "The model looks at ", html.Strong("all tokens at once"),
                 " and figures out which ones are related to each other. This is called 'attention' — ",
-                "each token 'attends to' other tokens to gather context for its prediction."
             ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '12px'}),
             html.P([
-                "Attention has multiple ", html.Strong("heads"), " — each head learns to look for different types of relationships. ",
-                "Below you can see what role each head plays and whether it's active on your current input."
             ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
         ])
     ]
@@ -476,9 +476,9 @@ def create_attention_content(attention_html=None, top_attended=None, layer_info=
                     html.I(className='fas fa-lightbulb', style={'color': '#f39c12', 'marginRight': '8px', 'fontSize': '16px'}),
                     html.Span([
                         html.Strong("Try this: "),
-                        f"Select Layer {guided_head['layer']}, Head {guided_head['head']} in the visualization below — ",
-                        f"this is a {guided_cat} head ",
-                        f"(activation: {guided_head['activation_score']:.0%} on your input)."
                     ], style={'color': '#495057', 'fontSize': '13px'})
                 ], style={
                     'padding': '12px 16px', 'backgroundColor': '#fef9e7', 'borderRadius': '8px',
@@ -554,7 +554,7 @@ def create_attention_content(attention_html=None, top_attended=None, layer_info=
                                 'fontFamily': 'monospace', 'fontSize': '12px', 'fontWeight': '500',
                                 'minWidth': '60px', 'color': '#495057' if is_active else '#aaa',
                                 'display': 'inline-flex', 'alignItems': 'center',
-                            }, title=f"See Layer {head_info['layer']}, Head {head_info['head']} in the visualization below"),
                             # Activation bar
                             html.Div([
                                 html.Div(style={
@@ -660,10 +660,10 @@ def create_attention_content(attention_html=None, top_attended=None, layer_info=
             content_items.append(
                 html.Div([
-                    html.H5("Attention Head Roles:", style={'color': '#495057', 'marginBottom': '8px'}),
                     html.P([
-                        "Each category represents a type of behavior we detected in this model's attention heads. ",
-                        "Click a category to see individual heads and how strongly they're activated on your input."
                     ], style={'color': '#6c757d', 'fontSize': '12px', 'marginBottom': '12px'}),
                     legend,
                     html.Div(category_sections),
@@ -671,8 +671,8 @@ def create_attention_content(attention_html=None, top_attended=None, layer_info=
                     html.Div([
                         html.I(className='fas fa-info-circle', style={'color': '#6c757d', 'marginRight': '6px', 'fontSize': '11px'}),
                         html.Span(
-                            "These categories are simplified labels based on each head's dominant behavior. "
-                            "In reality, heads can serve multiple roles and may behave differently on different inputs.",
                             style={'color': '#999', 'fontSize': '11px'}
                         )
                     ], style={'marginTop': '12px', 'padding': '8px 12px', 'backgroundColor': '#f8f9fa', 'borderRadius': '6px'})
@@ -684,7 +684,7 @@ def create_attention_content(attention_html=None, top_attended=None, layer_info=
             html.Div([
                 html.I(className='fas fa-info-circle', style={'color': '#6c757d', 'marginRight': '8px'}),
                 html.Span(
-                    "Head categorization is not available for this model. "
                     "The attention visualization below still shows the full attention patterns.",
                     style={'color': '#6c757d', 'fontSize': '13px'}
                 )
@@ -702,26 +702,26 @@ def create_attention_content(attention_html=None, top_attended=None, layer_info=
                 html.Div([
                     html.Div([
                         html.I(className='fas fa-mouse-pointer', style={'color': '#f093fb', 'marginRight': '8px'}),
-                        html.Strong("Select heads: "),
-                        html.Span("Click on layer/head numbers at the top to view specific attention heads.",
                                  style={'color': '#6c757d'})
                     ], style={'marginBottom': '4px'}),
                     html.Div([
                         html.Span("• ", style={'color': '#f093fb', 'fontWeight': 'bold'}),
                         html.Strong("Single click ", style={'color': '#495057'}),
-                        html.Span("on a colored head square: selects or deselects that head",
                                  style={'color': '#6c757d'})
                     ], style={'marginLeft': '28px', 'marginBottom': '4px', 'fontSize': '13px'}),
                     html.Div([
                         html.Span("• ", style={'color': '#f093fb', 'fontWeight': 'bold'}),
                         html.Strong("Double click ", style={'color': '#495057'}),
-                        html.Span("on a colored head square: selects only that head (deselects all others)",
                                  style={'color': '#6c757d'})
                     ], style={'marginLeft': '28px', 'marginBottom': '12px', 'fontSize': '13px'}),
                     html.Div([
                         html.I(className='fas fa-arrows-alt-h', style={'color': '#f093fb', 'marginRight': '8px'}),
                         html.Strong("Lines show attention: "),
-                        html.Span("Each line connects a token (left) to tokens it attends to (right). ",
                                  style={'color': '#6c757d'})
                     ], style={'marginBottom': '8px'}),
                     html.Div([
@@ -774,14 +774,14 @@ def create_mlp_content(layer_count=None, hidden_dim=None, intermediate_dim=None)
         html.Div([
             html.H5("What happens here:", style={'color': '#495057', 'marginBottom': '8px'}),
             html.P([
-                "After attention gathers context, each token's representation passes through a ",
-                html.Strong("Feed-Forward Network (MLP)"),
                 ". This is where the model's ", html.Strong("factual knowledge"), " is stored."
             ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '12px'}),
             html.P([
-                "During training, the MLP weights learned to encode facts and patterns from the training data. ",
-                "For example, when processing 'The capital of France is', the MLP layers help recall that 'Paris' is the answer. ",
-                "Researchers have found that specific facts are often stored in specific MLP neurons."
             ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
         ]),
@@ -851,7 +851,7 @@ def create_mlp_content(layer_count=None, hidden_dim=None, intermediate_dim=None)
             html.Span([
                 f"This happens in each of the model's ",
                 html.Strong(f"{layer_count} layers" if layer_count else "transformer layers"),
-                ", with attention and MLP working together - attention gathers context, MLP retrieves knowledge."
             ], style={'color': '#6c757d', 'fontSize': '13px'})
         ], style={'marginTop': '12px', 'padding': '12px', 'backgroundColor': '#e3f2fd', 'borderRadius': '6px'}),
@@ -860,8 +860,8 @@ def create_mlp_content(layer_count=None, hidden_dim=None, intermediate_dim=None)
             html.I(className='fas fa-road', style={'color': '#26a69a', 'marginRight': '8px'}),
             html.Span([
                 html.Strong("Adding, not replacing: "),
-                "The MLP doesn't replace the token's representation — it ",
-                html.Strong("adds"), " to the residual stream. Each layer contributes new information on top of ",
                 "everything computed before it, so the model accumulates understanding across all layers."
             ], style={'color': '#6c757d', 'fontSize': '13px'})
         ], style={'marginTop': '12px', 'padding': '12px', 'backgroundColor': '#e0f2f1', 'borderRadius': '6px'})

             create_stage_container(
                 stage_id='tokenization',
                 stage_num=1,
+                title='Text Splitting',
                 icon='fa-puzzle-piece',
                 color='#667eea',
                 summary_id='stage-1-summary',
             create_stage_container(
                 stage_id='embedding',
                 stage_num=2,
+                title='Meaning Encoding',
                 icon='fa-cube',
                 color='#764ba2',
                 summary_id='stage-2-summary',
             create_stage_container(
                 stage_id='mlp',
                 stage_num=4,
+                title='Knowledge Retrieval',
                 icon='fa-network-wired',
                 color='#4facfe',
                 summary_id='stage-4-summary',
     """Create the horizontal flow indicator showing all stages."""
     stages = [
         ('Input', '#6c757d'),
+        ('Split', '#667eea'),
+        ('Encode', '#764ba2'),
         ('Attention', '#f093fb'),
+        ('Knowledge', '#4facfe'),
         ('Output', '#00f2fe'),
     ]
         html.Div([
             html.H5("What happens here:", style={'color': '#495057', 'marginBottom': '8px'}),
             html.P([
+                "Your text is split into ",
+                html.Strong(f"{len(tokens)} pieces"),
+                " (called ", html.Em("tokens"), ") — small chunks that the model can understand. Each piece is assigned a unique ID from the model's vocabulary."
             ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
         ]),
         hidden_dim: Embedding dimension (e.g., 768)
         num_tokens: Number of tokens being processed
     """
+    dim_text = f"a list of {hidden_dim} numbers" if hidden_dim else "a long list of numbers"
     return html.Div([
         html.Div([
             html.H5("What happens here:", style={'color': '#495057', 'marginBottom': '8px'}),
             html.P([
+                "Each piece is converted into ", html.Strong(dim_text), " using a ",
+                html.Strong("pre-learned lookup table"), ". Think of it like a dictionary: the model has already ",
+                "memorized a numeric code for every word in its vocabulary during training."
             ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '12px'}),
             html.P([
+                "These numeric codes capture meaning — words with similar meanings (like 'happy' and 'joyful') ",
+                "get similar numbers, allowing the model to understand relationships between words."
             ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
         ]),
                 html.Span('→', style={'margin': '0 16px', 'fontSize': '24px', 'color': '#adb5bd'}),
                 html.Div([
                     html.Span('[', style={'fontSize': '20px', 'color': '#495057'}),
+                    html.Span(f' {dim_text} ', style={
                         'padding': '4px 12px',
                         'backgroundColor': '#e5d4ff',
                         'borderRadius': '4px',
             html.I(className='fas fa-lightbulb', style={'color': '#ffc107', 'marginRight': '8px'}),
             html.Span([
                 html.Strong("How the lookup table was created: "),
+                "During training on billions of text examples, the model learned which numbers best represent each word piece. ",
+                "This table is frozen after training — every time you use the model, the same piece always maps to the same list of numbers."
             ], style={'color': '#6c757d', 'fontSize': '13px'})
         ], style={'marginTop': '16px', 'padding': '12px', 'backgroundColor': '#fff8e1', 'borderRadius': '6px'}),
             html.I(className='fas fa-map-marker-alt', style={'color': '#5c6bc0', 'marginRight': '8px'}),
             html.Span([
                 html.Strong("Position matters too: "),
+                "These numeric codes alone don't capture word order — 'the cat chased the dog' and 'the dog chased the cat' ",
                 "would look the same. To fix this, the model also encodes ", html.Strong("positional information"),
+                ". Some models (like GPT-2) add a learned set of position numbers to each piece's code. ",
                 "Others (like Pythia) use a technique called Rotary Positional Encoding, which encodes ",
+                "the relative distance between pieces directly in the attention step. ",
+                "Either way, the model knows both ", html.Em("what"), " each piece is and ",
                 html.Em("where"), " it sits in the sequence."
             ], style={'color': '#6c757d', 'fontSize': '13px'})
         ], style={'marginTop': '12px', 'padding': '12px', 'backgroundColor': '#e8eaf6', 'borderRadius': '6px'})
         html.Div([
             html.H5("What happens here:", style={'color': '#495057', 'marginBottom': '8px'}),
             html.P([
+                "The model looks at ", html.Strong("all pieces at once"),
                 " and figures out which ones are related to each other. This is called 'attention' — ",
+                "each piece 'attends to' other pieces to gather context for its prediction."
             ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '12px'}),
             html.P([
+                "Attention uses multiple ", html.Strong("detectors"), " (technically called 'heads') — each one learns to look for different types of relationships. ",
+                "Below you can see what role each detector plays and whether it's active on your current input."
             ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
         ])
     ]
                     html.I(className='fas fa-lightbulb', style={'color': '#f39c12', 'marginRight': '8px', 'fontSize': '16px'}),
                     html.Span([
                         html.Strong("Try this: "),
+                        f"Select Layer {guided_head['layer']}, Detector {guided_head['head']} in the visualization below — ",
+                        f"this is a {guided_cat} detector ",
+                        f"(activity level: {guided_head['activation_score']:.0%} on your input)."
                     ], style={'color': '#495057', 'fontSize': '13px'})
                 ], style={
                     'padding': '12px 16px', 'backgroundColor': '#fef9e7', 'borderRadius': '8px',
                                 'fontFamily': 'monospace', 'fontSize': '12px', 'fontWeight': '500',
                                 'minWidth': '60px', 'color': '#495057' if is_active else '#aaa',
                                 'display': 'inline-flex', 'alignItems': 'center',
+                            }, title=f"See Layer {head_info['layer']}, Detector {head_info['head']} in the visualization below"),
                             # Activation bar
                             html.Div([
                                 html.Div(style={
             content_items.append(
                 html.Div([
+                    html.H5("Attention Detector Roles:", style={'color': '#495057', 'marginBottom': '8px'}),
                     html.P([
+                        "Each category represents a type of behavior we detected in this model's attention detectors. ",
+                        "Click a category to see individual detectors and how strongly they're activated on your input."
                     ], style={'color': '#6c757d', 'fontSize': '12px', 'marginBottom': '12px'}),
                     legend,
                     html.Div(category_sections),
                     html.Div([
                         html.I(className='fas fa-info-circle', style={'color': '#6c757d', 'marginRight': '6px', 'fontSize': '11px'}),
                         html.Span(
+                            "These categories are simplified labels based on each detector's dominant behavior. "
+                            "In reality, detectors can serve multiple roles and may behave differently on different inputs.",
                             style={'color': '#999', 'fontSize': '11px'}
                         )
                     ], style={'marginTop': '12px', 'padding': '8px 12px', 'backgroundColor': '#f8f9fa', 'borderRadius': '6px'})
             html.Div([
                 html.I(className='fas fa-info-circle', style={'color': '#6c757d', 'marginRight': '8px'}),
                 html.Span(
+                    "Detector categorization is not available for this model. "
                     "The attention visualization below still shows the full attention patterns.",
                     style={'color': '#6c757d', 'fontSize': '13px'}
                 )
                 html.Div([
                     html.Div([
                         html.I(className='fas fa-mouse-pointer', style={'color': '#f093fb', 'marginRight': '8px'}),
+                        html.Strong("Select detectors: "),
+                        html.Span("Click on layer/detector numbers at the top to view specific attention detectors.",
                                  style={'color': '#6c757d'})
                     ], style={'marginBottom': '4px'}),
                     html.Div([
                         html.Span("• ", style={'color': '#f093fb', 'fontWeight': 'bold'}),
                         html.Strong("Single click ", style={'color': '#495057'}),
+                        html.Span("on a colored square: selects or deselects that detector",
                                  style={'color': '#6c757d'})
                     ], style={'marginLeft': '28px', 'marginBottom': '4px', 'fontSize': '13px'}),
                     html.Div([
                         html.Span("• ", style={'color': '#f093fb', 'fontWeight': 'bold'}),
                         html.Strong("Double click ", style={'color': '#495057'}),
+                        html.Span("on a colored square: selects only that detector (deselects all others)",
                                  style={'color': '#6c757d'})
                     ], style={'marginLeft': '28px', 'marginBottom': '12px', 'fontSize': '13px'}),
                     html.Div([
                         html.I(className='fas fa-arrows-alt-h', style={'color': '#f093fb', 'marginRight': '8px'}),
                         html.Strong("Lines show attention: "),
+                        html.Span("Each line connects a word (left) to words it attends to (right). ",
                                  style={'color': '#6c757d'})
                     ], style={'marginBottom': '8px'}),
                     html.Div([
         html.Div([
             html.H5("What happens here:", style={'color': '#495057', 'marginBottom': '8px'}),
             html.P([
+                "After attention gathers context, each piece's representation passes through a ",
+                html.Strong("knowledge retrieval layer"), " (technically called a Feed-Forward Network or MLP)",
                 ". This is where the model's ", html.Strong("factual knowledge"), " is stored."
             ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '12px'}),
             html.P([
+                "During training, these layers learned to encode facts and patterns from the training data. ",
+                "For example, when processing 'The capital of France is', these knowledge layers help recall that 'Paris' is the answer. ",
+                "Researchers have found that specific facts are often stored in specific neurons within these layers."
             ], style={'color': '#6c757d', 'fontSize': '14px', 'marginBottom': '16px'})
         ]),
             html.Span([
                 f"This happens in each of the model's ",
                 html.Strong(f"{layer_count} layers" if layer_count else "transformer layers"),
+                ", with attention and knowledge retrieval working together — attention gathers context, and the knowledge layers retrieve facts."
             ], style={'color': '#6c757d', 'fontSize': '13px'})
         ], style={'marginTop': '12px', 'padding': '12px', 'backgroundColor': '#e3f2fd', 'borderRadius': '6px'}),
             html.I(className='fas fa-road', style={'color': '#26a69a', 'marginRight': '8px'}),
             html.Span([
                 html.Strong("Adding, not replacing: "),
+                "The knowledge layer doesn't replace the piece's representation — it ",
+                html.Strong("adds"), " to it. Each layer contributes new information on top of ",
                 "everything computed before it, so the model accumulates understanding across all layers."
             ], style={'color': '#6c757d', 'fontSize': '13px'})
         ], style={'marginTop': '12px', 'padding': '12px', 'backgroundColor': '#e0f2f1', 'borderRadius': '6px'})