Alogotron commited on
Commit
09d20cb
·
verified ·
1 Parent(s): 3be8037

Upload viz_magnitude.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. viz_magnitude.py +165 -0
viz_magnitude.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ NeuroScope — Activation Magnitude Chart
3
+
4
+ Bar chart showing L2 norm of hidden states across all layers.
5
+ Highlights layers 9, 18, 27 (used by the Activation Avatars adapter).
6
+ Shows how activation magnitude evolves through the network depth.
7
+
8
+ All charts use Plotly with the project dark theme (#1a1a2e bg, #e6b800 accent).
9
+ """
10
+
11
+ import numpy as np
12
+ import plotly.graph_objects as go
13
+ from extraction import ExtractionResult
14
+
15
+ # ---------------------------------------------------------------------------
16
+ # Theme constants
17
+ # ---------------------------------------------------------------------------
18
+ BG_COLOR = "#1a1a2e"
19
+ PAPER_COLOR = "#1a1a2e"
20
+ TEXT_COLOR = "#e0e0e0"
21
+ ACCENT_COLOR = "#e6b800"
22
+ GRID_COLOR = "#2a2a4e"
23
+ BAR_COLOR = "#4a90d9" # Default bar color
24
+ HIGHLIGHT_COLOR = "#e6b800" # Gold for highlighted layers
25
+ EMBED_COLOR = "#7b68ee" # Purple for embedding layer
26
+
27
+ # Layers used by the Activation Avatars adapter
28
+ AVATAR_LAYERS = {9, 18, 27}
29
+
30
+
31
+ def create_magnitude_chart(
32
+ result: ExtractionResult,
33
+ metric: str = "mean_l2",
34
+ ) -> go.Figure:
35
+ """Create a bar chart of activation magnitude across all layers.
36
+
37
+ Args:
38
+ result: Extraction output containing hidden states.
39
+ metric: Magnitude metric to compute:
40
+ - "mean_l2": Mean L2 norm across tokens (default)
41
+ - "max_l2": Max L2 norm across tokens
42
+ - "mean_abs": Mean absolute activation value
43
+
44
+ Returns:
45
+ Plotly Figure with interactive bar chart.
46
+ """
47
+ # hidden_states shape: (num_layers+1, seq_len, hidden_dim)
48
+ # Index 0 = embedding layer, 1..num_layers = transformer layers
49
+ hs = result.hidden_states
50
+ n_total = hs.shape[0] # num_layers + 1
51
+
52
+ # Compute magnitude per layer
53
+ if metric == "mean_l2":
54
+ # L2 norm per token, then average across tokens
55
+ magnitudes = np.array([
56
+ np.linalg.norm(hs[i], axis=-1).mean() for i in range(n_total)
57
+ ])
58
+ y_label = "Mean L2 Norm"
59
+ elif metric == "max_l2":
60
+ magnitudes = np.array([
61
+ np.linalg.norm(hs[i], axis=-1).max() for i in range(n_total)
62
+ ])
63
+ y_label = "Max L2 Norm"
64
+ elif metric == "mean_abs":
65
+ magnitudes = np.array([
66
+ np.abs(hs[i]).mean() for i in range(n_total)
67
+ ])
68
+ y_label = "Mean |Activation|"
69
+ else:
70
+ raise ValueError(f"Unknown metric: {metric}")
71
+
72
+ # Build labels and colors
73
+ labels = ["Embed"] + [f"L{i}" for i in range(result.num_layers)]
74
+ colors = []
75
+ for i in range(n_total):
76
+ if i == 0:
77
+ colors.append(EMBED_COLOR)
78
+ elif (i - 1) in AVATAR_LAYERS:
79
+ colors.append(HIGHLIGHT_COLOR)
80
+ else:
81
+ colors.append(BAR_COLOR)
82
+
83
+ # Hover text with detailed info
84
+ hover_text = []
85
+ for i in range(n_total):
86
+ layer_name = "Embedding" if i == 0 else f"Layer {i - 1}"
87
+ tag = " ⭐ Avatar Layer" if (i > 0 and (i - 1) in AVATAR_LAYERS) else ""
88
+ hover_text.append(
89
+ f"{layer_name}{tag}<br>"
90
+ f"{y_label}: {magnitudes[i]:.2f}<br>"
91
+ f"Δ from prev: {magnitudes[i] - magnitudes[max(0, i-1)]:+.2f}"
92
+ )
93
+
94
+ fig = go.Figure(
95
+ data=go.Bar(
96
+ x=labels,
97
+ y=magnitudes,
98
+ marker_color=colors,
99
+ text=[f"{m:.1f}" for m in magnitudes],
100
+ textposition="outside",
101
+ textfont=dict(color=TEXT_COLOR, size=8),
102
+ hovertext=hover_text,
103
+ hoverinfo="text",
104
+ )
105
+ )
106
+
107
+ # Add subtle markers for avatar layers
108
+ avatar_x = []
109
+ avatar_y = []
110
+ for layer in sorted(AVATAR_LAYERS):
111
+ idx = layer + 1 # +1 for embedding offset
112
+ if idx < n_total:
113
+ avatar_x.append(labels[idx])
114
+ avatar_y.append(magnitudes[idx])
115
+
116
+ if avatar_x:
117
+ fig.add_trace(
118
+ go.Scatter(
119
+ x=avatar_x,
120
+ y=avatar_y,
121
+ mode="markers",
122
+ marker=dict(
123
+ symbol="star",
124
+ size=12,
125
+ color=HIGHLIGHT_COLOR,
126
+ line=dict(width=1, color="white"),
127
+ ),
128
+ name="Avatar Layers",
129
+ hoverinfo="skip",
130
+ showlegend=True,
131
+ )
132
+ )
133
+
134
+ fig.update_layout(
135
+ title=dict(
136
+ text=f"Activation Magnitude by Layer ({y_label})",
137
+ font=dict(color=ACCENT_COLOR, size=14),
138
+ ),
139
+ xaxis=dict(
140
+ title=dict(text="Layer", font=dict(color=TEXT_COLOR, size=11)),
141
+ tickfont=dict(color=TEXT_COLOR, size=8),
142
+ gridcolor=GRID_COLOR,
143
+ tickangle=45,
144
+ ),
145
+ yaxis=dict(
146
+ title=dict(text=y_label, font=dict(color=TEXT_COLOR, size=11)),
147
+ tickfont=dict(color=TEXT_COLOR, size=9),
148
+ gridcolor=GRID_COLOR,
149
+ zeroline=False,
150
+ ),
151
+ paper_bgcolor=PAPER_COLOR,
152
+ plot_bgcolor=BG_COLOR,
153
+ margin=dict(l=60, r=20, t=50, b=60),
154
+ height=480,
155
+ showlegend=True,
156
+ legend=dict(
157
+ font=dict(color=TEXT_COLOR, size=10),
158
+ bgcolor="rgba(26, 26, 46, 0.8)",
159
+ x=0.02,
160
+ y=0.98,
161
+ ),
162
+ bargap=0.15,
163
+ )
164
+
165
+ return fig