AbstractPhil commited on
Commit
1a511f3
·
verified ·
1 Parent(s): 8f1ba76

Create freckles_observer_v40_freckles_noise.json

Browse files
freckles_observer_v40_freckles_noise.json ADDED
@@ -0,0 +1,1369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_stats": {
3
+ "enc_in": {
4
+ "mean": 0.11399824917316437,
5
+ "std": 0.32156893610954285,
6
+ "min": -4.077515602111816,
7
+ "max": 4.074148654937744,
8
+ "abs_mean": 0.25178295373916626,
9
+ "dead_frac": 2.543131586207892e-06,
10
+ "sparsity": 0.0,
11
+ "kurtosis": 4.910905838012695
12
+ },
13
+ "enc_block_0": {
14
+ "mean": -0.02227744832634926,
15
+ "std": 0.2875443994998932,
16
+ "min": -3.1000189781188965,
17
+ "max": 2.062558650970459,
18
+ "abs_mean": 0.17517945170402527,
19
+ "dead_frac": 4.450480446394067e-06,
20
+ "sparsity": 0.0,
21
+ "kurtosis": 19.296550750732422
22
+ },
23
+ "enc_block_1": {
24
+ "mean": -0.005942739546298981,
25
+ "std": 0.10117179155349731,
26
+ "min": -1.1117786169052124,
27
+ "max": 0.7992860078811646,
28
+ "abs_mean": 0.0745168998837471,
29
+ "dead_frac": 1.0172526344831567e-05,
30
+ "sparsity": 0.0,
31
+ "kurtosis": 2.5130233764648438
32
+ },
33
+ "enc_block_2": {
34
+ "mean": -0.021667584776878357,
35
+ "std": 0.11653400212526321,
36
+ "min": -0.7383712530136108,
37
+ "max": 0.6876223683357239,
38
+ "abs_mean": 0.09079861640930176,
39
+ "dead_frac": 8.106231689453125e-06,
40
+ "sparsity": 0.0,
41
+ "kurtosis": 1.041348934173584
42
+ },
43
+ "enc_block_3": {
44
+ "mean": -0.0528937391936779,
45
+ "std": 0.18374653160572052,
46
+ "min": -0.9734799861907959,
47
+ "max": 0.8834015130996704,
48
+ "abs_mean": 0.14933942258358002,
49
+ "dead_frac": 3.814697265625e-06,
50
+ "sparsity": 0.0,
51
+ "kurtosis": 0.4822535514831543
52
+ },
53
+ "enc_out_raw": {
54
+ "mean": -0.17712435126304626,
55
+ "std": 3.6461434364318848,
56
+ "min": -10.889140129089355,
57
+ "max": 11.768145561218262,
58
+ "abs_mean": 2.373218536376953,
59
+ "dead_frac": 1.9073486328125e-06,
60
+ "sparsity": 0.0,
61
+ "kurtosis": 1.0141587257385254
62
+ },
63
+ "cross_attn_0_qkv": {
64
+ "mean": 0.02815081924200058,
65
+ "std": 0.9149454832077026,
66
+ "min": -1.9138216972351074,
67
+ "max": 1.4539501667022705,
68
+ "abs_mean": 0.777122974395752,
69
+ "dead_frac": 0.0,
70
+ "sparsity": 0.0,
71
+ "kurtosis": -0.6490051746368408
72
+ },
73
+ "cross_attn_0_in": {
74
+ "mean": 3.307823657989502,
75
+ "std": 1.028746247291565,
76
+ "min": 1.6925034523010254,
77
+ "max": 4.709893703460693,
78
+ "abs_mean": 3.307823657989502,
79
+ "dead_frac": 0.0,
80
+ "sparsity": 0.0,
81
+ "kurtosis": -1.4075615406036377
82
+ },
83
+ "cross_attn_0_out": {
84
+ "mean": 3.362860679626465,
85
+ "std": 1.0373820066452026,
86
+ "min": 1.7293139696121216,
87
+ "max": 4.793014049530029,
88
+ "abs_mean": 3.362860679626465,
89
+ "dead_frac": 0.0,
90
+ "sparsity": 0.0,
91
+ "kurtosis": -1.3798936605453491
92
+ },
93
+ "cross_attn_1_qkv": {
94
+ "mean": -0.011559374630451202,
95
+ "std": 0.8941014409065247,
96
+ "min": -1.2823809385299683,
97
+ "max": 2.239341974258423,
98
+ "abs_mean": 0.6763687133789062,
99
+ "dead_frac": 0.0,
100
+ "sparsity": 0.0,
101
+ "kurtosis": 0.4769473075866699
102
+ },
103
+ "cross_attn_1_in": {
104
+ "mean": 3.362860679626465,
105
+ "std": 1.0373820066452026,
106
+ "min": 1.7293139696121216,
107
+ "max": 4.793014049530029,
108
+ "abs_mean": 3.362860679626465,
109
+ "dead_frac": 0.0,
110
+ "sparsity": 0.0,
111
+ "kurtosis": -1.3798936605453491
112
+ },
113
+ "cross_attn_1_out": {
114
+ "mean": 3.353649616241455,
115
+ "std": 0.9911406636238098,
116
+ "min": 1.7649903297424316,
117
+ "max": 4.691677093505859,
118
+ "abs_mean": 3.353649616241455,
119
+ "dead_frac": 0.0,
120
+ "sparsity": 0.0,
121
+ "kurtosis": -1.3825546503067017
122
+ },
123
+ "dec_in": {
124
+ "mean": 0.48801693320274353,
125
+ "std": 0.4462810754776001,
126
+ "min": -0.5784251689910889,
127
+ "max": 8.451927185058594,
128
+ "abs_mean": 0.49386128783226013,
129
+ "dead_frac": 0.0,
130
+ "sparsity": 0.0,
131
+ "kurtosis": 195.43594360351562
132
+ },
133
+ "dec_block_0": {
134
+ "mean": -0.1455148309469223,
135
+ "std": 0.26857054233551025,
136
+ "min": -3.289444923400879,
137
+ "max": 4.344697952270508,
138
+ "abs_mean": 0.19442293047904968,
139
+ "dead_frac": 3.1789145396032836e-06,
140
+ "sparsity": 0.0,
141
+ "kurtosis": 59.326595306396484
142
+ },
143
+ "dec_block_1": {
144
+ "mean": -0.017345234751701355,
145
+ "std": 0.1320338398218155,
146
+ "min": -1.80828058719635,
147
+ "max": 1.4308905601501465,
148
+ "abs_mean": 0.09153717756271362,
149
+ "dead_frac": 9.695689186628442e-06,
150
+ "sparsity": 0.0,
151
+ "kurtosis": 13.288253784179688
152
+ },
153
+ "dec_block_2": {
154
+ "mean": -0.030033249408006668,
155
+ "std": 0.13502919673919678,
156
+ "min": -0.9543643593788147,
157
+ "max": 1.3100506067276,
158
+ "abs_mean": 0.09647099673748016,
159
+ "dead_frac": 8.106231689453125e-06,
160
+ "sparsity": 0.0,
161
+ "kurtosis": 12.214755058288574
162
+ },
163
+ "dec_block_3": {
164
+ "mean": -0.07281665503978729,
165
+ "std": 0.165196493268013,
166
+ "min": -1.7998610734939575,
167
+ "max": 2.990985631942749,
168
+ "abs_mean": 0.1313476860523224,
169
+ "dead_frac": 3.973643288190942e-06,
170
+ "sparsity": 0.0,
171
+ "kurtosis": 32.06653594970703
172
+ },
173
+ "dec_out": {
174
+ "mean": 0.009957308880984783,
175
+ "std": 1.2505145072937012,
176
+ "min": -3.9976625442504883,
177
+ "max": 4.018248081207275,
178
+ "abs_mean": 1.0230882167816162,
179
+ "dead_frac": 0.0,
180
+ "sparsity": 0.0,
181
+ "kurtosis": -0.7579162120819092
182
+ },
183
+ "boundary_in": {
184
+ "mean": 0.009957308880984783,
185
+ "std": 1.2505145072937012,
186
+ "min": -3.9976625442504883,
187
+ "max": 4.018248081207275,
188
+ "abs_mean": 1.0230880975723267,
189
+ "dead_frac": 0.0,
190
+ "sparsity": 0.0,
191
+ "kurtosis": -0.7579159736633301
192
+ },
193
+ "boundary_out": {
194
+ "mean": 6.105394277255982e-05,
195
+ "std": 1.2510300874710083,
196
+ "min": -4.003395080566406,
197
+ "max": 4.002503395080566,
198
+ "abs_mean": 1.0234731435775757,
199
+ "dead_frac": 0.0,
200
+ "sparsity": 0.0,
201
+ "kurtosis": -0.75799560546875
202
+ },
203
+ "svd_U": {
204
+ "mean": -0.0015054205432534218,
205
+ "std": 0.1443297415971756,
206
+ "min": -0.5084033012390137,
207
+ "max": 0.5471221208572388,
208
+ "abs_mean": 0.11567305028438568,
209
+ "dead_frac": 6.039937488822034e-06,
210
+ "sparsity": 0.0,
211
+ "kurtosis": 0.46650028228759766
212
+ },
213
+ "svd_S_orig": {
214
+ "mean": 3.307823657989502,
215
+ "std": 1.028746247291565,
216
+ "min": 1.6925034523010254,
217
+ "max": 4.709893703460693,
218
+ "abs_mean": 3.307823657989502,
219
+ "dead_frac": 0.0,
220
+ "sparsity": 0.0,
221
+ "kurtosis": -1.4075615406036377
222
+ },
223
+ "svd_S": {
224
+ "mean": 3.353649616241455,
225
+ "std": 0.9911406636238098,
226
+ "min": 1.7649903297424316,
227
+ "max": 4.691677093505859,
228
+ "abs_mean": 3.353649616241455,
229
+ "dead_frac": 0.0,
230
+ "sparsity": 0.0,
231
+ "kurtosis": -1.3825546503067017
232
+ },
233
+ "svd_Vt": {
234
+ "mean": 0.037834037095308304,
235
+ "std": 0.49856749176979065,
236
+ "min": -0.9999575018882751,
237
+ "max": 0.9589366316795349,
238
+ "abs_mean": 0.37048596143722534,
239
+ "dead_frac": 0.0,
240
+ "sparsity": 0.0,
241
+ "kurtosis": -0.25922727584838867
242
+ },
243
+ "svd_M": {
244
+ "mean": -0.029393270611763,
245
+ "std": 0.4991353750228882,
246
+ "min": -0.9999995231628418,
247
+ "max": 0.9999973177909851,
248
+ "abs_mean": 0.33501312136650085,
249
+ "dead_frac": 5.7220458984375e-06,
250
+ "sparsity": 0.0,
251
+ "kurtosis": 0.24239110946655273
252
+ },
253
+ "recon": {
254
+ "mean": 6.105394277255982e-05,
255
+ "std": 1.2510300874710083,
256
+ "min": -4.003395080566406,
257
+ "max": 4.002503395080566,
258
+ "abs_mean": 1.0234731435775757,
259
+ "dead_frac": 0.0,
260
+ "sparsity": 0.0,
261
+ "kurtosis": -0.75799560546875
262
+ },
263
+ "input": {
264
+ "mean": -2.6453908503754064e-05,
265
+ "std": 1.250998616218567,
266
+ "min": -4.0,
267
+ "max": 4.0,
268
+ "abs_mean": 1.0234488248825073,
269
+ "dead_frac": 1.271565793103946e-06,
270
+ "sparsity": 0.0,
271
+ "kurtosis": -0.7580161094665527
272
+ }
273
+ },
274
+ "svd_bottleneck": {
275
+ "S_mean": [
276
+ 4.565850734710693,
277
+ 3.915027141571045,
278
+ 2.8913984298706055,
279
+ 1.859018325805664
280
+ ],
281
+ "S_std": [
282
+ 0.03792751207947731,
283
+ 0.0469575896859169,
284
+ 0.05345858260989189,
285
+ 0.05418039485812187
286
+ ],
287
+ "S_ratio": 2.4560546875,
288
+ "condition_number": 2.4560546875,
289
+ "effective_rank": 1.1468989849090576,
290
+ "recon_error": 1.4800130454409137e-15,
291
+ "U_orthogonality_error": 3.4124291408674278e-15,
292
+ "Vt_orthogonality_error": 6.465710594626638e-16,
293
+ "energy_per_mode": [
294
+ 0.43434232473373413,
295
+ 0.31936755776405334,
296
+ 0.17423005402088165,
297
+ 0.07206007838249207
298
+ ],
299
+ "sphere_radius_mean": 1.0,
300
+ "sphere_radius_std": 4.2930455634859754e-08
301
+ },
302
+ "cross_attention": {
303
+ "layer_0_delta": {
304
+ "delta_abs_mean": 0.05503721535205841,
305
+ "delta_abs_max": 0.08312034606933594,
306
+ "delta_std": 0.01944689266383648,
307
+ "delta_per_mode": [
308
+ 0.08059147000312805,
309
+ 0.03247781842947006,
310
+ 0.06663240492343903,
311
+ 0.040447160601615906
312
+ ],
313
+ "alpha_values": [
314
+ 0.022388869896531105,
315
+ 0.024696987122297287,
316
+ 0.02641209028661251,
317
+ 0.02643170952796936
318
+ ],
319
+ "alpha_mean": 0.02498241513967514,
320
+ "relative_change": 0.0166384968906641,
321
+ "sign_agreement": 1.0
322
+ },
323
+ "layer_0_attention": {
324
+ "q_norm_mean": 2.305610179901123,
325
+ "k_norm_mean": 1.3561913967132568,
326
+ "v_norm_mean": 1.7022989988327026,
327
+ "q_std": 1.1526931524276733,
328
+ "k_std": 0.672618567943573,
329
+ "v_std": 0.8512021899223328,
330
+ "qk_cosine": -0.29407045245170593,
331
+ "qv_cosine": -0.21351489424705505,
332
+ "kv_cosine": 0.7354511022567749
333
+ },
334
+ "layer_1_delta": {
335
+ "delta_abs_mean": 0.04028972610831261,
336
+ "delta_abs_max": 0.10133695602416992,
337
+ "delta_std": 0.05339942127466202,
338
+ "delta_per_mode": [
339
+ 0.09837014973163605,
340
+ 0.000631974427960813,
341
+ 0.02289256826043129,
342
+ 0.03926421329379082
343
+ ],
344
+ "alpha_values": [
345
+ 0.025104766711592674,
346
+ 0.02187989465892315,
347
+ 0.02244846150279045,
348
+ 0.027547497302293777
349
+ ],
350
+ "alpha_mean": 0.02424515411257744,
351
+ "relative_change": 0.011980789713561535,
352
+ "sign_agreement": 0.5
353
+ },
354
+ "layer_1_attention": {
355
+ "q_norm_mean": 2.4621071815490723,
356
+ "k_norm_mean": 1.233809471130371,
357
+ "v_norm_mean": 1.4172993898391724,
358
+ "q_std": 1.1312413215637207,
359
+ "k_std": 0.43080684542655945,
360
+ "v_std": 0.7043910026550293,
361
+ "qk_cosine": 0.2318161427974701,
362
+ "qv_cosine": 0.8531496524810791,
363
+ "kv_cosine": 0.6193335056304932
364
+ }
365
+ },
366
+ "enc_dec_symmetry": {
367
+ "block_0_spearman": 0.03686881909989656,
368
+ "block_0_cosine": 0.07803580909967422,
369
+ "block_0_procrustes_error": 8.642137458991783e-07,
370
+ "block_0_procrustes_alignment": 0.9170370101928711,
371
+ "block_1_spearman": 0.009047081100090216,
372
+ "block_1_cosine": 0.030039409175515175,
373
+ "block_1_procrustes_error": 1.0774815564218443e-06,
374
+ "block_1_procrustes_alignment": 0.896563708782196,
375
+ "block_2_spearman": 0.01232992455646543,
376
+ "block_2_cosine": 0.04046924412250519,
377
+ "block_2_procrustes_error": 8.364959853679466e-07,
378
+ "block_2_procrustes_alignment": 0.9196978211402893,
379
+ "block_3_spearman": -0.0008597959765885626,
380
+ "block_3_cosine": 0.16444019973278046,
381
+ "block_3_procrustes_error": 9.134143397204753e-07,
382
+ "block_3_procrustes_alignment": 0.912315309047699
383
+ },
384
+ "information_flow": {
385
+ "input_to_enc_spearman": -0.01943134982072711,
386
+ "M_to_S_compression": 1.0,
387
+ "M_variance": 0.24913613498210907,
388
+ "S_variance": 1.058318853378296,
389
+ "variance_retention": 4.247953878608404,
390
+ "cross_attn_total_delta": 0.05471521615982056,
391
+ "cross_attn_max_delta": 0.09769392013549805,
392
+ "cross_attn_relative_delta": 0.01654115319252014,
393
+ "dec_to_recon_spearman": 0.004384062372986261,
394
+ "end_to_end_spearman": 0.9999988479208689,
395
+ "end_to_end_mse": 7.40700807000394e-06,
396
+ "boundary_delta_mean": 0.00989625509828329,
397
+ "boundary_delta_max": 0.018932104110717773,
398
+ "boundary_relative": 0.009672925807535648
399
+ },
400
+ "cv_stages": {
401
+ "enc_in": {
402
+ "cv": 0.3957862952422528,
403
+ "name": "Encoder input projection",
404
+ "dim": 16
405
+ },
406
+ "enc_block_0": {
407
+ "cv": 0.3340476673895933,
408
+ "name": "Encoder block 0",
409
+ "dim": 16
410
+ },
411
+ "enc_block_1": {
412
+ "cv": 0.3956829632095093,
413
+ "name": "Encoder block 1",
414
+ "dim": 16
415
+ },
416
+ "enc_block_2": {
417
+ "cv": 0.3878920391524174,
418
+ "name": "Encoder block 2",
419
+ "dim": 16
420
+ },
421
+ "enc_block_3": {
422
+ "cv": 0.3079640097284264,
423
+ "name": "Encoder block 3",
424
+ "dim": 16
425
+ },
426
+ "svd_S_orig": {
427
+ "cv": 0.46774789196800154,
428
+ "name": "SVD S (pre cross-attn)",
429
+ "dim": 4
430
+ },
431
+ "svd_S": {
432
+ "cv": 0.46958339733251925,
433
+ "name": "SVD S (post cross-attn)",
434
+ "dim": 4
435
+ },
436
+ "dec_in": {
437
+ "cv": 0.31508316315948587,
438
+ "name": "Decoder input projection",
439
+ "dim": 16
440
+ },
441
+ "dec_block_0": {
442
+ "cv": 0.375718443993368,
443
+ "name": "Decoder block 0",
444
+ "dim": 16
445
+ },
446
+ "dec_block_3": {
447
+ "cv": 0.46716902590523746,
448
+ "name": "Decoder block 3",
449
+ "dim": 16
450
+ }
451
+ },
452
+ "noise_fingerprints": {
453
+ "gaussian": {
454
+ "S_mean": [
455
+ 4.551583290100098,
456
+ 3.9600980281829834,
457
+ 2.967662811279297,
458
+ 1.9223911762237549
459
+ ],
460
+ "S_std": [
461
+ 0.034768905490636826,
462
+ 0.035847216844558716,
463
+ 0.04485037177801132,
464
+ 0.04318368062376976
465
+ ],
466
+ "erank": 1.1229482889175415,
467
+ "enc_final_abs_mean": 0.1409558653831482,
468
+ "enc_final_dead_frac": 5.086263172415784e-06,
469
+ "cross_attn_delta": 0.054473843425512314,
470
+ "recon_mse": 2.188036887673661e-06
471
+ },
472
+ "uniform": {
473
+ "S_mean": [
474
+ 4.549080848693848,
475
+ 4.003808975219727,
476
+ 2.929938316345215,
477
+ 1.891095519065857
478
+ ],
479
+ "S_std": [
480
+ 0.026261691004037857,
481
+ 0.026727596297860146,
482
+ 0.03346379101276398,
483
+ 0.0348832905292511
484
+ ],
485
+ "erank": 1.0971537828445435,
486
+ "enc_final_abs_mean": 0.11894993484020233,
487
+ "enc_final_dead_frac": 5.086263172415784e-06,
488
+ "cross_attn_delta": 0.05392440780997276,
489
+ "recon_mse": 8.884973681233532e-07
490
+ },
491
+ "pink": {
492
+ "S_mean": [
493
+ 4.485807418823242,
494
+ 3.9797024726867676,
495
+ 2.974792003631592,
496
+ 2.0227270126342773
497
+ ],
498
+ "S_std": [
499
+ 0.060695406049489975,
500
+ 0.06107177957892418,
501
+ 0.06326989084482193,
502
+ 0.19789165258407593
503
+ ],
504
+ "erank": 1.2274385690689087,
505
+ "enc_final_abs_mean": 0.11895599216222763,
506
+ "enc_final_dead_frac": 0.0,
507
+ "cross_attn_delta": 0.05545634776353836,
508
+ "recon_mse": 3.906632173311664e-07
509
+ },
510
+ "salt_pepper": {
511
+ "S_mean": [
512
+ 4.531198024749756,
513
+ 3.8916804790496826,
514
+ 3.0370736122131348,
515
+ 2.0111701488494873
516
+ ],
517
+ "S_std": [
518
+ 0.04507261514663696,
519
+ 0.049419280141592026,
520
+ 0.060413066297769547,
521
+ 0.054792631417512894
522
+ ],
523
+ "erank": 1.156660556793213,
524
+ "enc_final_abs_mean": 0.18618455529212952,
525
+ "enc_final_dead_frac": 2.543131586207892e-06,
526
+ "cross_attn_delta": 0.055769048631191254,
527
+ "recon_mse": 2.9938106308691204e-05
528
+ },
529
+ "cauchy": {
530
+ "S_mean": [
531
+ 4.539364337921143,
532
+ 3.90649676322937,
533
+ 3.0197839736938477,
534
+ 1.9870045185089111
535
+ ],
536
+ "S_std": [
537
+ 0.04201872646808624,
538
+ 0.04622576758265495,
539
+ 0.055099062621593475,
540
+ 0.052384763956069946
541
+ ],
542
+ "erank": 1.1480085849761963,
543
+ "enc_final_abs_mean": 0.1741541475057602,
544
+ "enc_final_dead_frac": 3.1789145396032836e-06,
545
+ "cross_attn_delta": 0.055427126586437225,
546
+ "recon_mse": 1.4438321159104817e-05
547
+ }
548
+ },
549
+ "weights": {
550
+ "enc_in.weight": {
551
+ "shape": [
552
+ 384,
553
+ 48
554
+ ],
555
+ "norm": 4.736795425415039,
556
+ "mean": -3.101239417446777e-05,
557
+ "std": 0.03489072620868683,
558
+ "abs_mean": 0.02599160559475422,
559
+ "sparsity": 0.0,
560
+ "condition": 1.6303043365478516,
561
+ "erank": 47.647403717041016
562
+ },
563
+ "enc_in.bias": {
564
+ "shape": [
565
+ 384
566
+ ],
567
+ "norm": 3.09242582321167,
568
+ "mean": 0.11399959027767181,
569
+ "std": 0.10926610231399536,
570
+ "abs_mean": 0.12932059168815613,
571
+ "sparsity": 0.0
572
+ },
573
+ "enc_blocks.0.0.weight": {
574
+ "shape": [
575
+ 384
576
+ ],
577
+ "norm": 12.654324531555176,
578
+ "mean": 0.6359113454818726,
579
+ "std": 0.1125166043639183,
580
+ "abs_mean": 0.6359113454818726,
581
+ "sparsity": 0.0
582
+ },
583
+ "enc_blocks.0.0.bias": {
584
+ "shape": [
585
+ 384
586
+ ],
587
+ "norm": 1.056090235710144,
588
+ "mean": 0.0009981549810618162,
589
+ "std": 0.053954433649778366,
590
+ "abs_mean": 0.04389415308833122,
591
+ "sparsity": 0.0
592
+ },
593
+ "enc_blocks.0.1.weight": {
594
+ "shape": [
595
+ 384,
596
+ 384
597
+ ],
598
+ "norm": 7.409005165100098,
599
+ "mean": -0.0002259344037156552,
600
+ "std": 0.019293026998639107,
601
+ "abs_mean": 0.0151827996596694,
602
+ "sparsity": 5.425347262644209e-05,
603
+ "condition": 5300.60693359375,
604
+ "erank": 257.93896484375
605
+ },
606
+ "enc_blocks.0.1.bias": {
607
+ "shape": [
608
+ 384
609
+ ],
610
+ "norm": 0.9071546792984009,
611
+ "mean": 0.02974475547671318,
612
+ "std": 0.03551873937249184,
613
+ "abs_mean": 0.03694774955511093,
614
+ "sparsity": 0.0
615
+ },
616
+ "enc_blocks.0.3.weight": {
617
+ "shape": [
618
+ 384,
619
+ 384
620
+ ],
621
+ "norm": 13.219942092895508,
622
+ "mean": 0.0017413782188668847,
623
+ "std": 0.034382980316877365,
624
+ "abs_mean": 0.028007401153445244,
625
+ "sparsity": 2.7126736313221045e-05,
626
+ "condition": 9943.1591796875,
627
+ "erank": 285.1506042480469
628
+ },
629
+ "enc_blocks.0.3.bias": {
630
+ "shape": [
631
+ 384
632
+ ],
633
+ "norm": 0.8092156052589417,
634
+ "mean": -0.009931594133377075,
635
+ "std": 0.040135327726602554,
636
+ "abs_mean": 0.032900743186473846,
637
+ "sparsity": 0.0
638
+ },
639
+ "enc_blocks.1.0.weight": {
640
+ "shape": [
641
+ 384
642
+ ],
643
+ "norm": 16.65181541442871,
644
+ "mean": 0.8478428721427917,
645
+ "std": 0.057114146649837494,
646
+ "abs_mean": 0.8478428721427917,
647
+ "sparsity": 0.0
648
+ },
649
+ "enc_blocks.1.0.bias": {
650
+ "shape": [
651
+ 384
652
+ ],
653
+ "norm": 0.6272527575492859,
654
+ "mean": -5.119678098708391e-05,
655
+ "std": 0.03205107897520065,
656
+ "abs_mean": 0.024098535999655724,
657
+ "sparsity": 0.0
658
+ },
659
+ "enc_blocks.1.1.weight": {
660
+ "shape": [
661
+ 384,
662
+ 384
663
+ ],
664
+ "norm": 11.865377426147461,
665
+ "mean": 0.0005295192240737379,
666
+ "std": 0.030894985422492027,
667
+ "abs_mean": 0.02438214048743248,
668
+ "sparsity": 4.069010537932627e-05,
669
+ "condition": 3584.115478515625,
670
+ "erank": 272.6521301269531
671
+ },
672
+ "enc_blocks.1.1.bias": {
673
+ "shape": [
674
+ 384
675
+ ],
676
+ "norm": 0.6861883997917175,
677
+ "mean": 0.006015029735863209,
678
+ "std": 0.03454142436385155,
679
+ "abs_mean": 0.029296232387423515,
680
+ "sparsity": 0.0
681
+ },
682
+ "enc_blocks.1.3.weight": {
683
+ "shape": [
684
+ 384,
685
+ 384
686
+ ],
687
+ "norm": 13.247173309326172,
688
+ "mean": 0.0008856668719090521,
689
+ "std": 0.034486591815948486,
690
+ "abs_mean": 0.027842184528708458,
691
+ "sparsity": 2.7126736313221045e-05,
692
+ "condition": 7903.28125,
693
+ "erank": 281.22845458984375
694
+ },
695
+ "enc_blocks.1.3.bias": {
696
+ "shape": [
697
+ 384
698
+ ],
699
+ "norm": 0.7264137268066406,
700
+ "mean": -0.010470348410308361,
701
+ "std": 0.03560663014650345,
702
+ "abs_mean": 0.029817825183272362,
703
+ "sparsity": 0.0
704
+ },
705
+ "enc_blocks.2.0.weight": {
706
+ "shape": [
707
+ 384
708
+ ],
709
+ "norm": 17.554519653320312,
710
+ "mean": 0.8946146965026855,
711
+ "std": 0.046616703271865845,
712
+ "abs_mean": 0.8946146965026855,
713
+ "sparsity": 0.0
714
+ },
715
+ "enc_blocks.2.0.bias": {
716
+ "shape": [
717
+ 384
718
+ ],
719
+ "norm": 0.638079822063446,
720
+ "mean": -0.0017956134397536516,
721
+ "std": 0.032554741948843,
722
+ "abs_mean": 0.024456802755594254,
723
+ "sparsity": 0.0
724
+ },
725
+ "enc_blocks.2.1.weight": {
726
+ "shape": [
727
+ 384,
728
+ 384
729
+ ],
730
+ "norm": 12.878968238830566,
731
+ "mean": 0.0004422464990057051,
732
+ "std": 0.03353617712855339,
733
+ "abs_mean": 0.02666916884481907,
734
+ "sparsity": 6.781684078305261e-06,
735
+ "condition": 1223.2880859375,
736
+ "erank": 267.52203369140625
737
+ },
738
+ "enc_blocks.2.1.bias": {
739
+ "shape": [
740
+ 384
741
+ ],
742
+ "norm": 0.6446398496627808,
743
+ "mean": 0.0021967978682368994,
744
+ "std": 0.03286603093147278,
745
+ "abs_mean": 0.02753588557243347,
746
+ "sparsity": 0.0
747
+ },
748
+ "enc_blocks.2.3.weight": {
749
+ "shape": [
750
+ 384,
751
+ 384
752
+ ],
753
+ "norm": 12.749981880187988,
754
+ "mean": 0.0010546775301918387,
755
+ "std": 0.03318643569946289,
756
+ "abs_mean": 0.02681606449186802,
757
+ "sparsity": 4.069010537932627e-05,
758
+ "condition": 11286.2783203125,
759
+ "erank": 287.71466064453125
760
+ },
761
+ "enc_blocks.2.3.bias": {
762
+ "shape": [
763
+ 384
764
+ ],
765
+ "norm": 0.7675585746765137,
766
+ "mean": -0.011421765200793743,
767
+ "std": 0.03751590475440025,
768
+ "abs_mean": 0.03255150094628334,
769
+ "sparsity": 0.0
770
+ },
771
+ "enc_blocks.3.0.weight": {
772
+ "shape": [
773
+ 384
774
+ ],
775
+ "norm": 17.64955711364746,
776
+ "mean": 0.8998109698295593,
777
+ "std": 0.039498452097177505,
778
+ "abs_mean": 0.8998109698295593,
779
+ "sparsity": 0.0
780
+ },
781
+ "enc_blocks.3.0.bias": {
782
+ "shape": [
783
+ 384
784
+ ],
785
+ "norm": 0.632440984249115,
786
+ "mean": -0.001403431873768568,
787
+ "std": 0.0322856567800045,
788
+ "abs_mean": 0.02486751601099968,
789
+ "sparsity": 0.0
790
+ },
791
+ "enc_blocks.3.1.weight": {
792
+ "shape": [
793
+ 384,
794
+ 384
795
+ ],
796
+ "norm": 12.882952690124512,
797
+ "mean": 0.0005309268599376082,
798
+ "std": 0.03354526683688164,
799
+ "abs_mean": 0.026780350133776665,
800
+ "sparsity": 2.0345052689663135e-05,
801
+ "condition": 3306.011474609375,
802
+ "erank": 277.59588623046875
803
+ },
804
+ "enc_blocks.3.1.bias": {
805
+ "shape": [
806
+ 384
807
+ ],
808
+ "norm": 0.6859570741653442,
809
+ "mean": -9.370347470394336e-06,
810
+ "std": 0.03505076840519905,
811
+ "abs_mean": 0.0292180385440588,
812
+ "sparsity": 0.0
813
+ },
814
+ "enc_blocks.3.3.weight": {
815
+ "shape": [
816
+ 384,
817
+ 384
818
+ ],
819
+ "norm": 12.485105514526367,
820
+ "mean": 0.0012131760595366359,
821
+ "std": 0.03249076381325722,
822
+ "abs_mean": 0.02636081352829933,
823
+ "sparsity": 2.7126736313221045e-05,
824
+ "condition": 6738.20654296875,
825
+ "erank": 288.5359802246094
826
+ },
827
+ "enc_blocks.3.3.bias": {
828
+ "shape": [
829
+ 384
830
+ ],
831
+ "norm": 0.7744740843772888,
832
+ "mean": -0.014685760252177715,
833
+ "std": 0.036740291863679886,
834
+ "abs_mean": 0.03180073946714401,
835
+ "sparsity": 0.0
836
+ },
837
+ "enc_out.weight": {
838
+ "shape": [
839
+ 192,
840
+ 384
841
+ ],
842
+ "norm": 15.931266784667969,
843
+ "mean": 0.00025461785844527185,
844
+ "std": 0.05867227911949158,
845
+ "abs_mean": 0.04659826308488846,
846
+ "sparsity": 1.3563368156610522e-05,
847
+ "condition": 12.063321113586426,
848
+ "erank": 180.5324249267578
849
+ },
850
+ "enc_out.bias": {
851
+ "shape": [
852
+ 192
853
+ ],
854
+ "norm": 0.7678680419921875,
855
+ "mean": -0.0035826812963932753,
856
+ "std": 0.05544474720954895,
857
+ "abs_mean": 0.04248465597629547,
858
+ "sparsity": 0.0
859
+ },
860
+ "dec_in.weight": {
861
+ "shape": [
862
+ 384,
863
+ 192
864
+ ],
865
+ "norm": 17.036605834960938,
866
+ "mean": -0.00020357051107566804,
867
+ "std": 0.06274331361055374,
868
+ "abs_mean": 0.050236720591783524,
869
+ "sparsity": 0.0,
870
+ "condition": 11.275330543518066,
871
+ "erank": 160.26626586914062
872
+ },
873
+ "dec_in.bias": {
874
+ "shape": [
875
+ 384
876
+ ],
877
+ "norm": 0.838572084903717,
878
+ "mean": 0.006184441968798637,
879
+ "std": 0.04239920154213905,
880
+ "abs_mean": 0.03614982217550278,
881
+ "sparsity": 0.0
882
+ },
883
+ "dec_blocks.0.0.weight": {
884
+ "shape": [
885
+ 384
886
+ ],
887
+ "norm": 17.526269912719727,
888
+ "mean": 0.8938546180725098,
889
+ "std": 0.03080192767083645,
890
+ "abs_mean": 0.8938546180725098,
891
+ "sparsity": 0.0
892
+ },
893
+ "dec_blocks.0.0.bias": {
894
+ "shape": [
895
+ 384
896
+ ],
897
+ "norm": 0.6757148504257202,
898
+ "mean": 0.02418457716703415,
899
+ "std": 0.02461141161620617,
900
+ "abs_mean": 0.027816174551844597,
901
+ "sparsity": 0.0
902
+ },
903
+ "dec_blocks.0.1.weight": {
904
+ "shape": [
905
+ 384,
906
+ 384
907
+ ],
908
+ "norm": 11.316617012023926,
909
+ "mean": 0.001807661959901452,
910
+ "std": 0.029414964839816093,
911
+ "abs_mean": 0.023787135258316994,
912
+ "sparsity": 2.7126736313221045e-05,
913
+ "condition": 671915.0625,
914
+ "erank": 289.99749755859375
915
+ },
916
+ "dec_blocks.0.1.bias": {
917
+ "shape": [
918
+ 384
919
+ ],
920
+ "norm": 0.58186274766922,
921
+ "mean": 0.005265340209007263,
922
+ "std": 0.02926061488687992,
923
+ "abs_mean": 0.024605482816696167,
924
+ "sparsity": 0.0
925
+ },
926
+ "dec_blocks.0.3.weight": {
927
+ "shape": [
928
+ 384,
929
+ 384
930
+ ],
931
+ "norm": 15.586650848388672,
932
+ "mean": -0.0036952048540115356,
933
+ "std": 0.040421824902296066,
934
+ "abs_mean": 0.03299005329608917,
935
+ "sparsity": 0.0,
936
+ "condition": 2613.58642578125,
937
+ "erank": 273.7778625488281
938
+ },
939
+ "dec_blocks.0.3.bias": {
940
+ "shape": [
941
+ 384
942
+ ],
943
+ "norm": 0.60248863697052,
944
+ "mean": -0.005198275670409203,
945
+ "std": 0.030342521145939827,
946
+ "abs_mean": 0.02475673519074917,
947
+ "sparsity": 0.0
948
+ },
949
+ "dec_blocks.1.0.weight": {
950
+ "shape": [
951
+ 384
952
+ ],
953
+ "norm": 17.223556518554688,
954
+ "mean": 0.8784023523330688,
955
+ "std": 0.030661659315228462,
956
+ "abs_mean": 0.8784023523330688,
957
+ "sparsity": 0.0
958
+ },
959
+ "dec_blocks.1.0.bias": {
960
+ "shape": [
961
+ 384
962
+ ],
963
+ "norm": 0.6037180423736572,
964
+ "mean": 0.01788325607776642,
965
+ "std": 0.02511945739388466,
966
+ "abs_mean": 0.02397087588906288,
967
+ "sparsity": 0.0
968
+ },
969
+ "dec_blocks.1.1.weight": {
970
+ "shape": [
971
+ 384,
972
+ 384
973
+ ],
974
+ "norm": 12.594478607177734,
975
+ "mean": -0.00029621124849654734,
976
+ "std": 0.03279689699411392,
977
+ "abs_mean": 0.026519903913140297,
978
+ "sparsity": 2.0345052689663135e-05,
979
+ "condition": 1060.1834716796875,
980
+ "erank": 293.8656311035156
981
+ },
982
+ "dec_blocks.1.1.bias": {
983
+ "shape": [
984
+ 384
985
+ ],
986
+ "norm": 0.6123263239860535,
987
+ "mean": 0.0017201153095811605,
988
+ "std": 0.03124096989631653,
989
+ "abs_mean": 0.025975652039051056,
990
+ "sparsity": 0.0
991
+ },
992
+ "dec_blocks.1.3.weight": {
993
+ "shape": [
994
+ 384,
995
+ 384
996
+ ],
997
+ "norm": 12.478222846984863,
998
+ "mean": 0.00040824111783877015,
999
+ "std": 0.03249291703104973,
1000
+ "abs_mean": 0.02622552216053009,
1001
+ "sparsity": 6.781684078305261e-06,
1002
+ "condition": 4037.880126953125,
1003
+ "erank": 287.6106872558594
1004
+ },
1005
+ "dec_blocks.1.3.bias": {
1006
+ "shape": [
1007
+ 384
1008
+ ],
1009
+ "norm": 0.5644272565841675,
1010
+ "mean": -0.008861962705850601,
1011
+ "std": 0.02744188904762268,
1012
+ "abs_mean": 0.023942431434988976,
1013
+ "sparsity": 0.0
1014
+ },
1015
+ "dec_blocks.2.0.weight": {
1016
+ "shape": [
1017
+ 384
1018
+ ],
1019
+ "norm": 17.322954177856445,
1020
+ "mean": 0.8833889961242676,
1021
+ "std": 0.033128079026937485,
1022
+ "abs_mean": 0.8833889961242676,
1023
+ "sparsity": 0.0
1024
+ },
1025
+ "dec_blocks.2.0.bias": {
1026
+ "shape": [
1027
+ 384
1028
+ ],
1029
+ "norm": 0.6046607494354248,
1030
+ "mean": 0.01801062747836113,
1031
+ "std": 0.025087399408221245,
1032
+ "abs_mean": 0.02382083795964718,
1033
+ "sparsity": 0.0
1034
+ },
1035
+ "dec_blocks.2.1.weight": {
1036
+ "shape": [
1037
+ 384,
1038
+ 384
1039
+ ],
1040
+ "norm": 13.23556900024414,
1041
+ "mean": -0.00037671197787858546,
1042
+ "std": 0.03446568548679352,
1043
+ "abs_mean": 0.027716848999261856,
1044
+ "sparsity": 2.0345052689663135e-05,
1045
+ "condition": 23743.208984375,
1046
+ "erank": 286.2240905761719
1047
+ },
1048
+ "dec_blocks.2.1.bias": {
1049
+ "shape": [
1050
+ 384
1051
+ ],
1052
+ "norm": 0.6294949650764465,
1053
+ "mean": 0.00018974603153765202,
1054
+ "std": 0.03216513246297836,
1055
+ "abs_mean": 0.026205262169241905,
1056
+ "sparsity": 0.0
1057
+ },
1058
+ "dec_blocks.2.3.weight": {
1059
+ "shape": [
1060
+ 384,
1061
+ 384
1062
+ ],
1063
+ "norm": 12.366257667541504,
1064
+ "mean": 0.0003269152366556227,
1065
+ "std": 0.03220224380493164,
1066
+ "abs_mean": 0.026165293529629707,
1067
+ "sparsity": 4.747178900288418e-05,
1068
+ "condition": 9688.5419921875,
1069
+ "erank": 295.5987548828125
1070
+ },
1071
+ "dec_blocks.2.3.bias": {
1072
+ "shape": [
1073
+ 384
1074
+ ],
1075
+ "norm": 0.5544984340667725,
1076
+ "mean": -0.007604988291859627,
1077
+ "std": 0.027291078120470047,
1078
+ "abs_mean": 0.023679127916693687,
1079
+ "sparsity": 0.0
1080
+ },
1081
+ "dec_blocks.3.0.weight": {
1082
+ "shape": [
1083
+ 384
1084
+ ],
1085
+ "norm": 17.596338272094727,
1086
+ "mean": 0.896987795829773,
1087
+ "std": 0.0418156273663044,
1088
+ "abs_mean": 0.896987795829773,
1089
+ "sparsity": 0.0
1090
+ },
1091
+ "dec_blocks.3.0.bias": {
1092
+ "shape": [
1093
+ 384
1094
+ ],
1095
+ "norm": 0.576328456401825,
1096
+ "mean": 0.015175910666584969,
1097
+ "std": 0.025225672870874405,
1098
+ "abs_mean": 0.021809693425893784,
1099
+ "sparsity": 0.0
1100
+ },
1101
+ "dec_blocks.3.1.weight": {
1102
+ "shape": [
1103
+ 384,
1104
+ 384
1105
+ ],
1106
+ "norm": 13.386054039001465,
1107
+ "mean": 7.282180740730837e-05,
1108
+ "std": 0.0348595567047596,
1109
+ "abs_mean": 0.027967920526862144,
1110
+ "sparsity": 4.069010537932627e-05,
1111
+ "condition": 71744.9921875,
1112
+ "erank": 278.9849548339844
1113
+ },
1114
+ "dec_blocks.3.1.bias": {
1115
+ "shape": [
1116
+ 384
1117
+ ],
1118
+ "norm": 0.6271045804023743,
1119
+ "mean": 0.0018653826555237174,
1120
+ "std": 0.03198906406760216,
1121
+ "abs_mean": 0.02620757184922695,
1122
+ "sparsity": 0.0
1123
+ },
1124
+ "dec_blocks.3.3.weight": {
1125
+ "shape": [
1126
+ 384,
1127
+ 384
1128
+ ],
1129
+ "norm": 12.783568382263184,
1130
+ "mean": 0.0009830869967117906,
1131
+ "std": 0.033276136964559555,
1132
+ "abs_mean": 0.02756834402680397,
1133
+ "sparsity": 2.7126736313221045e-05,
1134
+ "condition": 817.364013671875,
1135
+ "erank": 301.7668762207031
1136
+ },
1137
+ "dec_blocks.3.3.bias": {
1138
+ "shape": [
1139
+ 384
1140
+ ],
1141
+ "norm": 0.6337453126907349,
1142
+ "mean": -0.013539277017116547,
1143
+ "std": 0.029408499598503113,
1144
+ "abs_mean": 0.026898186653852463,
1145
+ "sparsity": 0.0
1146
+ },
1147
+ "dec_out.weight": {
1148
+ "shape": [
1149
+ 48,
1150
+ 384
1151
+ ],
1152
+ "norm": 7.669042587280273,
1153
+ "mean": 0.0010714484378695488,
1154
+ "std": 0.05647920444607735,
1155
+ "abs_mean": 0.04642903059720993,
1156
+ "sparsity": 0.0,
1157
+ "condition": 1.529896855354309,
1158
+ "erank": 47.82491683959961
1159
+ },
1160
+ "dec_out.bias": {
1161
+ "shape": [
1162
+ 48
1163
+ ],
1164
+ "norm": 0.20354627072811127,
1165
+ "mean": -0.011547963134944439,
1166
+ "std": 0.027300545945763588,
1167
+ "abs_mean": 0.025418907403945923,
1168
+ "sparsity": 0.0
1169
+ },
1170
+ "cross_attn.0.alpha_logits": {
1171
+ "shape": [
1172
+ 4
1173
+ ],
1174
+ "norm": 3.9009289741516113,
1175
+ "mean": -1.9489340782165527,
1176
+ "std": 0.08920314908027649,
1177
+ "abs_mean": 1.9489340782165527,
1178
+ "sparsity": 0.0
1179
+ },
1180
+ "cross_attn.0.qkv.weight": {
1181
+ "shape": [
1182
+ 12,
1183
+ 4
1184
+ ],
1185
+ "norm": 2.395477771759033,
1186
+ "mean": 0.004947615321725607,
1187
+ "std": 0.3493805527687073,
1188
+ "abs_mean": 0.300910621881485,
1189
+ "sparsity": 0.0,
1190
+ "condition": 1.6833115816116333,
1191
+ "erank": 3.929398536682129
1192
+ },
1193
+ "cross_attn.0.qkv.bias": {
1194
+ "shape": [
1195
+ 12
1196
+ ],
1197
+ "norm": 1.140745997428894,
1198
+ "mean": -0.15099439024925232,
1199
+ "std": 0.305660218000412,
1200
+ "abs_mean": 0.2782840132713318,
1201
+ "sparsity": 0.0
1202
+ },
1203
+ "cross_attn.0.out_proj.weight": {
1204
+ "shape": [
1205
+ 4,
1206
+ 4
1207
+ ],
1208
+ "norm": 1.3285354375839233,
1209
+ "mean": 0.05088873207569122,
1210
+ "std": 0.338976114988327,
1211
+ "abs_mean": 0.27125734090805054,
1212
+ "sparsity": 0.0,
1213
+ "condition": 64.55738830566406,
1214
+ "erank": 2.6695845127105713
1215
+ },
1216
+ "cross_attn.0.out_proj.bias": {
1217
+ "shape": [
1218
+ 4
1219
+ ],
1220
+ "norm": 0.38879814743995667,
1221
+ "mean": 0.053185347467660904,
1222
+ "std": 0.21590836346149445,
1223
+ "abs_mean": 0.1815299093723297,
1224
+ "sparsity": 0.0
1225
+ },
1226
+ "cross_attn.0.norm.weight": {
1227
+ "shape": [
1228
+ 4
1229
+ ],
1230
+ "norm": 2.1059398651123047,
1231
+ "mean": 1.0422992706298828,
1232
+ "std": 0.1726568192243576,
1233
+ "abs_mean": 1.0422992706298828,
1234
+ "sparsity": 0.0
1235
+ },
1236
+ "cross_attn.0.norm.bias": {
1237
+ "shape": [
1238
+ 4
1239
+ ],
1240
+ "norm": 0.35794734954833984,
1241
+ "mean": -0.07177595794200897,
1242
+ "std": 0.18931381404399872,
1243
+ "abs_mean": 0.16136272251605988,
1244
+ "sparsity": 0.0
1245
+ },
1246
+ "cross_attn.1.alpha_logits": {
1247
+ "shape": [
1248
+ 4
1249
+ ],
1250
+ "norm": 3.9756813049316406,
1251
+ "mean": -1.9850671291351318,
1252
+ "std": 0.1212119609117508,
1253
+ "abs_mean": 1.9850671291351318,
1254
+ "sparsity": 0.0
1255
+ },
1256
+ "cross_attn.1.qkv.weight": {
1257
+ "shape": [
1258
+ 12,
1259
+ 4
1260
+ ],
1261
+ "norm": 2.1153385639190674,
1262
+ "mean": 0.03604736924171448,
1263
+ "std": 0.30639585852622986,
1264
+ "abs_mean": 0.23021213710308075,
1265
+ "sparsity": 0.0,
1266
+ "condition": 2.0000216960906982,
1267
+ "erank": 3.8778748512268066
1268
+ },
1269
+ "cross_attn.1.qkv.bias": {
1270
+ "shape": [
1271
+ 12
1272
+ ],
1273
+ "norm": 1.3504735231399536,
1274
+ "mean": 0.04980164021253586,
1275
+ "std": 0.40384697914123535,
1276
+ "abs_mean": 0.31919822096824646,
1277
+ "sparsity": 0.0
1278
+ },
1279
+ "cross_attn.1.out_proj.weight": {
1280
+ "shape": [
1281
+ 4,
1282
+ 4
1283
+ ],
1284
+ "norm": 1.544046401977539,
1285
+ "mean": -0.02773943543434143,
1286
+ "std": 0.3976403474807739,
1287
+ "abs_mean": 0.3338635563850403,
1288
+ "sparsity": 0.0,
1289
+ "condition": 6.554041385650635,
1290
+ "erank": 3.265773296356201
1291
+ },
1292
+ "cross_attn.1.out_proj.bias": {
1293
+ "shape": [
1294
+ 4
1295
+ ],
1296
+ "norm": 0.5558180809020996,
1297
+ "mean": -0.07216920703649521,
1298
+ "std": 0.30989256501197815,
1299
+ "abs_mean": 0.25283655524253845,
1300
+ "sparsity": 0.0
1301
+ },
1302
+ "cross_attn.1.norm.weight": {
1303
+ "shape": [
1304
+ 4
1305
+ ],
1306
+ "norm": 1.9561911821365356,
1307
+ "mean": 0.9712949395179749,
1308
+ "std": 0.13295160233974457,
1309
+ "abs_mean": 0.9712949395179749,
1310
+ "sparsity": 0.0
1311
+ },
1312
+ "cross_attn.1.norm.bias": {
1313
+ "shape": [
1314
+ 4
1315
+ ],
1316
+ "norm": 0.2207387089729309,
1317
+ "mean": -0.0339832603931427,
1318
+ "std": 0.12125197798013687,
1319
+ "abs_mean": 0.09095096588134766,
1320
+ "sparsity": 0.0
1321
+ },
1322
+ "boundary_smooth.net.0.weight": {
1323
+ "shape": [
1324
+ 8,
1325
+ 3,
1326
+ 3,
1327
+ 3
1328
+ ],
1329
+ "norm": 0.048899564892053604,
1330
+ "mean": -3.9799830119591206e-05,
1331
+ "std": 0.0033346840646117926,
1332
+ "abs_mean": 0.0023165682796388865,
1333
+ "sparsity": 0.0
1334
+ },
1335
+ "boundary_smooth.net.0.bias": {
1336
+ "shape": [
1337
+ 8
1338
+ ],
1339
+ "norm": 0.0003608279803302139,
1340
+ "mean": -3.4329681511735544e-05,
1341
+ "std": 0.0001313493849011138,
1342
+ "abs_mean": 0.0001100103254429996,
1343
+ "sparsity": 0.0
1344
+ },
1345
+ "boundary_smooth.net.2.weight": {
1346
+ "shape": [
1347
+ 3,
1348
+ 8,
1349
+ 3,
1350
+ 3
1351
+ ],
1352
+ "norm": 0.05223501846194267,
1353
+ "mean": -3.519888559821993e-05,
1354
+ "std": 0.0035622238647192717,
1355
+ "abs_mean": 0.002480539958924055,
1356
+ "sparsity": 0.0
1357
+ },
1358
+ "boundary_smooth.net.2.bias": {
1359
+ "shape": [
1360
+ 3
1361
+ ],
1362
+ "norm": 0.019741201773285866,
1363
+ "mean": -0.009897474199533463,
1364
+ "std": 0.006922249216586351,
1365
+ "abs_mean": 0.009897474199533463,
1366
+ "sparsity": 0.0
1367
+ }
1368
+ }
1369
+ }