| { |
| "transformer.wte.weight": { |
| "shape": [ |
| 50257, |
| 1024 |
| ], |
| "effective_rank": 925.6200561523438, |
| "stable_rank": 3.222175359725952, |
| "spectral_norm": 462.8680419921875, |
| "frobenius_norm": 830.8675537109375, |
| "mp_bound": 150.2218309787832, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.25901723751664113, |
| "alpha_r2": 0.9398083798430313, |
| "condition_number": 3932.63427734375, |
| "top_10_sv": [ |
| 462.8680419921875, |
| 68.01887512207031, |
| 59.577247619628906, |
| 55.14397430419922, |
| 54.30785369873047, |
| 49.892826080322266, |
| 49.320430755615234, |
| 48.26898956298828, |
| 47.46828842163086, |
| 46.06719207763672 |
| ] |
| }, |
| "transformer.wpe.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 151.1204833984375, |
| "stable_rank": 2.6537351608276367, |
| "spectral_norm": 23.605066299438477, |
| "frobenius_norm": 38.45333480834961, |
| "mp_bound": 0.14957082271575928, |
| "n_above_mp": 210, |
| "n_total": 1024, |
| "signal_ratio": 0.205078125, |
| "alpha": 0.718008780427603, |
| "alpha_r2": 0.8641226316938437, |
| "condition_number": 597220.375, |
| "top_10_sv": [ |
| 23.605066299438477, |
| 20.438417434692383, |
| 17.248029708862305, |
| 8.979466438293457, |
| 5.816164970397949, |
| 5.2002973556518555, |
| 4.532151699066162, |
| 3.2729744911193848, |
| 2.9242184162139893, |
| 2.0841357707977295 |
| ] |
| }, |
| "transformer.h.0.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 628.722412109375, |
| "stable_rank": 22.675931930541992, |
| "spectral_norm": 62.47161865234375, |
| "frobenius_norm": 297.48516845703125, |
| "mp_bound": 7.63114237099588, |
| "n_above_mp": 243, |
| "n_total": 1024, |
| "signal_ratio": 0.2373046875, |
| "alpha": 0.7995412978432835, |
| "alpha_r2": 0.9108228897637844, |
| "condition_number": 68.46097564697266, |
| "top_10_sv": [ |
| 62.47161865234375, |
| 59.061126708984375, |
| 52.00094985961914, |
| 47.60552215576172, |
| 45.418373107910156, |
| 45.035884857177734, |
| 44.06340026855469, |
| 41.139434814453125, |
| 39.21805953979492, |
| 38.14364242553711 |
| ] |
| }, |
| "transformer.h.0.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 737.2996826171875, |
| "stable_rank": 65.48827362060547, |
| "spectral_norm": 5.3187255859375, |
| "frobenius_norm": 43.041690826416016, |
| "mp_bound": 1.4740509986877441, |
| "n_above_mp": 248, |
| "n_total": 1024, |
| "signal_ratio": 0.2421875, |
| "alpha": 0.5869972613657769, |
| "alpha_r2": 0.8128838986239981, |
| "condition_number": 33621.77734375, |
| "top_10_sv": [ |
| 5.3187255859375, |
| 4.55604887008667, |
| 4.446892261505127, |
| 4.300610065460205, |
| 4.185939311981201, |
| 4.152495384216309, |
| 4.086340427398682, |
| 4.0273637771606445, |
| 3.9726521968841553, |
| 3.915243148803711 |
| ] |
| }, |
| "transformer.h.0.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 958.4553833007812, |
| "stable_rank": 35.955875396728516, |
| "spectral_norm": 34.83731460571289, |
| "frobenius_norm": 208.895751953125, |
| "mp_bound": 20.392281532287598, |
| "n_above_mp": 2, |
| "n_total": 1024, |
| "signal_ratio": 0.001953125, |
| "alpha": 0.24270056604789897, |
| "alpha_r2": 0.8867966791819655, |
| "condition_number": 47.199832916259766, |
| "top_10_sv": [ |
| 34.83731460571289, |
| 22.28569221496582, |
| 20.11617088317871, |
| 19.44432830810547, |
| 16.447628021240234, |
| 15.993996620178223, |
| 15.006787300109863, |
| 14.720680236816406, |
| 14.216849327087402, |
| 13.424826622009277 |
| ] |
| }, |
| "transformer.h.0.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 928.1140747070312, |
| "stable_rank": 34.63430404663086, |
| "spectral_norm": 28.995763778686523, |
| "frobenius_norm": 170.64273071289062, |
| "mp_bound": 14.264991760253906, |
| "n_above_mp": 5, |
| "n_total": 1024, |
| "signal_ratio": 0.0048828125, |
| "alpha": 0.288666051511119, |
| "alpha_r2": 0.8199686145081493, |
| "condition_number": 42.9376106262207, |
| "top_10_sv": [ |
| 28.995763778686523, |
| 17.847505569458008, |
| 17.05669403076172, |
| 15.134307861328125, |
| 14.533385276794434, |
| 12.990875244140625, |
| 11.749531745910645, |
| 11.249499320983887, |
| 10.62743091583252, |
| 10.352800369262695 |
| ] |
| }, |
| "transformer.h.1.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 820.3770751953125, |
| "stable_rank": 41.79490661621094, |
| "spectral_norm": 27.69493865966797, |
| "frobenius_norm": 179.04495239257812, |
| "mp_bound": 9.096073287364284, |
| "n_above_mp": 84, |
| "n_total": 1024, |
| "signal_ratio": 0.08203125, |
| "alpha": 0.5097652395048509, |
| "alpha_r2": 0.8774686180910731, |
| "condition_number": 27.94350814819336, |
| "top_10_sv": [ |
| 27.69493865966797, |
| 26.477949142456055, |
| 24.700719833374023, |
| 22.73528480529785, |
| 21.188859939575195, |
| 20.210037231445312, |
| 19.647275924682617, |
| 18.7845458984375, |
| 18.384246826171875, |
| 17.731191635131836 |
| ] |
| }, |
| "transformer.h.1.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 609.0930786132812, |
| "stable_rank": 6.79005241394043, |
| "spectral_norm": 35.383201599121094, |
| "frobenius_norm": 92.2005844116211, |
| "mp_bound": 1.7873764038085938, |
| "n_above_mp": 349, |
| "n_total": 1024, |
| "signal_ratio": 0.3408203125, |
| "alpha": 0.7416108698642246, |
| "alpha_r2": 0.8534609001933348, |
| "condition_number": 36300.8515625, |
| "top_10_sv": [ |
| 35.383201599121094, |
| 30.46486473083496, |
| 23.07978630065918, |
| 17.489381790161133, |
| 15.965164184570312, |
| 13.38463306427002, |
| 11.213841438293457, |
| 10.809903144836426, |
| 7.931944370269775, |
| 7.714986801147461 |
| ] |
| }, |
| "transformer.h.1.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 952.1654663085938, |
| "stable_rank": 15.651745796203613, |
| "spectral_norm": 54.71965026855469, |
| "frobenius_norm": 216.48345947265625, |
| "mp_bound": 20.210805416107178, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.28617357150294975, |
| "alpha_r2": 0.944003325737223, |
| "condition_number": 25.737693786621094, |
| "top_10_sv": [ |
| 54.71965026855469, |
| 20.120031356811523, |
| 18.258190155029297, |
| 17.045495986938477, |
| 16.402690887451172, |
| 16.293865203857422, |
| 15.51065731048584, |
| 15.293896675109863, |
| 14.751503944396973, |
| 14.567024230957031 |
| ] |
| }, |
| "transformer.h.1.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 951.001220703125, |
| "stable_rank": 15.77647876739502, |
| "spectral_norm": 40.80717086791992, |
| "frobenius_norm": 162.0845184326172, |
| "mp_bound": 15.43984866142273, |
| "n_above_mp": 5, |
| "n_total": 1024, |
| "signal_ratio": 0.0048828125, |
| "alpha": 0.27510892798744624, |
| "alpha_r2": 0.9524180259003465, |
| "condition_number": 47.46715545654297, |
| "top_10_sv": [ |
| 40.80717086791992, |
| 25.39755630493164, |
| 24.217174530029297, |
| 18.221494674682617, |
| 15.662391662597656, |
| 13.956523895263672, |
| 13.657442092895508, |
| 13.022187232971191, |
| 12.348648071289062, |
| 10.786282539367676 |
| ] |
| }, |
| "transformer.h.2.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 842.8485107421875, |
| "stable_rank": 39.29759216308594, |
| "spectral_norm": 32.41156768798828, |
| "frobenius_norm": 203.18096923828125, |
| "mp_bound": 11.375351081125348, |
| "n_above_mp": 52, |
| "n_total": 1024, |
| "signal_ratio": 0.05078125, |
| "alpha": 0.4740024292360243, |
| "alpha_r2": 0.8802370281377663, |
| "condition_number": 52.709171295166016, |
| "top_10_sv": [ |
| 32.41156768798828, |
| 30.25526237487793, |
| 25.755977630615234, |
| 24.045995712280273, |
| 22.17633819580078, |
| 21.890745162963867, |
| 21.64605712890625, |
| 21.57254981994629, |
| 19.244956970214844, |
| 18.357177734375 |
| ] |
| }, |
| "transformer.h.2.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 715.241943359375, |
| "stable_rank": 9.787028312683105, |
| "spectral_norm": 23.163925170898438, |
| "frobenius_norm": 72.466552734375, |
| "mp_bound": 2.168848752975464, |
| "n_above_mp": 315, |
| "n_total": 1024, |
| "signal_ratio": 0.3076171875, |
| "alpha": 0.6080837150928929, |
| "alpha_r2": 0.7822885769468158, |
| "condition_number": 187718.40625, |
| "top_10_sv": [ |
| 23.163925170898438, |
| 9.864102363586426, |
| 8.651338577270508, |
| 8.504467964172363, |
| 7.996184349060059, |
| 7.292422294616699, |
| 6.915347576141357, |
| 6.678709030151367, |
| 6.435945510864258, |
| 6.021938323974609 |
| ] |
| }, |
| "transformer.h.2.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 950.7151489257812, |
| "stable_rank": 23.699460983276367, |
| "spectral_norm": 44.83905029296875, |
| "frobenius_norm": 218.28587341308594, |
| "mp_bound": 20.38237237930298, |
| "n_above_mp": 3, |
| "n_total": 1024, |
| "signal_ratio": 0.0029296875, |
| "alpha": 0.2912721396169389, |
| "alpha_r2": 0.9537768474757292, |
| "condition_number": 36.274898529052734, |
| "top_10_sv": [ |
| 44.83905029296875, |
| 22.49462127685547, |
| 20.47141456604004, |
| 19.455570220947266, |
| 18.79952621459961, |
| 18.357946395874023, |
| 17.340517044067383, |
| 16.285560607910156, |
| 15.497901916503906, |
| 15.356476783752441 |
| ] |
| }, |
| "transformer.h.2.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 950.6380615234375, |
| "stable_rank": 20.027393341064453, |
| "spectral_norm": 37.39990997314453, |
| "frobenius_norm": 167.3719940185547, |
| "mp_bound": 15.765140533447266, |
| "n_above_mp": 3, |
| "n_total": 1024, |
| "signal_ratio": 0.0029296875, |
| "alpha": 0.2643769291286133, |
| "alpha_r2": 0.9029311778240089, |
| "condition_number": 100.74349212646484, |
| "top_10_sv": [ |
| 37.39990997314453, |
| 23.12250328063965, |
| 20.761938095092773, |
| 12.250714302062988, |
| 10.560304641723633, |
| 10.4915771484375, |
| 10.370850563049316, |
| 10.147944450378418, |
| 9.839871406555176, |
| 9.63220500946045 |
| ] |
| }, |
| "transformer.h.3.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 868.9850463867188, |
| "stable_rank": 41.50836181640625, |
| "spectral_norm": 33.2469596862793, |
| "frobenius_norm": 214.20013427734375, |
| "mp_bound": 13.617692360816509, |
| "n_above_mp": 30, |
| "n_total": 1024, |
| "signal_ratio": 0.029296875, |
| "alpha": 0.43598154200527217, |
| "alpha_r2": 0.9121806812022613, |
| "condition_number": 91.30290985107422, |
| "top_10_sv": [ |
| 33.2469596862793, |
| 26.513952255249023, |
| 25.003625869750977, |
| 23.534202575683594, |
| 22.926427841186523, |
| 21.938570022583008, |
| 21.85960578918457, |
| 20.899417877197266, |
| 20.27928924560547, |
| 19.71728515625 |
| ] |
| }, |
| "transformer.h.3.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 745.2302856445312, |
| "stable_rank": 16.78188133239746, |
| "spectral_norm": 17.08546257019043, |
| "frobenius_norm": 69.99178314208984, |
| "mp_bound": 2.3438045978546143, |
| "n_above_mp": 303, |
| "n_total": 1024, |
| "signal_ratio": 0.2958984375, |
| "alpha": 0.5602377771042083, |
| "alpha_r2": 0.7441943879202231, |
| "condition_number": 26872.6328125, |
| "top_10_sv": [ |
| 17.08546257019043, |
| 7.187192440032959, |
| 6.205941677093506, |
| 5.882521629333496, |
| 5.645092964172363, |
| 5.5240044593811035, |
| 5.3767547607421875, |
| 5.196966171264648, |
| 5.166182994842529, |
| 5.101844787597656 |
| ] |
| }, |
| "transformer.h.3.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 927.2161254882812, |
| "stable_rank": 26.023406982421875, |
| "spectral_norm": 43.73242950439453, |
| "frobenius_norm": 223.09286499023438, |
| "mp_bound": 19.039398908615112, |
| "n_above_mp": 6, |
| "n_total": 1024, |
| "signal_ratio": 0.005859375, |
| "alpha": 0.33058612233889434, |
| "alpha_r2": 0.9492689266361353, |
| "condition_number": 47.069427490234375, |
| "top_10_sv": [ |
| 43.73242950439453, |
| 37.90205764770508, |
| 20.579469680786133, |
| 19.914993286132812, |
| 19.66720199584961, |
| 19.149551391601562, |
| 18.675325393676758, |
| 18.34368896484375, |
| 17.572053909301758, |
| 17.139053344726562 |
| ] |
| }, |
| "transformer.h.3.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 938.9752197265625, |
| "stable_rank": 12.513044357299805, |
| "spectral_norm": 49.337677001953125, |
| "frobenius_norm": 174.52601623535156, |
| "mp_bound": 15.4556565284729, |
| "n_above_mp": 2, |
| "n_total": 1024, |
| "signal_ratio": 0.001953125, |
| "alpha": 0.2906680436450817, |
| "alpha_r2": 0.9012577872907297, |
| "condition_number": 150.67166137695312, |
| "top_10_sv": [ |
| 49.337677001953125, |
| 16.639305114746094, |
| 12.377195358276367, |
| 12.035073280334473, |
| 11.454721450805664, |
| 11.364490509033203, |
| 10.909409523010254, |
| 10.634885787963867, |
| 10.58263111114502, |
| 10.402506828308105 |
| ] |
| }, |
| "transformer.h.4.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 883.5626831054688, |
| "stable_rank": 66.41778564453125, |
| "spectral_norm": 26.355398178100586, |
| "frobenius_norm": 214.7888641357422, |
| "mp_bound": 14.6059706629891, |
| "n_above_mp": 28, |
| "n_total": 1024, |
| "signal_ratio": 0.02734375, |
| "alpha": 0.41387379570841165, |
| "alpha_r2": 0.928107216532441, |
| "condition_number": 54.06818771362305, |
| "top_10_sv": [ |
| 26.355398178100586, |
| 24.099023818969727, |
| 23.613140106201172, |
| 22.856849670410156, |
| 22.657690048217773, |
| 22.315385818481445, |
| 21.944866180419922, |
| 20.979703903198242, |
| 20.732112884521484, |
| 20.403736114501953 |
| ] |
| }, |
| "transformer.h.4.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 744.8408203125, |
| "stable_rank": 14.994495391845703, |
| "spectral_norm": 18.674596786499023, |
| "frobenius_norm": 72.3131332397461, |
| "mp_bound": 2.412200927734375, |
| "n_above_mp": 301, |
| "n_total": 1024, |
| "signal_ratio": 0.2939453125, |
| "alpha": 0.557779475735886, |
| "alpha_r2": 0.7426996015149363, |
| "condition_number": 38687.66015625, |
| "top_10_sv": [ |
| 18.674596786499023, |
| 6.152257442474365, |
| 6.06334924697876, |
| 5.751345634460449, |
| 5.698556423187256, |
| 5.618075847625732, |
| 5.548093795776367, |
| 5.433638572692871, |
| 5.330124855041504, |
| 5.277598857879639 |
| ] |
| }, |
| "transformer.h.4.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 928.9017333984375, |
| "stable_rank": 27.40129280090332, |
| "spectral_norm": 42.51769256591797, |
| "frobenius_norm": 222.56414794921875, |
| "mp_bound": 19.081247806549072, |
| "n_above_mp": 3, |
| "n_total": 1024, |
| "signal_ratio": 0.0029296875, |
| "alpha": 0.33154839758470445, |
| "alpha_r2": 0.9484103079592642, |
| "condition_number": 44.41904067993164, |
| "top_10_sv": [ |
| 42.51769256591797, |
| 33.62040710449219, |
| 20.00185775756836, |
| 19.004039764404297, |
| 18.752685546875, |
| 18.06087875366211, |
| 17.665428161621094, |
| 17.001155853271484, |
| 16.629446029663086, |
| 16.390981674194336 |
| ] |
| }, |
| "transformer.h.4.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 937.2068481445312, |
| "stable_rank": 12.849010467529297, |
| "spectral_norm": 48.77532958984375, |
| "frobenius_norm": 174.8376922607422, |
| "mp_bound": 15.396579265594482, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.3018330337436229, |
| "alpha_r2": 0.9230008315412146, |
| "condition_number": 90.40140533447266, |
| "top_10_sv": [ |
| 48.77532958984375, |
| 14.544754981994629, |
| 14.092583656311035, |
| 13.27783489227295, |
| 12.841653823852539, |
| 12.547450065612793, |
| 12.23483657836914, |
| 11.652227401733398, |
| 11.583452224731445, |
| 11.510062217712402 |
| ] |
| }, |
| "transformer.h.5.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 896.8251953125, |
| "stable_rank": 88.8460693359375, |
| "spectral_norm": 22.261667251586914, |
| "frobenius_norm": 209.83444213867188, |
| "mp_bound": 14.939659960070664, |
| "n_above_mp": 23, |
| "n_total": 1024, |
| "signal_ratio": 0.0224609375, |
| "alpha": 0.39352495861912024, |
| "alpha_r2": 0.9197388047187437, |
| "condition_number": 35.448787689208984, |
| "top_10_sv": [ |
| 22.261667251586914, |
| 20.855236053466797, |
| 20.020145416259766, |
| 19.449405670166016, |
| 19.232900619506836, |
| 18.622623443603516, |
| 18.29434585571289, |
| 17.920202255249023, |
| 17.658803939819336, |
| 17.550424575805664 |
| ] |
| }, |
| "transformer.h.5.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 746.4803466796875, |
| "stable_rank": 16.239879608154297, |
| "spectral_norm": 19.48035430908203, |
| "frobenius_norm": 78.50335693359375, |
| "mp_bound": 2.632415771484375, |
| "n_above_mp": 307, |
| "n_total": 1024, |
| "signal_ratio": 0.2998046875, |
| "alpha": 0.5490623315523016, |
| "alpha_r2": 0.7276577467035443, |
| "condition_number": 42145.03515625, |
| "top_10_sv": [ |
| 19.48035430908203, |
| 6.621999740600586, |
| 6.383045196533203, |
| 6.287996292114258, |
| 6.164387226104736, |
| 5.886140823364258, |
| 5.757631778717041, |
| 5.754157066345215, |
| 5.641662120819092, |
| 5.541800498962402 |
| ] |
| }, |
| "transformer.h.5.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 936.9888916015625, |
| "stable_rank": 31.094371795654297, |
| "spectral_norm": 39.32979202270508, |
| "frobenius_norm": 219.31207275390625, |
| "mp_bound": 19.247617721557617, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.3193148230456383, |
| "alpha_r2": 0.9361649517765219, |
| "condition_number": 41.62086868286133, |
| "top_10_sv": [ |
| 39.32979202270508, |
| 19.010740280151367, |
| 18.782209396362305, |
| 17.947439193725586, |
| 17.337230682373047, |
| 16.85923194885254, |
| 16.667734146118164, |
| 15.952093124389648, |
| 15.685457229614258, |
| 15.613792419433594 |
| ] |
| }, |
| "transformer.h.5.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 936.8583984375, |
| "stable_rank": 15.319039344787598, |
| "spectral_norm": 44.49594497680664, |
| "frobenius_norm": 174.1551055908203, |
| "mp_bound": 15.25501549243927, |
| "n_above_mp": 3, |
| "n_total": 1024, |
| "signal_ratio": 0.0029296875, |
| "alpha": 0.30940596098305206, |
| "alpha_r2": 0.9306552491126057, |
| "condition_number": 69.38197326660156, |
| "top_10_sv": [ |
| 44.49594497680664, |
| 16.51666831970215, |
| 16.053695678710938, |
| 14.54151725769043, |
| 13.180120468139648, |
| 12.70102596282959, |
| 12.365108489990234, |
| 12.008030891418457, |
| 11.90465259552002, |
| 11.715628623962402 |
| ] |
| }, |
| "transformer.h.6.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 897.4365844726562, |
| "stable_rank": 115.99281311035156, |
| "spectral_norm": 18.230764389038086, |
| "frobenius_norm": 196.3452606201172, |
| "mp_bound": 13.862052830560662, |
| "n_above_mp": 17, |
| "n_total": 1024, |
| "signal_ratio": 0.0166015625, |
| "alpha": 0.3909539490685798, |
| "alpha_r2": 0.8932138219176853, |
| "condition_number": 32.950740814208984, |
| "top_10_sv": [ |
| 18.230764389038086, |
| 17.658872604370117, |
| 16.965496063232422, |
| 16.603540420532227, |
| 16.098522186279297, |
| 15.950654983520508, |
| 15.425930976867676, |
| 15.198224067687988, |
| 14.950895309448242, |
| 14.900473594665527 |
| ] |
| }, |
| "transformer.h.6.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 741.775634765625, |
| "stable_rank": 23.221019744873047, |
| "spectral_norm": 17.442646026611328, |
| "frobenius_norm": 84.05296325683594, |
| "mp_bound": 2.810410976409912, |
| "n_above_mp": 295, |
| "n_total": 1024, |
| "signal_ratio": 0.2880859375, |
| "alpha": 0.5652261405393998, |
| "alpha_r2": 0.7572962195682047, |
| "condition_number": 14903.8291015625, |
| "top_10_sv": [ |
| 17.442646026611328, |
| 8.159473419189453, |
| 7.555866241455078, |
| 7.282261848449707, |
| 7.234253883361816, |
| 7.103213787078857, |
| 6.917233943939209, |
| 6.809220790863037, |
| 6.620255470275879, |
| 6.525652885437012 |
| ] |
| }, |
| "transformer.h.6.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 934.8484497070312, |
| "stable_rank": 30.034225463867188, |
| "spectral_norm": 39.7269172668457, |
| "frobenius_norm": 217.7173614501953, |
| "mp_bound": 19.036645889282227, |
| "n_above_mp": 3, |
| "n_total": 1024, |
| "signal_ratio": 0.0029296875, |
| "alpha": 0.32115342994248447, |
| "alpha_r2": 0.9391101648886675, |
| "condition_number": 59.284549713134766, |
| "top_10_sv": [ |
| 39.7269172668457, |
| 20.441776275634766, |
| 19.18965721130371, |
| 18.47493553161621, |
| 17.703449249267578, |
| 16.833940505981445, |
| 16.46803855895996, |
| 15.968595504760742, |
| 15.606867790222168, |
| 15.231405258178711 |
| ] |
| }, |
| "transformer.h.6.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 939.2515258789062, |
| "stable_rank": 24.95199203491211, |
| "spectral_norm": 34.625038146972656, |
| "frobenius_norm": 172.95889282226562, |
| "mp_bound": 15.321847558021545, |
| "n_above_mp": 2, |
| "n_total": 1024, |
| "signal_ratio": 0.001953125, |
| "alpha": 0.30995776991553114, |
| "alpha_r2": 0.9308628488187047, |
| "condition_number": 56.80348587036133, |
| "top_10_sv": [ |
| 34.625038146972656, |
| 16.99053382873535, |
| 15.084966659545898, |
| 13.3975191116333, |
| 13.283339500427246, |
| 12.721936225891113, |
| 12.279509544372559, |
| 12.057058334350586, |
| 11.876668930053711, |
| 11.697145462036133 |
| ] |
| }, |
| "transformer.h.7.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 910.6389770507812, |
| "stable_rank": 113.14727783203125, |
| "spectral_norm": 17.927513122558594, |
| "frobenius_norm": 190.69622802734375, |
| "mp_bound": 14.044904500546567, |
| "n_above_mp": 8, |
| "n_total": 1024, |
| "signal_ratio": 0.0078125, |
| "alpha": 0.36309653033535555, |
| "alpha_r2": 0.87929726729129, |
| "condition_number": 31.099102020263672, |
| "top_10_sv": [ |
| 17.927513122558594, |
| 17.431472778320312, |
| 16.597808837890625, |
| 15.969321250915527, |
| 15.868179321289062, |
| 14.81343936920166, |
| 14.50455379486084, |
| 14.174510955810547, |
| 13.997900009155273, |
| 13.888890266418457 |
| ] |
| }, |
| "transformer.h.7.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 743.44921875, |
| "stable_rank": 27.847932815551758, |
| "spectral_norm": 16.62673568725586, |
| "frobenius_norm": 87.74118041992188, |
| "mp_bound": 2.8953471183776855, |
| "n_above_mp": 311, |
| "n_total": 1024, |
| "signal_ratio": 0.3037109375, |
| "alpha": 0.5651577772345532, |
| "alpha_r2": 0.7372368922623682, |
| "condition_number": 9111.541015625, |
| "top_10_sv": [ |
| 16.62673568725586, |
| 7.667027473449707, |
| 7.169197082519531, |
| 6.978420257568359, |
| 6.833680152893066, |
| 6.656124591827393, |
| 6.566739082336426, |
| 6.528911590576172, |
| 6.47317361831665, |
| 6.427671432495117 |
| ] |
| }, |
| "transformer.h.7.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 939.2886352539062, |
| "stable_rank": 31.832117080688477, |
| "spectral_norm": 38.28334045410156, |
| "frobenius_norm": 215.99444580078125, |
| "mp_bound": 19.122761964797974, |
| "n_above_mp": 2, |
| "n_total": 1024, |
| "signal_ratio": 0.001953125, |
| "alpha": 0.31228344301482186, |
| "alpha_r2": 0.9328367415414192, |
| "condition_number": 34.72146224975586, |
| "top_10_sv": [ |
| 38.28334045410156, |
| 19.85346031188965, |
| 18.42683219909668, |
| 17.141801834106445, |
| 16.43387222290039, |
| 16.275760650634766, |
| 16.053062438964844, |
| 15.724404335021973, |
| 15.451314926147461, |
| 15.216456413269043 |
| ] |
| }, |
| "transformer.h.7.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 940.928955078125, |
| "stable_rank": 33.200374603271484, |
| "spectral_norm": 31.440378189086914, |
| "frobenius_norm": 181.15872192382812, |
| "mp_bound": 16.195263862609863, |
| "n_above_mp": 3, |
| "n_total": 1024, |
| "signal_ratio": 0.0029296875, |
| "alpha": 0.30703067706661946, |
| "alpha_r2": 0.9333060712253897, |
| "condition_number": 61.141605377197266, |
| "top_10_sv": [ |
| 31.440378189086914, |
| 18.50613784790039, |
| 16.681230545043945, |
| 15.740647315979004, |
| 13.607156753540039, |
| 13.430535316467285, |
| 13.093123435974121, |
| 12.78380298614502, |
| 12.500567436218262, |
| 12.175090789794922 |
| ] |
| }, |
| "transformer.h.8.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 905.0512084960938, |
| "stable_rank": 99.04619598388672, |
| "spectral_norm": 19.30182647705078, |
| "frobenius_norm": 192.09555053710938, |
| "mp_bound": 13.908594893171308, |
| "n_above_mp": 12, |
| "n_total": 1024, |
| "signal_ratio": 0.01171875, |
| "alpha": 0.37568865395886447, |
| "alpha_r2": 0.8932899012135317, |
| "condition_number": 26.482255935668945, |
| "top_10_sv": [ |
| 19.30182647705078, |
| 18.0625, |
| 17.029926300048828, |
| 16.608489990234375, |
| 15.877984046936035, |
| 15.367476463317871, |
| 15.020094871520996, |
| 14.934671401977539, |
| 14.854327201843262, |
| 14.298294067382812 |
| ] |
| }, |
| "transformer.h.8.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 730.39599609375, |
| "stable_rank": 23.92629051208496, |
| "spectral_norm": 18.780719757080078, |
| "frobenius_norm": 91.8649673461914, |
| "mp_bound": 2.854386568069458, |
| "n_above_mp": 321, |
| "n_total": 1024, |
| "signal_ratio": 0.3134765625, |
| "alpha": 0.590965763955716, |
| "alpha_r2": 0.7524001510220397, |
| "condition_number": 10199.13671875, |
| "top_10_sv": [ |
| 18.780719757080078, |
| 9.103902816772461, |
| 8.50258731842041, |
| 7.891252517700195, |
| 7.823528289794922, |
| 7.506181716918945, |
| 7.408065319061279, |
| 7.162362575531006, |
| 7.139276504516602, |
| 7.067962646484375 |
| ] |
| }, |
| "transformer.h.8.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 936.2421875, |
| "stable_rank": 38.09269714355469, |
| "spectral_norm": 34.884918212890625, |
| "frobenius_norm": 215.3072052001953, |
| "mp_bound": 18.746233463287354, |
| "n_above_mp": 2, |
| "n_total": 1024, |
| "signal_ratio": 0.001953125, |
| "alpha": 0.32267201724432737, |
| "alpha_r2": 0.9352114281367789, |
| "condition_number": 38.3657341003418, |
| "top_10_sv": [ |
| 34.884918212890625, |
| 19.184831619262695, |
| 18.076223373413086, |
| 17.63890266418457, |
| 16.88092613220215, |
| 16.76192855834961, |
| 16.45589828491211, |
| 16.098548889160156, |
| 15.723489761352539, |
| 15.483587265014648 |
| ] |
| }, |
| "transformer.h.8.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 950.4535522460938, |
| "stable_rank": 39.308773040771484, |
| "spectral_norm": 27.4515380859375, |
| "frobenius_norm": 172.1121063232422, |
| "mp_bound": 15.766410827636719, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.28704826650262233, |
| "alpha_r2": 0.905894394932516, |
| "condition_number": 47.98366165161133, |
| "top_10_sv": [ |
| 27.4515380859375, |
| 14.21110725402832, |
| 12.706835746765137, |
| 12.113818168640137, |
| 11.690884590148926, |
| 11.509939193725586, |
| 11.169968605041504, |
| 11.008914947509766, |
| 10.82995319366455, |
| 10.744305610656738 |
| ] |
| }, |
| "transformer.h.9.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 893.1614990234375, |
| "stable_rank": 115.87055206298828, |
| "spectral_norm": 17.444477081298828, |
| "frobenius_norm": 187.7779083251953, |
| "mp_bound": 12.937507738520768, |
| "n_above_mp": 18, |
| "n_total": 1024, |
| "signal_ratio": 0.017578125, |
| "alpha": 0.39390157964822387, |
| "alpha_r2": 0.8784313742375426, |
| "condition_number": 25.246517181396484, |
| "top_10_sv": [ |
| 17.444477081298828, |
| 16.29485511779785, |
| 15.905627250671387, |
| 15.577847480773926, |
| 15.356382369995117, |
| 15.149186134338379, |
| 14.64958381652832, |
| 14.456648826599121, |
| 14.237245559692383, |
| 14.110804557800293 |
| ] |
| }, |
| "transformer.h.9.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 723.2728271484375, |
| "stable_rank": 13.839754104614258, |
| "spectral_norm": 26.413339614868164, |
| "frobenius_norm": 98.2624282836914, |
| "mp_bound": 3.006326913833618, |
| "n_above_mp": 307, |
| "n_total": 1024, |
| "signal_ratio": 0.2998046875, |
| "alpha": 0.5910035608611524, |
| "alpha_r2": 0.7777498557933287, |
| "condition_number": 84904.125, |
| "top_10_sv": [ |
| 26.413339614868164, |
| 24.238210678100586, |
| 13.290877342224121, |
| 11.937247276306152, |
| 9.426248550415039, |
| 7.983802795410156, |
| 7.864888668060303, |
| 7.773645877838135, |
| 7.429806232452393, |
| 7.390601634979248 |
| ] |
| }, |
| "transformer.h.9.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 938.6810913085938, |
| "stable_rank": 35.63467025756836, |
| "spectral_norm": 35.846248626708984, |
| "frobenius_norm": 213.9833984375, |
| "mp_bound": 18.81469202041626, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.3128217781401333, |
| "alpha_r2": 0.9229840018865637, |
| "condition_number": 41.859554290771484, |
| "top_10_sv": [ |
| 35.846248626708984, |
| 17.866443634033203, |
| 17.141401290893555, |
| 16.734554290771484, |
| 16.55880355834961, |
| 16.355411529541016, |
| 15.886651039123535, |
| 15.564071655273438, |
| 15.157404899597168, |
| 14.905134201049805 |
| ] |
| }, |
| "transformer.h.9.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 944.1468505859375, |
| "stable_rank": 77.85943603515625, |
| "spectral_norm": 21.045055389404297, |
| "frobenius_norm": 185.6973419189453, |
| "mp_bound": 16.68483567237854, |
| "n_above_mp": 4, |
| "n_total": 1024, |
| "signal_ratio": 0.00390625, |
| "alpha": 0.30334476439857244, |
| "alpha_r2": 0.9238160976321365, |
| "condition_number": 36.544837951660156, |
| "top_10_sv": [ |
| 21.045055389404297, |
| 18.942150115966797, |
| 18.65625762939453, |
| 17.60894203186035, |
| 14.828644752502441, |
| 13.582180976867676, |
| 13.063478469848633, |
| 12.69278335571289, |
| 12.27115535736084, |
| 12.16966438293457 |
| ] |
| }, |
| "transformer.h.10.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 889.07373046875, |
| "stable_rank": 110.98599243164062, |
| "spectral_norm": 18.18061065673828, |
| "frobenius_norm": 191.53253173828125, |
| "mp_bound": 12.933648669067457, |
| "n_above_mp": 23, |
| "n_total": 1024, |
| "signal_ratio": 0.0224609375, |
| "alpha": 0.400968608999224, |
| "alpha_r2": 0.8759391614511947, |
| "condition_number": 22.077619552612305, |
| "top_10_sv": [ |
| 18.18061065673828, |
| 17.37954330444336, |
| 16.723297119140625, |
| 16.557512283325195, |
| 15.972862243652344, |
| 15.706113815307617, |
| 15.19486141204834, |
| 14.927367210388184, |
| 14.738473892211914, |
| 14.608979225158691 |
| ] |
| }, |
| "transformer.h.10.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 712.9119262695312, |
| "stable_rank": 15.614805221557617, |
| "spectral_norm": 24.71717071533203, |
| "frobenius_norm": 97.67131805419922, |
| "mp_bound": 2.9057722091674805, |
| "n_above_mp": 303, |
| "n_total": 1024, |
| "signal_ratio": 0.2958984375, |
| "alpha": 0.6156612751434485, |
| "alpha_r2": 0.7966407254881575, |
| "condition_number": 26397.486328125, |
| "top_10_sv": [ |
| 24.71717071533203, |
| 22.72795867919922, |
| 11.450672149658203, |
| 9.635822296142578, |
| 9.428568840026855, |
| 8.725939750671387, |
| 8.629619598388672, |
| 8.33855152130127, |
| 8.053350448608398, |
| 8.0328950881958 |
| ] |
| }, |
| "transformer.h.10.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 936.62158203125, |
| "stable_rank": 36.54176712036133, |
| "spectral_norm": 35.16852951049805, |
| "frobenius_norm": 212.593017578125, |
| "mp_bound": 18.578571796417236, |
| "n_above_mp": 2, |
| "n_total": 1024, |
| "signal_ratio": 0.001953125, |
| "alpha": 0.31837997506899607, |
| "alpha_r2": 0.9268627447940765, |
| "condition_number": 44.306236267089844, |
| "top_10_sv": [ |
| 35.16852951049805, |
| 18.78311538696289, |
| 17.198081970214844, |
| 16.65778350830078, |
| 16.474864959716797, |
| 16.360870361328125, |
| 15.851672172546387, |
| 15.545867919921875, |
| 15.253518104553223, |
| 15.09296703338623 |
| ] |
| }, |
| "transformer.h.10.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 945.8907470703125, |
| "stable_rank": 78.20089721679688, |
| "spectral_norm": 21.427583694458008, |
| "frobenius_norm": 189.48684692382812, |
| "mp_bound": 17.147469520568848, |
| "n_above_mp": 3, |
| "n_total": 1024, |
| "signal_ratio": 0.0029296875, |
| "alpha": 0.30030447990075465, |
| "alpha_r2": 0.9246116625990694, |
| "condition_number": 53.54841232299805, |
| "top_10_sv": [ |
| 21.427583694458008, |
| 20.07192611694336, |
| 17.730005264282227, |
| 15.819792747497559, |
| 14.968161582946777, |
| 13.369080543518066, |
| 13.173439025878906, |
| 13.08027458190918, |
| 12.7400541305542, |
| 12.428592681884766 |
| ] |
| }, |
| "transformer.h.11.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 901.9686279296875, |
| "stable_rank": 114.79285430908203, |
| "spectral_norm": 17.392602920532227, |
| "frobenius_norm": 186.34683227539062, |
| "mp_bound": 13.299353036480547, |
| "n_above_mp": 13, |
| "n_total": 1024, |
| "signal_ratio": 0.0126953125, |
| "alpha": 0.38050221069018897, |
| "alpha_r2": 0.8828344695891076, |
| "condition_number": 18.282512664794922, |
| "top_10_sv": [ |
| 17.392602920532227, |
| 16.71450424194336, |
| 15.829066276550293, |
| 15.560785293579102, |
| 15.137024879455566, |
| 14.838809967041016, |
| 14.30210018157959, |
| 13.980681419372559, |
| 13.925599098205566, |
| 13.726691246032715 |
| ] |
| }, |
| "transformer.h.11.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 733.375, |
| "stable_rank": 42.236331939697266, |
| "spectral_norm": 15.064600944519043, |
| "frobenius_norm": 97.90406799316406, |
| "mp_bound": 3.1596755981445312, |
| "n_above_mp": 294, |
| "n_total": 1024, |
| "signal_ratio": 0.287109375, |
| "alpha": 0.5869072812022946, |
| "alpha_r2": 0.7743078879808162, |
| "condition_number": 10999.072265625, |
| "top_10_sv": [ |
| 15.064600944519043, |
| 13.98853874206543, |
| 11.859731674194336, |
| 11.481588363647461, |
| 8.733123779296875, |
| 8.476235389709473, |
| 8.284677505493164, |
| 8.177391052246094, |
| 7.880287170410156, |
| 7.761397361755371 |
| ] |
| }, |
| "transformer.h.11.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 936.5640869140625, |
| "stable_rank": 36.39714050292969, |
| "spectral_norm": 35.354408264160156, |
| "frobenius_norm": 213.29330444335938, |
| "mp_bound": 18.617774963378906, |
| "n_above_mp": 2, |
| "n_total": 1024, |
| "signal_ratio": 0.001953125, |
| "alpha": 0.3180465298760983, |
| "alpha_r2": 0.9268863407776537, |
| "condition_number": 36.84086990356445, |
| "top_10_sv": [ |
| 35.354408264160156, |
| 19.106882095336914, |
| 17.004091262817383, |
| 16.809144973754883, |
| 16.472070693969727, |
| 16.30332374572754, |
| 15.732508659362793, |
| 15.557268142700195, |
| 15.403963088989258, |
| 14.984481811523438 |
| ] |
| }, |
| "transformer.h.11.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 950.7989501953125, |
| "stable_rank": 109.07081604003906, |
| "spectral_norm": 18.535531997680664, |
| "frobenius_norm": 193.57949829101562, |
| "mp_bound": 17.80658483505249, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.2904225097637395, |
| "alpha_r2": 0.9180203572981726, |
| "condition_number": 31.789386749267578, |
| "top_10_sv": [ |
| 18.535531997680664, |
| 17.2476749420166, |
| 15.5623140335083, |
| 15.329514503479004, |
| 14.429676055908203, |
| 13.832291603088379, |
| 13.37085247039795, |
| 13.245711326599121, |
| 13.16356372833252, |
| 13.034541130065918 |
| ] |
| }, |
| "transformer.h.12.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 900.2671508789062, |
| "stable_rank": 111.13557434082031, |
| "spectral_norm": 17.990798950195312, |
| "frobenius_norm": 189.66055297851562, |
| "mp_bound": 13.46277887415714, |
| "n_above_mp": 15, |
| "n_total": 1024, |
| "signal_ratio": 0.0146484375, |
| "alpha": 0.3849465327520483, |
| "alpha_r2": 0.8884130947195702, |
| "condition_number": 19.290571212768555, |
| "top_10_sv": [ |
| 17.990798950195312, |
| 17.627941131591797, |
| 16.81555938720703, |
| 16.37408447265625, |
| 15.790731430053711, |
| 15.523019790649414, |
| 14.99732494354248, |
| 14.942817687988281, |
| 14.576937675476074, |
| 14.378122329711914 |
| ] |
| }, |
| "transformer.h.12.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 726.1136474609375, |
| "stable_rank": 67.91838836669922, |
| "spectral_norm": 11.730238914489746, |
| "frobenius_norm": 96.67196655273438, |
| "mp_bound": 2.9830989837646484, |
| "n_above_mp": 317, |
| "n_total": 1024, |
| "signal_ratio": 0.3095703125, |
| "alpha": 0.6079611377940939, |
| "alpha_r2": 0.7713425607206535, |
| "condition_number": 74660.875, |
| "top_10_sv": [ |
| 11.730238914489746, |
| 11.002860069274902, |
| 9.450277328491211, |
| 9.339866638183594, |
| 9.130253791809082, |
| 8.70073413848877, |
| 8.407955169677734, |
| 8.365035057067871, |
| 8.216530799865723, |
| 8.057732582092285 |
| ] |
| }, |
| "transformer.h.12.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 937.1617431640625, |
| "stable_rank": 35.29595184326172, |
| "spectral_norm": 35.728485107421875, |
| "frobenius_norm": 212.26434326171875, |
| "mp_bound": 18.63219451904297, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.31249088591006335, |
| "alpha_r2": 0.9217926851101266, |
| "condition_number": 59.26630401611328, |
| "top_10_sv": [ |
| 35.728485107421875, |
| 18.58656883239746, |
| 16.78707504272461, |
| 16.50104331970215, |
| 16.170015335083008, |
| 15.932202339172363, |
| 15.695175170898438, |
| 15.225218772888184, |
| 14.931440353393555, |
| 14.655704498291016 |
| ] |
| }, |
| "transformer.h.12.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 951.9144897460938, |
| "stable_rank": 125.68574523925781, |
| "spectral_norm": 17.590118408203125, |
| "frobenius_norm": 197.20220947265625, |
| "mp_bound": 18.2495698928833, |
| "n_above_mp": 0, |
| "n_total": 1024, |
| "signal_ratio": 0.0, |
| "alpha": 0.28659027982486085, |
| "alpha_r2": 0.9157493562146201, |
| "condition_number": 63.03939437866211, |
| "top_10_sv": [ |
| 17.590118408203125, |
| 16.458555221557617, |
| 15.0971040725708, |
| 14.703747749328613, |
| 14.40058708190918, |
| 14.16800308227539, |
| 13.817294120788574, |
| 13.755056381225586, |
| 13.447993278503418, |
| 13.400959968566895 |
| ] |
| }, |
| "transformer.h.13.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 921.8739013671875, |
| "stable_rank": 122.69657897949219, |
| "spectral_norm": 17.3557071685791, |
| "frobenius_norm": 192.24655151367188, |
| "mp_bound": 14.840765654402492, |
| "n_above_mp": 5, |
| "n_total": 1024, |
| "signal_ratio": 0.0048828125, |
| "alpha": 0.34449500140802347, |
| "alpha_r2": 0.8890960763544924, |
| "condition_number": 12.950295448303223, |
| "top_10_sv": [ |
| 17.3557071685791, |
| 16.527515411376953, |
| 16.102203369140625, |
| 15.533954620361328, |
| 15.408553123474121, |
| 14.486262321472168, |
| 14.440102577209473, |
| 14.294885635375977, |
| 13.857573509216309, |
| 13.616387367248535 |
| ] |
| }, |
| "transformer.h.13.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 745.8662109375, |
| "stable_rank": 69.44125366210938, |
| "spectral_norm": 11.393943786621094, |
| "frobenius_norm": 94.94735717773438, |
| "mp_bound": 3.1595468521118164, |
| "n_above_mp": 305, |
| "n_total": 1024, |
| "signal_ratio": 0.2978515625, |
| "alpha": 0.5692666342371, |
| "alpha_r2": 0.7468213049281058, |
| "condition_number": 11072.6982421875, |
| "top_10_sv": [ |
| 11.393943786621094, |
| 10.033926010131836, |
| 9.37811279296875, |
| 8.2539701461792, |
| 7.887532711029053, |
| 7.69539213180542, |
| 7.5529046058654785, |
| 7.425933837890625, |
| 7.274467468261719, |
| 7.151112079620361 |
| ] |
| }, |
| "transformer.h.13.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 937.599365234375, |
| "stable_rank": 37.05495071411133, |
| "spectral_norm": 35.005916595458984, |
| "frobenius_norm": 213.0907440185547, |
| "mp_bound": 18.706077575683594, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.3122716285047514, |
| "alpha_r2": 0.9183302410504182, |
| "condition_number": 36.091087341308594, |
| "top_10_sv": [ |
| 35.005916595458984, |
| 18.3486385345459, |
| 16.548778533935547, |
| 16.184619903564453, |
| 15.928171157836914, |
| 15.709515571594238, |
| 15.531082153320312, |
| 14.870841026306152, |
| 14.759794235229492, |
| 14.733899116516113 |
| ] |
| }, |
| "transformer.h.13.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 953.9815063476562, |
| "stable_rank": 93.02642059326172, |
| "spectral_norm": 21.067401885986328, |
| "frobenius_norm": 203.19552612304688, |
| "mp_bound": 18.922306537628174, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.2803336865969046, |
| "alpha_r2": 0.9048293558413308, |
| "condition_number": 39.748817443847656, |
| "top_10_sv": [ |
| 21.067401885986328, |
| 17.05304718017578, |
| 15.297992706298828, |
| 14.769113540649414, |
| 14.03395938873291, |
| 13.835834503173828, |
| 13.424732208251953, |
| 13.155864715576172, |
| 13.011979103088379, |
| 12.918780326843262 |
| ] |
| }, |
| "transformer.h.14.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 927.090576171875, |
| "stable_rank": 125.47744750976562, |
| "spectral_norm": 16.747602462768555, |
| "frobenius_norm": 187.60113525390625, |
| "mp_bound": 14.835510292247621, |
| "n_above_mp": 4, |
| "n_total": 1024, |
| "signal_ratio": 0.00390625, |
| "alpha": 0.33493011773835984, |
| "alpha_r2": 0.893001234472107, |
| "condition_number": 14.26633358001709, |
| "top_10_sv": [ |
| 16.747602462768555, |
| 15.885435104370117, |
| 15.1227388381958, |
| 14.847970008850098, |
| 14.377248764038086, |
| 14.23108959197998, |
| 13.869467735290527, |
| 13.69625186920166, |
| 13.520434379577637, |
| 13.335817337036133 |
| ] |
| }, |
| "transformer.h.14.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 771.0768432617188, |
| "stable_rank": 45.977813720703125, |
| "spectral_norm": 14.095232963562012, |
| "frobenius_norm": 95.57546997070312, |
| "mp_bound": 3.5943875312805176, |
| "n_above_mp": 260, |
| "n_total": 1024, |
| "signal_ratio": 0.25390625, |
| "alpha": 0.5172198054236867, |
| "alpha_r2": 0.7357139263927202, |
| "condition_number": 4705.93505859375, |
| "top_10_sv": [ |
| 14.095232963562012, |
| 10.67358684539795, |
| 9.503582000732422, |
| 7.890437126159668, |
| 7.194736003875732, |
| 7.045958042144775, |
| 7.003268718719482, |
| 6.855443000793457, |
| 6.832728862762451, |
| 6.729796886444092 |
| ] |
| }, |
| "transformer.h.14.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 938.0935668945312, |
| "stable_rank": 32.70786666870117, |
| "spectral_norm": 37.25117492675781, |
| "frobenius_norm": 213.04241943359375, |
| "mp_bound": 18.850232362747192, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.3063623329883038, |
| "alpha_r2": 0.9141184802270717, |
| "condition_number": 55.86502456665039, |
| "top_10_sv": [ |
| 37.25117492675781, |
| 17.538530349731445, |
| 16.43047332763672, |
| 15.928857803344727, |
| 15.448025703430176, |
| 15.416549682617188, |
| 15.191793441772461, |
| 14.8709135055542, |
| 14.701964378356934, |
| 14.292315483093262 |
| ] |
| }, |
| "transformer.h.14.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 954.6704711914062, |
| "stable_rank": 85.12628173828125, |
| "spectral_norm": 23.248565673828125, |
| "frobenius_norm": 214.50035095214844, |
| "mp_bound": 19.985750913619995, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.2766327750388291, |
| "alpha_r2": 0.9013307804087409, |
| "condition_number": 80.55670166015625, |
| "top_10_sv": [ |
| 23.248565673828125, |
| 18.41067886352539, |
| 15.474272727966309, |
| 14.885811805725098, |
| 14.50477409362793, |
| 14.26229476928711, |
| 14.121097564697266, |
| 13.90327262878418, |
| 13.509455680847168, |
| 13.493183135986328 |
| ] |
| }, |
| "transformer.h.15.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 927.6941528320312, |
| "stable_rank": 123.29837799072266, |
| "spectral_norm": 16.74991798400879, |
| "frobenius_norm": 185.99075317382812, |
| "mp_bound": 14.70581637928394, |
| "n_above_mp": 3, |
| "n_total": 1024, |
| "signal_ratio": 0.0029296875, |
| "alpha": 0.33338608542644904, |
| "alpha_r2": 0.889507619370194, |
| "condition_number": 13.823485374450684, |
| "top_10_sv": [ |
| 16.74991798400879, |
| 15.065845489501953, |
| 14.728111267089844, |
| 14.221285820007324, |
| 14.000044822692871, |
| 13.83415412902832, |
| 13.696669578552246, |
| 13.493361473083496, |
| 13.296980857849121, |
| 13.158197402954102 |
| ] |
| }, |
| "transformer.h.15.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 734.9755859375, |
| "stable_rank": 39.57041549682617, |
| "spectral_norm": 15.907938957214355, |
| "frobenius_norm": 100.06892395019531, |
| "mp_bound": 3.168710231781006, |
| "n_above_mp": 315, |
| "n_total": 1024, |
| "signal_ratio": 0.3076171875, |
| "alpha": 0.5890122729886013, |
| "alpha_r2": 0.7620616339931927, |
| "condition_number": 118929.2421875, |
| "top_10_sv": [ |
| 15.907938957214355, |
| 9.037424087524414, |
| 8.875422477722168, |
| 8.679743766784668, |
| 8.60929012298584, |
| 8.428465843200684, |
| 8.394245147705078, |
| 8.310733795166016, |
| 8.301813125610352, |
| 8.17810344696045 |
| ] |
| }, |
| "transformer.h.15.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 938.0027465820312, |
| "stable_rank": 33.95222854614258, |
| "spectral_norm": 36.603248596191406, |
| "frobenius_norm": 213.28179931640625, |
| "mp_bound": 18.90978169441223, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.30609205088241465, |
| "alpha_r2": 0.9144551505747684, |
| "condition_number": 63.18374252319336, |
| "top_10_sv": [ |
| 36.603248596191406, |
| 17.053295135498047, |
| 16.544368743896484, |
| 16.148056030273438, |
| 16.080337524414062, |
| 15.509836196899414, |
| 15.409363746643066, |
| 15.122523307800293, |
| 14.887523651123047, |
| 14.568466186523438 |
| ] |
| }, |
| "transformer.h.15.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 954.9154663085938, |
| "stable_rank": 67.59476470947266, |
| "spectral_norm": 27.46138572692871, |
| "frobenius_norm": 225.77662658691406, |
| "mp_bound": 20.990967750549316, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.2748501163371319, |
| "alpha_r2": 0.8922496782749698, |
| "condition_number": 75.69937896728516, |
| "top_10_sv": [ |
| 27.46138572692871, |
| 20.168291091918945, |
| 16.48451805114746, |
| 14.961456298828125, |
| 14.724319458007812, |
| 14.283745765686035, |
| 14.013591766357422, |
| 13.970895767211914, |
| 13.828929901123047, |
| 13.712251663208008 |
| ] |
| }, |
| "transformer.h.16.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 946.042724609375, |
| "stable_rank": 144.74566650390625, |
| "spectral_norm": 15.667328834533691, |
| "frobenius_norm": 188.4940948486328, |
| "mp_bound": 15.990341963357688, |
| "n_above_mp": 0, |
| "n_total": 1024, |
| "signal_ratio": 0.0, |
| "alpha": 0.29235290161143895, |
| "alpha_r2": 0.8708847197371617, |
| "condition_number": 10.925378799438477, |
| "top_10_sv": [ |
| 15.667328834533691, |
| 13.777777671813965, |
| 13.580466270446777, |
| 13.480743408203125, |
| 13.149480819702148, |
| 12.728473663330078, |
| 12.464872360229492, |
| 12.249978065490723, |
| 12.104480743408203, |
| 12.058478355407715 |
| ] |
| }, |
| "transformer.h.16.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 771.8905639648438, |
| "stable_rank": 34.916778564453125, |
| "spectral_norm": 17.508285522460938, |
| "frobenius_norm": 103.45719909667969, |
| "mp_bound": 3.9519896507263184, |
| "n_above_mp": 251, |
| "n_total": 1024, |
| "signal_ratio": 0.2451171875, |
| "alpha": 0.5150372729977702, |
| "alpha_r2": 0.7343136754490875, |
| "condition_number": 7742.5263671875, |
| "top_10_sv": [ |
| 17.508285522460938, |
| 8.428668022155762, |
| 8.318235397338867, |
| 7.7397379875183105, |
| 7.588472843170166, |
| 7.562111854553223, |
| 7.487995624542236, |
| 7.291107177734375, |
| 7.21236515045166, |
| 7.17236852645874 |
| ] |
| }, |
| "transformer.h.16.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 940.1898193359375, |
| "stable_rank": 35.53369140625, |
| "spectral_norm": 35.824527740478516, |
| "frobenius_norm": 213.55052185058594, |
| "mp_bound": 19.066128730773926, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.29995952857301905, |
| "alpha_r2": 0.9104848364911763, |
| "condition_number": 64.50019836425781, |
| "top_10_sv": [ |
| 35.824527740478516, |
| 16.836957931518555, |
| 16.33154296875, |
| 15.77452564239502, |
| 15.625938415527344, |
| 15.192853927612305, |
| 14.959126472473145, |
| 14.744555473327637, |
| 14.429903030395508, |
| 14.108305931091309 |
| ] |
| }, |
| "transformer.h.16.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 953.3158569335938, |
| "stable_rank": 65.17062377929688, |
| "spectral_norm": 29.556535720825195, |
| "frobenius_norm": 238.60496520996094, |
| "mp_bound": 22.07623028755188, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.27669229111220206, |
| "alpha_r2": 0.8878361607414883, |
| "condition_number": 59.69606018066406, |
| "top_10_sv": [ |
| 29.556535720825195, |
| 20.332595825195312, |
| 17.3956356048584, |
| 15.779086112976074, |
| 15.581547737121582, |
| 15.288317680358887, |
| 15.058049201965332, |
| 14.879526138305664, |
| 14.627667427062988, |
| 14.318242073059082 |
| ] |
| }, |
| "transformer.h.17.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 947.1865234375, |
| "stable_rank": 142.42913818359375, |
| "spectral_norm": 15.703680992126465, |
| "frobenius_norm": 187.4134979248047, |
| "mp_bound": 15.91192969963621, |
| "n_above_mp": 0, |
| "n_total": 1024, |
| "signal_ratio": 0.0, |
| "alpha": 0.28839905133263977, |
| "alpha_r2": 0.8639671147195912, |
| "condition_number": 9.761466026306152, |
| "top_10_sv": [ |
| 15.703680992126465, |
| 14.009387016296387, |
| 13.541427612304688, |
| 13.323938369750977, |
| 12.849181175231934, |
| 12.739950180053711, |
| 12.229618072509766, |
| 12.002062797546387, |
| 11.895586013793945, |
| 11.779114723205566 |
| ] |
| }, |
| "transformer.h.17.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 761.0531005859375, |
| "stable_rank": 24.522497177124023, |
| "spectral_norm": 21.281356811523438, |
| "frobenius_norm": 105.38569641113281, |
| "mp_bound": 3.7680649757385254, |
| "n_above_mp": 283, |
| "n_total": 1024, |
| "signal_ratio": 0.2763671875, |
| "alpha": 0.5307534843077817, |
| "alpha_r2": 0.7242129172164211, |
| "condition_number": 3630.01904296875, |
| "top_10_sv": [ |
| 21.281356811523438, |
| 8.807740211486816, |
| 8.406106948852539, |
| 8.249408721923828, |
| 7.748202323913574, |
| 7.372308254241943, |
| 7.20876407623291, |
| 7.15388298034668, |
| 7.152019500732422, |
| 7.0919060707092285 |
| ] |
| }, |
| "transformer.h.17.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 941.4163208007812, |
| "stable_rank": 32.30669403076172, |
| "spectral_norm": 37.66846466064453, |
| "frobenius_norm": 214.10369873046875, |
| "mp_bound": 19.23757553100586, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.29567148822186234, |
| "alpha_r2": 0.9092601420648001, |
| "condition_number": 106.81797790527344, |
| "top_10_sv": [ |
| 37.66846466064453, |
| 16.419097900390625, |
| 15.927478790283203, |
| 15.601706504821777, |
| 15.464677810668945, |
| 15.215024948120117, |
| 14.866231918334961, |
| 14.730674743652344, |
| 14.174066543579102, |
| 14.107979774475098 |
| ] |
| }, |
| "transformer.h.17.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 955.3389892578125, |
| "stable_rank": 56.71522521972656, |
| "spectral_norm": 32.85487747192383, |
| "frobenius_norm": 247.42848205566406, |
| "mp_bound": 23.075778007507324, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.26997116277589567, |
| "alpha_r2": 0.8814747074927219, |
| "condition_number": 65.88758850097656, |
| "top_10_sv": [ |
| 32.85487747192383, |
| 20.14518165588379, |
| 17.330669403076172, |
| 16.186241149902344, |
| 15.963332176208496, |
| 15.739975929260254, |
| 15.285326957702637, |
| 14.80821418762207, |
| 14.750692367553711, |
| 14.634135246276855 |
| ] |
| }, |
| "transformer.h.18.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 946.826171875, |
| "stable_rank": 142.5345458984375, |
| "spectral_norm": 15.78872013092041, |
| "frobenius_norm": 188.49810791015625, |
| "mp_bound": 15.980405578724305, |
| "n_above_mp": 0, |
| "n_total": 1024, |
| "signal_ratio": 0.0, |
| "alpha": 0.29059627057789356, |
| "alpha_r2": 0.8667678175025767, |
| "condition_number": 8.09025764465332, |
| "top_10_sv": [ |
| 15.78872013092041, |
| 13.945246696472168, |
| 13.766387939453125, |
| 13.405774116516113, |
| 13.049812316894531, |
| 12.528767585754395, |
| 12.479240417480469, |
| 12.379585266113281, |
| 12.237228393554688, |
| 12.036273956298828 |
| ] |
| }, |
| "transformer.h.18.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 786.9091796875, |
| "stable_rank": 25.3469181060791, |
| "spectral_norm": 21.475971221923828, |
| "frobenius_norm": 108.12232971191406, |
| "mp_bound": 4.413455486297607, |
| "n_above_mp": 222, |
| "n_total": 1024, |
| "signal_ratio": 0.216796875, |
| "alpha": 0.478483490126483, |
| "alpha_r2": 0.7161237347700887, |
| "condition_number": 69973.6328125, |
| "top_10_sv": [ |
| 21.475971221923828, |
| 8.631346702575684, |
| 8.211710929870605, |
| 8.147095680236816, |
| 7.6618499755859375, |
| 7.513932228088379, |
| 7.377252101898193, |
| 7.279123783111572, |
| 7.242250919342041, |
| 7.182393550872803 |
| ] |
| }, |
| "transformer.h.18.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 944.5552978515625, |
| "stable_rank": 33.118221282958984, |
| "spectral_norm": 37.25223159790039, |
| "frobenius_norm": 214.38075256347656, |
| "mp_bound": 19.486257076263428, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.289183381623581, |
| "alpha_r2": 0.9070325638595973, |
| "condition_number": 98.16948699951172, |
| "top_10_sv": [ |
| 37.25223159790039, |
| 15.790482521057129, |
| 15.473872184753418, |
| 15.28934097290039, |
| 15.085716247558594, |
| 14.840494155883789, |
| 14.610716819763184, |
| 14.552131652832031, |
| 14.460594177246094, |
| 14.226823806762695 |
| ] |
| }, |
| "transformer.h.18.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 955.5623168945312, |
| "stable_rank": 62.169673919677734, |
| "spectral_norm": 32.66622543334961, |
| "frobenius_norm": 257.5658264160156, |
| "mp_bound": 24.027284145355225, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.2709203055980388, |
| "alpha_r2": 0.8864060504096509, |
| "condition_number": 42.17201232910156, |
| "top_10_sv": [ |
| 32.66622543334961, |
| 21.241456985473633, |
| 18.316198348999023, |
| 18.020597457885742, |
| 17.09003257751465, |
| 16.6181583404541, |
| 16.53766632080078, |
| 16.19798469543457, |
| 15.890321731567383, |
| 15.60676383972168 |
| ] |
| }, |
| "transformer.h.19.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 942.326171875, |
| "stable_rank": 155.50115966796875, |
| "spectral_norm": 14.992883682250977, |
| "frobenius_norm": 186.96141052246094, |
| "mp_bound": 15.57602523915173, |
| "n_above_mp": 0, |
| "n_total": 1024, |
| "signal_ratio": 0.0, |
| "alpha": 0.2982009204884547, |
| "alpha_r2": 0.8572427405933445, |
| "condition_number": 7.912736892700195, |
| "top_10_sv": [ |
| 14.992883682250977, |
| 14.037845611572266, |
| 13.398187637329102, |
| 13.230586051940918, |
| 12.737166404724121, |
| 12.300458908081055, |
| 12.292224884033203, |
| 12.19207763671875, |
| 12.00888729095459, |
| 11.759111404418945 |
| ] |
| }, |
| "transformer.h.19.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 807.1912231445312, |
| "stable_rank": 17.76304054260254, |
| "spectral_norm": 27.394882202148438, |
| "frobenius_norm": 115.45907592773438, |
| "mp_bound": 5.350113868713379, |
| "n_above_mp": 135, |
| "n_total": 1024, |
| "signal_ratio": 0.1318359375, |
| "alpha": 0.4361821075722079, |
| "alpha_r2": 0.7264706510936031, |
| "condition_number": 15827.857421875, |
| "top_10_sv": [ |
| 27.394882202148438, |
| 9.131587028503418, |
| 8.159581184387207, |
| 7.967867374420166, |
| 7.304231643676758, |
| 7.250730037689209, |
| 7.2139668464660645, |
| 7.170718669891357, |
| 7.162339687347412, |
| 7.095859050750732 |
| ] |
| }, |
| "transformer.h.19.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 946.293212890625, |
| "stable_rank": 33.909671783447266, |
| "spectral_norm": 36.853206634521484, |
| "frobenius_norm": 214.6036376953125, |
| "mp_bound": 19.577980041503906, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.2855408853304053, |
| "alpha_r2": 0.9022405306041533, |
| "condition_number": 91.69256591796875, |
| "top_10_sv": [ |
| 36.853206634521484, |
| 15.805397033691406, |
| 15.353994369506836, |
| 15.227054595947266, |
| 15.066706657409668, |
| 14.814555168151855, |
| 14.390864372253418, |
| 14.297786712646484, |
| 14.181514739990234, |
| 13.928579330444336 |
| ] |
| }, |
| "transformer.h.19.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 956.4804077148438, |
| "stable_rank": 52.073970794677734, |
| "spectral_norm": 37.2791862487793, |
| "frobenius_norm": 269.0151672363281, |
| "mp_bound": 25.231703281402588, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.26739284885206765, |
| "alpha_r2": 0.8835308501225437, |
| "condition_number": 45.95317077636719, |
| "top_10_sv": [ |
| 37.2791862487793, |
| 20.44887351989746, |
| 19.732032775878906, |
| 18.161226272583008, |
| 17.561620712280273, |
| 17.188261032104492, |
| 17.060258865356445, |
| 16.776437759399414, |
| 16.5764217376709, |
| 16.08405303955078 |
| ] |
| }, |
| "transformer.h.20.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 936.0965576171875, |
| "stable_rank": 156.9998779296875, |
| "spectral_norm": 14.947098731994629, |
| "frobenius_norm": 187.28652954101562, |
| "mp_bound": 15.20929343045714, |
| "n_above_mp": 0, |
| "n_total": 1024, |
| "signal_ratio": 0.0, |
| "alpha": 0.3120911362337675, |
| "alpha_r2": 0.858786145933246, |
| "condition_number": 8.875840187072754, |
| "top_10_sv": [ |
| 14.947098731994629, |
| 14.385692596435547, |
| 13.843692779541016, |
| 13.672863960266113, |
| 13.224756240844727, |
| 12.674160957336426, |
| 12.55836296081543, |
| 12.521924018859863, |
| 12.441327095031738, |
| 11.893940925598145 |
| ] |
| }, |
| "transformer.h.20.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 801.7127685546875, |
| "stable_rank": 16.856487274169922, |
| "spectral_norm": 28.67116355895996, |
| "frobenius_norm": 117.71420288085938, |
| "mp_bound": 5.2284698486328125, |
| "n_above_mp": 163, |
| "n_total": 1024, |
| "signal_ratio": 0.1591796875, |
| "alpha": 0.444035142929919, |
| "alpha_r2": 0.7121527583409499, |
| "condition_number": 31572.4609375, |
| "top_10_sv": [ |
| 28.67116355895996, |
| 9.656560897827148, |
| 8.204039573669434, |
| 7.896355152130127, |
| 7.5075554847717285, |
| 7.466111660003662, |
| 7.3787007331848145, |
| 7.314346790313721, |
| 7.256669044494629, |
| 7.230027675628662 |
| ] |
| }, |
| "transformer.h.20.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 948.0650024414062, |
| "stable_rank": 35.34494400024414, |
| "spectral_norm": 36.2116813659668, |
| "frobenius_norm": 215.2843017578125, |
| "mp_bound": 19.70071792602539, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.2835115515690549, |
| "alpha_r2": 0.8999757829277988, |
| "condition_number": 68.69733428955078, |
| "top_10_sv": [ |
| 36.2116813659668, |
| 15.233745574951172, |
| 15.040852546691895, |
| 15.021078109741211, |
| 14.903677940368652, |
| 14.672463417053223, |
| 14.484726905822754, |
| 14.234298706054688, |
| 13.947386741638184, |
| 13.761749267578125 |
| ] |
| }, |
| "transformer.h.20.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 957.45361328125, |
| "stable_rank": 53.200496673583984, |
| "spectral_norm": 38.38079071044922, |
| "frobenius_norm": 279.94439697265625, |
| "mp_bound": 26.34588861465454, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.26655693080921455, |
| "alpha_r2": 0.8858868317060345, |
| "condition_number": 45.9372444152832, |
| "top_10_sv": [ |
| 38.38079071044922, |
| 20.71181869506836, |
| 19.3667049407959, |
| 19.097551345825195, |
| 18.877281188964844, |
| 18.161039352416992, |
| 17.817678451538086, |
| 17.619935989379883, |
| 17.085142135620117, |
| 17.002307891845703 |
| ] |
| }, |
| "transformer.h.21.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 941.8446044921875, |
| "stable_rank": 150.69029235839844, |
| "spectral_norm": 15.391748428344727, |
| "frobenius_norm": 188.94290161132812, |
| "mp_bound": 15.802790776950205, |
| "n_above_mp": 0, |
| "n_total": 1024, |
| "signal_ratio": 0.0, |
| "alpha": 0.3000971543853719, |
| "alpha_r2": 0.8719087171149849, |
| "condition_number": 11.076602935791016, |
| "top_10_sv": [ |
| 15.391748428344727, |
| 14.966634750366211, |
| 14.548294067382812, |
| 13.83862018585205, |
| 13.741484642028809, |
| 13.272273063659668, |
| 13.161110877990723, |
| 12.848255157470703, |
| 12.513276100158691, |
| 12.309864044189453 |
| ] |
| }, |
| "transformer.h.21.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 802.9078979492188, |
| "stable_rank": 10.218631744384766, |
| "spectral_norm": 38.61031723022461, |
| "frobenius_norm": 123.42403411865234, |
| "mp_bound": 5.600226402282715, |
| "n_above_mp": 132, |
| "n_total": 1024, |
| "signal_ratio": 0.12890625, |
| "alpha": 0.43585709299085673, |
| "alpha_r2": 0.7264214720046629, |
| "condition_number": 16619.98828125, |
| "top_10_sv": [ |
| 38.61031723022461, |
| 12.657886505126953, |
| 9.889753341674805, |
| 9.055880546569824, |
| 8.717723846435547, |
| 7.8169941902160645, |
| 7.405618190765381, |
| 7.38328742980957, |
| 7.281287670135498, |
| 7.214484691619873 |
| ] |
| }, |
| "transformer.h.21.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 947.9262084960938, |
| "stable_rank": 36.02553176879883, |
| "spectral_norm": 35.89358139038086, |
| "frobenius_norm": 215.43785095214844, |
| "mp_bound": 19.718321800231934, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.2838725389249319, |
| "alpha_r2": 0.8980818942031933, |
| "condition_number": 72.88961029052734, |
| "top_10_sv": [ |
| 35.89358139038086, |
| 15.480804443359375, |
| 14.935028076171875, |
| 14.85132122039795, |
| 14.700712203979492, |
| 14.630121231079102, |
| 14.243003845214844, |
| 14.099127769470215, |
| 13.748274803161621, |
| 13.597418785095215 |
| ] |
| }, |
| "transformer.h.21.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 957.150146484375, |
| "stable_rank": 40.031089782714844, |
| "spectral_norm": 45.864524841308594, |
| "frobenius_norm": 290.1854248046875, |
| "mp_bound": 27.241085529327393, |
| "n_above_mp": 2, |
| "n_total": 1024, |
| "signal_ratio": 0.001953125, |
| "alpha": 0.26491965004813967, |
| "alpha_r2": 0.8792307905237831, |
| "condition_number": 55.03404998779297, |
| "top_10_sv": [ |
| 45.864524841308594, |
| 28.97075080871582, |
| 19.798044204711914, |
| 18.97209358215332, |
| 18.70978355407715, |
| 18.018537521362305, |
| 17.47182273864746, |
| 17.274417877197266, |
| 16.987682342529297, |
| 16.63946533203125 |
| ] |
| }, |
| "transformer.h.22.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 910.2899780273438, |
| "stable_rank": 125.6297836303711, |
| "spectral_norm": 16.882688522338867, |
| "frobenius_norm": 189.22909545898438, |
| "mp_bound": 14.07356805741784, |
| "n_above_mp": 5, |
| "n_total": 1024, |
| "signal_ratio": 0.0048828125, |
| "alpha": 0.3631109400306433, |
| "alpha_r2": 0.8785417413420795, |
| "condition_number": 18.38054656982422, |
| "top_10_sv": [ |
| 16.882688522338867, |
| 15.70670223236084, |
| 15.6824312210083, |
| 14.943181037902832, |
| 14.69243335723877, |
| 13.932169914245605, |
| 13.615700721740723, |
| 13.460670471191406, |
| 13.248589515686035, |
| 12.95559024810791 |
| ] |
| }, |
| "transformer.h.22.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 791.9282836914062, |
| "stable_rank": 7.863592624664307, |
| "spectral_norm": 47.40917205810547, |
| "frobenius_norm": 132.9452667236328, |
| "mp_bound": 5.678843975067139, |
| "n_above_mp": 149, |
| "n_total": 1024, |
| "signal_ratio": 0.1455078125, |
| "alpha": 0.4420612606508839, |
| "alpha_r2": 0.7218298086729467, |
| "condition_number": 11620.599609375, |
| "top_10_sv": [ |
| 47.40917205810547, |
| 18.899751663208008, |
| 13.706535339355469, |
| 12.33392333984375, |
| 10.662578582763672, |
| 8.17455768585205, |
| 7.832640171051025, |
| 7.784000396728516, |
| 7.671928882598877, |
| 7.559737682342529 |
| ] |
| }, |
| "transformer.h.22.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 947.9542846679688, |
| "stable_rank": 35.02265548706055, |
| "spectral_norm": 36.702762603759766, |
| "frobenius_norm": 217.2067413330078, |
| "mp_bound": 19.8194797039032, |
| "n_above_mp": 1, |
| "n_total": 1024, |
| "signal_ratio": 0.0009765625, |
| "alpha": 0.28619079431362643, |
| "alpha_r2": 0.9002755587642126, |
| "condition_number": 57.37492752075195, |
| "top_10_sv": [ |
| 36.702762603759766, |
| 15.816225051879883, |
| 15.100847244262695, |
| 14.771247863769531, |
| 14.534582138061523, |
| 14.363577842712402, |
| 14.114562034606934, |
| 14.111546516418457, |
| 13.981919288635254, |
| 13.779252052307129 |
| ] |
| }, |
| "transformer.h.22.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 947.5176391601562, |
| "stable_rank": 17.500995635986328, |
| "spectral_norm": 73.44800567626953, |
| "frobenius_norm": 307.2637939453125, |
| "mp_bound": 27.96702003479004, |
| "n_above_mp": 3, |
| "n_total": 1024, |
| "signal_ratio": 0.0029296875, |
| "alpha": 0.27600893237986174, |
| "alpha_r2": 0.8907517199504488, |
| "condition_number": 77.1149673461914, |
| "top_10_sv": [ |
| 73.44800567626953, |
| 46.17403030395508, |
| 41.175079345703125, |
| 19.932249069213867, |
| 19.340335845947266, |
| 18.94622039794922, |
| 18.65909767150879, |
| 17.994863510131836, |
| 17.704139709472656, |
| 17.6275577545166 |
| ] |
| }, |
| "transformer.h.23.attn.c_attn.weight": { |
| "shape": [ |
| 1024, |
| 3072 |
| ], |
| "effective_rank": 920.36767578125, |
| "stable_rank": 83.51404571533203, |
| "spectral_norm": 20.953210830688477, |
| "frobenius_norm": 191.48306274414062, |
| "mp_bound": 14.838865915472999, |
| "n_above_mp": 6, |
| "n_total": 1024, |
| "signal_ratio": 0.005859375, |
| "alpha": 0.347956924436965, |
| "alpha_r2": 0.9030461026469062, |
| "condition_number": 30.88874626159668, |
| "top_10_sv": [ |
| 20.953210830688477, |
| 18.181076049804688, |
| 16.9401798248291, |
| 16.4212703704834, |
| 16.156402587890625, |
| 15.156576156616211, |
| 14.803506851196289, |
| 14.501842498779297, |
| 14.308381080627441, |
| 14.216387748718262 |
| ] |
| }, |
| "transformer.h.23.attn.c_proj.weight": { |
| "shape": [ |
| 1024, |
| 1024 |
| ], |
| "effective_rank": 725.8294067382812, |
| "stable_rank": 7.097742080688477, |
| "spectral_norm": 54.51199722290039, |
| "frobenius_norm": 145.22862243652344, |
| "mp_bound": 4.808560848236084, |
| "n_above_mp": 205, |
| "n_total": 1024, |
| "signal_ratio": 0.2001953125, |
| "alpha": 0.5165891829463887, |
| "alpha_r2": 0.7805439618628243, |
| "condition_number": 81928.5625, |
| "top_10_sv": [ |
| 54.51199722290039, |
| 51.59781265258789, |
| 31.993318557739258, |
| 28.781885147094727, |
| 20.638469696044922, |
| 19.69414520263672, |
| 17.257286071777344, |
| 11.122438430786133, |
| 9.82822036743164, |
| 8.54098129272461 |
| ] |
| }, |
| "transformer.h.23.mlp.c_fc.weight": { |
| "shape": [ |
| 1024, |
| 4096 |
| ], |
| "effective_rank": 949.3385009765625, |
| "stable_rank": 32.35031509399414, |
| "spectral_norm": 39.07307052612305, |
| "frobenius_norm": 222.2372283935547, |
| "mp_bound": 20.35976243019104, |
| "n_above_mp": 2, |
| "n_total": 1024, |
| "signal_ratio": 0.001953125, |
| "alpha": 0.2880342497839811, |
| "alpha_r2": 0.9166713464789396, |
| "condition_number": 156.26080322265625, |
| "top_10_sv": [ |
| 39.07307052612305, |
| 21.5715389251709, |
| 18.421354293823242, |
| 18.168561935424805, |
| 16.698549270629883, |
| 16.427534103393555, |
| 15.287932395935059, |
| 14.903624534606934, |
| 14.39078140258789, |
| 14.156229019165039 |
| ] |
| }, |
| "transformer.h.23.mlp.c_proj.weight": { |
| "shape": [ |
| 4096, |
| 1024 |
| ], |
| "effective_rank": 952.0796508789062, |
| "stable_rank": 23.627531051635742, |
| "spectral_norm": 65.28954315185547, |
| "frobenius_norm": 317.3604431152344, |
| "mp_bound": 29.342853784561157, |
| "n_above_mp": 3, |
| "n_total": 1024, |
| "signal_ratio": 0.0029296875, |
| "alpha": 0.2737939337158801, |
| "alpha_r2": 0.9043915028204278, |
| "condition_number": 46.49357223510742, |
| "top_10_sv": [ |
| 65.28954315185547, |
| 45.984188079833984, |
| 36.61665725708008, |
| 27.076080322265625, |
| 23.387649536132812, |
| 21.129486083984375, |
| 20.738719940185547, |
| 20.499759674072266, |
| 20.096046447753906, |
| 19.36311912536621 |
| ] |
| } |
| } |