| layer,module,loss,samples,damp,time | |
| 0,self_attn.o_proj,failsafe(rtn): 0.0018692,0.00000,0.191 | |
| 0,self_attn.q_proj,0.0000026754,0.05000,2.073 | |
| 0,self_attn.v_proj,0.0000002852,0.05000,2.125 | |
| 0,self_attn.k_proj,0.0000001276,0.05000,2.127 | |
| 0,self_attn.o_gate,0.0000056994,0.05000,0.670 | |
| 0,mlp.up_proj,0.0000029969,0.05000,2.432 | |
| 0,mlp.gate_proj,0.0000021178,0.05000,2.590 | |
| 0,mlp.down_proj,0.0000014851,0.05000,4.498 | |
| 1,self_attn.o_proj,0.0000006074,0.05000,3.033 | |
| 1,self_attn.k_proj,0.0000217632,0.05000,3.057 | |
| 1,self_attn.q_proj,0.0000214327,0.05000,3.062 | |
| 1,self_attn.v_proj,0.0000276329,0.05000,3.066 | |
| 1,mlp.up_proj,0.0000020071,0.05000,2.278 | |
| 1,mlp.gate_proj,0.0000019046,0.05000,2.293 | |
| 1,mlp.down_proj,0.0000005985,0.05000,4.240 | |
| 2,self_attn.v_proj,0.0000127656,0.05000,3.075 | |
| 2,self_attn.k_proj,0.0000107873,0.05000,3.085 | |
| 2,self_attn.o_proj,0.0000002624,0.05000,3.097 | |
| 2,self_attn.q_proj,0.0000108437,0.05000,3.099 | |
| 2,mlp.up_proj,0.0000023888,0.05000,2.368 | |
| 2,mlp.gate_proj,0.0000023777,0.05000,2.405 | |
| 2,mlp.down_proj,0.0000010156,0.05000,4.324 | |
| 3,self_attn.o_proj,0.0000003675,0.05000,2.948 | |
| 3,self_attn.q_proj,0.0000134180,0.05000,2.948 | |
| 3,self_attn.v_proj,0.0000144534,0.05000,2.965 | |
| 3,self_attn.k_proj,0.0000137554,0.05000,2.968 | |
| 3,mlp.gate_proj,0.0000048571,0.05000,2.246 | |
| 3,mlp.up_proj,0.0000042930,0.05000,2.267 | |
| 3,mlp.down_proj,0.0000015000,0.05000,4.228 | |
| 4,self_attn.k_proj,0.0000108806,0.05000,2.927 | |
| 4,self_attn.v_proj,0.0000128837,0.05000,2.952 | |
| 4,self_attn.q_proj,0.0000106413,0.05000,2.965 | |
| 4,self_attn.o_proj,0.0000002981,0.05000,2.969 | |
| 4,mlp.up_proj,0.0000057718,0.05000,2.376 | |
| 4,mlp.gate_proj,0.0000063566,0.05000,2.425 | |
| 4,mlp.down_proj,0.0000016298,0.05000,4.328 | |
| 5,self_attn.q_proj,0.0000115468,0.05000,2.960 | |
| 5,self_attn.o_proj,0.0000004831,0.05000,2.972 | |
| 5,self_attn.k_proj,0.0000121797,0.05000,2.985 | |
| 5,self_attn.v_proj,0.0000130083,0.05000,2.986 | |
| 5,mlp.gate_proj,0.0000078714,0.05000,2.229 | |
| 5,mlp.up_proj,0.0000071630,0.05000,2.238 | |
| 5,mlp.down_proj,0.0000017172,0.05000,4.199 | |
| 6,self_attn.v_proj,0.0000116934,0.05000,2.973 | |
| 6,self_attn.k_proj,0.0000133436,0.05000,2.995 | |
| 6,self_attn.o_proj,0.0000004840,0.05000,3.018 | |
| 6,self_attn.q_proj,0.0000111725,0.05000,3.020 | |
| 6,mlp.up_proj,0.0000088430,0.05000,2.270 | |
| 6,mlp.gate_proj,0.0000095918,0.05000,2.285 | |
| 6,mlp.down_proj,0.0000019229,0.05000,4.204 | |
| 7,self_attn.o_proj,0.0000005164,0.05000,2.996 | |
| 7,self_attn.v_proj,0.0000109389,0.05000,3.003 | |
| 7,self_attn.k_proj,0.0000121498,0.05000,3.006 | |
| 7,self_attn.q_proj,0.0000099772,0.05000,3.012 | |
| 7,mlp.up_proj,0.0000092494,0.05000,2.391 | |
| 7,mlp.gate_proj,0.0000098175,0.05000,2.396 | |
| 7,mlp.down_proj,0.0000022285,0.05000,4.357 | |
| 8,self_attn.v_proj,0.0000075184,0.05000,2.976 | |
| 8,self_attn.k_proj,0.0000085118,0.05000,2.983 | |
| 8,self_attn.o_proj,0.0000008018,0.05000,2.994 | |
| 8,self_attn.q_proj,0.0000069240,0.05000,2.997 | |
| 8,mlp.up_proj,0.0000095222,0.05000,2.494 | |
| 8,mlp.gate_proj,0.0000104520,0.05000,2.512 | |
| 8,mlp.down_proj,0.0000021819,0.05000,4.426 | |
| 9,self_attn.o_proj,failsafe(rtn): 0.0020905,0.00000,0.100 | |
| 9,self_attn.q_proj,0.0000084631,0.05000,2.006 | |
| 9,self_attn.v_proj,0.0000005231,0.05000,2.039 | |
| 9,self_attn.k_proj,0.0000008172,0.05000,2.044 | |
| 9,self_attn.o_gate,0.0000059200,0.05000,0.671 | |
| 9,mlp.up_proj,0.0000109114,0.05000,2.436 | |
| 9,mlp.gate_proj,0.0000114498,0.05000,2.474 | |
| 9,mlp.down_proj,0.0000027955,0.05000,4.400 | |
| 10,self_attn.o_proj,0.0000007525,0.05000,3.000 | |
| 10,self_attn.q_proj,0.0000101399,0.05000,3.006 | |
| 10,self_attn.k_proj,0.0000126859,0.05000,3.008 | |
| 10,self_attn.v_proj,0.0000108107,0.05000,3.016 | |
| 10,mlp.up_proj,0.0000109930,0.05000,2.540 | |
| 10,mlp.gate_proj,0.0000113431,0.05000,2.546 | |
| 10,mlp.down_proj,0.0000026212,0.05000,4.586 | |
| 11,self_attn.v_proj,0.0000078508,0.05000,3.009 | |
| 11,self_attn.o_proj,0.0000008535,0.05000,3.016 | |
| 11,self_attn.k_proj,0.0000090067,0.05000,3.021 | |
| 11,self_attn.q_proj,0.0000073924,0.05000,3.026 | |
| 11,mlp.up_proj,0.0000111078,0.05000,2.454 | |
| 11,mlp.gate_proj,0.0000110846,0.05000,2.462 | |
| 11,mlp.down_proj,0.0000028039,0.05000,4.392 | |
| 12,self_attn.o_proj,0.0000010843,0.05000,2.991 | |
| 12,self_attn.q_proj,0.0000093472,0.05000,2.999 | |
| 12,self_attn.k_proj,0.0000102964,0.05000,3.016 | |
| 12,self_attn.v_proj,0.0000108772,0.05000,3.027 | |
| 12,mlp.gate_proj,0.0000107207,0.05000,3.187 | |
| 12,mlp.up_proj,0.0000109570,0.05000,3.195 | |
| 12,mlp.down_proj,0.0000028664,0.05000,5.429 | |
| 13,self_attn.o_proj,0.0000011266,0.05000,3.005 | |
| 13,self_attn.k_proj,0.0000091001,0.05000,3.011 | |
| 13,self_attn.v_proj,0.0000072897,0.05000,3.022 | |
| 13,self_attn.q_proj,0.0000071197,0.05000,3.026 | |
| 13,mlp.up_proj,0.0000113227,0.05000,2.730 | |
| 13,mlp.gate_proj,0.0000108762,0.05000,2.742 | |
| 13,mlp.down_proj,0.0000030420,0.05000,4.675 | |
| 14,self_attn.q_proj,0.0000077435,0.05000,2.935 | |
| 14,self_attn.o_proj,0.0000014335,0.05000,2.956 | |
| 14,self_attn.v_proj,0.0000076484,0.05000,2.956 | |
| 14,self_attn.k_proj,0.0000088960,0.05000,2.965 | |
| 14,mlp.gate_proj,0.0000103122,0.05000,3.145 | |
| 14,mlp.up_proj,0.0000113208,0.05000,3.171 | |
| 14,mlp.down_proj,0.0000034068,0.05000,5.263 | |
| 15,self_attn.v_proj,0.0000053345,0.05000,3.992 | |
| 15,self_attn.k_proj,0.0000064964,0.05000,4.008 | |
| 15,self_attn.o_proj,0.0000019109,0.05000,4.021 | |
| 15,self_attn.q_proj,0.0000050335,0.05000,4.064 | |
| 15,mlp.gate_proj,0.0000107005,0.05000,2.665 | |
| 15,mlp.up_proj,0.0000109866,0.05000,2.683 | |
| 15,mlp.down_proj,0.0000029978,0.05000,4.616 | |
| 16,self_attn.o_proj,failsafe(rtn): 0.0021057,0.00000,0.098 | |
| 16,self_attn.k_proj,0.0000007737,0.05000,1.938 | |
| 16,self_attn.q_proj,0.0000079238,0.05000,1.948 | |
| 16,self_attn.v_proj,0.0000004417,0.05000,1.950 | |
| 16,self_attn.o_gate,0.0000046259,0.05000,0.657 | |
| 16,mlp.gate_proj,0.0000140706,0.05000,2.433 | |
| 16,mlp.up_proj,0.0000137589,0.05000,2.452 | |
| 16,mlp.down_proj,0.0000043833,0.05000,4.380 | |
| 17,self_attn.o_proj,failsafe(rtn): 0.0021667,0.00000,0.099 | |
| 17,self_attn.q_proj,0.0000081710,0.05000,1.895 | |
| 17,self_attn.k_proj,0.0000007150,0.05000,1.922 | |
| 17,self_attn.v_proj,0.0000006462,0.05000,1.936 | |
| 17,self_attn.o_gate,0.0000054756,0.05000,0.764 | |
| 17,mlp.up_proj,0.0000150262,0.05000,2.489 | |
| 17,mlp.gate_proj,0.0000164731,0.05000,2.571 | |
| 17,mlp.down_proj,0.0000042990,0.05000,4.477 | |
| 18,self_attn.q_proj,0.0000070301,0.05000,2.990 | |
| 18,self_attn.o_proj,0.0000013950,0.05000,3.027 | |
| 18,self_attn.k_proj,0.0000099344,0.05000,3.036 | |
| 18,self_attn.v_proj,0.0000070985,0.05000,3.040 | |
| 18,mlp.up_proj,0.0000136936,0.05000,2.253 | |
| 18,mlp.gate_proj,0.0000142274,0.05000,2.267 | |
| 18,mlp.down_proj,0.0000037070,0.05000,4.206 | |
| 19,self_attn.v_proj,0.0000092693,0.05000,2.947 | |
| 19,self_attn.k_proj,0.0000115510,0.05000,2.960 | |
| 19,self_attn.q_proj,0.0000088237,0.05000,2.965 | |
| 19,self_attn.o_proj,0.0000013337,0.05000,2.967 | |
| 19,mlp.gate_proj,0.0000137574,0.05000,2.278 | |
| 19,mlp.up_proj,0.0000140591,0.05000,2.286 | |
| 19,mlp.down_proj,0.0000041625,0.05000,4.212 | |
| 20,self_attn.o_proj,0.0000021536,0.05000,2.909 | |
| 20,self_attn.q_proj,0.0000069513,0.05000,2.916 | |
| 20,self_attn.v_proj,0.0000069475,0.05000,2.927 | |
| 20,self_attn.k_proj,0.0000093887,0.05000,2.932 | |
| 20,mlp.gate_proj,0.0000136980,0.05000,2.209 | |
| 20,mlp.up_proj,0.0000144703,0.05000,2.222 | |
| 20,mlp.down_proj,0.0000054780,0.05000,4.168 | |
| 21,self_attn.k_proj,0.0000082680,0.05000,2.952 | |
| 21,self_attn.v_proj,0.0000077593,0.05000,2.964 | |
| 21,self_attn.q_proj,0.0000072644,0.05000,2.969 | |
| 21,self_attn.o_proj,0.0000032405,0.05000,2.975 | |
| 21,mlp.up_proj,0.0000137799,0.05000,2.308 | |
| 21,mlp.gate_proj,0.0000128238,0.05000,2.328 | |
| 21,mlp.down_proj,0.0000064990,0.05000,4.246 | |
| 22,self_attn.o_proj,failsafe(rtn): 0.0022430,0.00000,0.101 | |
| 22,self_attn.q_proj,0.0000099220,0.05000,1.950 | |
| 22,self_attn.k_proj,0.0000006394,0.05000,1.984 | |
| 22,self_attn.v_proj,0.0000011052,0.05000,2.031 | |
| 22,self_attn.o_gate,0.0000052709,0.05000,0.694 | |
| 22,mlp.gate_proj,0.0000165171,0.05000,2.286 | |
| 22,mlp.up_proj,0.0000177268,0.05000,2.291 | |
| 22,mlp.down_proj,0.0000096671,0.05000,4.204 | |
| 23,self_attn.o_proj,0.0000029835,0.05000,2.911 | |
| 23,self_attn.q_proj,0.0000096832,0.05000,2.918 | |
| 23,self_attn.v_proj,0.0000093455,0.05000,2.929 | |
| 23,self_attn.k_proj,0.0000124099,0.05000,2.932 | |
| 23,mlp.gate_proj,0.0000193158,0.05000,2.231 | |
| 23,mlp.up_proj,0.0000207531,0.05000,2.240 | |
| 23,mlp.down_proj,0.0000115674,0.05000,4.174 | |
| 24,self_attn.k_proj,0.0000102661,0.05000,2.938 | |
| 24,self_attn.v_proj,0.0000083745,0.05000,2.960 | |
| 24,self_attn.o_proj,0.0000050032,0.05000,2.966 | |
| 24,self_attn.q_proj,0.0000082454,0.05000,2.969 | |
| 24,mlp.up_proj,0.0000245099,0.05000,2.249 | |
| 24,mlp.gate_proj,0.0000226359,0.05000,2.264 | |
| 24,mlp.down_proj,0.0000187627,0.05000,4.206 | |
| 25,self_attn.q_proj,0.0000086351,0.05000,2.928 | |
| 25,self_attn.k_proj,0.0000109922,0.05000,2.955 | |
| 25,self_attn.o_proj,0.0000065444,0.05000,2.956 | |
| 25,self_attn.v_proj,0.0000085155,0.05000,2.961 | |
| 25,mlp.gate_proj,0.0000262606,0.05000,2.240 | |
| 25,mlp.up_proj,0.0000288957,0.05000,2.263 | |
| 25,mlp.down_proj,0.0000330173,0.05000,4.186 | |
| 26,self_attn.q_proj,0.0000138319,0.05000,2.959 | |
| 26,self_attn.o_proj,0.0000122393,0.05000,2.967 | |
| 26,self_attn.v_proj,0.0000135245,0.05000,2.970 | |
| 26,self_attn.k_proj,0.0000197168,0.05000,2.987 | |
| 26,mlp.gate_proj,0.0000293236,0.05000,2.263 | |
| 26,mlp.up_proj,0.0000331990,0.05000,2.278 | |
| 26,mlp.down_proj,0.0000287111,0.05000,4.210 | |
| 27,self_attn.v_proj,0.0000121277,0.05000,2.904 | |
| 27,self_attn.o_proj,0.0000077357,0.05000,2.916 | |
| 27,self_attn.q_proj,0.0000129707,0.05000,2.921 | |
| 27,self_attn.k_proj,0.0000142470,0.05000,2.921 | |
| 27,mlp.gate_proj,0.0000332846,0.05000,2.250 | |
| 27,mlp.up_proj,0.0000383662,0.05000,2.295 | |
| 27,mlp.down_proj,0.0000375026,0.05000,4.208 | |
| 28,self_attn.k_proj,0.0000229948,0.05000,2.890 | |
| 28,self_attn.q_proj,0.0000166297,0.05000,2.907 | |
| 28,self_attn.v_proj,0.0000171070,0.05000,2.913 | |
| 28,self_attn.o_proj,0.0000238302,0.05000,2.915 | |
| 28,mlp.up_proj,0.0000445883,0.05000,2.208 | |
| 28,mlp.gate_proj,0.0000377646,0.05000,2.231 | |
| 28,mlp.down_proj,0.0000522973,0.05000,4.164 | |
| 29,self_attn.o_proj,failsafe(rtn): 0.0025024,0.00000,0.095 | |
| 29,self_attn.v_proj,0.0000108235,0.05000,1.851 | |
| 29,self_attn.k_proj,0.0000011112,0.05000,1.863 | |
| 29,self_attn.q_proj,0.0000212847,0.05000,1.870 | |
| 29,self_attn.o_gate,0.0000235498,0.05000,0.656 | |
| 29,mlp.up_proj,0.0000567799,0.05000,2.281 | |
| 29,mlp.gate_proj,0.0000467146,0.05000,2.297 | |
| 29,mlp.down_proj,0.0000851708,0.05000,4.246 | |
| 30,self_attn.o_proj,failsafe(rtn): 0.0025940,0.00000,0.099 | |
| 30,self_attn.v_proj,0.0000335646,0.05000,1.833 | |
| 30,self_attn.k_proj,0.0000012887,0.05000,1.861 | |
| 30,self_attn.q_proj,0.0000326057,0.05000,1.869 | |
| 30,self_attn.o_gate,0.0000374721,0.05000,0.815 | |
| 30,mlp.gate_proj,0.0000602557,0.05000,2.257 | |
| 30,mlp.up_proj,0.0000727343,0.05000,2.264 | |
| 30,mlp.down_proj,0.0001834925,0.05000,4.187 | |
| 31,self_attn.o_proj,failsafe(rtn): 0.0024261,0.00000,0.099 | |
| 31,self_attn.v_proj,0.0000052294,0.05000,1.898 | |
| 31,self_attn.k_proj,0.0000008337,0.05000,1.914 | |
| 31,self_attn.q_proj,0.0000223652,0.05000,1.931 | |
| 31,self_attn.o_gate,0.0000234013,0.05000,0.657 | |
| 31,mlp.up_proj,0.0001052787,0.05000,2.221 | |
| 31,mlp.gate_proj,0.0000943520,0.05000,2.242 | |
| 31,mlp.down_proj,0.0007571873,0.05000,4.177 | |