| layer,module,loss,samples,damp,time | |
| 0,self_attn.o_proj,failsafe(rtn): 0.0018692,0.00000,0.365 | |
| 0,self_attn.v_proj,0.0000002852,0.05000,2.313 | |
| 0,self_attn.k_proj,0.0000001276,0.05000,2.319 | |
| 0,self_attn.q_proj,0.0000026754,0.05000,2.338 | |
| 0,self_attn.o_gate,0.0000056994,0.05000,0.734 | |
| 0,mlp.up_proj,0.0000029969,0.05000,2.312 | |
| 0,mlp.gate_proj,0.0000021178,0.05000,2.376 | |
| 0,mlp.down_proj,0.0000014851,0.05000,4.314 | |
| 1,self_attn.q_proj,0.0000214327,0.05000,3.077 | |
| 1,self_attn.o_proj,0.0000006074,0.05000,3.104 | |
| 1,self_attn.k_proj,0.0000217632,0.05000,3.117 | |
| 1,self_attn.v_proj,0.0000276329,0.05000,3.120 | |
| 1,mlp.gate_proj,0.0000019046,0.05000,2.313 | |
| 1,mlp.up_proj,0.0000020071,0.05000,2.323 | |
| 1,mlp.down_proj,0.0000005985,0.05000,4.283 | |
| 2,self_attn.v_proj,0.0000127656,0.05000,3.089 | |
| 2,self_attn.q_proj,0.0000108437,0.05000,3.128 | |
| 2,self_attn.k_proj,0.0000107873,0.05000,3.132 | |
| 2,self_attn.o_proj,0.0000002624,0.05000,3.137 | |
| 2,mlp.gate_proj,0.0000023777,0.05000,2.511 | |
| 2,mlp.up_proj,0.0000023888,0.05000,2.534 | |
| 2,mlp.down_proj,0.0000010156,0.05000,4.452 | |
| 3,self_attn.q_proj,0.0000134180,0.05000,3.093 | |
| 3,self_attn.k_proj,0.0000137554,0.05000,3.125 | |
| 3,self_attn.v_proj,0.0000144534,0.05000,3.134 | |
| 3,self_attn.o_proj,0.0000003675,0.05000,3.141 | |
| 3,mlp.gate_proj,0.0000048571,0.05000,2.296 | |
| 3,mlp.up_proj,0.0000042930,0.05000,2.320 | |
| 3,mlp.down_proj,0.0000015000,0.05000,4.277 | |
| 4,self_attn.v_proj,0.0000128837,0.05000,3.061 | |
| 4,self_attn.k_proj,0.0000108806,0.05000,3.094 | |
| 4,self_attn.q_proj,0.0000106413,0.05000,3.105 | |
| 4,self_attn.o_proj,0.0000002981,0.05000,3.112 | |
| 4,mlp.up_proj,0.0000057718,0.05000,2.497 | |
| 4,mlp.gate_proj,0.0000063566,0.05000,2.513 | |
| 4,mlp.down_proj,0.0000016298,0.05000,4.424 | |
| 5,self_attn.q_proj,0.0000115468,0.05000,3.022 | |
| 5,self_attn.o_proj,0.0000004831,0.05000,3.064 | |
| 5,self_attn.v_proj,0.0000130083,0.05000,3.067 | |
| 5,self_attn.k_proj,0.0000121797,0.05000,3.070 | |
| 5,mlp.gate_proj,0.0000078714,0.05000,2.288 | |
| 5,mlp.up_proj,0.0000071630,0.05000,2.318 | |
| 5,mlp.down_proj,0.0000017172,0.05000,4.243 | |
| 6,self_attn.v_proj,0.0000116934,0.05000,3.043 | |
| 6,self_attn.k_proj,0.0000133436,0.05000,3.057 | |
| 6,self_attn.q_proj,0.0000111725,0.05000,3.073 | |
| 6,self_attn.o_proj,0.0000004840,0.05000,3.078 | |
| 6,mlp.gate_proj,0.0000095918,0.05000,2.280 | |
| 6,mlp.up_proj,0.0000088430,0.05000,2.283 | |
| 6,mlp.down_proj,0.0000019229,0.05000,4.203 | |
| 7,self_attn.q_proj,0.0000099772,0.05000,3.036 | |
| 7,self_attn.o_proj,0.0000005164,0.05000,3.066 | |
| 7,self_attn.v_proj,0.0000109389,0.05000,3.073 | |
| 7,self_attn.k_proj,0.0000121498,0.05000,3.076 | |
| 7,mlp.gate_proj,0.0000098175,0.05000,2.284 | |
| 7,mlp.up_proj,0.0000092494,0.05000,2.305 | |
| 7,mlp.down_proj,0.0000022285,0.05000,4.234 | |
| 8,self_attn.v_proj,0.0000075188,0.05000,3.149 | |
| 8,self_attn.o_proj,0.0000008020,0.05000,3.157 | |
| 8,self_attn.q_proj,0.0000069240,0.05000,3.163 | |
| 8,self_attn.k_proj,0.0000085086,0.05000,3.165 | |
| 8,mlp.up_proj,0.0000095218,0.05000,2.345 | |
| 8,mlp.gate_proj,0.0000104520,0.05000,2.375 | |
| 8,mlp.down_proj,0.0000021824,0.05000,4.301 | |
| 9,self_attn.o_proj,failsafe(rtn): 0.0020905,0.00000,0.097 | |
| 9,self_attn.v_proj,0.0000005230,0.05000,1.976 | |
| 9,self_attn.k_proj,0.0000008173,0.05000,1.999 | |
| 9,self_attn.q_proj,0.0000084612,0.05000,2.002 | |
| 9,self_attn.o_gate,0.0000059209,0.05000,0.657 | |
| 9,mlp.up_proj,0.0000109480,0.05000,2.284 | |
| 9,mlp.gate_proj,0.0000114849,0.05000,2.299 | |
| 9,mlp.down_proj,0.0000028129,0.05000,4.222 | |
| 10,self_attn.q_proj,0.0000101656,0.05000,3.081 | |
| 10,self_attn.k_proj,0.0000127152,0.05000,3.111 | |
| 10,self_attn.v_proj,0.0000108378,0.05000,3.122 | |
| 10,self_attn.o_proj,0.0000007537,0.05000,3.125 | |
| 10,mlp.up_proj,0.0000110120,0.05000,2.622 | |
| 10,mlp.gate_proj,0.0000113623,0.05000,2.642 | |
| 10,mlp.down_proj,0.0000026269,0.05000,4.580 | |
| 11,self_attn.v_proj,0.0000078631,0.05000,3.096 | |
| 11,self_attn.o_proj,0.0000008522,0.05000,3.131 | |
| 11,self_attn.q_proj,0.0000073996,0.05000,3.135 | |
| 11,self_attn.k_proj,0.0000090127,0.05000,3.138 | |
| 11,mlp.up_proj,0.0000111150,0.05000,2.283 | |
| 11,mlp.gate_proj,0.0000110942,0.05000,2.297 | |
| 11,mlp.down_proj,0.0000028067,0.05000,4.212 | |
| 12,self_attn.q_proj,0.0000093563,0.05000,2.901 | |
| 12,self_attn.o_proj,0.0000010849,0.05000,2.929 | |
| 12,self_attn.v_proj,0.0000108863,0.05000,2.937 | |
| 12,self_attn.k_proj,0.0000103044,0.05000,2.945 | |
| 12,mlp.gate_proj,0.0000107282,0.05000,2.498 | |
| 12,mlp.up_proj,0.0000109626,0.05000,2.522 | |
| 12,mlp.down_proj,0.0000028672,0.05000,4.469 | |
| 13,self_attn.v_proj,0.0000072914,0.05000,2.986 | |
| 13,self_attn.k_proj,0.0000091078,0.05000,3.025 | |
| 13,self_attn.o_proj,0.0000011265,0.05000,3.029 | |
| 13,self_attn.q_proj,0.0000071241,0.05000,3.033 | |
| 13,mlp.up_proj,0.0000113235,0.05000,2.353 | |
| 13,mlp.gate_proj,0.0000108775,0.05000,2.361 | |
| 13,mlp.down_proj,0.0000030396,0.05000,4.278 | |
| 14,self_attn.q_proj,0.0000077357,0.05000,2.863 | |
| 14,self_attn.o_proj,0.0000014332,0.05000,2.888 | |
| 14,self_attn.k_proj,0.0000088931,0.05000,2.890 | |
| 14,self_attn.v_proj,0.0000076432,0.05000,2.892 | |
| 14,mlp.gate_proj,0.0000103080,0.05000,2.610 | |
| 14,mlp.up_proj,0.0000113153,0.05000,2.619 | |
| 14,mlp.down_proj,0.0000034036,0.05000,4.571 | |
| 15,self_attn.v_proj,0.0000053299,0.05000,2.866 | |
| 15,self_attn.o_proj,0.0000019097,0.05000,2.893 | |
| 15,self_attn.k_proj,0.0000064892,0.05000,2.896 | |
| 15,self_attn.q_proj,0.0000050327,0.05000,2.903 | |
| 15,mlp.up_proj,0.0000109781,0.05000,2.230 | |
| 15,mlp.gate_proj,0.0000106926,0.05000,2.242 | |
| 15,mlp.down_proj,0.0000029907,0.05000,4.144 | |
| 16,self_attn.o_proj,failsafe(rtn): 0.0021057,0.00000,0.097 | |
| 16,self_attn.k_proj,0.0000007730,0.05000,1.877 | |
| 16,self_attn.v_proj,0.0000004408,0.05000,1.895 | |
| 16,self_attn.q_proj,0.0000079157,0.05000,1.906 | |
| 16,self_attn.o_gate,0.0000046221,0.05000,0.658 | |
| 16,mlp.up_proj,0.0000137838,0.05000,2.611 | |
| 16,mlp.gate_proj,0.0000140992,0.05000,2.626 | |
| 16,mlp.down_proj,0.0000044268,0.05000,4.544 | |
| 17,self_attn.o_proj,failsafe(rtn): 0.0021667,0.00000,0.101 | |
| 17,self_attn.v_proj,0.0000006478,0.05000,1.965 | |
| 17,self_attn.k_proj,0.0000007183,0.05000,1.973 | |
| 17,self_attn.q_proj,0.0000081989,0.05000,1.978 | |
| 17,self_attn.o_gate,0.0000054954,0.05000,0.656 | |
| 17,mlp.up_proj,0.0000150867,0.05000,2.610 | |
| 17,mlp.gate_proj,0.0000165420,0.05000,2.632 | |
| 17,mlp.down_proj,0.0000043397,0.05000,4.550 | |
| 18,self_attn.q_proj,0.0000070497,0.05000,3.084 | |
| 18,self_attn.o_proj,0.0000013994,0.05000,3.107 | |
| 18,self_attn.v_proj,0.0000071203,0.05000,3.117 | |
| 18,self_attn.k_proj,0.0000099659,0.05000,3.124 | |
| 18,mlp.gate_proj,0.0000142639,0.05000,2.629 | |
| 18,mlp.up_proj,0.0000137292,0.05000,2.666 | |
| 18,mlp.down_proj,0.0000037223,0.05000,4.590 | |
| 19,self_attn.v_proj,0.0000092909,0.05000,2.909 | |
| 19,self_attn.k_proj,0.0000115645,0.05000,2.937 | |
| 19,self_attn.q_proj,0.0000088469,0.05000,2.947 | |
| 19,self_attn.o_proj,0.0000013371,0.05000,2.955 | |
| 19,mlp.up_proj,0.0000140925,0.05000,2.559 | |
| 19,mlp.gate_proj,0.0000137925,0.05000,2.579 | |
| 19,mlp.down_proj,0.0000041741,0.05000,4.491 | |
| 20,self_attn.q_proj,0.0000069671,0.05000,2.973 | |
| 20,self_attn.o_proj,0.0000021632,0.05000,3.001 | |
| 20,self_attn.k_proj,0.0000094110,0.05000,3.024 | |
| 20,self_attn.v_proj,0.0000069630,0.05000,3.029 | |
| 20,mlp.gate_proj,0.0000137304,0.05000,2.482 | |
| 20,mlp.up_proj,0.0000145074,0.05000,2.509 | |
| 20,mlp.down_proj,0.0000054941,0.05000,4.432 | |
| 21,self_attn.v_proj,0.0000077745,0.05000,2.941 | |
| 21,self_attn.k_proj,0.0000082770,0.05000,2.965 | |
| 21,self_attn.q_proj,0.0000072783,0.05000,2.979 | |
| 21,self_attn.o_proj,0.0000032530,0.05000,2.983 | |
| 21,mlp.up_proj,0.0000138135,0.05000,2.564 | |
| 21,mlp.gate_proj,0.0000128529,0.05000,2.599 | |
| 21,mlp.down_proj,0.0000065183,0.05000,4.596 | |
| 22,self_attn.o_proj,failsafe(rtn): 0.0022430,0.00000,0.095 | |
| 22,self_attn.q_proj,0.0000099461,0.05000,1.938 | |
| 22,self_attn.k_proj,0.0000006408,0.05000,1.975 | |
| 22,self_attn.v_proj,0.0000011084,0.05000,1.992 | |
| 22,self_attn.o_gate,0.0000052852,0.05000,0.681 | |
| 22,mlp.gate_proj,0.0000165691,0.05000,2.439 | |
| 22,mlp.up_proj,0.0000177798,0.05000,2.445 | |
| 22,mlp.down_proj,0.0000097134,0.05000,4.381 | |
| 23,self_attn.o_proj,0.0000029966,0.05000,3.032 | |
| 23,self_attn.q_proj,0.0000097131,0.05000,3.045 | |
| 23,self_attn.k_proj,0.0000124430,0.05000,3.058 | |
| 23,self_attn.v_proj,0.0000093706,0.05000,3.072 | |
| 23,mlp.gate_proj,0.0000193721,0.05000,2.553 | |
| 23,mlp.up_proj,0.0000208169,0.05000,2.603 | |
| 23,mlp.down_proj,0.0000116066,0.05000,4.526 | |
| 24,self_attn.v_proj,0.0000083977,0.05000,3.006 | |
| 24,self_attn.q_proj,0.0000082685,0.05000,3.034 | |
| 24,self_attn.k_proj,0.0000102885,0.05000,3.048 | |
| 24,self_attn.o_proj,0.0000050330,0.05000,3.051 | |
| 24,mlp.up_proj,0.0000245762,0.05000,2.473 | |
| 24,mlp.gate_proj,0.0000227001,0.05000,2.480 | |
| 24,mlp.down_proj,0.0000188121,0.05000,4.402 | |
| 25,self_attn.q_proj,0.0000086529,0.05000,2.938 | |
| 25,self_attn.v_proj,0.0000085304,0.05000,2.963 | |
| 25,self_attn.o_proj,0.0000065483,0.05000,2.971 | |
| 25,self_attn.k_proj,0.0000110142,0.05000,2.979 | |
| 25,mlp.gate_proj,0.0000263007,0.05000,2.514 | |
| 25,mlp.up_proj,0.0000289325,0.05000,2.557 | |
| 25,mlp.down_proj,0.0000330309,0.05000,4.460 | |
| 26,self_attn.v_proj,0.0000135194,0.05000,2.966 | |
| 26,self_attn.k_proj,0.0000197288,0.05000,2.985 | |
| 26,self_attn.q_proj,0.0000138416,0.05000,2.996 | |
| 26,self_attn.o_proj,0.0000122633,0.05000,3.003 | |
| 26,mlp.gate_proj,0.0000293281,0.05000,2.578 | |
| 26,mlp.up_proj,0.0000331938,0.05000,2.618 | |
| 26,mlp.down_proj,0.0000286714,0.05000,4.516 | |
| 27,self_attn.q_proj,0.0000129731,0.05000,3.048 | |
| 27,self_attn.o_proj,0.0000077469,0.05000,3.072 | |
| 27,self_attn.v_proj,0.0000121207,0.05000,3.081 | |
| 27,self_attn.k_proj,0.0000142424,0.05000,3.085 | |
| 27,mlp.up_proj,0.0000383397,0.05000,2.579 | |
| 27,mlp.gate_proj,0.0000332575,0.05000,2.599 | |
| 27,mlp.down_proj,0.0000373985,0.05000,4.558 | |
| 28,self_attn.k_proj,0.0000229849,0.05000,2.961 | |
| 28,self_attn.o_proj,0.0000238381,0.05000,2.987 | |
| 28,self_attn.v_proj,0.0000170875,0.05000,2.990 | |
| 28,self_attn.q_proj,0.0000166143,0.05000,2.999 | |
| 28,mlp.up_proj,0.0000445356,0.05000,2.436 | |
| 28,mlp.gate_proj,0.0000377255,0.05000,2.483 | |
| 28,mlp.down_proj,0.0000521327,0.05000,4.401 | |
| 29,self_attn.o_proj,failsafe(rtn): 0.0025024,0.00000,0.103 | |
| 29,self_attn.q_proj,0.0000212811,0.05000,1.898 | |
| 29,self_attn.v_proj,0.0000108000,0.05000,1.928 | |
| 29,self_attn.k_proj,0.0000011077,0.05000,1.941 | |
| 29,self_attn.o_gate,0.0000235504,0.05000,0.674 | |
| 29,mlp.up_proj,0.0000567625,0.05000,2.402 | |
| 29,mlp.gate_proj,0.0000467000,0.05000,2.418 | |
| 29,mlp.down_proj,0.0000849603,0.05000,4.344 | |
| 30,self_attn.o_proj,failsafe(rtn): 0.0025940,0.00000,0.097 | |
| 30,self_attn.q_proj,0.0000326366,0.05000,1.932 | |
| 30,self_attn.v_proj,0.0000335974,0.05000,1.933 | |
| 30,self_attn.k_proj,0.0000012899,0.05000,1.966 | |
| 30,self_attn.o_gate,0.0000374967,0.05000,0.668 | |
| 30,mlp.up_proj,0.0000729923,0.05000,3.208 | |
| 30,mlp.gate_proj,0.0000604775,0.05000,3.370 | |
| 30,mlp.down_proj,0.0001841053,0.05000,5.231 | |
| 31,self_attn.o_proj,failsafe(rtn): 0.0024261,0.00000,0.112 | |
| 31,self_attn.q_proj,0.0000223886,0.05000,2.671 | |
| 31,self_attn.v_proj,0.0000052386,0.05000,2.788 | |
| 31,self_attn.k_proj,0.0000008344,0.05000,2.925 | |
| 31,self_attn.o_gate,0.0000234286,0.05000,0.822 | |
| 31,mlp.up_proj,0.0001053831,0.05000,3.133 | |
| 31,mlp.gate_proj,0.0000944571,0.05000,3.336 | |
| 31,mlp.down_proj,0.0007573602,0.05000,5.207 | |