| layer,module,loss,samples,damp,time | |
| 0,linear_attn.in_proj_qkv,0.0000002075,0.05000,1.370 | |
| 0,linear_attn.in_proj_z,0.0000001358,0.05000,1.004 | |
| 0,linear_attn.out_proj,0.0000000001,0.05000,1.244 | |
| 0,mlp.up_proj,0.0000000012,0.05000,1.379 | |
| 0,mlp.gate_proj,0.0000000013,0.05000,1.396 | |
| 0,mlp.down_proj,0.0000000000,0.05000,3.793 | |
| 1,linear_attn.in_proj_qkv,0.0000000111,0.05000,1.035 | |
| 1,linear_attn.in_proj_z,0.0000000068,0.05000,1.032 | |
| 1,linear_attn.out_proj,0.0000000000,0.05000,1.236 | |
| 1,mlp.up_proj,0.0000000026,0.05000,1.413 | |
| 1,mlp.gate_proj,0.0000000028,0.05000,1.457 | |
| 1,mlp.down_proj,0.0000000000,0.05000,3.695 | |
| 2,linear_attn.in_proj_qkv,0.0000000156,0.05000,1.028 | |
| 2,linear_attn.in_proj_z,0.0000000094,0.05000,1.029 | |
| 2,linear_attn.out_proj,0.0000000000,0.05000,1.230 | |
| 2,mlp.up_proj,0.0000000042,0.05000,1.426 | |
| 2,mlp.gate_proj,0.0000000045,0.05000,1.470 | |
| 2,mlp.down_proj,0.0000000001,0.05000,3.677 | |
| 3,self_attn.q_proj,0.0000001636,0.05000,1.746 | |
| 3,self_attn.v_proj,0.0000000129,0.05000,1.758 | |
| 3,self_attn.k_proj,0.0000000128,0.05000,1.780 | |
| 3,self_attn.o_proj,0.0000000000,0.05000,1.231 | |
| 3,mlp.gate_proj,0.0000000052,0.05000,1.425 | |
| 3,mlp.up_proj,0.0000000050,0.05000,1.430 | |
| 3,mlp.down_proj,0.0000000000,0.05000,3.709 | |
| 4,linear_attn.in_proj_qkv,0.0000000191,0.05000,1.034 | |
| 4,linear_attn.in_proj_z,0.0000000122,0.05000,1.032 | |
| 4,linear_attn.out_proj,0.0000000001,0.05000,1.229 | |
| 4,mlp.up_proj,0.0000000066,0.05000,1.453 | |
| 4,mlp.gate_proj,0.0000000069,0.05000,1.485 | |
| 4,mlp.down_proj,0.0000000000,0.05000,3.699 | |
| 5,linear_attn.in_proj_qkv,0.0000000245,0.05000,1.044 | |
| 5,linear_attn.in_proj_z,0.0000000158,0.05000,1.019 | |
| 5,linear_attn.out_proj,0.0000000001,0.05000,1.222 | |
| 5,mlp.gate_proj,0.0000000087,0.05000,1.436 | |
| 5,mlp.up_proj,0.0000000082,0.05000,1.456 | |
| 5,mlp.down_proj,0.0000000001,0.05000,3.728 | |
| 6,linear_attn.in_proj_qkv,0.0000000396,0.05000,1.041 | |
| 6,linear_attn.in_proj_z,0.0000000235,0.05000,1.026 | |
| 6,linear_attn.out_proj,0.0000000001,0.05000,1.227 | |
| 6,mlp.gate_proj,0.0000000126,0.05000,1.474 | |
| 6,mlp.up_proj,0.0000000116,0.05000,1.478 | |
| 6,mlp.down_proj,0.0000000001,0.05000,3.676 | |
| 7,self_attn.q_proj,0.0000001413,0.05000,1.711 | |
| 7,self_attn.v_proj,0.0000000113,0.05000,1.726 | |
| 7,self_attn.k_proj,0.0000000121,0.05000,1.752 | |
| 7,self_attn.o_proj,0.0000000001,0.05000,1.225 | |
| 7,mlp.up_proj,0.0000000127,0.05000,1.418 | |
| 7,mlp.gate_proj,0.0000000137,0.05000,1.421 | |
| 7,mlp.down_proj,0.0000000002,0.05000,3.712 | |
| 8,linear_attn.in_proj_qkv,0.0000000387,0.05000,1.035 | |
| 8,linear_attn.in_proj_z,0.0000000243,0.05000,1.015 | |
| 8,linear_attn.out_proj,0.0000000002,0.05000,1.225 | |
| 8,mlp.up_proj,0.0000000147,0.05000,1.425 | |
| 8,mlp.gate_proj,0.0000000157,0.05000,1.457 | |
| 8,mlp.down_proj,0.0000000002,0.05000,3.715 | |
| 9,linear_attn.in_proj_qkv,0.0000000406,0.05000,1.034 | |
| 9,linear_attn.in_proj_z,0.0000000246,0.05000,1.008 | |
| 9,linear_attn.out_proj,0.0000000002,0.05000,1.228 | |
| 9,mlp.gate_proj,0.0000000178,0.05000,1.450 | |
| 9,mlp.up_proj,0.0000000167,0.05000,1.469 | |
| 9,mlp.down_proj,0.0000000003,0.05000,3.706 | |
| 10,linear_attn.in_proj_qkv,0.0000000437,0.05000,1.030 | |
| 10,linear_attn.in_proj_z,0.0000000256,0.05000,1.004 | |
| 10,linear_attn.out_proj,0.0000000002,0.05000,1.237 | |
| 10,mlp.up_proj,0.0000000178,0.05000,1.444 | |
| 10,mlp.gate_proj,0.0000000189,0.05000,1.458 | |
| 10,mlp.down_proj,0.0000000003,0.05000,3.636 | |
| 11,self_attn.k_proj,0.0000000117,0.05000,1.733 | |
| 11,self_attn.q_proj,0.0000001285,0.05000,1.733 | |
| 11,self_attn.v_proj,0.0000000104,0.05000,1.742 | |
| 11,self_attn.o_proj,0.0000000002,0.05000,1.227 | |
| 11,mlp.gate_proj,0.0000000206,0.05000,1.423 | |
| 11,mlp.up_proj,0.0000000196,0.05000,1.430 | |
| 11,mlp.down_proj,0.0000000004,0.05000,3.723 | |
| 12,linear_attn.in_proj_qkv,0.0000000517,0.05000,1.036 | |
| 12,linear_attn.in_proj_z,0.0000000303,0.05000,1.009 | |
| 12,linear_attn.out_proj,0.0000000003,0.05000,1.227 | |
| 12,mlp.gate_proj,0.0000000217,0.05000,1.387 | |
| 12,mlp.up_proj,0.0000000206,0.05000,1.396 | |
| 12,mlp.down_proj,0.0000000004,0.05000,3.688 | |
| 13,linear_attn.in_proj_qkv,0.0000000543,0.05000,1.022 | |
| 13,linear_attn.in_proj_z,0.0000000303,0.05000,1.012 | |
| 13,linear_attn.out_proj,0.0000000004,0.05000,1.231 | |
| 13,mlp.gate_proj,0.0000000234,0.05000,1.392 | |
| 13,mlp.up_proj,0.0000000223,0.05000,1.414 | |
| 13,mlp.down_proj,0.0000000005,0.05000,3.705 | |
| 14,linear_attn.in_proj_qkv,0.0000000611,0.05000,1.010 | |
| 14,linear_attn.in_proj_z,0.0000000349,0.05000,1.027 | |
| 14,linear_attn.out_proj,0.0000000005,0.05000,1.224 | |
| 14,mlp.gate_proj,0.0000000249,0.05000,1.391 | |
| 14,mlp.up_proj,0.0000000238,0.05000,1.413 | |
| 14,mlp.down_proj,0.0000000005,0.05000,3.677 | |
| 15,self_attn.v_proj,0.0000000096,0.05000,1.707 | |
| 15,self_attn.q_proj,0.0000001083,0.05000,1.713 | |
| 15,self_attn.k_proj,0.0000000100,0.05000,1.761 | |
| 15,self_attn.o_proj,0.0000000003,0.05000,1.232 | |
| 15,mlp.up_proj,0.0000000278,0.05000,1.418 | |
| 15,mlp.gate_proj,0.0000000289,0.05000,1.426 | |
| 15,mlp.down_proj,0.0000000007,0.05000,3.684 | |
| 16,linear_attn.in_proj_qkv,0.0000000651,0.05000,1.008 | |
| 16,linear_attn.in_proj_z,0.0000000368,0.05000,1.011 | |
| 16,linear_attn.out_proj,0.0000000005,0.05000,1.229 | |
| 16,mlp.gate_proj,0.0000000321,0.05000,1.423 | |
| 16,mlp.up_proj,0.0000000307,0.05000,1.444 | |
| 16,mlp.down_proj,0.0000000008,0.05000,3.675 | |
| 17,linear_attn.in_proj_qkv,0.0000000748,0.05000,1.021 | |
| 17,linear_attn.in_proj_z,0.0000000373,0.05000,1.012 | |
| 17,linear_attn.out_proj,0.0000000007,0.05000,1.231 | |
| 17,mlp.gate_proj,0.0000000371,0.05000,1.425 | |
| 17,mlp.up_proj,0.0000000355,0.05000,1.433 | |
| 17,mlp.down_proj,0.0000000011,0.05000,3.682 | |
| 18,linear_attn.in_proj_qkv,0.0000000799,0.05000,1.029 | |
| 18,linear_attn.in_proj_z,0.0000000435,0.05000,1.011 | |
| 18,linear_attn.out_proj,0.0000000009,0.05000,1.237 | |
| 18,mlp.up_proj,0.0000000432,0.05000,1.412 | |
| 18,mlp.gate_proj,0.0000000469,0.05000,1.430 | |
| 18,mlp.down_proj,0.0000000017,0.05000,3.702 | |
| 19,self_attn.q_proj,0.0000001313,0.05000,1.704 | |
| 19,self_attn.k_proj,0.0000000129,0.05000,1.721 | |
| 19,self_attn.v_proj,0.0000000148,0.05000,1.732 | |
| 19,self_attn.o_proj,0.0000000014,0.05000,1.228 | |
| 19,mlp.gate_proj,0.0000000578,0.05000,1.389 | |
| 19,mlp.up_proj,0.0000000545,0.05000,1.400 | |
| 19,mlp.down_proj,0.0000000023,0.05000,3.726 | |
| 20,linear_attn.in_proj_qkv,0.0000001444,0.05000,1.033 | |
| 20,linear_attn.in_proj_z,0.0000000797,0.05000,1.030 | |
| 20,linear_attn.out_proj,0.0000000014,0.05000,1.230 | |
| 20,mlp.up_proj,0.0000000645,0.05000,1.407 | |
| 20,mlp.gate_proj,0.0000000699,0.05000,1.447 | |
| 20,mlp.down_proj,0.0000000026,0.05000,3.746 | |
| 21,linear_attn.in_proj_qkv,0.0000001775,0.05000,1.028 | |
| 21,linear_attn.in_proj_z,0.0000001111,0.05000,1.027 | |
| 21,linear_attn.out_proj,0.0000000019,0.05000,1.229 | |
| 21,mlp.gate_proj,0.0000000760,0.05000,1.414 | |
| 21,mlp.up_proj,0.0000000707,0.05000,1.419 | |
| 21,mlp.down_proj,0.0000000028,0.05000,3.753 | |
| 22,linear_attn.in_proj_qkv,0.0000001534,0.05000,1.023 | |
| 22,linear_attn.in_proj_z,0.0000000923,0.05000,1.035 | |
| 22,linear_attn.out_proj,0.0000000017,0.05000,1.229 | |
| 22,mlp.up_proj,0.0000000799,0.05000,1.397 | |
| 22,mlp.gate_proj,0.0000000898,0.05000,1.398 | |
| 22,mlp.down_proj,0.0000000033,0.05000,3.691 | |
| 23,self_attn.k_proj,0.0000000186,0.05000,1.713 | |
| 23,self_attn.v_proj,0.0000000191,0.05000,1.733 | |
| 23,self_attn.q_proj,0.0000001725,0.05000,1.747 | |
| 23,self_attn.o_proj,0.0000000017,0.05000,1.243 | |
| 23,mlp.gate_proj,0.0000000931,0.05000,1.417 | |
| 23,mlp.up_proj,0.0000000854,0.05000,1.428 | |
| 23,mlp.down_proj,0.0000000034,0.05000,3.725 | |
| 24,linear_attn.in_proj_qkv,0.0000001517,0.05000,1.022 | |
| 24,linear_attn.in_proj_z,0.0000000844,0.05000,1.036 | |
| 24,linear_attn.out_proj,0.0000000021,0.05000,1.229 | |
| 24,mlp.gate_proj,0.0000001015,0.05000,1.445 | |
| 24,mlp.up_proj,0.0000000928,0.05000,1.457 | |
| 24,mlp.down_proj,0.0000000040,0.05000,3.743 | |
| 25,linear_attn.in_proj_qkv,0.0000001549,0.05000,1.033 | |
| 25,linear_attn.in_proj_z,0.0000000803,0.05000,1.032 | |
| 25,linear_attn.out_proj,0.0000000022,0.05000,1.216 | |
| 25,mlp.gate_proj,0.0000000983,0.05000,1.438 | |
| 25,mlp.up_proj,0.0000000952,0.05000,1.442 | |
| 25,mlp.down_proj,0.0000000041,0.05000,3.747 | |
| 26,linear_attn.in_proj_qkv,0.0000001424,0.05000,1.024 | |
| 26,linear_attn.in_proj_z,0.0000000707,0.05000,1.034 | |
| 26,linear_attn.out_proj,0.0000000023,0.05000,1.207 | |
| 26,mlp.gate_proj,0.0000000940,0.05000,1.456 | |
| 26,mlp.up_proj,0.0000000966,0.05000,1.468 | |
| 26,mlp.down_proj,0.0000000043,0.05000,3.704 | |
| 27,self_attn.k_proj,0.0000000177,0.05000,1.741 | |
| 27,self_attn.q_proj,0.0000001626,0.05000,1.761 | |
| 27,self_attn.v_proj,0.0000000154,0.05000,1.770 | |
| 27,self_attn.o_proj,0.0000000032,0.05000,1.254 | |
| 27,mlp.gate_proj,0.0000000972,0.05000,1.431 | |
| 27,mlp.up_proj,0.0000001030,0.05000,1.440 | |
| 27,mlp.down_proj,0.0000000047,0.05000,3.722 | |
| 28,linear_attn.in_proj_qkv,0.0000001435,0.05000,1.030 | |
| 28,linear_attn.in_proj_z,0.0000000666,0.05000,1.040 | |
| 28,linear_attn.out_proj,0.0000000029,0.05000,1.216 | |
| 28,mlp.gate_proj,0.0000000946,0.05000,1.425 | |
| 28,mlp.up_proj,0.0000001020,0.05000,1.432 | |
| 28,mlp.down_proj,0.0000000050,0.05000,3.754 | |
| 29,linear_attn.in_proj_qkv,0.0000001544,0.05000,1.028 | |
| 29,linear_attn.in_proj_z,0.0000000705,0.05000,1.027 | |
| 29,linear_attn.out_proj,0.0000000025,0.05000,1.214 | |
| 29,mlp.gate_proj,0.0000000945,0.05000,1.439 | |
| 29,mlp.up_proj,0.0000001026,0.05000,1.465 | |
| 29,mlp.down_proj,0.0000000050,0.05000,3.763 | |
| 30,linear_attn.in_proj_qkv,0.0000001554,0.05000,1.043 | |
| 30,linear_attn.in_proj_z,0.0000000724,0.05000,1.068 | |
| 30,linear_attn.out_proj,0.0000000033,0.05000,1.230 | |
| 30,mlp.gate_proj,0.0000000917,0.05000,1.436 | |
| 30,mlp.up_proj,0.0000001018,0.05000,1.448 | |
| 30,mlp.down_proj,0.0000000049,0.05000,3.706 | |
| 31,self_attn.v_proj,0.0000000185,0.05000,1.734 | |
| 31,self_attn.q_proj,0.0000001505,0.05000,1.766 | |
| 31,self_attn.k_proj,0.0000000166,0.05000,1.770 | |
| 31,self_attn.o_proj,0.0000000040,0.05000,1.211 | |
| 31,mlp.up_proj,0.0000001088,0.05000,1.442 | |
| 31,mlp.gate_proj,0.0000000964,0.05000,1.472 | |
| 31,mlp.down_proj,0.0000000053,0.05000,3.693 | |
| 32,linear_attn.in_proj_qkv,0.0000001616,0.05000,1.040 | |
| 32,linear_attn.in_proj_z,0.0000000702,0.05000,1.029 | |
| 32,linear_attn.out_proj,0.0000000033,0.05000,1.232 | |
| 32,mlp.gate_proj,0.0000001028,0.05000,1.418 | |
| 32,mlp.up_proj,0.0000001162,0.05000,1.447 | |
| 32,mlp.down_proj,0.0000000058,0.05000,3.762 | |
| 33,linear_attn.in_proj_qkv,0.0000001914,0.05000,1.022 | |
| 33,linear_attn.in_proj_z,0.0000000784,0.05000,1.034 | |
| 33,linear_attn.out_proj,0.0000000039,0.05000,1.222 | |
| 33,mlp.up_proj,0.0000001207,0.05000,1.393 | |
| 33,mlp.gate_proj,0.0000001067,0.05000,1.399 | |
| 33,mlp.down_proj,0.0000000059,0.05000,3.754 | |
| 34,linear_attn.in_proj_qkv,0.0000001861,0.05000,1.026 | |
| 34,linear_attn.in_proj_z,0.0000000843,0.05000,1.032 | |
| 34,linear_attn.out_proj,0.0000000041,0.05000,1.230 | |
| 34,mlp.gate_proj,0.0000001186,0.05000,1.425 | |
| 34,mlp.up_proj,0.0000001293,0.05000,1.436 | |
| 34,mlp.down_proj,0.0000000078,0.05000,3.695 | |
| 35,self_attn.v_proj,0.0000000283,0.05000,1.770 | |
| 35,self_attn.q_proj,0.0000001840,0.05000,1.786 | |
| 35,self_attn.k_proj,0.0000000199,0.05000,1.801 | |
| 35,self_attn.o_proj,0.0000000071,0.05000,1.228 | |
| 35,mlp.up_proj,0.0000001330,0.05000,1.440 | |
| 35,mlp.gate_proj,0.0000001239,0.05000,1.460 | |
| 35,mlp.down_proj,0.0000000088,0.05000,3.733 | |
| 36,linear_attn.in_proj_qkv,0.0000002602,0.05000,1.038 | |
| 36,linear_attn.in_proj_z,0.0000001298,0.05000,1.035 | |
| 36,linear_attn.out_proj,0.0000000037,0.05000,1.233 | |
| 36,mlp.up_proj,0.0000001317,0.05000,1.429 | |
| 36,mlp.gate_proj,0.0000001379,0.05000,1.470 | |
| 36,mlp.down_proj,0.0000000081,0.05000,3.742 | |
| 37,linear_attn.in_proj_qkv,0.0000002863,0.05000,1.028 | |
| 37,linear_attn.in_proj_z,0.0000001613,0.05000,1.045 | |
| 37,linear_attn.out_proj,0.0000000043,0.05000,1.226 | |
| 37,mlp.up_proj,0.0000001320,0.05000,1.414 | |
| 37,mlp.gate_proj,0.0000001398,0.05000,1.429 | |
| 37,mlp.down_proj,0.0000000078,0.05000,3.719 | |
| 38,linear_attn.in_proj_qkv,0.0000002546,0.05000,1.024 | |
| 38,linear_attn.in_proj_z,0.0000001477,0.05000,1.026 | |
| 38,linear_attn.out_proj,0.0000000032,0.05000,1.228 | |
| 38,mlp.gate_proj,0.0000001848,0.05000,1.467 | |
| 38,mlp.up_proj,0.0000001489,0.05000,1.517 | |
| 38,mlp.down_proj,0.0000000082,0.05000,3.692 | |
| 39,self_attn.k_proj,0.0000000249,0.05000,1.712 | |
| 39,self_attn.v_proj,0.0000000300,0.05000,1.727 | |
| 39,self_attn.q_proj,0.0000002229,0.05000,1.746 | |
| 39,self_attn.o_proj,0.0000000049,0.05000,1.231 | |
| 39,mlp.gate_proj,0.0000001662,0.05000,1.488 | |
| 39,mlp.up_proj,0.0000001445,0.05000,1.489 | |
| 39,mlp.down_proj,0.0000000080,0.05000,3.701 | |
| 40,linear_attn.in_proj_qkv,0.0000002447,0.05000,1.026 | |
| 40,linear_attn.in_proj_z,0.0000001349,0.05000,1.036 | |
| 40,linear_attn.out_proj,0.0000000040,0.05000,1.232 | |
| 40,mlp.up_proj,0.0000001539,0.05000,1.413 | |
| 40,mlp.gate_proj,0.0000001819,0.05000,1.448 | |
| 40,mlp.down_proj,0.0000000083,0.05000,3.806 | |
| 41,linear_attn.in_proj_qkv,0.0000002201,0.05000,1.022 | |
| 41,linear_attn.in_proj_z,0.0000001207,0.05000,1.030 | |
| 41,linear_attn.out_proj,0.0000000035,0.05000,1.221 | |
| 41,mlp.up_proj,0.0000001497,0.05000,1.471 | |
| 41,mlp.gate_proj,0.0000001583,0.05000,1.473 | |
| 41,mlp.down_proj,0.0000000083,0.05000,3.754 | |
| 42,linear_attn.in_proj_qkv,0.0000002117,0.05000,1.028 | |
| 42,linear_attn.in_proj_z,0.0000001109,0.05000,1.028 | |
| 42,linear_attn.out_proj,0.0000000042,0.05000,1.236 | |
| 42,mlp.gate_proj,0.0000001501,0.05000,1.430 | |
| 42,mlp.up_proj,0.0000001533,0.05000,1.449 | |
| 42,mlp.down_proj,0.0000000093,0.05000,3.761 | |
| 43,self_attn.k_proj,0.0000000260,0.05000,1.730 | |
| 43,self_attn.q_proj,0.0000002219,0.05000,1.731 | |
| 43,self_attn.v_proj,0.0000000303,0.05000,1.771 | |
| 43,self_attn.o_proj,0.0000000069,0.05000,1.220 | |
| 43,mlp.up_proj,0.0000001592,0.05000,1.462 | |
| 43,mlp.gate_proj,0.0000001490,0.05000,1.475 | |
| 43,mlp.down_proj,0.0000000100,0.05000,3.767 | |
| 44,linear_attn.in_proj_qkv,0.0000002080,0.05000,1.036 | |
| 44,linear_attn.in_proj_z,0.0000001016,0.05000,1.032 | |
| 44,linear_attn.out_proj,0.0000000061,0.05000,1.241 | |
| 44,mlp.up_proj,0.0000001637,0.05000,1.431 | |
| 44,mlp.gate_proj,0.0000001513,0.05000,1.432 | |
| 44,mlp.down_proj,0.0000000115,0.05000,3.686 | |
| 45,linear_attn.in_proj_qkv,0.0000002124,0.05000,1.032 | |
| 45,linear_attn.in_proj_z,0.0000001036,0.05000,1.036 | |
| 45,linear_attn.out_proj,0.0000000054,0.05000,1.239 | |
| 45,mlp.gate_proj,0.0000001512,0.05000,1.415 | |
| 45,mlp.up_proj,0.0000001650,0.05000,1.427 | |
| 45,mlp.down_proj,0.0000000120,0.05000,3.729 | |
| 46,linear_attn.in_proj_qkv,0.0000002224,0.05000,1.027 | |
| 46,linear_attn.in_proj_z,0.0000001100,0.05000,1.042 | |
| 46,linear_attn.out_proj,0.0000000082,0.05000,1.233 | |
| 46,mlp.gate_proj,0.0000001491,0.05000,1.447 | |
| 46,mlp.up_proj,0.0000001676,0.05000,1.473 | |
| 46,mlp.down_proj,0.0000000132,0.05000,3.803 | |
| 47,self_attn.q_proj,0.0000002389,0.05000,1.770 | |
| 47,self_attn.k_proj,0.0000000255,0.05000,1.771 | |
| 47,self_attn.v_proj,0.0000000391,0.05000,1.806 | |
| 47,self_attn.o_proj,0.0000000089,0.05000,1.225 | |
| 47,mlp.up_proj,0.0000001848,0.05000,1.439 | |
| 47,mlp.gate_proj,0.0000001653,0.05000,1.443 | |
| 47,mlp.down_proj,0.0000000162,0.05000,3.724 | |
| 48,linear_attn.in_proj_qkv,0.0000002445,0.05000,1.042 | |
| 48,linear_attn.in_proj_z,0.0000001161,0.05000,1.022 | |
| 48,linear_attn.out_proj,0.0000000094,0.05000,1.247 | |
| 48,mlp.gate_proj,0.0000001822,0.05000,1.439 | |
| 48,mlp.up_proj,0.0000002028,0.05000,1.475 | |
| 48,mlp.down_proj,0.0000000198,0.05000,3.687 | |
| 49,linear_attn.in_proj_qkv,0.0000002972,0.05000,1.048 | |
| 49,linear_attn.in_proj_z,0.0000001243,0.05000,1.020 | |
| 49,linear_attn.out_proj,0.0000000127,0.05000,1.247 | |
| 49,mlp.gate_proj,0.0000002012,0.05000,1.429 | |
| 49,mlp.up_proj,0.0000002182,0.05000,1.436 | |
| 49,mlp.down_proj,0.0000000258,0.05000,3.678 | |
| 50,linear_attn.in_proj_qkv,0.0000003036,0.05000,1.038 | |
| 50,linear_attn.in_proj_z,0.0000001385,0.05000,1.036 | |
| 50,linear_attn.out_proj,0.0000000192,0.05000,1.246 | |
| 50,mlp.gate_proj,0.0000002451,0.05000,1.427 | |
| 50,mlp.up_proj,0.0000002521,0.05000,1.467 | |
| 50,mlp.down_proj,0.0000000435,0.05000,3.690 | |
| 51,self_attn.v_proj,0.0000000820,0.05000,1.735 | |
| 51,self_attn.k_proj,0.0000000351,0.05000,1.748 | |
| 51,self_attn.q_proj,0.0000003200,0.05000,1.751 | |
| 51,self_attn.o_proj,0.0000000278,0.05000,1.241 | |
| 51,mlp.up_proj,0.0000002922,0.05000,1.427 | |
| 51,mlp.gate_proj,0.0000002748,0.05000,1.440 | |
| 51,mlp.down_proj,0.0000000544,0.05000,3.721 | |
| 52,linear_attn.in_proj_qkv,0.0000004735,0.05000,1.035 | |
| 52,linear_attn.in_proj_z,0.0000002124,0.05000,1.033 | |
| 52,linear_attn.out_proj,0.0000000231,0.05000,1.243 | |
| 52,mlp.gate_proj,0.0000003281,0.05000,1.440 | |
| 52,mlp.up_proj,0.0000003091,0.05000,1.457 | |
| 52,mlp.down_proj,0.0000000524,0.05000,3.701 | |
| 53,linear_attn.in_proj_qkv,0.0000004603,0.05000,1.067 | |
| 53,linear_attn.in_proj_z,0.0000002217,0.05000,1.029 | |
| 53,linear_attn.out_proj,0.0000000243,0.05000,1.285 | |
| 53,mlp.gate_proj,0.0000003586,0.05000,1.436 | |
| 53,mlp.up_proj,0.0000003214,0.05000,1.453 | |
| 53,mlp.down_proj,0.0000000558,0.05000,3.702 | |
| 54,linear_attn.in_proj_qkv,0.0000004444,0.05000,1.035 | |
| 54,linear_attn.in_proj_z,0.0000002355,0.05000,1.034 | |
| 54,linear_attn.out_proj,0.0000000238,0.05000,1.284 | |
| 54,mlp.gate_proj,0.0000004498,0.05000,1.457 | |
| 54,mlp.up_proj,0.0000003799,0.05000,1.459 | |
| 54,mlp.down_proj,0.0000000769,0.05000,3.709 | |
| 55,self_attn.q_proj,0.0000004171,0.05000,1.702 | |
| 55,self_attn.v_proj,0.0000001058,0.05000,1.715 | |
| 55,self_attn.k_proj,0.0000000506,0.05000,1.764 | |
| 55,self_attn.o_proj,0.0000000284,0.05000,1.233 | |
| 55,mlp.up_proj,0.0000003784,0.05000,1.428 | |
| 55,mlp.gate_proj,0.0000004249,0.05000,1.451 | |
| 55,mlp.down_proj,0.0000000625,0.05000,3.718 | |
| 56,linear_attn.in_proj_qkv,0.0000004994,0.05000,1.030 | |
| 56,linear_attn.in_proj_z,0.0000002452,0.05000,1.016 | |
| 56,linear_attn.out_proj,0.0000000310,0.05000,1.245 | |
| 56,mlp.gate_proj,0.0000004580,0.05000,1.428 | |
| 56,mlp.up_proj,0.0000003986,0.05000,1.458 | |
| 56,mlp.down_proj,0.0000000565,0.05000,3.724 | |
| 57,linear_attn.in_proj_qkv,0.0000005406,0.05000,1.065 | |
| 57,linear_attn.in_proj_z,0.0000002603,0.05000,1.041 | |
| 57,linear_attn.out_proj,0.0000000297,0.05000,1.294 | |
| 57,mlp.gate_proj,0.0000005248,0.05000,1.438 | |
| 57,mlp.up_proj,0.0000004510,0.05000,1.470 | |
| 57,mlp.down_proj,0.0000000598,0.05000,3.737 | |
| 58,linear_attn.in_proj_qkv,0.0000005110,0.05000,1.052 | |
| 58,linear_attn.in_proj_z,0.0000002702,0.05000,1.040 | |
| 58,linear_attn.out_proj,0.0000000303,0.05000,1.231 | |
| 58,mlp.up_proj,0.0000005175,0.05000,1.446 | |
| 58,mlp.gate_proj,0.0000006045,0.05000,1.452 | |
| 58,mlp.down_proj,0.0000000817,0.05000,3.716 | |
| 59,self_attn.v_proj,0.0000002233,0.05000,1.734 | |
| 59,self_attn.q_proj,0.0000004970,0.05000,1.753 | |
| 59,self_attn.k_proj,0.0000000637,0.05000,1.772 | |
| 59,self_attn.o_proj,0.0000000789,0.05000,1.238 | |
| 59,mlp.gate_proj,0.0000005972,0.05000,1.396 | |
| 59,mlp.up_proj,0.0000005255,0.05000,1.418 | |
| 59,mlp.down_proj,0.0000000991,0.05000,3.727 | |
| 60,linear_attn.in_proj_qkv,0.0000006119,0.05000,1.024 | |
| 60,linear_attn.in_proj_z,0.0000002857,0.05000,1.027 | |
| 60,linear_attn.out_proj,0.0000000744,0.05000,1.249 | |
| 60,mlp.up_proj,0.0000005230,0.05000,1.451 | |
| 60,mlp.gate_proj,0.0000005900,0.05000,1.456 | |
| 60,mlp.down_proj,0.0000001247,0.05000,3.711 | |
| 61,linear_attn.in_proj_qkv,0.0000004431,0.05000,1.027 | |
| 61,linear_attn.in_proj_z,0.0000002485,0.05000,1.025 | |
| 61,linear_attn.out_proj,0.0000000689,0.05000,1.219 | |
| 61,mlp.gate_proj,0.0000006208,0.05000,1.409 | |
| 61,mlp.up_proj,0.0000005521,0.05000,1.425 | |
| 61,mlp.down_proj,0.0000001618,0.05000,3.708 | |
| 62,linear_attn.in_proj_qkv,0.0000004958,0.05000,1.027 | |
| 62,linear_attn.in_proj_z,0.0000002576,0.05000,1.038 | |
| 62,linear_attn.out_proj,0.0000001895,0.05000,1.220 | |
| 62,mlp.gate_proj,0.0000005716,0.05000,1.410 | |
| 62,mlp.up_proj,0.0000005109,0.05000,1.443 | |
| 62,mlp.down_proj,0.0000002368,0.05000,3.673 | |
| 63,self_attn.q_proj,0.0000004438,0.05000,1.753 | |
| 63,self_attn.k_proj,0.0000000597,0.05000,1.773 | |
| 63,self_attn.v_proj,0.0000001823,0.05000,1.774 | |
| 63,self_attn.o_proj,0.0000002019,0.05000,1.217 | |
| 63,mlp.gate_proj,0.0000004743,0.05000,1.410 | |
| 63,mlp.up_proj,0.0000004088,0.05000,1.411 | |
| 63,mlp.down_proj,0.0000004359,0.05000,3.645 | |