File size: 9,067 Bytes
f235135 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | layer,module,loss,samples,damp,time
0,self_attn.k_proj,0.0000074843,0.05000,5.272
0,self_attn.q_proj,0.0000147214,0.05000,5.281
0,self_attn.v_proj,0.0000004168,0.05000,5.330
0,self_attn.o_proj,0.0000000187,0.05000,0.970
0,mlp.gate_proj,0.0000084507,0.05000,2.314
0,mlp.up_proj,0.0000073953,0.05000,2.321
0,mlp.down_proj,0.0000001043,0.05000,2.760
1,self_attn.q_proj,0.0000176587,0.05000,5.488
1,self_attn.v_proj,0.0000011781,0.05000,5.531
1,self_attn.k_proj,0.0000103479,0.05000,5.556
1,self_attn.o_proj,0.0000000659,0.05000,1.008
1,mlp.up_proj,0.0000101163,0.05000,2.793
1,mlp.gate_proj,0.0000115220,0.05000,2.857
1,mlp.down_proj,0.0000023786,0.05000,2.978
2,self_attn.v_proj,0.0000045326,0.05000,5.520
2,self_attn.k_proj,0.0000444663,0.05000,5.568
2,self_attn.q_proj,0.0000746845,0.05000,5.603
2,self_attn.o_proj,0.0000000708,0.05000,0.977
2,mlp.up_proj,0.0000157823,0.05000,2.370
2,mlp.gate_proj,0.0000186525,0.05000,2.381
2,mlp.down_proj,0.0000003235,0.05000,2.880
3,self_attn.q_proj,0.0000490428,0.05000,4.770
3,self_attn.v_proj,0.0000046019,0.05000,4.821
3,self_attn.k_proj,0.0000271964,0.05000,4.853
3,self_attn.o_proj,0.0000001303,0.05000,0.991
3,mlp.gate_proj,0.0000271254,0.05000,2.287
3,mlp.up_proj,0.0000203218,0.05000,2.309
3,mlp.down_proj,0.0000004725,0.05000,2.923
4,self_attn.v_proj,0.0000049324,0.05000,4.775
4,self_attn.q_proj,0.0000503491,0.05000,4.856
4,self_attn.k_proj,0.0000263398,0.05000,4.882
4,self_attn.o_proj,0.0000001964,0.05000,1.008
4,mlp.gate_proj,0.0000360828,0.05000,2.330
4,mlp.up_proj,0.0000242689,0.05000,2.349
4,mlp.down_proj,0.0000006482,0.05000,2.859
5,self_attn.q_proj,0.0000723580,0.05000,4.878
5,self_attn.k_proj,0.0000435308,0.05000,4.908
5,self_attn.v_proj,0.0000050953,0.05000,4.928
5,self_attn.o_proj,0.0000002506,0.05000,0.988
5,mlp.up_proj,0.0000276863,0.05000,2.221
5,mlp.gate_proj,0.0000389511,0.05000,2.249
5,mlp.down_proj,0.0000008920,0.05000,2.865
6,self_attn.v_proj,0.0000047490,0.05000,4.730
6,self_attn.k_proj,0.0000314569,0.05000,4.806
6,self_attn.q_proj,0.0000593003,0.05000,4.831
6,self_attn.o_proj,0.0000003914,0.05000,0.994
6,mlp.gate_proj,0.0000416038,0.05000,2.261
6,mlp.up_proj,0.0000296857,0.05000,2.274
6,mlp.down_proj,0.0000010432,0.05000,2.867
7,self_attn.v_proj,0.0000041414,0.05000,4.777
7,self_attn.k_proj,0.0000294888,0.05000,4.819
7,self_attn.q_proj,0.0000499796,0.05000,4.840
7,self_attn.o_proj,0.0000005414,0.05000,0.996
7,mlp.gate_proj,0.0000407815,0.05000,2.382
7,mlp.up_proj,0.0000316740,0.05000,2.395
7,mlp.down_proj,0.0000011954,0.05000,2.921
8,self_attn.k_proj,0.0000382184,0.05000,4.773
8,self_attn.q_proj,0.0000634405,0.05000,4.840
8,self_attn.v_proj,0.0000051689,0.05000,4.857
8,self_attn.o_proj,0.0000007112,0.05000,0.994
8,mlp.gate_proj,0.0000432118,0.05000,2.214
8,mlp.up_proj,0.0000328935,0.05000,2.231
8,mlp.down_proj,0.0000012622,0.05000,2.910
9,self_attn.v_proj,0.0000062167,0.05000,4.765
9,self_attn.k_proj,0.0000348470,0.05000,4.821
9,self_attn.q_proj,0.0000589752,0.05000,4.857
9,self_attn.o_proj,0.0000007339,0.05000,0.995
9,mlp.gate_proj,0.0000421141,0.05000,2.229
9,mlp.up_proj,0.0000330395,0.05000,2.241
9,mlp.down_proj,0.0000012594,0.05000,2.852
10,self_attn.k_proj,0.0000369369,0.05000,4.772
10,self_attn.v_proj,0.0000048285,0.05000,4.813
10,self_attn.q_proj,0.0000594491,0.05000,4.850
10,self_attn.o_proj,0.0000007190,0.05000,0.987
10,mlp.up_proj,0.0000361118,0.05000,2.281
10,mlp.gate_proj,0.0000430197,0.05000,2.303
10,mlp.down_proj,0.0000014489,0.05000,2.891
11,self_attn.v_proj,0.0000057577,0.05000,4.785
11,self_attn.k_proj,0.0000277935,0.05000,4.850
11,self_attn.q_proj,0.0000493928,0.05000,4.877
11,self_attn.o_proj,0.0000009189,0.05000,1.004
11,mlp.gate_proj,0.0000448819,0.05000,2.218
11,mlp.up_proj,0.0000388907,0.05000,2.235
11,mlp.down_proj,0.0000016271,0.05000,2.859
12,self_attn.q_proj,0.0000703840,0.05000,4.766
12,self_attn.k_proj,0.0000419931,0.05000,4.806
12,self_attn.v_proj,0.0000062611,0.05000,4.833
12,self_attn.o_proj,0.0000010599,0.05000,1.008
12,mlp.up_proj,0.0000416281,0.05000,2.392
12,mlp.gate_proj,0.0000476432,0.05000,2.410
12,mlp.down_proj,0.0000018099,0.05000,2.883
13,self_attn.v_proj,0.0000073646,0.05000,4.787
13,self_attn.k_proj,0.0000480065,0.05000,4.836
13,self_attn.q_proj,0.0000746108,0.05000,4.859
13,self_attn.o_proj,0.0000012881,0.05000,1.002
13,mlp.gate_proj,0.0000567143,0.05000,2.312
13,mlp.up_proj,0.0000464424,0.05000,2.333
13,mlp.down_proj,0.0000024285,0.05000,2.855
14,self_attn.v_proj,0.0000085554,0.05000,4.747
14,self_attn.k_proj,0.0000377909,0.05000,4.815
14,self_attn.q_proj,0.0000818829,0.05000,4.839
14,self_attn.o_proj,0.0000014739,0.05000,0.988
14,mlp.gate_proj,0.0000618665,0.05000,2.244
14,mlp.up_proj,0.0000501615,0.05000,2.275
14,mlp.down_proj,0.0000028961,0.05000,2.943
15,self_attn.k_proj,0.0000434869,0.05000,4.765
15,self_attn.q_proj,0.0000836033,0.05000,4.814
15,self_attn.v_proj,0.0000085927,0.05000,4.843
15,self_attn.o_proj,0.0000009176,0.05000,0.978
15,mlp.up_proj,0.0000501215,0.05000,2.199
15,mlp.gate_proj,0.0000664111,0.05000,2.224
15,mlp.down_proj,0.0000029919,0.05000,2.856
16,self_attn.v_proj,0.0000096999,0.05000,4.660
16,self_attn.k_proj,0.0000491510,0.05000,4.808
16,self_attn.q_proj,0.0000873152,0.05000,4.831
16,self_attn.o_proj,0.0000006891,0.05000,0.990
16,mlp.gate_proj,0.0000677998,0.05000,2.255
16,mlp.up_proj,0.0000498530,0.05000,2.270
16,mlp.down_proj,0.0000028770,0.05000,2.887
17,self_attn.k_proj,0.0000451024,0.05000,4.814
17,self_attn.q_proj,0.0000833279,0.05000,4.854
17,self_attn.v_proj,0.0000093786,0.05000,4.881
17,self_attn.o_proj,0.0000006592,0.05000,0.993
17,mlp.gate_proj,0.0000711968,0.05000,2.303
17,mlp.up_proj,0.0000515198,0.05000,2.313
17,mlp.down_proj,0.0000031699,0.05000,2.877
18,self_attn.v_proj,0.0000117244,0.05000,4.799
18,self_attn.q_proj,0.0000941548,0.05000,4.843
18,self_attn.k_proj,0.0000521436,0.05000,4.855
18,self_attn.o_proj,0.0000007543,0.05000,1.060
18,mlp.gate_proj,0.0000748079,0.05000,2.265
18,mlp.up_proj,0.0000554492,0.05000,2.288
18,mlp.down_proj,0.0000033983,0.05000,2.867
19,self_attn.k_proj,0.0000492243,0.05000,4.762
19,self_attn.v_proj,0.0000116459,0.05000,4.852
19,self_attn.q_proj,0.0000846121,0.05000,4.881
19,self_attn.o_proj,0.0000010064,0.05000,0.996
19,mlp.gate_proj,0.0000798135,0.05000,2.413
19,mlp.up_proj,0.0000598702,0.05000,2.435
19,mlp.down_proj,0.0000041946,0.05000,2.896
20,self_attn.v_proj,0.0000138354,0.05000,4.743
20,self_attn.k_proj,0.0000525729,0.05000,4.779
20,self_attn.q_proj,0.0000871055,0.05000,4.804
20,self_attn.o_proj,0.0000007463,0.05000,1.062
20,mlp.gate_proj,0.0000772857,0.05000,2.329
20,mlp.up_proj,0.0000608534,0.05000,2.344
20,mlp.down_proj,0.0000040513,0.05000,2.916
21,self_attn.k_proj,0.0000516450,0.05000,4.799
21,self_attn.q_proj,0.0000864292,0.05000,4.834
21,self_attn.v_proj,0.0000178772,0.05000,4.858
21,self_attn.o_proj,0.0000009117,0.05000,1.007
21,mlp.up_proj,0.0000644529,0.05000,2.339
21,mlp.gate_proj,0.0000826544,0.05000,2.358
21,mlp.down_proj,0.0000043396,0.05000,2.849
22,self_attn.k_proj,0.0000493620,0.05000,4.685
22,self_attn.q_proj,0.0000846314,0.05000,4.768
22,self_attn.v_proj,0.0000178073,0.05000,4.804
22,self_attn.o_proj,0.0000007427,0.05000,0.985
22,mlp.up_proj,0.0000691359,0.05000,2.223
22,mlp.gate_proj,0.0000892200,0.05000,2.242
22,mlp.down_proj,0.0000049096,0.05000,2.858
23,self_attn.k_proj,0.0000528449,0.05000,4.804
23,self_attn.q_proj,0.0000848596,0.05000,4.884
23,self_attn.v_proj,0.0000174951,0.05000,4.895
23,self_attn.o_proj,0.0000011734,0.05000,0.995
23,mlp.up_proj,0.0000761134,0.05000,2.367
23,mlp.gate_proj,0.0001037115,0.05000,2.376
23,mlp.down_proj,0.0000060200,0.05000,2.890
24,self_attn.v_proj,0.0000255699,0.05000,4.749
24,self_attn.k_proj,0.0000586351,0.05000,4.825
24,self_attn.q_proj,0.0000929255,0.05000,4.852
24,self_attn.o_proj,0.0000017205,0.05000,1.011
24,mlp.gate_proj,0.0001158564,0.05000,2.337
24,mlp.up_proj,0.0000833232,0.05000,2.350
24,mlp.down_proj,0.0000070803,0.05000,2.862
25,self_attn.v_proj,0.0000219677,0.05000,4.786
25,self_attn.k_proj,0.0000455065,0.05000,4.819
25,self_attn.q_proj,0.0000875274,0.05000,4.842
25,self_attn.o_proj,0.0000018091,0.05000,0.995
25,mlp.gate_proj,0.0001257230,0.05000,2.238
25,mlp.up_proj,0.0000896462,0.05000,2.241
25,mlp.down_proj,0.0000089616,0.05000,2.927
26,self_attn.v_proj,0.0000301434,0.05000,4.714
26,self_attn.q_proj,0.0000821644,0.05000,4.799
26,self_attn.k_proj,0.0000533734,0.05000,4.827
26,self_attn.o_proj,0.0000028081,0.05000,1.033
26,mlp.gate_proj,0.0001326795,0.05000,2.426
26,mlp.up_proj,0.0000931492,0.05000,2.448
26,mlp.down_proj,0.0000130549,0.05000,2.898
27,self_attn.q_proj,0.0000648963,0.05000,4.827
27,self_attn.v_proj,0.0000198264,0.05000,4.863
27,self_attn.k_proj,0.0000367513,0.05000,4.890
27,self_attn.o_proj,0.0000078769,0.05000,1.015
27,mlp.gate_proj,0.0001287943,0.05000,2.227
27,mlp.up_proj,0.0001021746,0.05000,2.234
27,mlp.down_proj,0.0000386645,0.05000,2.917
|