File size: 6,025 Bytes
7b8152b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | layer,module,loss,samples,damp,time
0,self_attn.k_proj,0.00409379,0.01000,0.998
0,self_attn.v_proj,0.00072114,0.01000,0.988
0,self_attn.q_proj,0.00414090,0.01000,0.992
0,self_attn.out_proj,0.00001473,0.01000,1.021
0,fc1,0.00906052,0.01000,1.003
0,fc2,0.00011689,0.01000,4.243
1,self_attn.k_proj,0.00241560,0.01000,1.022
1,self_attn.v_proj,0.00030304,0.01000,0.993
1,self_attn.q_proj,0.00263582,0.01000,0.988
1,self_attn.out_proj,0.00001348,0.01000,1.031
1,fc1,0.00911925,0.01000,1.005
1,fc2,0.00113884,0.01000,4.290
2,self_attn.k_proj,0.00382860,0.01000,0.983
2,self_attn.v_proj,0.00054063,0.01000,0.985
2,self_attn.q_proj,0.00378607,0.01000,0.988
2,self_attn.out_proj,0.00004634,0.01000,0.984
2,fc1,0.00967952,0.01000,0.996
2,fc2,0.00550816,0.01000,4.239
3,self_attn.k_proj,0.00358004,0.01000,0.985
3,self_attn.v_proj,0.00060755,0.01000,0.998
3,self_attn.q_proj,0.00388739,0.01000,0.986
3,self_attn.out_proj,0.00007405,0.01000,1.050
3,fc1,0.00773898,0.01000,0.991
3,fc2,0.02920380,0.01000,4.234
4,self_attn.k_proj,0.00413768,0.01000,0.986
4,self_attn.v_proj,0.00054294,0.01000,0.989
4,self_attn.q_proj,0.00418821,0.01000,0.983
4,self_attn.out_proj,0.00005169,0.01000,0.986
4,fc1,0.00736058,0.01000,1.007
4,fc2,0.39130572,0.01250,4.257
5,self_attn.k_proj,0.00486233,0.01000,1.011
5,self_attn.v_proj,0.00066676,0.01000,0.993
5,self_attn.q_proj,0.00465768,0.01000,0.992
5,self_attn.out_proj,0.00006351,0.01000,1.005
5,fc1,0.00789846,0.01000,1.005
5,fc2,0.00017804,0.01000,4.146
6,self_attn.k_proj,0.00761369,0.01000,0.989
6,self_attn.v_proj,0.00081846,0.01000,0.974
6,self_attn.q_proj,0.00610037,0.01000,0.981
6,self_attn.out_proj,0.00008329,0.01000,1.009
6,fc1,0.00947184,0.01000,0.977
6,fc2,0.00032562,0.01000,4.177
7,self_attn.k_proj,0.00821398,0.01000,0.989
7,self_attn.v_proj,0.00094582,0.01000,0.974
7,self_attn.q_proj,0.00643242,0.01000,0.995
7,self_attn.out_proj,0.00007262,0.01000,0.986
7,fc1,0.01092366,0.01000,1.010
7,fc2,0.00015775,0.01000,4.204
8,self_attn.k_proj,0.00864247,0.01000,0.986
8,self_attn.v_proj,0.00102178,0.01000,0.991
8,self_attn.q_proj,0.00637043,0.01000,0.974
8,self_attn.out_proj,0.00007326,0.01000,1.000
8,fc1,0.01218541,0.01000,1.032
8,fc2,0.00021664,0.01000,4.152
9,self_attn.k_proj,0.00874096,0.01000,0.996
9,self_attn.v_proj,0.00111076,0.01000,0.996
9,self_attn.q_proj,0.00673135,0.01000,0.996
9,self_attn.out_proj,0.00007552,0.01000,0.990
9,fc1,0.01299828,0.01000,1.013
9,fc2,0.00025837,0.01000,4.185
10,self_attn.k_proj,0.00844553,0.01000,0.972
10,self_attn.v_proj,0.00138582,0.01000,1.037
10,self_attn.q_proj,0.00662948,0.01000,1.033
10,self_attn.out_proj,0.00007813,0.01000,1.032
10,fc1,0.01408036,0.01000,1.002
10,fc2,0.00027341,0.01000,4.212
11,self_attn.k_proj,0.00810582,0.01000,1.027
11,self_attn.v_proj,0.00164685,0.01000,0.979
11,self_attn.q_proj,0.00655979,0.01000,1.001
11,self_attn.out_proj,0.00006866,0.01000,0.983
11,fc1,0.01486147,0.01000,1.003
11,fc2,0.00035751,0.01000,4.201
12,self_attn.k_proj,0.00892932,0.01000,0.974
12,self_attn.v_proj,0.00180916,0.01000,0.993
12,self_attn.q_proj,0.00685191,0.01000,0.987
12,self_attn.out_proj,0.00007998,0.01000,0.995
12,fc1,0.01568660,0.01000,0.986
12,fc2,0.00033367,0.01000,4.177
13,self_attn.k_proj,0.00930009,0.01000,0.995
13,self_attn.v_proj,0.00193084,0.01000,0.999
13,self_attn.q_proj,0.00691767,0.01000,0.989
13,self_attn.out_proj,0.00008400,0.01000,0.992
13,fc1,0.01652814,0.01000,1.009
13,fc2,0.00043007,0.01000,4.208
14,self_attn.k_proj,0.00859589,0.01000,0.986
14,self_attn.v_proj,0.00245260,0.01000,0.985
14,self_attn.q_proj,0.00656543,0.01000,0.978
14,self_attn.out_proj,0.00009421,0.01000,0.983
14,fc1,0.01786581,0.01000,1.007
14,fc2,0.00054112,0.01000,4.169
15,self_attn.k_proj,0.00779343,0.01000,0.995
15,self_attn.v_proj,0.00313876,0.01000,0.995
15,self_attn.q_proj,0.00688220,0.01000,1.002
15,self_attn.out_proj,0.00009604,0.01000,0.999
15,fc1,0.01899958,0.01000,0.984
15,fc2,0.00067443,0.01000,4.171
16,self_attn.k_proj,0.00774822,0.01000,0.970
16,self_attn.v_proj,0.00353068,0.01000,0.983
16,self_attn.q_proj,0.00607082,0.01000,0.979
16,self_attn.out_proj,0.00014269,0.01000,0.987
16,fc1,0.02137345,0.01000,1.042
16,fc2,0.00070578,0.01000,4.236
17,self_attn.k_proj,0.00760870,0.01000,1.032
17,self_attn.v_proj,0.00402974,0.01000,1.009
17,self_attn.q_proj,0.00628217,0.01000,1.044
17,self_attn.out_proj,0.00016927,0.01000,1.025
17,fc1,0.02438176,0.01000,1.000
17,fc2,0.00098657,0.01000,4.209
18,self_attn.k_proj,0.00715731,0.01000,0.977
18,self_attn.v_proj,0.00468908,0.01000,0.989
18,self_attn.q_proj,0.00624552,0.01000,0.978
18,self_attn.out_proj,0.00030485,0.01000,0.991
18,fc1,0.02636197,0.01000,0.999
18,fc2,0.00203174,0.01000,4.254
19,self_attn.k_proj,0.00734770,0.01000,0.990
19,self_attn.v_proj,0.00605974,0.01000,0.989
19,self_attn.q_proj,0.00633731,0.01000,0.990
19,self_attn.out_proj,0.00041913,0.01000,0.997
19,fc1,0.02817888,0.01000,0.998
19,fc2,0.00118158,0.01000,4.213
20,self_attn.k_proj,0.00741961,0.01000,0.991
20,self_attn.v_proj,0.00686072,0.01000,0.991
20,self_attn.q_proj,0.00603745,0.01000,0.996
20,self_attn.out_proj,0.00052878,0.01000,0.992
20,fc1,0.02827213,0.01000,0.996
20,fc2,0.00133380,0.01000,4.248
21,self_attn.k_proj,0.00697015,0.01000,0.991
21,self_attn.v_proj,0.00690806,0.01000,0.995
21,self_attn.q_proj,0.00655608,0.01000,1.053
21,self_attn.out_proj,0.00054652,0.01000,0.991
21,fc1,0.02712614,0.01000,1.007
21,fc2,0.00177069,0.01000,4.216
22,self_attn.k_proj,0.00651944,0.01000,0.982
22,self_attn.v_proj,0.00744933,0.01000,0.990
22,self_attn.q_proj,0.00703869,0.01000,0.977
22,self_attn.out_proj,0.00068570,0.01000,0.989
22,fc1,0.02461607,0.01000,1.004
22,fc2,0.00199070,0.01000,4.303
23,self_attn.k_proj,0.00893416,0.01000,0.989
23,self_attn.v_proj,0.00480653,0.01000,0.994
23,self_attn.q_proj,0.01262821,0.01000,0.995
23,self_attn.out_proj,0.00080622,0.01000,1.000
23,fc1,0.02185187,0.01000,0.985
23,fc2,0.00234254,0.01000,4.236
|