File size: 7,203 Bytes
d46ba0c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | layer,module,loss,samples,damp,time
0,mlp.shared_expert_gate,0.0000000001,0.05000,0.499
0,linear_attn.conv1d,fallback(rtn): 0.0025482,0.00000,0.027
0,linear_attn.in_proj_b,0.0000000155,0.05000,0.618
0,linear_attn.in_proj_a,0.0000000626,0.05000,0.624
1,mlp.shared_expert_gate,0.0000000002,0.05000,0.226
1,linear_attn.conv1d,fallback(rtn): 0.0029144,0.00000,0.003
1,linear_attn.in_proj_a,0.0000000509,0.05000,0.404
1,linear_attn.in_proj_b,0.0000000246,0.05000,0.410
2,mlp.shared_expert_gate,0.0000000002,0.05000,0.226
2,linear_attn.conv1d,fallback(rtn): 0.0023651,0.00000,0.002
2,linear_attn.in_proj_a,0.0000000371,0.05000,0.392
2,linear_attn.in_proj_b,0.0000000259,0.05000,0.393
3,mlp.shared_expert_gate,0.0000000002,0.05000,0.228
4,mlp.shared_expert_gate,0.0000000003,0.05000,0.228
4,linear_attn.conv1d,fallback(rtn): 0.0034332,0.00000,0.002
4,linear_attn.in_proj_b,0.0000000275,0.05000,0.589
4,linear_attn.in_proj_a,0.0000000457,0.05000,0.592
5,mlp.shared_expert_gate,0.0000000004,0.05000,0.230
5,linear_attn.conv1d,fallback(rtn): 0.0032959,0.00000,0.002
5,linear_attn.in_proj_a,0.0000000540,0.05000,0.592
5,linear_attn.in_proj_b,0.0000000330,0.05000,0.597
6,mlp.shared_expert_gate,0.0000000004,0.05000,0.228
6,linear_attn.conv1d,fallback(rtn): 0.0032501,0.00000,0.006
6,linear_attn.in_proj_b,0.0000000372,0.05000,0.587
6,linear_attn.in_proj_a,0.0000000537,0.05000,0.592
7,mlp.shared_expert_gate,0.0000000004,0.05000,0.226
8,mlp.shared_expert_gate,0.0000000004,0.05000,0.226
8,linear_attn.conv1d,fallback(rtn): 0.0051270,0.00000,0.003
8,linear_attn.in_proj_b,0.0000000398,0.05000,0.592
8,linear_attn.in_proj_a,0.0000000837,0.05000,0.596
9,mlp.shared_expert_gate,0.0000000003,0.05000,0.226
9,linear_attn.conv1d,fallback(rtn): 0.0042419,0.00000,0.003
9,linear_attn.in_proj_a,0.0000000661,0.05000,0.592
9,linear_attn.in_proj_b,0.0000000469,0.05000,0.598
10,mlp.shared_expert_gate,0.0000000002,0.05000,0.227
10,linear_attn.conv1d,fallback(rtn): 0.0034790,0.00000,0.007
10,linear_attn.in_proj_a,0.0000000523,0.05000,0.392
10,linear_attn.in_proj_b,0.0000000358,0.05000,0.395
11,mlp.shared_expert_gate,0.0000000003,0.05000,0.226
12,mlp.shared_expert_gate,0.0000000004,0.05000,0.227
12,linear_attn.conv1d,fallback(rtn): 0.0037537,0.00000,0.004
12,linear_attn.in_proj_a,0.0000000525,0.05000,0.467
12,linear_attn.in_proj_b,0.0000000359,0.05000,0.473
13,mlp.shared_expert_gate,0.0000000004,0.05000,0.227
13,linear_attn.conv1d,fallback(rtn): 0.0045166,0.00000,0.003
13,linear_attn.in_proj_a,0.0000000676,0.05000,0.429
13,linear_attn.in_proj_b,0.0000000426,0.05000,0.433
14,mlp.shared_expert_gate,0.0000000004,0.05000,0.227
14,linear_attn.conv1d,fallback(rtn): 0.0046387,0.00000,0.003
14,linear_attn.in_proj_a,0.0000000660,0.05000,0.428
14,linear_attn.in_proj_b,0.0000000472,0.05000,0.438
15,mlp.shared_expert_gate,0.0000000005,0.05000,0.226
16,mlp.shared_expert_gate,0.0000000005,0.05000,0.227
16,linear_attn.conv1d,fallback(rtn): 0.0041809,0.00000,0.011
16,linear_attn.in_proj_a,0.0000000582,0.05000,0.631
16,linear_attn.in_proj_b,0.0000000415,0.05000,0.630
17,mlp.shared_expert_gate,0.0000000005,0.05000,0.228
17,linear_attn.conv1d,fallback(rtn): 0.0041809,0.00000,0.002
17,linear_attn.in_proj_b,0.0000000446,0.05000,0.462
17,linear_attn.in_proj_a,0.0000000589,0.05000,0.468
18,mlp.shared_expert_gate,0.0000000004,0.05000,0.229
18,linear_attn.conv1d,fallback(rtn): 0.0042725,0.00000,0.005
18,linear_attn.in_proj_b,0.0000000463,0.05000,0.451
18,linear_attn.in_proj_a,0.0000000575,0.05000,0.452
19,mlp.shared_expert_gate,0.0000000005,0.05000,0.229
20,mlp.shared_expert_gate,0.0000000006,0.05000,0.227
20,linear_attn.conv1d,fallback(rtn): 0.0056458,0.00000,0.004
20,linear_attn.in_proj_a,0.0000000815,0.05000,0.609
20,linear_attn.in_proj_b,0.0000000489,0.05000,0.614
21,mlp.shared_expert_gate,0.0000000005,0.05000,0.227
21,linear_attn.conv1d,fallback(rtn): 0.0043335,0.00000,0.002
21,linear_attn.in_proj_b,0.0000000441,0.05000,0.606
21,linear_attn.in_proj_a,0.0000000643,0.05000,0.613
22,mlp.shared_expert_gate,0.0000000003,0.05000,0.228
22,linear_attn.conv1d,fallback(rtn): 0.0036469,0.00000,0.011
22,linear_attn.in_proj_a,0.0000000598,0.05000,0.407
22,linear_attn.in_proj_b,0.0000000453,0.05000,0.406
23,mlp.shared_expert_gate,0.0000000004,0.05000,0.230
24,mlp.shared_expert_gate,0.0000000006,0.05000,0.227
24,linear_attn.conv1d,fallback(rtn): 0.0038605,0.00000,0.006
24,linear_attn.in_proj_b,0.0000000423,0.05000,0.613
24,linear_attn.in_proj_a,0.0000000583,0.05000,0.620
25,mlp.shared_expert_gate,0.0000000005,0.05000,0.228
25,linear_attn.conv1d,fallback(rtn): 0.0043945,0.00000,0.006
25,linear_attn.in_proj_a,0.0000000718,0.05000,0.611
25,linear_attn.in_proj_b,0.0000000451,0.05000,0.617
26,mlp.shared_expert_gate,0.0000000004,0.05000,0.230
26,linear_attn.conv1d,fallback(rtn): 0.0047913,0.00000,0.011
26,linear_attn.in_proj_a,0.0000000673,0.05000,0.440
26,linear_attn.in_proj_b,0.0000000510,0.05000,0.446
27,mlp.shared_expert_gate,0.0000000005,0.05000,0.227
28,mlp.shared_expert_gate,0.0000000005,0.05000,0.227
28,linear_attn.conv1d,fallback(rtn): 0.0040283,0.00000,0.005
28,linear_attn.in_proj_a,0.0000000644,0.05000,0.400
28,linear_attn.in_proj_b,0.0000000472,0.05000,0.403
29,mlp.shared_expert_gate,0.0000000005,0.05000,0.226
29,linear_attn.conv1d,fallback(rtn): 0.0042725,0.00000,0.010
29,linear_attn.in_proj_b,0.0000000466,0.05000,0.440
29,linear_attn.in_proj_a,0.0000000594,0.05000,0.447
30,mlp.shared_expert_gate,0.0000000005,0.05000,0.226
30,linear_attn.conv1d,fallback(rtn): 0.0045471,0.00000,0.002
30,linear_attn.in_proj_a,0.0000000686,0.05000,0.610
30,linear_attn.in_proj_b,0.0000000572,0.05000,0.618
31,mlp.shared_expert_gate,0.0000000005,0.05000,0.225
32,mlp.shared_expert_gate,0.0000000004,0.05000,0.228
32,linear_attn.conv1d,fallback(rtn): 0.0040588,0.00000,0.011
32,linear_attn.in_proj_b,0.0000000539,0.05000,0.606
32,linear_attn.in_proj_a,0.0000000782,0.05000,0.616
33,mlp.shared_expert_gate,0.0000000004,0.05000,0.227
33,linear_attn.conv1d,fallback(rtn): 0.0041504,0.00000,0.006
33,linear_attn.in_proj_a,0.0000000806,0.05000,0.615
33,linear_attn.in_proj_b,0.0000000507,0.05000,0.621
34,mlp.shared_expert_gate,0.0000000004,0.05000,0.228
34,linear_attn.conv1d,fallback(rtn): 0.0044861,0.00000,0.005
34,linear_attn.in_proj_b,0.0000000513,0.05000,0.620
34,linear_attn.in_proj_a,0.0000000973,0.05000,0.628
35,mlp.shared_expert_gate,0.0000000004,0.05000,0.225
36,mlp.shared_expert_gate,0.0000000003,0.05000,0.228
36,linear_attn.conv1d,fallback(rtn): 0.0070496,0.00000,0.003
36,linear_attn.in_proj_b,0.0000000560,0.05000,0.618
36,linear_attn.in_proj_a,0.0000001325,0.05000,0.622
37,mlp.shared_expert_gate,0.0000000003,0.05000,0.227
37,linear_attn.conv1d,fallback(rtn): 0.0072632,0.00000,0.004
37,linear_attn.in_proj_a,0.0000001435,0.05000,0.431
37,linear_attn.in_proj_b,0.0000000616,0.05000,0.438
38,mlp.shared_expert_gate,0.0000000002,0.05000,0.226
38,linear_attn.conv1d,fallback(rtn): 0.0062561,0.00000,0.003
38,linear_attn.in_proj_b,0.0000000656,0.05000,0.398
38,linear_attn.in_proj_a,0.0000001249,0.05000,0.403
39,mlp.shared_expert_gate,0.0000000002,0.05000,0.231
|