senasi-v20_gpu / quant_log.csv
thhiep's picture
SenASI v20
d46ba0c verified
layer,module,loss,samples,damp,time
0,mlp.shared_expert_gate,0.0000000001,0.05000,0.499
0,linear_attn.conv1d,fallback(rtn): 0.0025482,0.00000,0.027
0,linear_attn.in_proj_b,0.0000000155,0.05000,0.618
0,linear_attn.in_proj_a,0.0000000626,0.05000,0.624
1,mlp.shared_expert_gate,0.0000000002,0.05000,0.226
1,linear_attn.conv1d,fallback(rtn): 0.0029144,0.00000,0.003
1,linear_attn.in_proj_a,0.0000000509,0.05000,0.404
1,linear_attn.in_proj_b,0.0000000246,0.05000,0.410
2,mlp.shared_expert_gate,0.0000000002,0.05000,0.226
2,linear_attn.conv1d,fallback(rtn): 0.0023651,0.00000,0.002
2,linear_attn.in_proj_a,0.0000000371,0.05000,0.392
2,linear_attn.in_proj_b,0.0000000259,0.05000,0.393
3,mlp.shared_expert_gate,0.0000000002,0.05000,0.228
4,mlp.shared_expert_gate,0.0000000003,0.05000,0.228
4,linear_attn.conv1d,fallback(rtn): 0.0034332,0.00000,0.002
4,linear_attn.in_proj_b,0.0000000275,0.05000,0.589
4,linear_attn.in_proj_a,0.0000000457,0.05000,0.592
5,mlp.shared_expert_gate,0.0000000004,0.05000,0.230
5,linear_attn.conv1d,fallback(rtn): 0.0032959,0.00000,0.002
5,linear_attn.in_proj_a,0.0000000540,0.05000,0.592
5,linear_attn.in_proj_b,0.0000000330,0.05000,0.597
6,mlp.shared_expert_gate,0.0000000004,0.05000,0.228
6,linear_attn.conv1d,fallback(rtn): 0.0032501,0.00000,0.006
6,linear_attn.in_proj_b,0.0000000372,0.05000,0.587
6,linear_attn.in_proj_a,0.0000000537,0.05000,0.592
7,mlp.shared_expert_gate,0.0000000004,0.05000,0.226
8,mlp.shared_expert_gate,0.0000000004,0.05000,0.226
8,linear_attn.conv1d,fallback(rtn): 0.0051270,0.00000,0.003
8,linear_attn.in_proj_b,0.0000000398,0.05000,0.592
8,linear_attn.in_proj_a,0.0000000837,0.05000,0.596
9,mlp.shared_expert_gate,0.0000000003,0.05000,0.226
9,linear_attn.conv1d,fallback(rtn): 0.0042419,0.00000,0.003
9,linear_attn.in_proj_a,0.0000000661,0.05000,0.592
9,linear_attn.in_proj_b,0.0000000469,0.05000,0.598
10,mlp.shared_expert_gate,0.0000000002,0.05000,0.227
10,linear_attn.conv1d,fallback(rtn): 0.0034790,0.00000,0.007
10,linear_attn.in_proj_a,0.0000000523,0.05000,0.392
10,linear_attn.in_proj_b,0.0000000358,0.05000,0.395
11,mlp.shared_expert_gate,0.0000000003,0.05000,0.226
12,mlp.shared_expert_gate,0.0000000004,0.05000,0.227
12,linear_attn.conv1d,fallback(rtn): 0.0037537,0.00000,0.004
12,linear_attn.in_proj_a,0.0000000525,0.05000,0.467
12,linear_attn.in_proj_b,0.0000000359,0.05000,0.473
13,mlp.shared_expert_gate,0.0000000004,0.05000,0.227
13,linear_attn.conv1d,fallback(rtn): 0.0045166,0.00000,0.003
13,linear_attn.in_proj_a,0.0000000676,0.05000,0.429
13,linear_attn.in_proj_b,0.0000000426,0.05000,0.433
14,mlp.shared_expert_gate,0.0000000004,0.05000,0.227
14,linear_attn.conv1d,fallback(rtn): 0.0046387,0.00000,0.003
14,linear_attn.in_proj_a,0.0000000660,0.05000,0.428
14,linear_attn.in_proj_b,0.0000000472,0.05000,0.438
15,mlp.shared_expert_gate,0.0000000005,0.05000,0.226
16,mlp.shared_expert_gate,0.0000000005,0.05000,0.227
16,linear_attn.conv1d,fallback(rtn): 0.0041809,0.00000,0.011
16,linear_attn.in_proj_a,0.0000000582,0.05000,0.631
16,linear_attn.in_proj_b,0.0000000415,0.05000,0.630
17,mlp.shared_expert_gate,0.0000000005,0.05000,0.228
17,linear_attn.conv1d,fallback(rtn): 0.0041809,0.00000,0.002
17,linear_attn.in_proj_b,0.0000000446,0.05000,0.462
17,linear_attn.in_proj_a,0.0000000589,0.05000,0.468
18,mlp.shared_expert_gate,0.0000000004,0.05000,0.229
18,linear_attn.conv1d,fallback(rtn): 0.0042725,0.00000,0.005
18,linear_attn.in_proj_b,0.0000000463,0.05000,0.451
18,linear_attn.in_proj_a,0.0000000575,0.05000,0.452
19,mlp.shared_expert_gate,0.0000000005,0.05000,0.229
20,mlp.shared_expert_gate,0.0000000006,0.05000,0.227
20,linear_attn.conv1d,fallback(rtn): 0.0056458,0.00000,0.004
20,linear_attn.in_proj_a,0.0000000815,0.05000,0.609
20,linear_attn.in_proj_b,0.0000000489,0.05000,0.614
21,mlp.shared_expert_gate,0.0000000005,0.05000,0.227
21,linear_attn.conv1d,fallback(rtn): 0.0043335,0.00000,0.002
21,linear_attn.in_proj_b,0.0000000441,0.05000,0.606
21,linear_attn.in_proj_a,0.0000000643,0.05000,0.613
22,mlp.shared_expert_gate,0.0000000003,0.05000,0.228
22,linear_attn.conv1d,fallback(rtn): 0.0036469,0.00000,0.011
22,linear_attn.in_proj_a,0.0000000598,0.05000,0.407
22,linear_attn.in_proj_b,0.0000000453,0.05000,0.406
23,mlp.shared_expert_gate,0.0000000004,0.05000,0.230
24,mlp.shared_expert_gate,0.0000000006,0.05000,0.227
24,linear_attn.conv1d,fallback(rtn): 0.0038605,0.00000,0.006
24,linear_attn.in_proj_b,0.0000000423,0.05000,0.613
24,linear_attn.in_proj_a,0.0000000583,0.05000,0.620
25,mlp.shared_expert_gate,0.0000000005,0.05000,0.228
25,linear_attn.conv1d,fallback(rtn): 0.0043945,0.00000,0.006
25,linear_attn.in_proj_a,0.0000000718,0.05000,0.611
25,linear_attn.in_proj_b,0.0000000451,0.05000,0.617
26,mlp.shared_expert_gate,0.0000000004,0.05000,0.230
26,linear_attn.conv1d,fallback(rtn): 0.0047913,0.00000,0.011
26,linear_attn.in_proj_a,0.0000000673,0.05000,0.440
26,linear_attn.in_proj_b,0.0000000510,0.05000,0.446
27,mlp.shared_expert_gate,0.0000000005,0.05000,0.227
28,mlp.shared_expert_gate,0.0000000005,0.05000,0.227
28,linear_attn.conv1d,fallback(rtn): 0.0040283,0.00000,0.005
28,linear_attn.in_proj_a,0.0000000644,0.05000,0.400
28,linear_attn.in_proj_b,0.0000000472,0.05000,0.403
29,mlp.shared_expert_gate,0.0000000005,0.05000,0.226
29,linear_attn.conv1d,fallback(rtn): 0.0042725,0.00000,0.010
29,linear_attn.in_proj_b,0.0000000466,0.05000,0.440
29,linear_attn.in_proj_a,0.0000000594,0.05000,0.447
30,mlp.shared_expert_gate,0.0000000005,0.05000,0.226
30,linear_attn.conv1d,fallback(rtn): 0.0045471,0.00000,0.002
30,linear_attn.in_proj_a,0.0000000686,0.05000,0.610
30,linear_attn.in_proj_b,0.0000000572,0.05000,0.618
31,mlp.shared_expert_gate,0.0000000005,0.05000,0.225
32,mlp.shared_expert_gate,0.0000000004,0.05000,0.228
32,linear_attn.conv1d,fallback(rtn): 0.0040588,0.00000,0.011
32,linear_attn.in_proj_b,0.0000000539,0.05000,0.606
32,linear_attn.in_proj_a,0.0000000782,0.05000,0.616
33,mlp.shared_expert_gate,0.0000000004,0.05000,0.227
33,linear_attn.conv1d,fallback(rtn): 0.0041504,0.00000,0.006
33,linear_attn.in_proj_a,0.0000000806,0.05000,0.615
33,linear_attn.in_proj_b,0.0000000507,0.05000,0.621
34,mlp.shared_expert_gate,0.0000000004,0.05000,0.228
34,linear_attn.conv1d,fallback(rtn): 0.0044861,0.00000,0.005
34,linear_attn.in_proj_b,0.0000000513,0.05000,0.620
34,linear_attn.in_proj_a,0.0000000973,0.05000,0.628
35,mlp.shared_expert_gate,0.0000000004,0.05000,0.225
36,mlp.shared_expert_gate,0.0000000003,0.05000,0.228
36,linear_attn.conv1d,fallback(rtn): 0.0070496,0.00000,0.003
36,linear_attn.in_proj_b,0.0000000560,0.05000,0.618
36,linear_attn.in_proj_a,0.0000001325,0.05000,0.622
37,mlp.shared_expert_gate,0.0000000003,0.05000,0.227
37,linear_attn.conv1d,fallback(rtn): 0.0072632,0.00000,0.004
37,linear_attn.in_proj_a,0.0000001435,0.05000,0.431
37,linear_attn.in_proj_b,0.0000000616,0.05000,0.438
38,mlp.shared_expert_gate,0.0000000002,0.05000,0.226
38,linear_attn.conv1d,fallback(rtn): 0.0062561,0.00000,0.003
38,linear_attn.in_proj_b,0.0000000656,0.05000,0.398
38,linear_attn.in_proj_a,0.0000001249,0.05000,0.403
39,mlp.shared_expert_gate,0.0000000002,0.05000,0.231