bert-base-uncased-squadv1-52.0-sparse / linear_layer_sparse_stats_total_41M_52.0_relative_sparsity.csv
Chua, Vui Seng
Initial model commit
b0990e7
,linear_id,shape,param_count,nnz_count
0,bert.encoder.layer.0.attention.self.query,"[768, 768]",589824,93964
1,bert.encoder.layer.0.attention.self.key,"[768, 768]",589824,73700
2,bert.encoder.layer.0.attention.self.value,"[768, 768]",589824,137467
3,bert.encoder.layer.0.attention.output.dense,"[768, 768]",589824,160390
4,bert.encoder.layer.0.intermediate.dense,"[3072, 768]",2359296,1709314
5,bert.encoder.layer.0.output.dense,"[768, 3072]",2359296,1641896
6,bert.encoder.layer.1.attention.self.query,"[768, 768]",589824,159970
7,bert.encoder.layer.1.attention.self.key,"[768, 768]",589824,141738
8,bert.encoder.layer.1.attention.self.value,"[768, 768]",589824,162459
9,bert.encoder.layer.1.attention.output.dense,"[768, 768]",589824,172052
10,bert.encoder.layer.1.intermediate.dense,"[3072, 768]",2359296,1741238
11,bert.encoder.layer.1.output.dense,"[768, 3072]",2359296,1626278
12,bert.encoder.layer.2.attention.self.query,"[768, 768]",589824,186643
13,bert.encoder.layer.2.attention.self.key,"[768, 768]",589824,178576
14,bert.encoder.layer.2.attention.self.value,"[768, 768]",589824,192860
15,bert.encoder.layer.2.attention.output.dense,"[768, 768]",589824,192555
16,bert.encoder.layer.2.intermediate.dense,"[3072, 768]",2359296,1768328
17,bert.encoder.layer.2.output.dense,"[768, 3072]",2359296,1655075
18,bert.encoder.layer.3.attention.self.query,"[768, 768]",589824,198697
19,bert.encoder.layer.3.attention.self.key,"[768, 768]",589824,196134
20,bert.encoder.layer.3.attention.self.value,"[768, 768]",589824,237622
21,bert.encoder.layer.3.attention.output.dense,"[768, 768]",589824,230620
22,bert.encoder.layer.3.intermediate.dense,"[3072, 768]",2359296,1766508
23,bert.encoder.layer.3.output.dense,"[768, 3072]",2359296,1642235
24,bert.encoder.layer.4.attention.self.query,"[768, 768]",589824,197935
25,bert.encoder.layer.4.attention.self.key,"[768, 768]",589824,194664
26,bert.encoder.layer.4.attention.self.value,"[768, 768]",589824,249031
27,bert.encoder.layer.4.attention.output.dense,"[768, 768]",589824,246498
28,bert.encoder.layer.4.intermediate.dense,"[3072, 768]",2359296,1760924
29,bert.encoder.layer.4.output.dense,"[768, 3072]",2359296,1621864
30,bert.encoder.layer.5.attention.self.query,"[768, 768]",589824,183234
31,bert.encoder.layer.5.attention.self.key,"[768, 768]",589824,190974
32,bert.encoder.layer.5.attention.self.value,"[768, 768]",589824,252216
33,bert.encoder.layer.5.attention.output.dense,"[768, 768]",589824,244656
34,bert.encoder.layer.5.intermediate.dense,"[3072, 768]",2359296,1779234
35,bert.encoder.layer.5.output.dense,"[768, 3072]",2359296,1623945
36,bert.encoder.layer.6.attention.self.query,"[768, 768]",589824,185037
37,bert.encoder.layer.6.attention.self.key,"[768, 768]",589824,195911
38,bert.encoder.layer.6.attention.self.value,"[768, 768]",589824,253876
39,bert.encoder.layer.6.attention.output.dense,"[768, 768]",589824,239963
40,bert.encoder.layer.6.intermediate.dense,"[3072, 768]",2359296,1729192
41,bert.encoder.layer.6.output.dense,"[768, 3072]",2359296,1551791
42,bert.encoder.layer.7.attention.self.query,"[768, 768]",589824,141144
43,bert.encoder.layer.7.attention.self.key,"[768, 768]",589824,154678
44,bert.encoder.layer.7.attention.self.value,"[768, 768]",589824,216034
45,bert.encoder.layer.7.attention.output.dense,"[768, 768]",589824,203449
46,bert.encoder.layer.7.intermediate.dense,"[3072, 768]",2359296,1588111
47,bert.encoder.layer.7.output.dense,"[768, 3072]",2359296,1430335
48,bert.encoder.layer.8.attention.self.query,"[768, 768]",589824,171025
49,bert.encoder.layer.8.attention.self.key,"[768, 768]",589824,176631
50,bert.encoder.layer.8.attention.self.value,"[768, 768]",589824,249792
51,bert.encoder.layer.8.attention.output.dense,"[768, 768]",589824,221322
52,bert.encoder.layer.8.intermediate.dense,"[3072, 768]",2359296,1369543
53,bert.encoder.layer.8.output.dense,"[768, 3072]",2359296,1208818
54,bert.encoder.layer.9.attention.self.query,"[768, 768]",589824,143797
55,bert.encoder.layer.9.attention.self.key,"[768, 768]",589824,143411
56,bert.encoder.layer.9.attention.self.value,"[768, 768]",589824,122373
57,bert.encoder.layer.9.attention.output.dense,"[768, 768]",589824,107006
58,bert.encoder.layer.9.intermediate.dense,"[3072, 768]",2359296,918887
59,bert.encoder.layer.9.output.dense,"[768, 3072]",2359296,802323
60,bert.encoder.layer.10.attention.self.query,"[768, 768]",589824,98505
61,bert.encoder.layer.10.attention.self.key,"[768, 768]",589824,99921
62,bert.encoder.layer.10.attention.self.value,"[768, 768]",589824,66491
63,bert.encoder.layer.10.attention.output.dense,"[768, 768]",589824,51630
64,bert.encoder.layer.10.intermediate.dense,"[3072, 768]",2359296,652757
65,bert.encoder.layer.10.output.dense,"[768, 3072]",2359296,533202
66,bert.encoder.layer.11.attention.self.query,"[768, 768]",589824,50457
67,bert.encoder.layer.11.attention.self.key,"[768, 768]",589824,55196
68,bert.encoder.layer.11.attention.self.value,"[768, 768]",589824,40581
69,bert.encoder.layer.11.attention.output.dense,"[768, 768]",589824,23933
70,bert.encoder.layer.11.intermediate.dense,"[3072, 768]",2359296,535793
71,bert.encoder.layer.11.output.dense,"[768, 3072]",2359296,255106