bert-base-squadv1 / linear_layer_sparsity_85M_params_0.00_sparsity.md
Chua, Vui Seng
Add collaterals
d96569c
layer_id layer_type param_type shape nparam nnz sparsity
5 bert.encoder.layer.0.attention.self.query Linear weight [768, 768] 589824 589824 0
7 bert.encoder.layer.0.attention.self.key Linear weight [768, 768] 589824 589824 0
9 bert.encoder.layer.0.attention.self.value Linear weight [768, 768] 589824 589824 0
11 bert.encoder.layer.0.attention.output.dense Linear weight [768, 768] 589824 589824 0
15 bert.encoder.layer.0.intermediate.dense Linear weight [3072, 768] 2359296 2359296 0
17 bert.encoder.layer.0.output.dense Linear weight [768, 3072] 2359296 2359296 0
21 bert.encoder.layer.1.attention.self.query Linear weight [768, 768] 589824 589824 0
23 bert.encoder.layer.1.attention.self.key Linear weight [768, 768] 589824 589824 0
25 bert.encoder.layer.1.attention.self.value Linear weight [768, 768] 589824 589824 0
27 bert.encoder.layer.1.attention.output.dense Linear weight [768, 768] 589824 589824 0
31 bert.encoder.layer.1.intermediate.dense Linear weight [3072, 768] 2359296 2359296 0
33 bert.encoder.layer.1.output.dense Linear weight [768, 3072] 2359296 2359296 0
37 bert.encoder.layer.2.attention.self.query Linear weight [768, 768] 589824 589824 0
39 bert.encoder.layer.2.attention.self.key Linear weight [768, 768] 589824 589824 0
41 bert.encoder.layer.2.attention.self.value Linear weight [768, 768] 589824 589824 0
43 bert.encoder.layer.2.attention.output.dense Linear weight [768, 768] 589824 589824 0
47 bert.encoder.layer.2.intermediate.dense Linear weight [3072, 768] 2359296 2359296 0
49 bert.encoder.layer.2.output.dense Linear weight [768, 3072] 2359296 2359296 0
53 bert.encoder.layer.3.attention.self.query Linear weight [768, 768] 589824 589824 0
55 bert.encoder.layer.3.attention.self.key Linear weight [768, 768] 589824 589824 0
57 bert.encoder.layer.3.attention.self.value Linear weight [768, 768] 589824 589824 0
59 bert.encoder.layer.3.attention.output.dense Linear weight [768, 768] 589824 589824 0
63 bert.encoder.layer.3.intermediate.dense Linear weight [3072, 768] 2359296 2359296 0
65 bert.encoder.layer.3.output.dense Linear weight [768, 3072] 2359296 2359296 0
69 bert.encoder.layer.4.attention.self.query Linear weight [768, 768] 589824 589824 0
71 bert.encoder.layer.4.attention.self.key Linear weight [768, 768] 589824 589824 0
73 bert.encoder.layer.4.attention.self.value Linear weight [768, 768] 589824 589824 0
75 bert.encoder.layer.4.attention.output.dense Linear weight [768, 768] 589824 589824 0
79 bert.encoder.layer.4.intermediate.dense Linear weight [3072, 768] 2359296 2359296 0
81 bert.encoder.layer.4.output.dense Linear weight [768, 3072] 2359296 2359296 0
85 bert.encoder.layer.5.attention.self.query Linear weight [768, 768] 589824 589824 0
87 bert.encoder.layer.5.attention.self.key Linear weight [768, 768] 589824 589824 0
89 bert.encoder.layer.5.attention.self.value Linear weight [768, 768] 589824 589824 0
91 bert.encoder.layer.5.attention.output.dense Linear weight [768, 768] 589824 589824 0
95 bert.encoder.layer.5.intermediate.dense Linear weight [3072, 768] 2359296 2359296 0
97 bert.encoder.layer.5.output.dense Linear weight [768, 3072] 2359296 2359296 0
101 bert.encoder.layer.6.attention.self.query Linear weight [768, 768] 589824 589824 0
103 bert.encoder.layer.6.attention.self.key Linear weight [768, 768] 589824 589824 0
105 bert.encoder.layer.6.attention.self.value Linear weight [768, 768] 589824 589824 0
107 bert.encoder.layer.6.attention.output.dense Linear weight [768, 768] 589824 589824 0
111 bert.encoder.layer.6.intermediate.dense Linear weight [3072, 768] 2359296 2359296 0
113 bert.encoder.layer.6.output.dense Linear weight [768, 3072] 2359296 2359296 0
117 bert.encoder.layer.7.attention.self.query Linear weight [768, 768] 589824 589824 0
119 bert.encoder.layer.7.attention.self.key Linear weight [768, 768] 589824 589824 0
121 bert.encoder.layer.7.attention.self.value Linear weight [768, 768] 589824 589824 0
123 bert.encoder.layer.7.attention.output.dense Linear weight [768, 768] 589824 589824 0
127 bert.encoder.layer.7.intermediate.dense Linear weight [3072, 768] 2359296 2359296 0
129 bert.encoder.layer.7.output.dense Linear weight [768, 3072] 2359296 2359296 0
133 bert.encoder.layer.8.attention.self.query Linear weight [768, 768] 589824 589824 0
135 bert.encoder.layer.8.attention.self.key Linear weight [768, 768] 589824 589824 0
137 bert.encoder.layer.8.attention.self.value Linear weight [768, 768] 589824 589824 0
139 bert.encoder.layer.8.attention.output.dense Linear weight [768, 768] 589824 589824 0
143 bert.encoder.layer.8.intermediate.dense Linear weight [3072, 768] 2359296 2359296 0
145 bert.encoder.layer.8.output.dense Linear weight [768, 3072] 2359296 2359296 0
149 bert.encoder.layer.9.attention.self.query Linear weight [768, 768] 589824 589824 0
151 bert.encoder.layer.9.attention.self.key Linear weight [768, 768] 589824 589824 0
153 bert.encoder.layer.9.attention.self.value Linear weight [768, 768] 589824 589824 0
155 bert.encoder.layer.9.attention.output.dense Linear weight [768, 768] 589824 589824 0
159 bert.encoder.layer.9.intermediate.dense Linear weight [3072, 768] 2359296 2359296 0
161 bert.encoder.layer.9.output.dense Linear weight [768, 3072] 2359296 2359296 0
165 bert.encoder.layer.10.attention.self.query Linear weight [768, 768] 589824 589824 0
167 bert.encoder.layer.10.attention.self.key Linear weight [768, 768] 589824 589824 0
169 bert.encoder.layer.10.attention.self.value Linear weight [768, 768] 589824 589824 0
171 bert.encoder.layer.10.attention.output.dense Linear weight [768, 768] 589824 589824 0
175 bert.encoder.layer.10.intermediate.dense Linear weight [3072, 768] 2359296 2359296 0
177 bert.encoder.layer.10.output.dense Linear weight [768, 3072] 2359296 2359296 0
181 bert.encoder.layer.11.attention.self.query Linear weight [768, 768] 589824 589824 0
183 bert.encoder.layer.11.attention.self.key Linear weight [768, 768] 589824 589824 0
185 bert.encoder.layer.11.attention.self.value Linear weight [768, 768] 589824 589824 0
187 bert.encoder.layer.11.attention.output.dense Linear weight [768, 768] 589824 589824 0
191 bert.encoder.layer.11.intermediate.dense Linear weight [3072, 768] 2359296 2359296 0
193 bert.encoder.layer.11.output.dense Linear weight [768, 3072] 2359296 2359296 0