| ,layer_id,layer_type,param_type,shape,nparam,nnz,sparsity |
| 0,bert.embeddings.word_embeddings,Embedding,weight,"[30522, 768]",23440896,23440896,0.0 |
| 1,bert.embeddings.position_embeddings,Embedding,weight,"[512, 768]",393216,393216,0.0 |
| 2,bert.embeddings.token_type_embeddings,Embedding,weight,"[2, 768]",1536,1536,0.0 |
| 3,bert.embeddings.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 4,bert.embeddings.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 5,bert.encoder.layer.0.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 6,bert.encoder.layer.0.attention.self.query,Linear,bias,[768],768,768,0.0 |
| 7,bert.encoder.layer.0.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 8,bert.encoder.layer.0.attention.self.key,Linear,bias,[768],768,768,0.0 |
| 9,bert.encoder.layer.0.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 10,bert.encoder.layer.0.attention.self.value,Linear,bias,[768],768,768,0.0 |
| 11,bert.encoder.layer.0.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 12,bert.encoder.layer.0.attention.output.dense,Linear,bias,[768],768,768,0.0 |
| 13,bert.encoder.layer.0.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 14,bert.encoder.layer.0.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 15,bert.encoder.layer.0.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 |
| 16,bert.encoder.layer.0.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
| 17,bert.encoder.layer.0.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 |
| 18,bert.encoder.layer.0.output.dense,Linear,bias,[768],768,768,0.0 |
| 19,bert.encoder.layer.0.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 20,bert.encoder.layer.0.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 21,bert.encoder.layer.1.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 22,bert.encoder.layer.1.attention.self.query,Linear,bias,[768],768,768,0.0 |
| 23,bert.encoder.layer.1.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 24,bert.encoder.layer.1.attention.self.key,Linear,bias,[768],768,768,0.0 |
| 25,bert.encoder.layer.1.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 26,bert.encoder.layer.1.attention.self.value,Linear,bias,[768],768,768,0.0 |
| 27,bert.encoder.layer.1.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 28,bert.encoder.layer.1.attention.output.dense,Linear,bias,[768],768,768,0.0 |
| 29,bert.encoder.layer.1.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 30,bert.encoder.layer.1.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 31,bert.encoder.layer.1.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 |
| 32,bert.encoder.layer.1.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
| 33,bert.encoder.layer.1.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 |
| 34,bert.encoder.layer.1.output.dense,Linear,bias,[768],768,768,0.0 |
| 35,bert.encoder.layer.1.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 36,bert.encoder.layer.1.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 37,bert.encoder.layer.2.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 38,bert.encoder.layer.2.attention.self.query,Linear,bias,[768],768,768,0.0 |
| 39,bert.encoder.layer.2.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 40,bert.encoder.layer.2.attention.self.key,Linear,bias,[768],768,768,0.0 |
| 41,bert.encoder.layer.2.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 42,bert.encoder.layer.2.attention.self.value,Linear,bias,[768],768,768,0.0 |
| 43,bert.encoder.layer.2.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 44,bert.encoder.layer.2.attention.output.dense,Linear,bias,[768],768,768,0.0 |
| 45,bert.encoder.layer.2.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 46,bert.encoder.layer.2.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 47,bert.encoder.layer.2.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 |
| 48,bert.encoder.layer.2.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
| 49,bert.encoder.layer.2.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 |
| 50,bert.encoder.layer.2.output.dense,Linear,bias,[768],768,768,0.0 |
| 51,bert.encoder.layer.2.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 52,bert.encoder.layer.2.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 53,bert.encoder.layer.3.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 54,bert.encoder.layer.3.attention.self.query,Linear,bias,[768],768,768,0.0 |
| 55,bert.encoder.layer.3.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 56,bert.encoder.layer.3.attention.self.key,Linear,bias,[768],768,768,0.0 |
| 57,bert.encoder.layer.3.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 58,bert.encoder.layer.3.attention.self.value,Linear,bias,[768],768,768,0.0 |
| 59,bert.encoder.layer.3.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 60,bert.encoder.layer.3.attention.output.dense,Linear,bias,[768],768,768,0.0 |
| 61,bert.encoder.layer.3.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 62,bert.encoder.layer.3.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 63,bert.encoder.layer.3.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 |
| 64,bert.encoder.layer.3.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
| 65,bert.encoder.layer.3.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 |
| 66,bert.encoder.layer.3.output.dense,Linear,bias,[768],768,768,0.0 |
| 67,bert.encoder.layer.3.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 68,bert.encoder.layer.3.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 69,bert.encoder.layer.4.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 70,bert.encoder.layer.4.attention.self.query,Linear,bias,[768],768,768,0.0 |
| 71,bert.encoder.layer.4.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 72,bert.encoder.layer.4.attention.self.key,Linear,bias,[768],768,768,0.0 |
| 73,bert.encoder.layer.4.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 74,bert.encoder.layer.4.attention.self.value,Linear,bias,[768],768,768,0.0 |
| 75,bert.encoder.layer.4.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 76,bert.encoder.layer.4.attention.output.dense,Linear,bias,[768],768,768,0.0 |
| 77,bert.encoder.layer.4.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 78,bert.encoder.layer.4.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 79,bert.encoder.layer.4.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 |
| 80,bert.encoder.layer.4.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
| 81,bert.encoder.layer.4.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 |
| 82,bert.encoder.layer.4.output.dense,Linear,bias,[768],768,768,0.0 |
| 83,bert.encoder.layer.4.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 84,bert.encoder.layer.4.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 85,bert.encoder.layer.5.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 86,bert.encoder.layer.5.attention.self.query,Linear,bias,[768],768,768,0.0 |
| 87,bert.encoder.layer.5.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 88,bert.encoder.layer.5.attention.self.key,Linear,bias,[768],768,768,0.0 |
| 89,bert.encoder.layer.5.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 90,bert.encoder.layer.5.attention.self.value,Linear,bias,[768],768,768,0.0 |
| 91,bert.encoder.layer.5.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 92,bert.encoder.layer.5.attention.output.dense,Linear,bias,[768],768,768,0.0 |
| 93,bert.encoder.layer.5.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 94,bert.encoder.layer.5.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 95,bert.encoder.layer.5.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 |
| 96,bert.encoder.layer.5.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
| 97,bert.encoder.layer.5.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 |
| 98,bert.encoder.layer.5.output.dense,Linear,bias,[768],768,768,0.0 |
| 99,bert.encoder.layer.5.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 100,bert.encoder.layer.5.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 101,bert.encoder.layer.6.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 102,bert.encoder.layer.6.attention.self.query,Linear,bias,[768],768,768,0.0 |
| 103,bert.encoder.layer.6.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 104,bert.encoder.layer.6.attention.self.key,Linear,bias,[768],768,768,0.0 |
| 105,bert.encoder.layer.6.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 106,bert.encoder.layer.6.attention.self.value,Linear,bias,[768],768,768,0.0 |
| 107,bert.encoder.layer.6.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 108,bert.encoder.layer.6.attention.output.dense,Linear,bias,[768],768,768,0.0 |
| 109,bert.encoder.layer.6.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 110,bert.encoder.layer.6.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 111,bert.encoder.layer.6.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 |
| 112,bert.encoder.layer.6.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
| 113,bert.encoder.layer.6.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 |
| 114,bert.encoder.layer.6.output.dense,Linear,bias,[768],768,768,0.0 |
| 115,bert.encoder.layer.6.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 116,bert.encoder.layer.6.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 117,bert.encoder.layer.7.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 118,bert.encoder.layer.7.attention.self.query,Linear,bias,[768],768,768,0.0 |
| 119,bert.encoder.layer.7.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 120,bert.encoder.layer.7.attention.self.key,Linear,bias,[768],768,768,0.0 |
| 121,bert.encoder.layer.7.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 122,bert.encoder.layer.7.attention.self.value,Linear,bias,[768],768,768,0.0 |
| 123,bert.encoder.layer.7.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 124,bert.encoder.layer.7.attention.output.dense,Linear,bias,[768],768,768,0.0 |
| 125,bert.encoder.layer.7.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 126,bert.encoder.layer.7.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 127,bert.encoder.layer.7.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 |
| 128,bert.encoder.layer.7.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
| 129,bert.encoder.layer.7.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 |
| 130,bert.encoder.layer.7.output.dense,Linear,bias,[768],768,768,0.0 |
| 131,bert.encoder.layer.7.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 132,bert.encoder.layer.7.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 133,bert.encoder.layer.8.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 134,bert.encoder.layer.8.attention.self.query,Linear,bias,[768],768,768,0.0 |
| 135,bert.encoder.layer.8.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 136,bert.encoder.layer.8.attention.self.key,Linear,bias,[768],768,768,0.0 |
| 137,bert.encoder.layer.8.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 138,bert.encoder.layer.8.attention.self.value,Linear,bias,[768],768,768,0.0 |
| 139,bert.encoder.layer.8.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 140,bert.encoder.layer.8.attention.output.dense,Linear,bias,[768],768,768,0.0 |
| 141,bert.encoder.layer.8.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 142,bert.encoder.layer.8.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 143,bert.encoder.layer.8.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 |
| 144,bert.encoder.layer.8.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
| 145,bert.encoder.layer.8.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 |
| 146,bert.encoder.layer.8.output.dense,Linear,bias,[768],768,768,0.0 |
| 147,bert.encoder.layer.8.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 148,bert.encoder.layer.8.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 149,bert.encoder.layer.9.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 150,bert.encoder.layer.9.attention.self.query,Linear,bias,[768],768,768,0.0 |
| 151,bert.encoder.layer.9.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 152,bert.encoder.layer.9.attention.self.key,Linear,bias,[768],768,768,0.0 |
| 153,bert.encoder.layer.9.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 154,bert.encoder.layer.9.attention.self.value,Linear,bias,[768],768,768,0.0 |
| 155,bert.encoder.layer.9.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 156,bert.encoder.layer.9.attention.output.dense,Linear,bias,[768],768,768,0.0 |
| 157,bert.encoder.layer.9.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 158,bert.encoder.layer.9.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 159,bert.encoder.layer.9.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 |
| 160,bert.encoder.layer.9.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
| 161,bert.encoder.layer.9.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 |
| 162,bert.encoder.layer.9.output.dense,Linear,bias,[768],768,768,0.0 |
| 163,bert.encoder.layer.9.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 164,bert.encoder.layer.9.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 165,bert.encoder.layer.10.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 166,bert.encoder.layer.10.attention.self.query,Linear,bias,[768],768,768,0.0 |
| 167,bert.encoder.layer.10.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 168,bert.encoder.layer.10.attention.self.key,Linear,bias,[768],768,768,0.0 |
| 169,bert.encoder.layer.10.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 170,bert.encoder.layer.10.attention.self.value,Linear,bias,[768],768,768,0.0 |
| 171,bert.encoder.layer.10.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 172,bert.encoder.layer.10.attention.output.dense,Linear,bias,[768],768,768,0.0 |
| 173,bert.encoder.layer.10.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 174,bert.encoder.layer.10.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 175,bert.encoder.layer.10.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 |
| 176,bert.encoder.layer.10.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
| 177,bert.encoder.layer.10.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 |
| 178,bert.encoder.layer.10.output.dense,Linear,bias,[768],768,768,0.0 |
| 179,bert.encoder.layer.10.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 180,bert.encoder.layer.10.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 181,bert.encoder.layer.11.attention.self.query,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 182,bert.encoder.layer.11.attention.self.query,Linear,bias,[768],768,768,0.0 |
| 183,bert.encoder.layer.11.attention.self.key,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 184,bert.encoder.layer.11.attention.self.key,Linear,bias,[768],768,768,0.0 |
| 185,bert.encoder.layer.11.attention.self.value,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 186,bert.encoder.layer.11.attention.self.value,Linear,bias,[768],768,768,0.0 |
| 187,bert.encoder.layer.11.attention.output.dense,Linear,weight,"[768, 768]",589824,589824,0.0 |
| 188,bert.encoder.layer.11.attention.output.dense,Linear,bias,[768],768,768,0.0 |
| 189,bert.encoder.layer.11.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 190,bert.encoder.layer.11.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 191,bert.encoder.layer.11.intermediate.dense,Linear,weight,"[3072, 768]",2359296,2359296,0.0 |
| 192,bert.encoder.layer.11.intermediate.dense,Linear,bias,[3072],3072,3072,0.0 |
| 193,bert.encoder.layer.11.output.dense,Linear,weight,"[768, 3072]",2359296,2359296,0.0 |
| 194,bert.encoder.layer.11.output.dense,Linear,bias,[768],768,768,0.0 |
| 195,bert.encoder.layer.11.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 |
| 196,bert.encoder.layer.11.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 |
| 197,qa_outputs,Linear,weight,"[2, 768]",1536,1536,0.0 |
| 198,qa_outputs,Linear,bias,[2],2,2,0.0 |
|
|