{ "0.auto_model.encoder.layers.0.attn.Wqkv": { "type": "FactorizedLinear", "in_features": 768, "out_features": 2304, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 2304, 768 ], "tensorized_shape": "((9, 16, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.0.attn.out_proj": { "type": "FactorizedLinear", "in_features": 768, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 768 ], "tensorized_shape": "((4, 12, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.0.mlp.fc11": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.0.mlp.fc12": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.0.mlp.fc2": { "type": "FactorizedLinear", "in_features": 3072, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 3072 ], "tensorized_shape": "((4, 12, 16), (12, 16, 16))" }, "0.auto_model.encoder.layers.1.attn.Wqkv": { "type": "FactorizedLinear", "in_features": 768, "out_features": 2304, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 2304, 768 ], "tensorized_shape": "((9, 16, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.1.attn.out_proj": { "type": "FactorizedLinear", "in_features": 768, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 768 ], "tensorized_shape": "((4, 12, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.1.mlp.fc11": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.1.mlp.fc12": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.1.mlp.fc2": { "type": "FactorizedLinear", "in_features": 3072, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 3072 ], "tensorized_shape": "((4, 12, 16), (12, 16, 16))" }, "0.auto_model.encoder.layers.2.attn.Wqkv": { "type": "FactorizedLinear", "in_features": 768, "out_features": 2304, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 2304, 768 ], "tensorized_shape": "((9, 16, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.2.attn.out_proj": { "type": "FactorizedLinear", "in_features": 768, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 768 ], "tensorized_shape": "((4, 12, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.2.mlp.fc11": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.2.mlp.fc12": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.2.mlp.fc2": { "type": "FactorizedLinear", "in_features": 3072, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 3072 ], "tensorized_shape": "((4, 12, 16), (12, 16, 16))" }, "0.auto_model.encoder.layers.3.attn.Wqkv": { "type": "FactorizedLinear", "in_features": 768, "out_features": 2304, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 2304, 768 ], "tensorized_shape": "((9, 16, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.3.attn.out_proj": { "type": "FactorizedLinear", "in_features": 768, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 768 ], "tensorized_shape": "((4, 12, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.3.mlp.fc11": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.3.mlp.fc12": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.3.mlp.fc2": { "type": "FactorizedLinear", "in_features": 3072, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 3072 ], "tensorized_shape": "((4, 12, 16), (12, 16, 16))" }, "0.auto_model.encoder.layers.4.attn.Wqkv": { "type": "FactorizedLinear", "in_features": 768, "out_features": 2304, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 2304, 768 ], "tensorized_shape": "((9, 16, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.4.attn.out_proj": { "type": "FactorizedLinear", "in_features": 768, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 768 ], "tensorized_shape": "((4, 12, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.4.mlp.fc11": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.4.mlp.fc12": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.4.mlp.fc2": { "type": "FactorizedLinear", "in_features": 3072, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 3072 ], "tensorized_shape": "((4, 12, 16), (12, 16, 16))" }, "0.auto_model.encoder.layers.5.attn.Wqkv": { "type": "FactorizedLinear", "in_features": 768, "out_features": 2304, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 2304, 768 ], "tensorized_shape": "((9, 16, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.5.attn.out_proj": { "type": "FactorizedLinear", "in_features": 768, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 768 ], "tensorized_shape": "((4, 12, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.5.mlp.fc11": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.5.mlp.fc12": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.5.mlp.fc2": { "type": "FactorizedLinear", "in_features": 3072, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 3072 ], "tensorized_shape": "((4, 12, 16), (12, 16, 16))" }, "0.auto_model.encoder.layers.6.attn.Wqkv": { "type": "FactorizedLinear", "in_features": 768, "out_features": 2304, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 2304, 768 ], "tensorized_shape": "((9, 16, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.6.attn.out_proj": { "type": "FactorizedLinear", "in_features": 768, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 768 ], "tensorized_shape": "((4, 12, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.6.mlp.fc11": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.6.mlp.fc12": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.6.mlp.fc2": { "type": "FactorizedLinear", "in_features": 3072, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 3072 ], "tensorized_shape": "((4, 12, 16), (12, 16, 16))" }, "0.auto_model.encoder.layers.7.attn.Wqkv": { "type": "FactorizedLinear", "in_features": 768, "out_features": 2304, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 2304, 768 ], "tensorized_shape": "((9, 16, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.7.attn.out_proj": { "type": "FactorizedLinear", "in_features": 768, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 768 ], "tensorized_shape": "((4, 12, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.7.mlp.fc11": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.7.mlp.fc12": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.7.mlp.fc2": { "type": "FactorizedLinear", "in_features": 3072, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 3072 ], "tensorized_shape": "((4, 12, 16), (12, 16, 16))" }, "0.auto_model.encoder.layers.8.attn.Wqkv": { "type": "FactorizedLinear", "in_features": 768, "out_features": 2304, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 2304, 768 ], "tensorized_shape": "((9, 16, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.8.attn.out_proj": { "type": "FactorizedLinear", "in_features": 768, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 768 ], "tensorized_shape": "((4, 12, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.8.mlp.fc11": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.8.mlp.fc12": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.8.mlp.fc2": { "type": "FactorizedLinear", "in_features": 3072, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 3072 ], "tensorized_shape": "((4, 12, 16), (12, 16, 16))" }, "0.auto_model.encoder.layers.9.attn.Wqkv": { "type": "FactorizedLinear", "in_features": 768, "out_features": 2304, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 2304, 768 ], "tensorized_shape": "((9, 16, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.9.attn.out_proj": { "type": "FactorizedLinear", "in_features": 768, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 768 ], "tensorized_shape": "((4, 12, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.9.mlp.fc11": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.9.mlp.fc12": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.9.mlp.fc2": { "type": "FactorizedLinear", "in_features": 3072, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 3072 ], "tensorized_shape": "((4, 12, 16), (12, 16, 16))" }, "0.auto_model.encoder.layers.10.attn.Wqkv": { "type": "FactorizedLinear", "in_features": 768, "out_features": 2304, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 2304, 768 ], "tensorized_shape": "((9, 16, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.10.attn.out_proj": { "type": "FactorizedLinear", "in_features": 768, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 768 ], "tensorized_shape": "((4, 12, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.10.mlp.fc11": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.10.mlp.fc12": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.10.mlp.fc2": { "type": "FactorizedLinear", "in_features": 3072, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 3072 ], "tensorized_shape": "((4, 12, 16), (12, 16, 16))" }, "0.auto_model.encoder.layers.11.attn.Wqkv": { "type": "FactorizedLinear", "in_features": 768, "out_features": 2304, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 2304, 768 ], "tensorized_shape": "((9, 16, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.11.attn.out_proj": { "type": "FactorizedLinear", "in_features": 768, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 768 ], "tensorized_shape": "((4, 12, 16), (4, 12, 16))" }, "0.auto_model.encoder.layers.11.mlp.fc11": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.11.mlp.fc12": { "type": "FactorizedLinear", "in_features": 768, "out_features": 3072, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 3072, 768 ], "tensorized_shape": "((8, 16, 24), (4, 12, 16))" }, "0.auto_model.encoder.layers.11.mlp.fc2": { "type": "FactorizedLinear", "in_features": 3072, "out_features": 768, "bias": true, "rank": 4, "factorization": "cp", "weight_shape": [ 768, 3072 ], "tensorized_shape": "((4, 12, 16), (12, 16, 16))" } }