| { | |
| "0.auto_model.encoder.layers.0.attn.Wqkv": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 2304, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "tensorized_shape": "((9, 16, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.0.attn.out_proj": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.0.mlp.fc11": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.0.mlp.fc12": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.0.mlp.fc2": { | |
| "type": "FactorizedLinear", | |
| "in_features": 3072, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (12, 16, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.1.attn.Wqkv": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 2304, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "tensorized_shape": "((9, 16, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.1.attn.out_proj": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.1.mlp.fc11": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.1.mlp.fc12": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.1.mlp.fc2": { | |
| "type": "FactorizedLinear", | |
| "in_features": 3072, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (12, 16, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.2.attn.Wqkv": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 2304, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "tensorized_shape": "((9, 16, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.2.attn.out_proj": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.2.mlp.fc11": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.2.mlp.fc12": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.2.mlp.fc2": { | |
| "type": "FactorizedLinear", | |
| "in_features": 3072, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (12, 16, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.3.attn.Wqkv": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 2304, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "tensorized_shape": "((9, 16, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.3.attn.out_proj": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.3.mlp.fc11": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.3.mlp.fc12": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.3.mlp.fc2": { | |
| "type": "FactorizedLinear", | |
| "in_features": 3072, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (12, 16, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.4.attn.Wqkv": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 2304, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "tensorized_shape": "((9, 16, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.4.attn.out_proj": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.4.mlp.fc11": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.4.mlp.fc12": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.4.mlp.fc2": { | |
| "type": "FactorizedLinear", | |
| "in_features": 3072, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (12, 16, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.5.attn.Wqkv": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 2304, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "tensorized_shape": "((9, 16, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.5.attn.out_proj": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.5.mlp.fc11": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.5.mlp.fc12": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.5.mlp.fc2": { | |
| "type": "FactorizedLinear", | |
| "in_features": 3072, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (12, 16, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.6.attn.Wqkv": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 2304, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "tensorized_shape": "((9, 16, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.6.attn.out_proj": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.6.mlp.fc11": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.6.mlp.fc12": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.6.mlp.fc2": { | |
| "type": "FactorizedLinear", | |
| "in_features": 3072, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (12, 16, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.7.attn.Wqkv": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 2304, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "tensorized_shape": "((9, 16, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.7.attn.out_proj": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.7.mlp.fc11": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.7.mlp.fc12": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.7.mlp.fc2": { | |
| "type": "FactorizedLinear", | |
| "in_features": 3072, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (12, 16, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.8.attn.Wqkv": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 2304, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "tensorized_shape": "((9, 16, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.8.attn.out_proj": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.8.mlp.fc11": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.8.mlp.fc12": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.8.mlp.fc2": { | |
| "type": "FactorizedLinear", | |
| "in_features": 3072, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (12, 16, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.9.attn.Wqkv": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 2304, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "tensorized_shape": "((9, 16, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.9.attn.out_proj": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.9.mlp.fc11": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.9.mlp.fc12": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.9.mlp.fc2": { | |
| "type": "FactorizedLinear", | |
| "in_features": 3072, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (12, 16, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.10.attn.Wqkv": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 2304, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "tensorized_shape": "((9, 16, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.10.attn.out_proj": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.10.mlp.fc11": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.10.mlp.fc12": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.10.mlp.fc2": { | |
| "type": "FactorizedLinear", | |
| "in_features": 3072, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (12, 16, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.11.attn.Wqkv": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 2304, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "tensorized_shape": "((9, 16, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.11.attn.out_proj": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.11.mlp.fc11": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.11.mlp.fc12": { | |
| "type": "FactorizedLinear", | |
| "in_features": 768, | |
| "out_features": 3072, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 3072, | |
| 768 | |
| ], | |
| "tensorized_shape": "((8, 16, 24), (4, 12, 16))" | |
| }, | |
| "0.auto_model.encoder.layers.11.mlp.fc2": { | |
| "type": "FactorizedLinear", | |
| "in_features": 3072, | |
| "out_features": 768, | |
| "bias": true, | |
| "rank": 4, | |
| "factorization": "cp", | |
| "weight_shape": [ | |
| 768, | |
| 3072 | |
| ], | |
| "tensorized_shape": "((4, 12, 16), (12, 16, 16))" | |
| } | |
| } |