Feature Extraction
MLX
sentence-transformers
xlm-roberta
embeddings
multilingual
quantized
int8
q8
revis
text-embeddings-inference
Instructions to use mavis-ai/Multilingual-e5-large-Q8 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- MLX
How to use mavis-ai/Multilingual-e5-large-Q8 with MLX:
# Download the model from the Hub pip install huggingface_hub[hf_xet] huggingface-cli download --local-dir Multilingual-e5-large-Q8 mavis-ai/Multilingual-e5-large-Q8
- sentence-transformers
How to use mavis-ai/Multilingual-e5-large-Q8 with sentence-transformers:
from sentence_transformers import SentenceTransformer model = SentenceTransformer("mavis-ai/Multilingual-e5-large-Q8") sentences = [ "The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium." ] embeddings = model.encode(sentences) similarities = model.similarity(embeddings, embeddings) print(similarities.shape) # [3, 3] - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- LM Studio
| { | |
| "schemaVersion": 2, | |
| "format": "revis-xlm-roberta-e5-mlx-native-q8", | |
| "source": "intfloat/multilingual-e5-large", | |
| "baseWeights": "weights.00.safetensors", | |
| "quantizedWeights": "weights.00.safetensors", | |
| "quantization": { | |
| "type": "mlx-native-affine", | |
| "bits": 8, | |
| "group_size": 64, | |
| "mode": "affine", | |
| "quantizedTensorSuffix": ".qweight", | |
| "scaleTensorSuffix": ".scales", | |
| "biasTensorSuffix": ".biases", | |
| "linear": "mx.quantized_matmul(x, qweight, scales=scales, biases=biases, transpose=True, group_size=group_size, bits=bits, mode=mode)", | |
| "embedding": "mx.dequantize(qweight[ids], scales=scales[ids], biases=biases[ids], group_size=group_size, bits=bits, mode=mode)" | |
| }, | |
| "quantized": [ | |
| { | |
| "name": "embeddings.position_embeddings.weight", | |
| "shape": [ | |
| 514, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "embeddings.position_embeddings.weight.qweight", | |
| "scales": "embeddings.position_embeddings.weight.scales", | |
| "biases": "embeddings.position_embeddings.weight.biases" | |
| }, | |
| { | |
| "name": "embeddings.token_type_embeddings.weight", | |
| "shape": [ | |
| 1, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "embeddings.token_type_embeddings.weight.qweight", | |
| "scales": "embeddings.token_type_embeddings.weight.scales", | |
| "biases": "embeddings.token_type_embeddings.weight.biases" | |
| }, | |
| { | |
| "name": "embeddings.word_embeddings.weight", | |
| "shape": [ | |
| 250002, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "embeddings.word_embeddings.weight.qweight", | |
| "scales": "embeddings.word_embeddings.weight.scales", | |
| "biases": "embeddings.word_embeddings.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.0.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.0.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.0.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.0.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.0.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.0.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.0.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.0.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.0.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.0.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.0.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.0.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.0.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.0.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.0.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.0.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.0.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.0.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.0.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.0.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.0.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.0.output.dense.weight.qweight", | |
| "scales": "encoder.layer.0.output.dense.weight.scales", | |
| "biases": "encoder.layer.0.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.1.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.1.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.1.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.1.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.1.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.1.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.1.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.1.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.1.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.1.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.1.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.1.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.1.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.1.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.1.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.1.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.1.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.1.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.1.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.1.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.1.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.1.output.dense.weight.qweight", | |
| "scales": "encoder.layer.1.output.dense.weight.scales", | |
| "biases": "encoder.layer.1.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.10.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.10.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.10.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.10.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.10.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.10.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.10.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.10.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.10.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.10.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.10.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.10.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.10.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.10.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.10.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.10.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.10.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.10.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.10.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.10.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.10.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.10.output.dense.weight.qweight", | |
| "scales": "encoder.layer.10.output.dense.weight.scales", | |
| "biases": "encoder.layer.10.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.11.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.11.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.11.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.11.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.11.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.11.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.11.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.11.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.11.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.11.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.11.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.11.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.11.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.11.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.11.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.11.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.11.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.11.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.11.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.11.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.11.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.11.output.dense.weight.qweight", | |
| "scales": "encoder.layer.11.output.dense.weight.scales", | |
| "biases": "encoder.layer.11.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.12.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.12.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.12.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.12.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.12.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.12.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.12.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.12.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.12.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.12.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.12.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.12.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.12.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.12.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.12.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.12.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.12.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.12.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.12.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.12.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.12.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.12.output.dense.weight.qweight", | |
| "scales": "encoder.layer.12.output.dense.weight.scales", | |
| "biases": "encoder.layer.12.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.13.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.13.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.13.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.13.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.13.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.13.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.13.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.13.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.13.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.13.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.13.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.13.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.13.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.13.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.13.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.13.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.13.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.13.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.13.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.13.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.13.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.13.output.dense.weight.qweight", | |
| "scales": "encoder.layer.13.output.dense.weight.scales", | |
| "biases": "encoder.layer.13.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.14.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.14.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.14.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.14.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.14.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.14.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.14.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.14.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.14.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.14.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.14.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.14.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.14.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.14.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.14.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.14.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.14.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.14.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.14.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.14.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.14.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.14.output.dense.weight.qweight", | |
| "scales": "encoder.layer.14.output.dense.weight.scales", | |
| "biases": "encoder.layer.14.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.15.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.15.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.15.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.15.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.15.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.15.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.15.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.15.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.15.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.15.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.15.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.15.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.15.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.15.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.15.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.15.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.15.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.15.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.15.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.15.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.15.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.15.output.dense.weight.qweight", | |
| "scales": "encoder.layer.15.output.dense.weight.scales", | |
| "biases": "encoder.layer.15.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.16.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.16.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.16.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.16.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.16.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.16.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.16.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.16.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.16.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.16.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.16.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.16.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.16.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.16.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.16.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.16.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.16.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.16.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.16.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.16.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.16.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.16.output.dense.weight.qweight", | |
| "scales": "encoder.layer.16.output.dense.weight.scales", | |
| "biases": "encoder.layer.16.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.17.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.17.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.17.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.17.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.17.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.17.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.17.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.17.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.17.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.17.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.17.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.17.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.17.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.17.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.17.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.17.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.17.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.17.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.17.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.17.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.17.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.17.output.dense.weight.qweight", | |
| "scales": "encoder.layer.17.output.dense.weight.scales", | |
| "biases": "encoder.layer.17.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.18.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.18.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.18.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.18.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.18.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.18.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.18.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.18.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.18.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.18.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.18.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.18.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.18.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.18.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.18.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.18.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.18.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.18.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.18.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.18.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.18.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.18.output.dense.weight.qweight", | |
| "scales": "encoder.layer.18.output.dense.weight.scales", | |
| "biases": "encoder.layer.18.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.19.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.19.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.19.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.19.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.19.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.19.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.19.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.19.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.19.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.19.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.19.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.19.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.19.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.19.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.19.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.19.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.19.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.19.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.19.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.19.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.19.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.19.output.dense.weight.qweight", | |
| "scales": "encoder.layer.19.output.dense.weight.scales", | |
| "biases": "encoder.layer.19.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.2.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.2.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.2.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.2.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.2.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.2.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.2.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.2.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.2.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.2.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.2.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.2.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.2.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.2.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.2.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.2.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.2.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.2.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.2.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.2.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.2.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.2.output.dense.weight.qweight", | |
| "scales": "encoder.layer.2.output.dense.weight.scales", | |
| "biases": "encoder.layer.2.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.20.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.20.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.20.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.20.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.20.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.20.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.20.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.20.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.20.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.20.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.20.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.20.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.20.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.20.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.20.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.20.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.20.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.20.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.20.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.20.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.20.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.20.output.dense.weight.qweight", | |
| "scales": "encoder.layer.20.output.dense.weight.scales", | |
| "biases": "encoder.layer.20.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.21.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.21.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.21.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.21.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.21.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.21.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.21.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.21.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.21.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.21.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.21.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.21.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.21.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.21.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.21.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.21.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.21.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.21.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.21.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.21.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.21.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.21.output.dense.weight.qweight", | |
| "scales": "encoder.layer.21.output.dense.weight.scales", | |
| "biases": "encoder.layer.21.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.22.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.22.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.22.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.22.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.22.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.22.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.22.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.22.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.22.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.22.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.22.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.22.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.22.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.22.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.22.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.22.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.22.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.22.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.22.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.22.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.22.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.22.output.dense.weight.qweight", | |
| "scales": "encoder.layer.22.output.dense.weight.scales", | |
| "biases": "encoder.layer.22.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.23.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.23.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.23.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.23.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.23.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.23.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.23.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.23.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.23.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.23.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.23.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.23.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.23.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.23.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.23.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.23.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.23.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.23.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.23.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.23.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.23.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.23.output.dense.weight.qweight", | |
| "scales": "encoder.layer.23.output.dense.weight.scales", | |
| "biases": "encoder.layer.23.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.3.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.3.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.3.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.3.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.3.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.3.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.3.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.3.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.3.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.3.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.3.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.3.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.3.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.3.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.3.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.3.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.3.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.3.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.3.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.3.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.3.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.3.output.dense.weight.qweight", | |
| "scales": "encoder.layer.3.output.dense.weight.scales", | |
| "biases": "encoder.layer.3.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.4.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.4.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.4.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.4.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.4.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.4.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.4.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.4.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.4.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.4.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.4.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.4.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.4.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.4.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.4.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.4.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.4.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.4.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.4.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.4.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.4.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.4.output.dense.weight.qweight", | |
| "scales": "encoder.layer.4.output.dense.weight.scales", | |
| "biases": "encoder.layer.4.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.5.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.5.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.5.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.5.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.5.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.5.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.5.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.5.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.5.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.5.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.5.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.5.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.5.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.5.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.5.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.5.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.5.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.5.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.5.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.5.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.5.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.5.output.dense.weight.qweight", | |
| "scales": "encoder.layer.5.output.dense.weight.scales", | |
| "biases": "encoder.layer.5.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.6.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.6.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.6.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.6.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.6.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.6.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.6.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.6.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.6.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.6.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.6.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.6.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.6.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.6.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.6.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.6.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.6.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.6.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.6.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.6.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.6.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.6.output.dense.weight.qweight", | |
| "scales": "encoder.layer.6.output.dense.weight.scales", | |
| "biases": "encoder.layer.6.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.7.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.7.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.7.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.7.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.7.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.7.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.7.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.7.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.7.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.7.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.7.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.7.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.7.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.7.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.7.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.7.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.7.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.7.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.7.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.7.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.7.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.7.output.dense.weight.qweight", | |
| "scales": "encoder.layer.7.output.dense.weight.scales", | |
| "biases": "encoder.layer.7.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.8.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.8.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.8.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.8.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.8.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.8.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.8.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.8.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.8.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.8.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.8.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.8.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.8.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.8.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.8.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.8.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.8.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.8.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.8.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.8.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.8.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.8.output.dense.weight.qweight", | |
| "scales": "encoder.layer.8.output.dense.weight.scales", | |
| "biases": "encoder.layer.8.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.9.attention.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.9.attention.output.dense.weight.qweight", | |
| "scales": "encoder.layer.9.attention.output.dense.weight.scales", | |
| "biases": "encoder.layer.9.attention.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.9.attention.self.key.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.9.attention.self.key.weight.qweight", | |
| "scales": "encoder.layer.9.attention.self.key.weight.scales", | |
| "biases": "encoder.layer.9.attention.self.key.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.9.attention.self.query.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.9.attention.self.query.weight.qweight", | |
| "scales": "encoder.layer.9.attention.self.query.weight.scales", | |
| "biases": "encoder.layer.9.attention.self.query.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.9.attention.self.value.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.9.attention.self.value.weight.qweight", | |
| "scales": "encoder.layer.9.attention.self.value.weight.scales", | |
| "biases": "encoder.layer.9.attention.self.value.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.9.intermediate.dense.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.9.intermediate.dense.weight.qweight", | |
| "scales": "encoder.layer.9.intermediate.dense.weight.scales", | |
| "biases": "encoder.layer.9.intermediate.dense.weight.biases" | |
| }, | |
| { | |
| "name": "encoder.layer.9.output.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "encoder.layer.9.output.dense.weight.qweight", | |
| "scales": "encoder.layer.9.output.dense.weight.scales", | |
| "biases": "encoder.layer.9.output.dense.weight.biases" | |
| }, | |
| { | |
| "name": "pooler.dense.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "qweight": "pooler.dense.weight.qweight", | |
| "scales": "pooler.dense.weight.scales", | |
| "biases": "pooler.dense.weight.biases" | |
| } | |
| ], | |
| "kept": [ | |
| { | |
| "name": "embeddings.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "embeddings.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "embeddings.position_ids", | |
| "shape": [ | |
| 1, | |
| 514 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.0.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.0.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.0.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.0.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.0.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.0.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.0.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.0.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.0.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.0.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.1.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.1.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.1.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.1.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.1.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.1.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.1.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.1.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.1.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.1.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.10.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.10.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.10.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.10.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.10.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.10.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.10.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.10.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.10.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.10.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.11.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.11.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.11.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.11.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.11.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.11.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.11.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.11.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.11.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.11.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.12.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.12.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.12.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.12.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.12.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.12.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.12.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.12.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.12.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.12.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.13.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.13.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.13.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.13.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.13.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.13.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.13.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.13.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.13.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.13.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.14.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.14.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.14.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.14.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.14.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.14.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.14.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.14.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.14.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.14.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.15.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.15.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.15.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.15.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.15.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.15.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.15.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.15.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.15.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.15.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.16.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.16.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.16.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.16.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.16.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.16.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.16.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.16.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.16.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.16.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.17.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.17.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.17.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.17.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.17.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.17.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.17.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.17.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.17.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.17.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.18.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.18.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.18.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.18.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.18.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.18.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.18.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.18.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.18.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.18.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.19.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.19.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.19.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.19.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.19.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.19.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.19.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.19.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.19.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.19.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.2.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.2.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.2.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.2.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.2.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.2.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.2.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.2.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.2.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.2.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.20.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.20.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.20.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.20.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.20.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.20.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.20.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.20.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.20.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.20.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.21.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.21.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.21.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.21.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.21.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.21.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.21.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.21.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.21.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.21.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.22.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.22.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.22.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.22.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.22.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.22.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.22.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.22.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.22.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.22.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.23.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.23.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.23.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.23.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.23.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.23.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.23.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.23.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.23.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.23.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.3.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.3.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.3.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.3.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.3.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.3.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.3.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.3.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.3.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.3.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.4.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.4.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.4.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.4.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.4.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.4.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.4.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.4.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.4.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.4.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.5.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.5.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.5.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.5.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.5.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.5.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.5.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.5.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.5.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.5.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.6.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.6.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.6.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.6.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.6.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.6.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.6.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.6.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.6.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.6.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.7.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.7.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.7.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.7.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.7.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.7.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.7.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.7.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.7.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.7.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.8.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.8.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.8.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.8.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.8.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.8.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.8.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.8.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.8.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.8.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.9.attention.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.9.attention.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.9.attention.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.9.attention.self.key.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.9.attention.self.query.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.9.attention.self.value.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.9.intermediate.dense.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.9.output.LayerNorm.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.9.output.LayerNorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "encoder.layer.9.output.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| }, | |
| { | |
| "name": "pooler.dense.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16" | |
| } | |
| ] | |
| } | |