Instructions to use GetBeholder/Beholder-q4f16 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- MLC-LLM
How to use GetBeholder/Beholder-q4f16 with MLC-LLM:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
| { | |
| "metadata": { | |
| "ParamSize": 435, | |
| "ParamBytes": 423938816.0, | |
| "BitsPerParam": 4.507631543378053 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 127139840, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_weight", | |
| "shape": [ | |
| 248320, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 127139840, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "93f284b422558fe7f3bf7a385c6f5879" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32844416, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_scale", | |
| "shape": [ | |
| 248320, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 15892480, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 15892480 | |
| }, | |
| { | |
| "name": "model.layers.0.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 15894528 | |
| }, | |
| { | |
| "name": "model.layers.0.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 15894560 | |
| }, | |
| { | |
| "name": "model.layers.0.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 15943712 | |
| }, | |
| { | |
| "name": "model.layers.0.linear_attn.in_proj_a.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 15943744 | |
| }, | |
| { | |
| "name": "model.layers.0.linear_attn.in_proj_a.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 15951936 | |
| }, | |
| { | |
| "name": "model.layers.0.linear_attn.in_proj_b.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 15952960 | |
| }, | |
| { | |
| "name": "model.layers.0.linear_attn.in_proj_b.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 15961152 | |
| }, | |
| { | |
| "name": "model.layers.0.linear_attn.in_proj_qkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 15962176 | |
| }, | |
| { | |
| "name": "model.layers.0.linear_attn.in_proj_qkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 19107904 | |
| }, | |
| { | |
| "name": "model.layers.0.linear_attn.in_proj_z.q_weight", | |
| "shape": [ | |
| 2048, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 19501120 | |
| }, | |
| { | |
| "name": "model.layers.0.linear_attn.in_proj_z.q_scale", | |
| "shape": [ | |
| 2048, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 20549696 | |
| }, | |
| { | |
| "name": "model.layers.0.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 20680768 | |
| }, | |
| { | |
| "name": "model.layers.0.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 20681024 | |
| }, | |
| { | |
| "name": "model.layers.0.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 21729600 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 21860672 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 23695680 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 23925056 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 27595072 | |
| }, | |
| { | |
| "name": "model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 28053824 | |
| }, | |
| { | |
| "name": "model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 28055872 | |
| }, | |
| { | |
| "name": "model.layers.1.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 28057920 | |
| }, | |
| { | |
| "name": "model.layers.1.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 28057952 | |
| }, | |
| { | |
| "name": "model.layers.1.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 28107104 | |
| }, | |
| { | |
| "name": "model.layers.1.linear_attn.in_proj_a.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 28107136 | |
| }, | |
| { | |
| "name": "model.layers.1.linear_attn.in_proj_a.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 28115328 | |
| }, | |
| { | |
| "name": "model.layers.1.linear_attn.in_proj_b.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 28116352 | |
| }, | |
| { | |
| "name": "model.layers.1.linear_attn.in_proj_b.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 28124544 | |
| }, | |
| { | |
| "name": "model.layers.1.linear_attn.in_proj_qkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 28125568 | |
| }, | |
| { | |
| "name": "model.layers.1.linear_attn.in_proj_qkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 31271296 | |
| }, | |
| { | |
| "name": "model.layers.1.linear_attn.in_proj_z.q_weight", | |
| "shape": [ | |
| 2048, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 31664512 | |
| }, | |
| { | |
| "name": "model.layers.1.linear_attn.in_proj_z.q_scale", | |
| "shape": [ | |
| 2048, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 32713088 | |
| }, | |
| { | |
| "name": "model.layers.1.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 32844160 | |
| } | |
| ], | |
| "md5sum": "4709fb8e1b57e74ac62247ed06a45778" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33473920, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.1.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 1048576 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 1179648 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 3014656 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 3244032 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 6914048 | |
| }, | |
| { | |
| "name": "model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 7372800 | |
| }, | |
| { | |
| "name": "model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 7374848 | |
| }, | |
| { | |
| "name": "model.layers.10.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 7376896 | |
| }, | |
| { | |
| "name": "model.layers.10.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 7376928 | |
| }, | |
| { | |
| "name": "model.layers.10.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 7426080 | |
| }, | |
| { | |
| "name": "model.layers.10.linear_attn.in_proj_a.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7426112 | |
| }, | |
| { | |
| "name": "model.layers.10.linear_attn.in_proj_a.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 7434304 | |
| }, | |
| { | |
| "name": "model.layers.10.linear_attn.in_proj_b.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 7435328 | |
| }, | |
| { | |
| "name": "model.layers.10.linear_attn.in_proj_b.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 7443520 | |
| }, | |
| { | |
| "name": "model.layers.10.linear_attn.in_proj_qkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 7444544 | |
| }, | |
| { | |
| "name": "model.layers.10.linear_attn.in_proj_qkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 10590272 | |
| }, | |
| { | |
| "name": "model.layers.10.linear_attn.in_proj_z.q_weight", | |
| "shape": [ | |
| 2048, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 10983488 | |
| }, | |
| { | |
| "name": "model.layers.10.linear_attn.in_proj_z.q_scale", | |
| "shape": [ | |
| 2048, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 12032064 | |
| }, | |
| { | |
| "name": "model.layers.10.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 12163136 | |
| }, | |
| { | |
| "name": "model.layers.10.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 12163392 | |
| }, | |
| { | |
| "name": "model.layers.10.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 13211968 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 13343040 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 15178048 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 15407424 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 19077440 | |
| }, | |
| { | |
| "name": "model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 19536192 | |
| }, | |
| { | |
| "name": "model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 19538240 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 19540288 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 21375296 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 21604672 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 25274688 | |
| }, | |
| { | |
| "name": "model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25733440 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.k_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 25735488 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 5120, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2621440, | |
| "byteOffset": 25736000 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 5120, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 327680, | |
| "byteOffset": 28357440 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 28685120 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 29733696 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.q_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 29864768 | |
| }, | |
| { | |
| "name": "model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 29865280 | |
| }, | |
| { | |
| "name": "model.layers.12.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 29867328 | |
| }, | |
| { | |
| "name": "model.layers.12.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 29867360 | |
| }, | |
| { | |
| "name": "model.layers.12.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 29916512 | |
| }, | |
| { | |
| "name": "model.layers.12.linear_attn.in_proj_a.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 29916544 | |
| }, | |
| { | |
| "name": "model.layers.12.linear_attn.in_proj_a.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 29924736 | |
| }, | |
| { | |
| "name": "model.layers.12.linear_attn.in_proj_b.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 29925760 | |
| }, | |
| { | |
| "name": "model.layers.12.linear_attn.in_proj_b.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 29933952 | |
| }, | |
| { | |
| "name": "model.layers.12.linear_attn.in_proj_qkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 29934976 | |
| }, | |
| { | |
| "name": "model.layers.12.linear_attn.in_proj_qkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 33080704 | |
| } | |
| ], | |
| "md5sum": "4f922876a6baf328159d461b78df043d" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32883584, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.linear_attn.in_proj_z.q_weight", | |
| "shape": [ | |
| 2048, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.12.linear_attn.in_proj_z.q_scale", | |
| "shape": [ | |
| 2048, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 1048576 | |
| }, | |
| { | |
| "name": "model.layers.12.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 1179648 | |
| }, | |
| { | |
| "name": "model.layers.12.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 1179904 | |
| }, | |
| { | |
| "name": "model.layers.12.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 2228480 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 2359552 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 4194560 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 4423936 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 8093952 | |
| }, | |
| { | |
| "name": "model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 8552704 | |
| }, | |
| { | |
| "name": "model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 8554752 | |
| }, | |
| { | |
| "name": "model.layers.13.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 8556800 | |
| }, | |
| { | |
| "name": "model.layers.13.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 8556832 | |
| }, | |
| { | |
| "name": "model.layers.13.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 8605984 | |
| }, | |
| { | |
| "name": "model.layers.13.linear_attn.in_proj_a.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8606016 | |
| }, | |
| { | |
| "name": "model.layers.13.linear_attn.in_proj_a.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 8614208 | |
| }, | |
| { | |
| "name": "model.layers.13.linear_attn.in_proj_b.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8615232 | |
| }, | |
| { | |
| "name": "model.layers.13.linear_attn.in_proj_b.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 8623424 | |
| }, | |
| { | |
| "name": "model.layers.13.linear_attn.in_proj_qkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 8624448 | |
| }, | |
| { | |
| "name": "model.layers.13.linear_attn.in_proj_qkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 11770176 | |
| }, | |
| { | |
| "name": "model.layers.13.linear_attn.in_proj_z.q_weight", | |
| "shape": [ | |
| 2048, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 12163392 | |
| }, | |
| { | |
| "name": "model.layers.13.linear_attn.in_proj_z.q_scale", | |
| "shape": [ | |
| 2048, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 13211968 | |
| }, | |
| { | |
| "name": "model.layers.13.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 13343040 | |
| }, | |
| { | |
| "name": "model.layers.13.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 13343296 | |
| }, | |
| { | |
| "name": "model.layers.13.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 14391872 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 14522944 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 16357952 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 16587328 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 20257344 | |
| }, | |
| { | |
| "name": "model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 20716096 | |
| }, | |
| { | |
| "name": "model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 20718144 | |
| }, | |
| { | |
| "name": "model.layers.14.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 20720192 | |
| }, | |
| { | |
| "name": "model.layers.14.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 20720224 | |
| }, | |
| { | |
| "name": "model.layers.14.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 20769376 | |
| }, | |
| { | |
| "name": "model.layers.14.linear_attn.in_proj_a.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20769408 | |
| }, | |
| { | |
| "name": "model.layers.14.linear_attn.in_proj_a.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 20777600 | |
| }, | |
| { | |
| "name": "model.layers.14.linear_attn.in_proj_b.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 20778624 | |
| }, | |
| { | |
| "name": "model.layers.14.linear_attn.in_proj_b.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 20786816 | |
| }, | |
| { | |
| "name": "model.layers.14.linear_attn.in_proj_qkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 20787840 | |
| }, | |
| { | |
| "name": "model.layers.14.linear_attn.in_proj_qkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 23933568 | |
| }, | |
| { | |
| "name": "model.layers.14.linear_attn.in_proj_z.q_weight", | |
| "shape": [ | |
| 2048, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 24326784 | |
| }, | |
| { | |
| "name": "model.layers.14.linear_attn.in_proj_z.q_scale", | |
| "shape": [ | |
| 2048, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 25375360 | |
| }, | |
| { | |
| "name": "model.layers.14.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 25506432 | |
| }, | |
| { | |
| "name": "model.layers.14.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 25506688 | |
| }, | |
| { | |
| "name": "model.layers.14.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 26555264 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 26686336 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 28521344 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 28750720 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 32420736 | |
| }, | |
| { | |
| "name": "model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 32879488 | |
| }, | |
| { | |
| "name": "model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 32881536 | |
| } | |
| ], | |
| "md5sum": "5c38d68d9d9d30527e4980f8a3317583" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30520960, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 1835008 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 2064384 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 5734400 | |
| }, | |
| { | |
| "name": "model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 6193152 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.k_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 6195200 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 5120, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2621440, | |
| "byteOffset": 6195712 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 5120, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 327680, | |
| "byteOffset": 8817152 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 9144832 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 10193408 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.q_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 10324480 | |
| }, | |
| { | |
| "name": "model.layers.16.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 10324992 | |
| }, | |
| { | |
| "name": "model.layers.16.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 10327040 | |
| }, | |
| { | |
| "name": "model.layers.16.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 10327072 | |
| }, | |
| { | |
| "name": "model.layers.16.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 10376224 | |
| }, | |
| { | |
| "name": "model.layers.16.linear_attn.in_proj_a.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 10376256 | |
| }, | |
| { | |
| "name": "model.layers.16.linear_attn.in_proj_a.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 10384448 | |
| }, | |
| { | |
| "name": "model.layers.16.linear_attn.in_proj_b.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 10385472 | |
| }, | |
| { | |
| "name": "model.layers.16.linear_attn.in_proj_b.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 10393664 | |
| }, | |
| { | |
| "name": "model.layers.16.linear_attn.in_proj_qkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 10394688 | |
| }, | |
| { | |
| "name": "model.layers.16.linear_attn.in_proj_qkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 13540416 | |
| }, | |
| { | |
| "name": "model.layers.16.linear_attn.in_proj_z.q_weight", | |
| "shape": [ | |
| 2048, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 13933632 | |
| }, | |
| { | |
| "name": "model.layers.16.linear_attn.in_proj_z.q_scale", | |
| "shape": [ | |
| 2048, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 14982208 | |
| }, | |
| { | |
| "name": "model.layers.16.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 15113280 | |
| }, | |
| { | |
| "name": "model.layers.16.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 15113536 | |
| }, | |
| { | |
| "name": "model.layers.16.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 16162112 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 16293184 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 18128192 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 18357568 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 22027584 | |
| }, | |
| { | |
| "name": "model.layers.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 22486336 | |
| }, | |
| { | |
| "name": "model.layers.17.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 22488384 | |
| }, | |
| { | |
| "name": "model.layers.17.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 22490432 | |
| }, | |
| { | |
| "name": "model.layers.17.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 22490464 | |
| }, | |
| { | |
| "name": "model.layers.17.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 22539616 | |
| }, | |
| { | |
| "name": "model.layers.17.linear_attn.in_proj_a.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 22539648 | |
| }, | |
| { | |
| "name": "model.layers.17.linear_attn.in_proj_a.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 22547840 | |
| }, | |
| { | |
| "name": "model.layers.17.linear_attn.in_proj_b.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 22548864 | |
| }, | |
| { | |
| "name": "model.layers.17.linear_attn.in_proj_b.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 22557056 | |
| }, | |
| { | |
| "name": "model.layers.17.linear_attn.in_proj_qkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 22558080 | |
| }, | |
| { | |
| "name": "model.layers.17.linear_attn.in_proj_qkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 25703808 | |
| }, | |
| { | |
| "name": "model.layers.17.linear_attn.in_proj_z.q_weight", | |
| "shape": [ | |
| 2048, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 26097024 | |
| }, | |
| { | |
| "name": "model.layers.17.linear_attn.in_proj_z.q_scale", | |
| "shape": [ | |
| 2048, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 27145600 | |
| }, | |
| { | |
| "name": "model.layers.17.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 27276672 | |
| }, | |
| { | |
| "name": "model.layers.17.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27276928 | |
| }, | |
| { | |
| "name": "model.layers.17.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 28325504 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 28456576 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 30291584 | |
| } | |
| ], | |
| "md5sum": "eb0c84192229de8cfedc3419c9170c29" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32589440, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "model.layers.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 4128768 | |
| }, | |
| { | |
| "name": "model.layers.18.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 4130816 | |
| }, | |
| { | |
| "name": "model.layers.18.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 4132864 | |
| }, | |
| { | |
| "name": "model.layers.18.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 4132896 | |
| }, | |
| { | |
| "name": "model.layers.18.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 4182048 | |
| }, | |
| { | |
| "name": "model.layers.18.linear_attn.in_proj_a.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 4182080 | |
| }, | |
| { | |
| "name": "model.layers.18.linear_attn.in_proj_a.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 4190272 | |
| }, | |
| { | |
| "name": "model.layers.18.linear_attn.in_proj_b.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 4191296 | |
| }, | |
| { | |
| "name": "model.layers.18.linear_attn.in_proj_b.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 4199488 | |
| }, | |
| { | |
| "name": "model.layers.18.linear_attn.in_proj_qkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 4200512 | |
| }, | |
| { | |
| "name": "model.layers.18.linear_attn.in_proj_qkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 7346240 | |
| }, | |
| { | |
| "name": "model.layers.18.linear_attn.in_proj_z.q_weight", | |
| "shape": [ | |
| 2048, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 7739456 | |
| }, | |
| { | |
| "name": "model.layers.18.linear_attn.in_proj_z.q_scale", | |
| "shape": [ | |
| 2048, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 8788032 | |
| }, | |
| { | |
| "name": "model.layers.18.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 8919104 | |
| }, | |
| { | |
| "name": "model.layers.18.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8919360 | |
| }, | |
| { | |
| "name": "model.layers.18.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 9967936 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 10099008 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 11934016 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 12163392 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 15833408 | |
| }, | |
| { | |
| "name": "model.layers.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16292160 | |
| }, | |
| { | |
| "name": "model.layers.19.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16294208 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 16296256 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 18131264 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 18360640 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 22030656 | |
| }, | |
| { | |
| "name": "model.layers.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 22489408 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.k_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 22491456 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 5120, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2621440, | |
| "byteOffset": 22491968 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 5120, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 327680, | |
| "byteOffset": 25113408 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 25441088 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 26489664 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.q_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 26620736 | |
| }, | |
| { | |
| "name": "model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 26621248 | |
| }, | |
| { | |
| "name": "model.layers.2.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 26623296 | |
| }, | |
| { | |
| "name": "model.layers.2.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 26623328 | |
| }, | |
| { | |
| "name": "model.layers.2.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 26672480 | |
| }, | |
| { | |
| "name": "model.layers.2.linear_attn.in_proj_a.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 26672512 | |
| }, | |
| { | |
| "name": "model.layers.2.linear_attn.in_proj_a.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 26680704 | |
| }, | |
| { | |
| "name": "model.layers.2.linear_attn.in_proj_b.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 26681728 | |
| }, | |
| { | |
| "name": "model.layers.2.linear_attn.in_proj_b.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 26689920 | |
| }, | |
| { | |
| "name": "model.layers.2.linear_attn.in_proj_qkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 26690944 | |
| }, | |
| { | |
| "name": "model.layers.2.linear_attn.in_proj_qkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 29836672 | |
| }, | |
| { | |
| "name": "model.layers.2.linear_attn.in_proj_z.q_weight", | |
| "shape": [ | |
| 2048, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 30229888 | |
| }, | |
| { | |
| "name": "model.layers.2.linear_attn.in_proj_z.q_scale", | |
| "shape": [ | |
| 2048, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 31278464 | |
| }, | |
| { | |
| "name": "model.layers.2.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31409536 | |
| }, | |
| { | |
| "name": "model.layers.2.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 31409792 | |
| }, | |
| { | |
| "name": "model.layers.2.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 32458368 | |
| } | |
| ], | |
| "md5sum": "7e784aa9a5b9d0746e3b9ac1d255e4bf" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30591680, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 1835008 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 2064384 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 5734400 | |
| }, | |
| { | |
| "name": "model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 6193152 | |
| }, | |
| { | |
| "name": "model.layers.20.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 6195200 | |
| }, | |
| { | |
| "name": "model.layers.20.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 6197248 | |
| }, | |
| { | |
| "name": "model.layers.20.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 6197280 | |
| }, | |
| { | |
| "name": "model.layers.20.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 6246432 | |
| }, | |
| { | |
| "name": "model.layers.20.linear_attn.in_proj_a.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 6246464 | |
| }, | |
| { | |
| "name": "model.layers.20.linear_attn.in_proj_a.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 6254656 | |
| }, | |
| { | |
| "name": "model.layers.20.linear_attn.in_proj_b.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 6255680 | |
| }, | |
| { | |
| "name": "model.layers.20.linear_attn.in_proj_b.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 6263872 | |
| }, | |
| { | |
| "name": "model.layers.20.linear_attn.in_proj_qkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 6264896 | |
| }, | |
| { | |
| "name": "model.layers.20.linear_attn.in_proj_qkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 9410624 | |
| }, | |
| { | |
| "name": "model.layers.20.linear_attn.in_proj_z.q_weight", | |
| "shape": [ | |
| 2048, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 9803840 | |
| }, | |
| { | |
| "name": "model.layers.20.linear_attn.in_proj_z.q_scale", | |
| "shape": [ | |
| 2048, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 10852416 | |
| }, | |
| { | |
| "name": "model.layers.20.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 10983488 | |
| }, | |
| { | |
| "name": "model.layers.20.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 10983744 | |
| }, | |
| { | |
| "name": "model.layers.20.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 12032320 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 12163392 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 13998400 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 14227776 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 17897792 | |
| }, | |
| { | |
| "name": "model.layers.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 18356544 | |
| }, | |
| { | |
| "name": "model.layers.21.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 18358592 | |
| }, | |
| { | |
| "name": "model.layers.21.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 18360640 | |
| }, | |
| { | |
| "name": "model.layers.21.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 18360672 | |
| }, | |
| { | |
| "name": "model.layers.21.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 18409824 | |
| }, | |
| { | |
| "name": "model.layers.21.linear_attn.in_proj_a.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 18409856 | |
| }, | |
| { | |
| "name": "model.layers.21.linear_attn.in_proj_a.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 18418048 | |
| }, | |
| { | |
| "name": "model.layers.21.linear_attn.in_proj_b.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 18419072 | |
| }, | |
| { | |
| "name": "model.layers.21.linear_attn.in_proj_b.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 18427264 | |
| }, | |
| { | |
| "name": "model.layers.21.linear_attn.in_proj_qkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 18428288 | |
| }, | |
| { | |
| "name": "model.layers.21.linear_attn.in_proj_qkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 21574016 | |
| }, | |
| { | |
| "name": "model.layers.21.linear_attn.in_proj_z.q_weight", | |
| "shape": [ | |
| 2048, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 21967232 | |
| }, | |
| { | |
| "name": "model.layers.21.linear_attn.in_proj_z.q_scale", | |
| "shape": [ | |
| 2048, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 23015808 | |
| }, | |
| { | |
| "name": "model.layers.21.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 23146880 | |
| }, | |
| { | |
| "name": "model.layers.21.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 23147136 | |
| }, | |
| { | |
| "name": "model.layers.21.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 24195712 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 24326784 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 26161792 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 26391168 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 30061184 | |
| }, | |
| { | |
| "name": "model.layers.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 30519936 | |
| }, | |
| { | |
| "name": "model.layers.22.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 30521984 | |
| }, | |
| { | |
| "name": "model.layers.22.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 30524032 | |
| }, | |
| { | |
| "name": "model.layers.22.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 30524064 | |
| }, | |
| { | |
| "name": "model.layers.22.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 30573216 | |
| }, | |
| { | |
| "name": "model.layers.22.linear_attn.in_proj_a.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 30573248 | |
| }, | |
| { | |
| "name": "model.layers.22.linear_attn.in_proj_a.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 30581440 | |
| }, | |
| { | |
| "name": "model.layers.22.linear_attn.in_proj_b.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 30582464 | |
| }, | |
| { | |
| "name": "model.layers.22.linear_attn.in_proj_b.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 30590656 | |
| } | |
| ], | |
| "md5sum": "f0182e1a8fae18264e646f95c3f9784a" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32817472, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.linear_attn.in_proj_qkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.22.linear_attn.in_proj_qkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 3145728 | |
| }, | |
| { | |
| "name": "model.layers.22.linear_attn.in_proj_z.q_weight", | |
| "shape": [ | |
| 2048, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 3538944 | |
| }, | |
| { | |
| "name": "model.layers.22.linear_attn.in_proj_z.q_scale", | |
| "shape": [ | |
| 2048, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 4587520 | |
| }, | |
| { | |
| "name": "model.layers.22.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "model.layers.22.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 4718848 | |
| }, | |
| { | |
| "name": "model.layers.22.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 5767424 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 5898496 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 7733504 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 7962880 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 11632896 | |
| }, | |
| { | |
| "name": "model.layers.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 12091648 | |
| }, | |
| { | |
| "name": "model.layers.23.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 12093696 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 12095744 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 13930752 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 14160128 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 17830144 | |
| }, | |
| { | |
| "name": "model.layers.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 18288896 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.k_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 18290944 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 5120, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2621440, | |
| "byteOffset": 18291456 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 5120, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 327680, | |
| "byteOffset": 20912896 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 21240576 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 22289152 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.q_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 22420224 | |
| }, | |
| { | |
| "name": "model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 22420736 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 22422784 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 24257792 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 24487168 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 28157184 | |
| }, | |
| { | |
| "name": "model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 28615936 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.k_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 28617984 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 5120, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2621440, | |
| "byteOffset": 28618496 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 5120, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 327680, | |
| "byteOffset": 31239936 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 31567616 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 32616192 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.q_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 32747264 | |
| }, | |
| { | |
| "name": "model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 32747776 | |
| }, | |
| { | |
| "name": "model.layers.4.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 32749824 | |
| }, | |
| { | |
| "name": "model.layers.4.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 32749856 | |
| }, | |
| { | |
| "name": "model.layers.4.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 32799008 | |
| }, | |
| { | |
| "name": "model.layers.4.linear_attn.in_proj_a.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32799040 | |
| }, | |
| { | |
| "name": "model.layers.4.linear_attn.in_proj_a.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 32807232 | |
| }, | |
| { | |
| "name": "model.layers.4.linear_attn.in_proj_b.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32808256 | |
| }, | |
| { | |
| "name": "model.layers.4.linear_attn.in_proj_b.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 32816448 | |
| } | |
| ], | |
| "md5sum": "3f3637f27390589bbb79094331a15055" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32289664, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.linear_attn.in_proj_qkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.4.linear_attn.in_proj_qkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 3145728 | |
| }, | |
| { | |
| "name": "model.layers.4.linear_attn.in_proj_z.q_weight", | |
| "shape": [ | |
| 2048, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 3538944 | |
| }, | |
| { | |
| "name": "model.layers.4.linear_attn.in_proj_z.q_scale", | |
| "shape": [ | |
| 2048, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 4587520 | |
| }, | |
| { | |
| "name": "model.layers.4.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 4718592 | |
| }, | |
| { | |
| "name": "model.layers.4.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 4718848 | |
| }, | |
| { | |
| "name": "model.layers.4.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 5767424 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 5898496 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 7733504 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 7962880 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 11632896 | |
| }, | |
| { | |
| "name": "model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 12091648 | |
| }, | |
| { | |
| "name": "model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 12093696 | |
| }, | |
| { | |
| "name": "model.layers.5.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 12095744 | |
| }, | |
| { | |
| "name": "model.layers.5.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 12095776 | |
| }, | |
| { | |
| "name": "model.layers.5.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 12144928 | |
| }, | |
| { | |
| "name": "model.layers.5.linear_attn.in_proj_a.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 12144960 | |
| }, | |
| { | |
| "name": "model.layers.5.linear_attn.in_proj_a.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 12153152 | |
| }, | |
| { | |
| "name": "model.layers.5.linear_attn.in_proj_b.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 12154176 | |
| }, | |
| { | |
| "name": "model.layers.5.linear_attn.in_proj_b.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 12162368 | |
| }, | |
| { | |
| "name": "model.layers.5.linear_attn.in_proj_qkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 12163392 | |
| }, | |
| { | |
| "name": "model.layers.5.linear_attn.in_proj_qkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 15309120 | |
| }, | |
| { | |
| "name": "model.layers.5.linear_attn.in_proj_z.q_weight", | |
| "shape": [ | |
| 2048, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 15702336 | |
| }, | |
| { | |
| "name": "model.layers.5.linear_attn.in_proj_z.q_scale", | |
| "shape": [ | |
| 2048, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 16750912 | |
| }, | |
| { | |
| "name": "model.layers.5.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 16881984 | |
| }, | |
| { | |
| "name": "model.layers.5.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 16882240 | |
| }, | |
| { | |
| "name": "model.layers.5.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 17930816 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 18061888 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 19896896 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 20126272 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 23796288 | |
| }, | |
| { | |
| "name": "model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24255040 | |
| }, | |
| { | |
| "name": "model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 24257088 | |
| }, | |
| { | |
| "name": "model.layers.6.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 24259136 | |
| }, | |
| { | |
| "name": "model.layers.6.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 24259168 | |
| }, | |
| { | |
| "name": "model.layers.6.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 24308320 | |
| }, | |
| { | |
| "name": "model.layers.6.linear_attn.in_proj_a.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24308352 | |
| }, | |
| { | |
| "name": "model.layers.6.linear_attn.in_proj_a.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 24316544 | |
| }, | |
| { | |
| "name": "model.layers.6.linear_attn.in_proj_b.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24317568 | |
| }, | |
| { | |
| "name": "model.layers.6.linear_attn.in_proj_b.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 24325760 | |
| }, | |
| { | |
| "name": "model.layers.6.linear_attn.in_proj_qkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 24326784 | |
| }, | |
| { | |
| "name": "model.layers.6.linear_attn.in_proj_qkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 27472512 | |
| }, | |
| { | |
| "name": "model.layers.6.linear_attn.in_proj_z.q_weight", | |
| "shape": [ | |
| 2048, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 27865728 | |
| }, | |
| { | |
| "name": "model.layers.6.linear_attn.in_proj_z.q_scale", | |
| "shape": [ | |
| 2048, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 28914304 | |
| }, | |
| { | |
| "name": "model.layers.6.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 29045376 | |
| }, | |
| { | |
| "name": "model.layers.6.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 29045632 | |
| }, | |
| { | |
| "name": "model.layers.6.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 30094208 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 30225280 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 32060288 | |
| } | |
| ], | |
| "md5sum": "3c5d2affe1b85345039e5d1bc1e7e94c" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32589440, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 3670016 | |
| }, | |
| { | |
| "name": "model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 4128768 | |
| }, | |
| { | |
| "name": "model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 4130816 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 4132864 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 5967872 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 6197248 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 9867264 | |
| }, | |
| { | |
| "name": "model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 10326016 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.k_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 10328064 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 5120, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2621440, | |
| "byteOffset": 10328576 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 5120, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 327680, | |
| "byteOffset": 12950016 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 13277696 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 14326272 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.q_norm.weight", | |
| "shape": [ | |
| 256 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 512, | |
| "byteOffset": 14457344 | |
| }, | |
| { | |
| "name": "model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 14457856 | |
| }, | |
| { | |
| "name": "model.layers.8.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 14459904 | |
| }, | |
| { | |
| "name": "model.layers.8.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 14459936 | |
| }, | |
| { | |
| "name": "model.layers.8.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 14509088 | |
| }, | |
| { | |
| "name": "model.layers.8.linear_attn.in_proj_a.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 14509120 | |
| }, | |
| { | |
| "name": "model.layers.8.linear_attn.in_proj_a.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 14517312 | |
| }, | |
| { | |
| "name": "model.layers.8.linear_attn.in_proj_b.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 14518336 | |
| }, | |
| { | |
| "name": "model.layers.8.linear_attn.in_proj_b.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 14526528 | |
| }, | |
| { | |
| "name": "model.layers.8.linear_attn.in_proj_qkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 14527552 | |
| }, | |
| { | |
| "name": "model.layers.8.linear_attn.in_proj_qkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 17673280 | |
| }, | |
| { | |
| "name": "model.layers.8.linear_attn.in_proj_z.q_weight", | |
| "shape": [ | |
| 2048, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 18066496 | |
| }, | |
| { | |
| "name": "model.layers.8.linear_attn.in_proj_z.q_scale", | |
| "shape": [ | |
| 2048, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 19115072 | |
| }, | |
| { | |
| "name": "model.layers.8.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 19246144 | |
| }, | |
| { | |
| "name": "model.layers.8.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 19246400 | |
| }, | |
| { | |
| "name": "model.layers.8.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 20294976 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 20426048 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 22261056 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 22490432 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 26160448 | |
| }, | |
| { | |
| "name": "model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 26619200 | |
| }, | |
| { | |
| "name": "model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 26621248 | |
| }, | |
| { | |
| "name": "model.layers.9.linear_attn.A_log", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 26623296 | |
| }, | |
| { | |
| "name": "model.layers.9.linear_attn.conv1d_weight", | |
| "shape": [ | |
| 6144, | |
| 1, | |
| 4 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49152, | |
| "byteOffset": 26623328 | |
| }, | |
| { | |
| "name": "model.layers.9.linear_attn.dt_bias", | |
| "shape": [ | |
| 16 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 32, | |
| "byteOffset": 26672480 | |
| }, | |
| { | |
| "name": "model.layers.9.linear_attn.in_proj_a.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 26672512 | |
| }, | |
| { | |
| "name": "model.layers.9.linear_attn.in_proj_a.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 26680704 | |
| }, | |
| { | |
| "name": "model.layers.9.linear_attn.in_proj_b.q_weight", | |
| "shape": [ | |
| 16, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 26681728 | |
| }, | |
| { | |
| "name": "model.layers.9.linear_attn.in_proj_b.q_scale", | |
| "shape": [ | |
| 16, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1024, | |
| "byteOffset": 26689920 | |
| }, | |
| { | |
| "name": "model.layers.9.linear_attn.in_proj_qkv.q_weight", | |
| "shape": [ | |
| 6144, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 26690944 | |
| }, | |
| { | |
| "name": "model.layers.9.linear_attn.in_proj_qkv.q_scale", | |
| "shape": [ | |
| 6144, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 29836672 | |
| }, | |
| { | |
| "name": "model.layers.9.linear_attn.in_proj_z.q_weight", | |
| "shape": [ | |
| 2048, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 30229888 | |
| }, | |
| { | |
| "name": "model.layers.9.linear_attn.in_proj_z.q_scale", | |
| "shape": [ | |
| 2048, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 31278464 | |
| }, | |
| { | |
| "name": "model.layers.9.linear_attn.norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31409536 | |
| }, | |
| { | |
| "name": "model.layers.9.linear_attn.out_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 31409792 | |
| }, | |
| { | |
| "name": "model.layers.9.linear_attn.out_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 131072, | |
| "byteOffset": 32458368 | |
| } | |
| ], | |
| "md5sum": "aaab7e99d84c3dd5ea958c381dbbb391" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 6197248, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 1024, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1835008, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 1024, | |
| 112 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 229376, | |
| "byteOffset": 1835008 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 7168, | |
| 128 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3670016, | |
| "byteOffset": 2064384 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 7168, | |
| 32 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 458752, | |
| "byteOffset": 5734400 | |
| }, | |
| { | |
| "name": "model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 6193152 | |
| }, | |
| { | |
| "name": "model.norm.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 6195200 | |
| } | |
| ], | |
| "md5sum": "2b2692e4fe995d6d0d80df38833841e7" | |
| } | |
| ] | |
| } |