diff --git "a/GLM-OCR-q4f16_1-MLC/tensor-cache.json" "b/GLM-OCR-q4f16_1-MLC/tensor-cache.json" new file mode 100644--- /dev/null +++ "b/GLM-OCR-q4f16_1-MLC/tensor-cache.json" @@ -0,0 +1,7940 @@ +{ + "metadata": { + "ParamSize": 733, + "ParamBytes": 632651776.0, + "BitsPerParam": 4.570333746050445 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 45613056, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 59392, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45613056, + "byteOffset": 0 + } + ], + "md5sum": "b4d9720c7dbb18bcf52965f395544dfd" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 45613056, + "records": [ + { + "name": "language_model.embed_tokens.q_weight", + "shape": [ + 59392, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45613056, + "byteOffset": 0 + } + ], + "md5sum": "db8538fbf9c909295e49373f5f58d37a" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 32652288, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 59392, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5701632, + "byteOffset": 0 + }, + { + "name": "language_model.embed_tokens.q_scale", + "shape": [ + 59392, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5701632, + "byteOffset": 5701632 + }, + { + "name": "language_model.layers.0.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 11403264 + }, + { + "name": "language_model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 1536, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3538944, + "byteOffset": 11406336 + }, + { + "name": "language_model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 1536, + 144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 442368, + "byteOffset": 14945280 + }, + { + "name": "language_model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 9216, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7077888, + "byteOffset": 15387648 + }, + { + "name": "language_model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 9216, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 884736, + "byteOffset": 22465536 + }, + { + "name": "language_model.layers.0.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 23350272 + }, + { + "name": "language_model.layers.0.post_mlp_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 23353344 + }, + { + "name": "language_model.layers.0.post_self_attn_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 23356416 + }, + { + "name": "language_model.layers.0.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 23359488 + }, + { + "name": "language_model.layers.0.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 24145920 + }, + { + "name": "language_model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 24244224 + }, + { + "name": "language_model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 25817088 + }, + { + "name": "language_model.layers.0.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 26013696 + }, + { + "name": "language_model.layers.0.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 27586560 + }, + { + "name": "language_model.layers.0.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 27783168 + }, + { + "name": "language_model.layers.0.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 28569600 + }, + { + "name": "language_model.layers.1.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 28667904 + }, + { + "name": "language_model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 1536, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3538944, + "byteOffset": 28670976 + }, + { + "name": "language_model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 1536, + 144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 442368, + "byteOffset": 32209920 + } + ], + "md5sum": "5c86e2516f91ccab4669cf6d28d07285" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 30547968, + "records": [ + { + "name": "language_model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 9216, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7077888, + "byteOffset": 0 + }, + { + "name": "language_model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 9216, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 884736, + "byteOffset": 7077888 + }, + { + "name": "language_model.layers.1.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 7962624 + }, + { + "name": "language_model.layers.1.post_mlp_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 7965696 + }, + { + "name": "language_model.layers.1.post_self_attn_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 7968768 + }, + { + "name": "language_model.layers.1.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 7971840 + }, + { + "name": "language_model.layers.1.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 8758272 + }, + { + "name": "language_model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 8856576 + }, + { + "name": "language_model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 10429440 + }, + { + "name": "language_model.layers.1.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 10626048 + }, + { + "name": "language_model.layers.1.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 12198912 + }, + { + "name": "language_model.layers.1.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12395520 + }, + { + "name": "language_model.layers.1.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 13181952 + }, + { + "name": "language_model.layers.10.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 13280256 + }, + { + "name": "language_model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 1536, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3538944, + "byteOffset": 13283328 + }, + { + "name": "language_model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 1536, + 144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 442368, + "byteOffset": 16822272 + }, + { + "name": "language_model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 9216, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7077888, + "byteOffset": 17264640 + }, + { + "name": "language_model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 9216, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 884736, + "byteOffset": 24342528 + }, + { + "name": "language_model.layers.10.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 25227264 + }, + { + "name": "language_model.layers.10.post_mlp_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 25230336 + }, + { + "name": "language_model.layers.10.post_self_attn_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 25233408 + }, + { + "name": "language_model.layers.10.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25236480 + }, + { + "name": "language_model.layers.10.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 26022912 + }, + { + "name": "language_model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 26121216 + }, + { + "name": "language_model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 27694080 + }, + { + "name": "language_model.layers.10.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 27890688 + }, + { + "name": "language_model.layers.10.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 29463552 + }, + { + "name": "language_model.layers.10.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 29660160 + }, + { + "name": "language_model.layers.10.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 30446592 + }, + { + "name": "language_model.layers.11.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 30544896 + } + ], + "md5sum": "d543a27575279cbd6891aee259c3ed97" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33444864, + "records": [ + { + "name": "language_model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 1536, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3538944, + "byteOffset": 0 + }, + { + "name": "language_model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 1536, + 144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 442368, + "byteOffset": 3538944 + }, + { + "name": "language_model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 9216, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7077888, + "byteOffset": 3981312 + }, + { + "name": "language_model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 9216, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 884736, + "byteOffset": 11059200 + }, + { + "name": "language_model.layers.11.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 11943936 + }, + { + "name": "language_model.layers.11.post_mlp_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 11947008 + }, + { + "name": "language_model.layers.11.post_self_attn_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 11950080 + }, + { + "name": "language_model.layers.11.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 11953152 + }, + { + "name": "language_model.layers.11.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 12739584 + }, + { + "name": "language_model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12837888 + }, + { + "name": "language_model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 14410752 + }, + { + "name": "language_model.layers.11.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 14607360 + }, + { + "name": "language_model.layers.11.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 16180224 + }, + { + "name": "language_model.layers.11.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 16376832 + }, + { + "name": "language_model.layers.11.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 17163264 + }, + { + "name": "language_model.layers.12.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 17261568 + }, + { + "name": "language_model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 1536, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3538944, + "byteOffset": 17264640 + }, + { + "name": "language_model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 1536, + 144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 442368, + "byteOffset": 20803584 + }, + { + "name": "language_model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 9216, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7077888, + "byteOffset": 21245952 + }, + { + "name": "language_model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 9216, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 884736, + "byteOffset": 28323840 + }, + { + "name": "language_model.layers.12.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 29208576 + }, + { + "name": "language_model.layers.12.post_mlp_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 29211648 + }, + { + "name": "language_model.layers.12.post_self_attn_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 29214720 + }, + { + "name": "language_model.layers.12.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 29217792 + }, + { + "name": "language_model.layers.12.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 30004224 + }, + { + "name": "language_model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 30102528 + }, + { + "name": "language_model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 31675392 + }, + { + "name": "language_model.layers.12.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 31872000 + } + ], + "md5sum": "d9001d6b2a04a12a2eb9fb2487c6103a" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 32956416, + "records": [ + { + "name": "language_model.layers.12.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 0 + }, + { + "name": "language_model.layers.12.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 196608 + }, + { + "name": "language_model.layers.12.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 983040 + }, + { + "name": "language_model.layers.13.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 1081344 + }, + { + "name": "language_model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 1536, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3538944, + "byteOffset": 1084416 + }, + { + "name": "language_model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 1536, + 144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 442368, + "byteOffset": 4623360 + }, + { + "name": "language_model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 9216, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7077888, + "byteOffset": 5065728 + }, + { + "name": "language_model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 9216, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 884736, + "byteOffset": 12143616 + }, + { + "name": "language_model.layers.13.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 13028352 + }, + { + "name": "language_model.layers.13.post_mlp_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 13031424 + }, + { + "name": "language_model.layers.13.post_self_attn_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 13034496 + }, + { + "name": "language_model.layers.13.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 13037568 + }, + { + "name": "language_model.layers.13.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 13824000 + }, + { + "name": "language_model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 13922304 + }, + { + "name": "language_model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 15495168 + }, + { + "name": "language_model.layers.13.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 15691776 + }, + { + "name": "language_model.layers.13.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 17264640 + }, + { + "name": "language_model.layers.13.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 17461248 + }, + { + "name": "language_model.layers.13.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 18247680 + }, + { + "name": "language_model.layers.14.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 18345984 + }, + { + "name": "language_model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 1536, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3538944, + "byteOffset": 18349056 + }, + { + "name": "language_model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 1536, + 144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 442368, + "byteOffset": 21888000 + }, + { + "name": "language_model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 9216, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7077888, + "byteOffset": 22330368 + }, + { + "name": "language_model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 9216, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 884736, + "byteOffset": 29408256 + }, + { + "name": "language_model.layers.14.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 30292992 + }, + { + "name": "language_model.layers.14.post_mlp_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 30296064 + }, + { + "name": "language_model.layers.14.post_self_attn_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 30299136 + }, + { + "name": "language_model.layers.14.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 30302208 + }, + { + "name": "language_model.layers.14.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 31088640 + }, + { + "name": "language_model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 31186944 + }, + { + "name": "language_model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 32759808 + } + ], + "md5sum": "35dc70826045fc235add28f87f2f245d" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 32759808, + "records": [ + { + "name": "language_model.layers.14.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "language_model.layers.14.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 1572864 + }, + { + "name": "language_model.layers.14.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 1769472 + }, + { + "name": "language_model.layers.14.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 2555904 + }, + { + "name": "language_model.layers.15.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 2654208 + }, + { + "name": "language_model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 1536, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3538944, + "byteOffset": 2657280 + }, + { + "name": "language_model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 1536, + 144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 442368, + "byteOffset": 6196224 + }, + { + "name": "language_model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 9216, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7077888, + "byteOffset": 6638592 + }, + { + "name": "language_model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 9216, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 884736, + "byteOffset": 13716480 + }, + { + "name": "language_model.layers.15.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 14601216 + }, + { + "name": "language_model.layers.15.post_mlp_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 14604288 + }, + { + "name": "language_model.layers.15.post_self_attn_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 14607360 + }, + { + "name": "language_model.layers.15.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 14610432 + }, + { + "name": "language_model.layers.15.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 15396864 + }, + { + "name": "language_model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 15495168 + }, + { + "name": "language_model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 17068032 + }, + { + "name": "language_model.layers.15.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 17264640 + }, + { + "name": "language_model.layers.15.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 18837504 + }, + { + "name": "language_model.layers.15.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 19034112 + }, + { + "name": "language_model.layers.15.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 19820544 + }, + { + "name": "language_model.layers.2.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 19918848 + }, + { + "name": "language_model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 1536, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3538944, + "byteOffset": 19921920 + }, + { + "name": "language_model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 1536, + 144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 442368, + "byteOffset": 23460864 + }, + { + "name": "language_model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 9216, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7077888, + "byteOffset": 23903232 + }, + { + "name": "language_model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 9216, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 884736, + "byteOffset": 30981120 + }, + { + "name": "language_model.layers.2.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 31865856 + }, + { + "name": "language_model.layers.2.post_mlp_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 31868928 + }, + { + "name": "language_model.layers.2.post_self_attn_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 31872000 + }, + { + "name": "language_model.layers.2.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 31875072 + }, + { + "name": "language_model.layers.2.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 32661504 + } + ], + "md5sum": "90fe98dcabb6f7a9458a57a342295ce8" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 32750592, + "records": [ + { + "name": "language_model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "language_model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 1572864 + }, + { + "name": "language_model.layers.2.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 1769472 + }, + { + "name": "language_model.layers.2.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 3342336 + }, + { + "name": "language_model.layers.2.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 3538944 + }, + { + "name": "language_model.layers.2.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 4325376 + }, + { + "name": "language_model.layers.3.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 4423680 + }, + { + "name": "language_model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 1536, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3538944, + "byteOffset": 4426752 + }, + { + "name": "language_model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 1536, + 144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 442368, + "byteOffset": 7965696 + }, + { + "name": "language_model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 9216, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7077888, + "byteOffset": 8408064 + }, + { + "name": "language_model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 9216, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 884736, + "byteOffset": 15485952 + }, + { + "name": "language_model.layers.3.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 16370688 + }, + { + "name": "language_model.layers.3.post_mlp_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 16373760 + }, + { + "name": "language_model.layers.3.post_self_attn_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 16376832 + }, + { + "name": "language_model.layers.3.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 16379904 + }, + { + "name": "language_model.layers.3.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 17166336 + }, + { + "name": "language_model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 17264640 + }, + { + "name": "language_model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 18837504 + }, + { + "name": "language_model.layers.3.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19034112 + }, + { + "name": "language_model.layers.3.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 20606976 + }, + { + "name": "language_model.layers.3.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 20803584 + }, + { + "name": "language_model.layers.3.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 21590016 + }, + { + "name": "language_model.layers.4.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 21688320 + }, + { + "name": "language_model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 1536, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3538944, + "byteOffset": 21691392 + }, + { + "name": "language_model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 1536, + 144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 442368, + "byteOffset": 25230336 + }, + { + "name": "language_model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 9216, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7077888, + "byteOffset": 25672704 + } + ], + "md5sum": "3de8bf5305b9b3b5ccb1ba1f34f7873a" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 27451392, + "records": [ + { + "name": "language_model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 9216, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 884736, + "byteOffset": 0 + }, + { + "name": "language_model.layers.4.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 884736 + }, + { + "name": "language_model.layers.4.post_mlp_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 887808 + }, + { + "name": "language_model.layers.4.post_self_attn_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 890880 + }, + { + "name": "language_model.layers.4.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 893952 + }, + { + "name": "language_model.layers.4.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 1680384 + }, + { + "name": "language_model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 1778688 + }, + { + "name": "language_model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 3351552 + }, + { + "name": "language_model.layers.4.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 3548160 + }, + { + "name": "language_model.layers.4.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 5121024 + }, + { + "name": "language_model.layers.4.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 5317632 + }, + { + "name": "language_model.layers.4.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 6104064 + }, + { + "name": "language_model.layers.5.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 6202368 + }, + { + "name": "language_model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 1536, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3538944, + "byteOffset": 6205440 + }, + { + "name": "language_model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 1536, + 144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 442368, + "byteOffset": 9744384 + }, + { + "name": "language_model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 9216, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7077888, + "byteOffset": 10186752 + }, + { + "name": "language_model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 9216, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 884736, + "byteOffset": 17264640 + }, + { + "name": "language_model.layers.5.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 18149376 + }, + { + "name": "language_model.layers.5.post_mlp_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 18152448 + }, + { + "name": "language_model.layers.5.post_self_attn_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 18155520 + }, + { + "name": "language_model.layers.5.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 18158592 + }, + { + "name": "language_model.layers.5.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 18945024 + }, + { + "name": "language_model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19043328 + }, + { + "name": "language_model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 20616192 + }, + { + "name": "language_model.layers.5.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 20812800 + }, + { + "name": "language_model.layers.5.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 22385664 + }, + { + "name": "language_model.layers.5.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 22582272 + }, + { + "name": "language_model.layers.5.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 23368704 + }, + { + "name": "language_model.layers.6.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 23467008 + }, + { + "name": "language_model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 1536, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3538944, + "byteOffset": 23470080 + }, + { + "name": "language_model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 1536, + 144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 442368, + "byteOffset": 27009024 + } + ], + "md5sum": "bc5d7964280331cf0c6a0b8f53315038" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 30547968, + "records": [ + { + "name": "language_model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 9216, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7077888, + "byteOffset": 0 + }, + { + "name": "language_model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 9216, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 884736, + "byteOffset": 7077888 + }, + { + "name": "language_model.layers.6.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 7962624 + }, + { + "name": "language_model.layers.6.post_mlp_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 7965696 + }, + { + "name": "language_model.layers.6.post_self_attn_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 7968768 + }, + { + "name": "language_model.layers.6.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 7971840 + }, + { + "name": "language_model.layers.6.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 8758272 + }, + { + "name": "language_model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 8856576 + }, + { + "name": "language_model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 10429440 + }, + { + "name": "language_model.layers.6.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 10626048 + }, + { + "name": "language_model.layers.6.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 12198912 + }, + { + "name": "language_model.layers.6.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12395520 + }, + { + "name": "language_model.layers.6.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 13181952 + }, + { + "name": "language_model.layers.7.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 13280256 + }, + { + "name": "language_model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 1536, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3538944, + "byteOffset": 13283328 + }, + { + "name": "language_model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 1536, + 144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 442368, + "byteOffset": 16822272 + }, + { + "name": "language_model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 9216, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7077888, + "byteOffset": 17264640 + }, + { + "name": "language_model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 9216, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 884736, + "byteOffset": 24342528 + }, + { + "name": "language_model.layers.7.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 25227264 + }, + { + "name": "language_model.layers.7.post_mlp_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 25230336 + }, + { + "name": "language_model.layers.7.post_self_attn_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 25233408 + }, + { + "name": "language_model.layers.7.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25236480 + }, + { + "name": "language_model.layers.7.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 26022912 + }, + { + "name": "language_model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 26121216 + }, + { + "name": "language_model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 27694080 + }, + { + "name": "language_model.layers.7.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 27890688 + }, + { + "name": "language_model.layers.7.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 29463552 + }, + { + "name": "language_model.layers.7.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 29660160 + }, + { + "name": "language_model.layers.7.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 30446592 + }, + { + "name": "language_model.layers.8.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 30544896 + } + ], + "md5sum": "297af330a75a3369b5be703029592b72" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33444864, + "records": [ + { + "name": "language_model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 1536, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3538944, + "byteOffset": 0 + }, + { + "name": "language_model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 1536, + 144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 442368, + "byteOffset": 3538944 + }, + { + "name": "language_model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 9216, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7077888, + "byteOffset": 3981312 + }, + { + "name": "language_model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 9216, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 884736, + "byteOffset": 11059200 + }, + { + "name": "language_model.layers.8.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 11943936 + }, + { + "name": "language_model.layers.8.post_mlp_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 11947008 + }, + { + "name": "language_model.layers.8.post_self_attn_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 11950080 + }, + { + "name": "language_model.layers.8.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 11953152 + }, + { + "name": "language_model.layers.8.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 12739584 + }, + { + "name": "language_model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12837888 + }, + { + "name": "language_model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 14410752 + }, + { + "name": "language_model.layers.8.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 14607360 + }, + { + "name": "language_model.layers.8.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 16180224 + }, + { + "name": "language_model.layers.8.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 16376832 + }, + { + "name": "language_model.layers.8.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 17163264 + }, + { + "name": "language_model.layers.9.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 17261568 + }, + { + "name": "language_model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 1536, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3538944, + "byteOffset": 17264640 + }, + { + "name": "language_model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 1536, + 144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 442368, + "byteOffset": 20803584 + }, + { + "name": "language_model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 9216, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7077888, + "byteOffset": 21245952 + }, + { + "name": "language_model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 9216, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 884736, + "byteOffset": 28323840 + }, + { + "name": "language_model.layers.9.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 29208576 + }, + { + "name": "language_model.layers.9.post_mlp_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 29211648 + }, + { + "name": "language_model.layers.9.post_self_attn_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 29214720 + }, + { + "name": "language_model.layers.9.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 29217792 + }, + { + "name": "language_model.layers.9.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 30004224 + }, + { + "name": "language_model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 30102528 + }, + { + "name": "language_model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 31675392 + }, + { + "name": "language_model.layers.9.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 31872000 + } + ], + "md5sum": "af07eea08c226d2f95853452dcfa02dc" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 31858688, + "records": [ + { + "name": "language_model.layers.9.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 0 + }, + { + "name": "language_model.layers.9.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 196608 + }, + { + "name": "language_model.layers.9.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 98304, + "byteOffset": 983040 + }, + { + "name": "language_model.norm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 1081344 + }, + { + "name": "visual.blocks.0.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 1084416 + }, + { + "name": "visual.blocks.0.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 1084544 + }, + { + "name": "visual.blocks.0.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 1086592 + }, + { + "name": "visual.blocks.0.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 1610880 + }, + { + "name": "visual.blocks.0.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 1676416 + }, + { + "name": "visual.blocks.0.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 1676544 + }, + { + "name": "visual.blocks.0.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 1682688 + }, + { + "name": "visual.blocks.0.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 3255552 + }, + { + "name": "visual.blocks.0.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 3452160 + }, + { + "name": "visual.blocks.0.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 3454208 + }, + { + "name": "visual.blocks.0.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 5551360 + }, + { + "name": "visual.blocks.0.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5813504 + }, + { + "name": "visual.blocks.0.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 5821696 + }, + { + "name": "visual.blocks.0.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 7918848 + }, + { + "name": "visual.blocks.0.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8180992 + }, + { + "name": "visual.blocks.0.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8189184 + }, + { + "name": "visual.blocks.0.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 10286336 + }, + { + "name": "visual.blocks.0.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10548480 + }, + { + "name": "visual.blocks.0.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10550528 + }, + { + "name": "visual.blocks.1.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 10552576 + }, + { + "name": "visual.blocks.1.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10552704 + }, + { + "name": "visual.blocks.1.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 10554752 + }, + { + "name": "visual.blocks.1.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 11079040 + }, + { + "name": "visual.blocks.1.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 11144576 + }, + { + "name": "visual.blocks.1.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 11144704 + }, + { + "name": "visual.blocks.1.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 11150848 + }, + { + "name": "visual.blocks.1.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 12723712 + }, + { + "name": "visual.blocks.1.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12920320 + }, + { + "name": "visual.blocks.1.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12922368 + }, + { + "name": "visual.blocks.1.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 15019520 + }, + { + "name": "visual.blocks.1.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 15281664 + }, + { + "name": "visual.blocks.1.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 15289856 + }, + { + "name": "visual.blocks.1.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 17387008 + }, + { + "name": "visual.blocks.1.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 17649152 + }, + { + "name": "visual.blocks.1.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 17657344 + }, + { + "name": "visual.blocks.1.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 19754496 + }, + { + "name": "visual.blocks.1.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 20016640 + }, + { + "name": "visual.blocks.1.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 20018688 + }, + { + "name": "visual.blocks.10.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 20020736 + }, + { + "name": "visual.blocks.10.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 20020864 + }, + { + "name": "visual.blocks.10.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 20022912 + }, + { + "name": "visual.blocks.10.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 20547200 + }, + { + "name": "visual.blocks.10.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 20612736 + }, + { + "name": "visual.blocks.10.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 20612864 + }, + { + "name": "visual.blocks.10.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 20619008 + }, + { + "name": "visual.blocks.10.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 22191872 + }, + { + "name": "visual.blocks.10.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 22388480 + }, + { + "name": "visual.blocks.10.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 22390528 + }, + { + "name": "visual.blocks.10.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 24487680 + }, + { + "name": "visual.blocks.10.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24749824 + }, + { + "name": "visual.blocks.10.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24758016 + }, + { + "name": "visual.blocks.10.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 26855168 + }, + { + "name": "visual.blocks.10.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27117312 + }, + { + "name": "visual.blocks.10.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 27125504 + }, + { + "name": "visual.blocks.10.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 29222656 + }, + { + "name": "visual.blocks.10.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 29484800 + }, + { + "name": "visual.blocks.10.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 29486848 + }, + { + "name": "visual.blocks.11.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 29488896 + }, + { + "name": "visual.blocks.11.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 29489024 + }, + { + "name": "visual.blocks.11.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 29491072 + }, + { + "name": "visual.blocks.11.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 30015360 + }, + { + "name": "visual.blocks.11.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 30080896 + }, + { + "name": "visual.blocks.11.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30081024 + }, + { + "name": "visual.blocks.11.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 30087168 + }, + { + "name": "visual.blocks.11.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 31660032 + }, + { + "name": "visual.blocks.11.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 31856640 + } + ], + "md5sum": "9421ae0601df346d5af1718f3385b1b9" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 33139456, + "records": [ + { + "name": "visual.blocks.11.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "visual.blocks.11.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 2097152 + }, + { + "name": "visual.blocks.11.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2359296 + }, + { + "name": "visual.blocks.11.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 2367488 + }, + { + "name": "visual.blocks.11.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 4464640 + }, + { + "name": "visual.blocks.11.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4726784 + }, + { + "name": "visual.blocks.11.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 4734976 + }, + { + "name": "visual.blocks.11.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 6832128 + }, + { + "name": "visual.blocks.11.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7094272 + }, + { + "name": "visual.blocks.11.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7096320 + }, + { + "name": "visual.blocks.12.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 7098368 + }, + { + "name": "visual.blocks.12.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7098496 + }, + { + "name": "visual.blocks.12.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 7100544 + }, + { + "name": "visual.blocks.12.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 7624832 + }, + { + "name": "visual.blocks.12.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 7690368 + }, + { + "name": "visual.blocks.12.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 7690496 + }, + { + "name": "visual.blocks.12.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 7696640 + }, + { + "name": "visual.blocks.12.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 9269504 + }, + { + "name": "visual.blocks.12.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 9466112 + }, + { + "name": "visual.blocks.12.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 9468160 + }, + { + "name": "visual.blocks.12.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 11565312 + }, + { + "name": "visual.blocks.12.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11827456 + }, + { + "name": "visual.blocks.12.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 11835648 + }, + { + "name": "visual.blocks.12.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 13932800 + }, + { + "name": "visual.blocks.12.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 14194944 + }, + { + "name": "visual.blocks.12.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14203136 + }, + { + "name": "visual.blocks.12.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 16300288 + }, + { + "name": "visual.blocks.12.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16562432 + }, + { + "name": "visual.blocks.12.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16564480 + }, + { + "name": "visual.blocks.13.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 16566528 + }, + { + "name": "visual.blocks.13.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16566656 + }, + { + "name": "visual.blocks.13.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 16568704 + }, + { + "name": "visual.blocks.13.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 17092992 + }, + { + "name": "visual.blocks.13.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 17158528 + }, + { + "name": "visual.blocks.13.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17158656 + }, + { + "name": "visual.blocks.13.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 17164800 + }, + { + "name": "visual.blocks.13.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 18737664 + }, + { + "name": "visual.blocks.13.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 18934272 + }, + { + "name": "visual.blocks.13.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18936320 + }, + { + "name": "visual.blocks.13.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21033472 + }, + { + "name": "visual.blocks.13.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21295616 + }, + { + "name": "visual.blocks.13.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 21303808 + }, + { + "name": "visual.blocks.13.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 23400960 + }, + { + "name": "visual.blocks.13.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23663104 + }, + { + "name": "visual.blocks.13.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23671296 + }, + { + "name": "visual.blocks.13.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 25768448 + }, + { + "name": "visual.blocks.13.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26030592 + }, + { + "name": "visual.blocks.13.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26032640 + }, + { + "name": "visual.blocks.14.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 26034688 + }, + { + "name": "visual.blocks.14.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26034816 + }, + { + "name": "visual.blocks.14.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 26036864 + }, + { + "name": "visual.blocks.14.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 26561152 + }, + { + "name": "visual.blocks.14.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 26626688 + }, + { + "name": "visual.blocks.14.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26626816 + }, + { + "name": "visual.blocks.14.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 26632960 + }, + { + "name": "visual.blocks.14.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 28205824 + }, + { + "name": "visual.blocks.14.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 28402432 + }, + { + "name": "visual.blocks.14.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 28404480 + }, + { + "name": "visual.blocks.14.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 30501632 + }, + { + "name": "visual.blocks.14.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30763776 + }, + { + "name": "visual.blocks.14.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30771968 + }, + { + "name": "visual.blocks.14.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 32869120 + }, + { + "name": "visual.blocks.14.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 33131264 + } + ], + "md5sum": "f222788e0cc2c358275624b30e3829d2" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 33137664, + "records": [ + { + "name": "visual.blocks.14.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "visual.blocks.14.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 2097152 + }, + { + "name": "visual.blocks.14.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2359296 + }, + { + "name": "visual.blocks.14.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2361344 + }, + { + "name": "visual.blocks.15.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 2363392 + }, + { + "name": "visual.blocks.15.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2363520 + }, + { + "name": "visual.blocks.15.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 2365568 + }, + { + "name": "visual.blocks.15.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 2889856 + }, + { + "name": "visual.blocks.15.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 2955392 + }, + { + "name": "visual.blocks.15.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 2955520 + }, + { + "name": "visual.blocks.15.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 2961664 + }, + { + "name": "visual.blocks.15.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 4534528 + }, + { + "name": "visual.blocks.15.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 4731136 + }, + { + "name": "visual.blocks.15.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 4733184 + }, + { + "name": "visual.blocks.15.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 6830336 + }, + { + "name": "visual.blocks.15.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7092480 + }, + { + "name": "visual.blocks.15.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 7100672 + }, + { + "name": "visual.blocks.15.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 9197824 + }, + { + "name": "visual.blocks.15.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9459968 + }, + { + "name": "visual.blocks.15.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 9468160 + }, + { + "name": "visual.blocks.15.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 11565312 + }, + { + "name": "visual.blocks.15.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11827456 + }, + { + "name": "visual.blocks.15.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11829504 + }, + { + "name": "visual.blocks.16.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 11831552 + }, + { + "name": "visual.blocks.16.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11831680 + }, + { + "name": "visual.blocks.16.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 11833728 + }, + { + "name": "visual.blocks.16.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 12358016 + }, + { + "name": "visual.blocks.16.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 12423552 + }, + { + "name": "visual.blocks.16.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12423680 + }, + { + "name": "visual.blocks.16.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12429824 + }, + { + "name": "visual.blocks.16.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 14002688 + }, + { + "name": "visual.blocks.16.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14199296 + }, + { + "name": "visual.blocks.16.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14201344 + }, + { + "name": "visual.blocks.16.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 16298496 + }, + { + "name": "visual.blocks.16.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16560640 + }, + { + "name": "visual.blocks.16.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 16568832 + }, + { + "name": "visual.blocks.16.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 18665984 + }, + { + "name": "visual.blocks.16.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18928128 + }, + { + "name": "visual.blocks.16.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18936320 + }, + { + "name": "visual.blocks.16.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21033472 + }, + { + "name": "visual.blocks.16.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21295616 + }, + { + "name": "visual.blocks.16.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21297664 + }, + { + "name": "visual.blocks.17.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 21299712 + }, + { + "name": "visual.blocks.17.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21299840 + }, + { + "name": "visual.blocks.17.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 21301888 + }, + { + "name": "visual.blocks.17.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 21826176 + }, + { + "name": "visual.blocks.17.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 21891712 + }, + { + "name": "visual.blocks.17.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21891840 + }, + { + "name": "visual.blocks.17.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 21897984 + }, + { + "name": "visual.blocks.17.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 23470848 + }, + { + "name": "visual.blocks.17.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23667456 + }, + { + "name": "visual.blocks.17.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23669504 + }, + { + "name": "visual.blocks.17.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 25766656 + }, + { + "name": "visual.blocks.17.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26028800 + }, + { + "name": "visual.blocks.17.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26036992 + }, + { + "name": "visual.blocks.17.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28134144 + }, + { + "name": "visual.blocks.17.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 28396288 + }, + { + "name": "visual.blocks.17.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 28404480 + }, + { + "name": "visual.blocks.17.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 30501632 + }, + { + "name": "visual.blocks.17.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30763776 + }, + { + "name": "visual.blocks.17.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30765824 + }, + { + "name": "visual.blocks.18.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 30767872 + }, + { + "name": "visual.blocks.18.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30768000 + }, + { + "name": "visual.blocks.18.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 30770048 + }, + { + "name": "visual.blocks.18.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 31294336 + }, + { + "name": "visual.blocks.18.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 31359872 + }, + { + "name": "visual.blocks.18.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31360000 + }, + { + "name": "visual.blocks.18.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 31366144 + }, + { + "name": "visual.blocks.18.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 32939008 + }, + { + "name": "visual.blocks.18.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 33135616 + } + ], + "md5sum": "62b8e68b83ef39695caba58987b8a4e5" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 33139456, + "records": [ + { + "name": "visual.blocks.18.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "visual.blocks.18.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 2097152 + }, + { + "name": "visual.blocks.18.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2359296 + }, + { + "name": "visual.blocks.18.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 2367488 + }, + { + "name": "visual.blocks.18.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 4464640 + }, + { + "name": "visual.blocks.18.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4726784 + }, + { + "name": "visual.blocks.18.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 4734976 + }, + { + "name": "visual.blocks.18.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 6832128 + }, + { + "name": "visual.blocks.18.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7094272 + }, + { + "name": "visual.blocks.18.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7096320 + }, + { + "name": "visual.blocks.19.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 7098368 + }, + { + "name": "visual.blocks.19.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7098496 + }, + { + "name": "visual.blocks.19.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 7100544 + }, + { + "name": "visual.blocks.19.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 7624832 + }, + { + "name": "visual.blocks.19.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 7690368 + }, + { + "name": "visual.blocks.19.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 7690496 + }, + { + "name": "visual.blocks.19.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 7696640 + }, + { + "name": "visual.blocks.19.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 9269504 + }, + { + "name": "visual.blocks.19.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 9466112 + }, + { + "name": "visual.blocks.19.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 9468160 + }, + { + "name": "visual.blocks.19.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 11565312 + }, + { + "name": "visual.blocks.19.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11827456 + }, + { + "name": "visual.blocks.19.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 11835648 + }, + { + "name": "visual.blocks.19.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 13932800 + }, + { + "name": "visual.blocks.19.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 14194944 + }, + { + "name": "visual.blocks.19.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14203136 + }, + { + "name": "visual.blocks.19.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 16300288 + }, + { + "name": "visual.blocks.19.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16562432 + }, + { + "name": "visual.blocks.19.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16564480 + }, + { + "name": "visual.blocks.2.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 16566528 + }, + { + "name": "visual.blocks.2.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16566656 + }, + { + "name": "visual.blocks.2.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 16568704 + }, + { + "name": "visual.blocks.2.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 17092992 + }, + { + "name": "visual.blocks.2.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 17158528 + }, + { + "name": "visual.blocks.2.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17158656 + }, + { + "name": "visual.blocks.2.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 17164800 + }, + { + "name": "visual.blocks.2.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 18737664 + }, + { + "name": "visual.blocks.2.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 18934272 + }, + { + "name": "visual.blocks.2.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18936320 + }, + { + "name": "visual.blocks.2.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21033472 + }, + { + "name": "visual.blocks.2.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21295616 + }, + { + "name": "visual.blocks.2.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 21303808 + }, + { + "name": "visual.blocks.2.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 23400960 + }, + { + "name": "visual.blocks.2.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23663104 + }, + { + "name": "visual.blocks.2.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23671296 + }, + { + "name": "visual.blocks.2.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 25768448 + }, + { + "name": "visual.blocks.2.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26030592 + }, + { + "name": "visual.blocks.2.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26032640 + }, + { + "name": "visual.blocks.20.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 26034688 + }, + { + "name": "visual.blocks.20.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26034816 + }, + { + "name": "visual.blocks.20.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 26036864 + }, + { + "name": "visual.blocks.20.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 26561152 + }, + { + "name": "visual.blocks.20.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 26626688 + }, + { + "name": "visual.blocks.20.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26626816 + }, + { + "name": "visual.blocks.20.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 26632960 + }, + { + "name": "visual.blocks.20.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 28205824 + }, + { + "name": "visual.blocks.20.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 28402432 + }, + { + "name": "visual.blocks.20.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 28404480 + }, + { + "name": "visual.blocks.20.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 30501632 + }, + { + "name": "visual.blocks.20.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30763776 + }, + { + "name": "visual.blocks.20.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30771968 + }, + { + "name": "visual.blocks.20.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 32869120 + }, + { + "name": "visual.blocks.20.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 33131264 + } + ], + "md5sum": "1b958d64c856be321cd042de1e26b88c" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 33137664, + "records": [ + { + "name": "visual.blocks.20.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "visual.blocks.20.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 2097152 + }, + { + "name": "visual.blocks.20.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2359296 + }, + { + "name": "visual.blocks.20.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2361344 + }, + { + "name": "visual.blocks.21.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 2363392 + }, + { + "name": "visual.blocks.21.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2363520 + }, + { + "name": "visual.blocks.21.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 2365568 + }, + { + "name": "visual.blocks.21.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 2889856 + }, + { + "name": "visual.blocks.21.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 2955392 + }, + { + "name": "visual.blocks.21.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 2955520 + }, + { + "name": "visual.blocks.21.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 2961664 + }, + { + "name": "visual.blocks.21.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 4534528 + }, + { + "name": "visual.blocks.21.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 4731136 + }, + { + "name": "visual.blocks.21.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 4733184 + }, + { + "name": "visual.blocks.21.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 6830336 + }, + { + "name": "visual.blocks.21.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7092480 + }, + { + "name": "visual.blocks.21.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 7100672 + }, + { + "name": "visual.blocks.21.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 9197824 + }, + { + "name": "visual.blocks.21.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9459968 + }, + { + "name": "visual.blocks.21.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 9468160 + }, + { + "name": "visual.blocks.21.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 11565312 + }, + { + "name": "visual.blocks.21.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11827456 + }, + { + "name": "visual.blocks.21.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11829504 + }, + { + "name": "visual.blocks.22.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 11831552 + }, + { + "name": "visual.blocks.22.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11831680 + }, + { + "name": "visual.blocks.22.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 11833728 + }, + { + "name": "visual.blocks.22.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 12358016 + }, + { + "name": "visual.blocks.22.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 12423552 + }, + { + "name": "visual.blocks.22.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12423680 + }, + { + "name": "visual.blocks.22.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12429824 + }, + { + "name": "visual.blocks.22.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 14002688 + }, + { + "name": "visual.blocks.22.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14199296 + }, + { + "name": "visual.blocks.22.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14201344 + }, + { + "name": "visual.blocks.22.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 16298496 + }, + { + "name": "visual.blocks.22.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16560640 + }, + { + "name": "visual.blocks.22.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 16568832 + }, + { + "name": "visual.blocks.22.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 18665984 + }, + { + "name": "visual.blocks.22.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18928128 + }, + { + "name": "visual.blocks.22.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18936320 + }, + { + "name": "visual.blocks.22.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21033472 + }, + { + "name": "visual.blocks.22.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21295616 + }, + { + "name": "visual.blocks.22.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21297664 + }, + { + "name": "visual.blocks.23.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 21299712 + }, + { + "name": "visual.blocks.23.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21299840 + }, + { + "name": "visual.blocks.23.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 21301888 + }, + { + "name": "visual.blocks.23.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 21826176 + }, + { + "name": "visual.blocks.23.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 21891712 + }, + { + "name": "visual.blocks.23.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21891840 + }, + { + "name": "visual.blocks.23.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 21897984 + }, + { + "name": "visual.blocks.23.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 23470848 + }, + { + "name": "visual.blocks.23.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23667456 + }, + { + "name": "visual.blocks.23.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23669504 + }, + { + "name": "visual.blocks.23.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 25766656 + }, + { + "name": "visual.blocks.23.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26028800 + }, + { + "name": "visual.blocks.23.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26036992 + }, + { + "name": "visual.blocks.23.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28134144 + }, + { + "name": "visual.blocks.23.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 28396288 + }, + { + "name": "visual.blocks.23.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 28404480 + }, + { + "name": "visual.blocks.23.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 30501632 + }, + { + "name": "visual.blocks.23.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30763776 + }, + { + "name": "visual.blocks.23.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30765824 + }, + { + "name": "visual.blocks.3.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 30767872 + }, + { + "name": "visual.blocks.3.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30768000 + }, + { + "name": "visual.blocks.3.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 30770048 + }, + { + "name": "visual.blocks.3.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 31294336 + }, + { + "name": "visual.blocks.3.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 31359872 + }, + { + "name": "visual.blocks.3.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31360000 + }, + { + "name": "visual.blocks.3.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 31366144 + }, + { + "name": "visual.blocks.3.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 32939008 + }, + { + "name": "visual.blocks.3.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 33135616 + } + ], + "md5sum": "ce4244f3752dc74526653a87a1dd90e4" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 33139456, + "records": [ + { + "name": "visual.blocks.3.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "visual.blocks.3.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 2097152 + }, + { + "name": "visual.blocks.3.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2359296 + }, + { + "name": "visual.blocks.3.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 2367488 + }, + { + "name": "visual.blocks.3.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 4464640 + }, + { + "name": "visual.blocks.3.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4726784 + }, + { + "name": "visual.blocks.3.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 4734976 + }, + { + "name": "visual.blocks.3.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 6832128 + }, + { + "name": "visual.blocks.3.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7094272 + }, + { + "name": "visual.blocks.3.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7096320 + }, + { + "name": "visual.blocks.4.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 7098368 + }, + { + "name": "visual.blocks.4.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7098496 + }, + { + "name": "visual.blocks.4.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 7100544 + }, + { + "name": "visual.blocks.4.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 7624832 + }, + { + "name": "visual.blocks.4.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 7690368 + }, + { + "name": "visual.blocks.4.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 7690496 + }, + { + "name": "visual.blocks.4.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 7696640 + }, + { + "name": "visual.blocks.4.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 9269504 + }, + { + "name": "visual.blocks.4.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 9466112 + }, + { + "name": "visual.blocks.4.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 9468160 + }, + { + "name": "visual.blocks.4.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 11565312 + }, + { + "name": "visual.blocks.4.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11827456 + }, + { + "name": "visual.blocks.4.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 11835648 + }, + { + "name": "visual.blocks.4.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 13932800 + }, + { + "name": "visual.blocks.4.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 14194944 + }, + { + "name": "visual.blocks.4.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14203136 + }, + { + "name": "visual.blocks.4.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 16300288 + }, + { + "name": "visual.blocks.4.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16562432 + }, + { + "name": "visual.blocks.4.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16564480 + }, + { + "name": "visual.blocks.5.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 16566528 + }, + { + "name": "visual.blocks.5.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16566656 + }, + { + "name": "visual.blocks.5.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 16568704 + }, + { + "name": "visual.blocks.5.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 17092992 + }, + { + "name": "visual.blocks.5.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 17158528 + }, + { + "name": "visual.blocks.5.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17158656 + }, + { + "name": "visual.blocks.5.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 17164800 + }, + { + "name": "visual.blocks.5.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 18737664 + }, + { + "name": "visual.blocks.5.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 18934272 + }, + { + "name": "visual.blocks.5.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18936320 + }, + { + "name": "visual.blocks.5.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21033472 + }, + { + "name": "visual.blocks.5.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21295616 + }, + { + "name": "visual.blocks.5.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 21303808 + }, + { + "name": "visual.blocks.5.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 23400960 + }, + { + "name": "visual.blocks.5.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23663104 + }, + { + "name": "visual.blocks.5.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23671296 + }, + { + "name": "visual.blocks.5.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 25768448 + }, + { + "name": "visual.blocks.5.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26030592 + }, + { + "name": "visual.blocks.5.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26032640 + }, + { + "name": "visual.blocks.6.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 26034688 + }, + { + "name": "visual.blocks.6.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26034816 + }, + { + "name": "visual.blocks.6.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 26036864 + }, + { + "name": "visual.blocks.6.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 26561152 + }, + { + "name": "visual.blocks.6.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 26626688 + }, + { + "name": "visual.blocks.6.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26626816 + }, + { + "name": "visual.blocks.6.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 26632960 + }, + { + "name": "visual.blocks.6.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 28205824 + }, + { + "name": "visual.blocks.6.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 28402432 + }, + { + "name": "visual.blocks.6.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 28404480 + }, + { + "name": "visual.blocks.6.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 30501632 + }, + { + "name": "visual.blocks.6.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30763776 + }, + { + "name": "visual.blocks.6.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30771968 + }, + { + "name": "visual.blocks.6.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 32869120 + }, + { + "name": "visual.blocks.6.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 33131264 + } + ], + "md5sum": "9439aa8fba3b7fb250333e8900cfbea9" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 30770944, + "records": [ + { + "name": "visual.blocks.6.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "visual.blocks.6.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 2097152 + }, + { + "name": "visual.blocks.6.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2359296 + }, + { + "name": "visual.blocks.6.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2361344 + }, + { + "name": "visual.blocks.7.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 2363392 + }, + { + "name": "visual.blocks.7.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2363520 + }, + { + "name": "visual.blocks.7.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 2365568 + }, + { + "name": "visual.blocks.7.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 2889856 + }, + { + "name": "visual.blocks.7.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 2955392 + }, + { + "name": "visual.blocks.7.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 2955520 + }, + { + "name": "visual.blocks.7.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 2961664 + }, + { + "name": "visual.blocks.7.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 4534528 + }, + { + "name": "visual.blocks.7.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 4731136 + }, + { + "name": "visual.blocks.7.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 4733184 + }, + { + "name": "visual.blocks.7.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 6830336 + }, + { + "name": "visual.blocks.7.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7092480 + }, + { + "name": "visual.blocks.7.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 7100672 + }, + { + "name": "visual.blocks.7.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 9197824 + }, + { + "name": "visual.blocks.7.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9459968 + }, + { + "name": "visual.blocks.7.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 9468160 + }, + { + "name": "visual.blocks.7.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 11565312 + }, + { + "name": "visual.blocks.7.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11827456 + }, + { + "name": "visual.blocks.7.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11829504 + }, + { + "name": "visual.blocks.8.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 11831552 + }, + { + "name": "visual.blocks.8.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11831680 + }, + { + "name": "visual.blocks.8.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 11833728 + }, + { + "name": "visual.blocks.8.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 12358016 + }, + { + "name": "visual.blocks.8.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 12423552 + }, + { + "name": "visual.blocks.8.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12423680 + }, + { + "name": "visual.blocks.8.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12429824 + }, + { + "name": "visual.blocks.8.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 14002688 + }, + { + "name": "visual.blocks.8.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14199296 + }, + { + "name": "visual.blocks.8.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14201344 + }, + { + "name": "visual.blocks.8.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 16298496 + }, + { + "name": "visual.blocks.8.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16560640 + }, + { + "name": "visual.blocks.8.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 16568832 + }, + { + "name": "visual.blocks.8.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 18665984 + }, + { + "name": "visual.blocks.8.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18928128 + }, + { + "name": "visual.blocks.8.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18936320 + }, + { + "name": "visual.blocks.8.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21033472 + }, + { + "name": "visual.blocks.8.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21295616 + }, + { + "name": "visual.blocks.8.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21297664 + }, + { + "name": "visual.blocks.9.attn.k_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 21299712 + }, + { + "name": "visual.blocks.9.attn.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21299840 + }, + { + "name": "visual.blocks.9.attn.proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 21301888 + }, + { + "name": "visual.blocks.9.attn.proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 21826176 + }, + { + "name": "visual.blocks.9.attn.q_norm.weight", + "shape": [ + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 128, + "byteOffset": 21891712 + }, + { + "name": "visual.blocks.9.attn.qkv.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21891840 + }, + { + "name": "visual.blocks.9.attn.qkv.q_weight", + "shape": [ + 3072, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 21897984 + }, + { + "name": "visual.blocks.9.attn.qkv.q_scale", + "shape": [ + 3072, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 196608, + "byteOffset": 23470848 + }, + { + "name": "visual.blocks.9.mlp.down_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23667456 + }, + { + "name": "visual.blocks.9.mlp.down_proj.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23669504 + }, + { + "name": "visual.blocks.9.mlp.down_proj.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 25766656 + }, + { + "name": "visual.blocks.9.mlp.gate_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26028800 + }, + { + "name": "visual.blocks.9.mlp.gate_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26036992 + }, + { + "name": "visual.blocks.9.mlp.gate_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28134144 + }, + { + "name": "visual.blocks.9.mlp.up_proj.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 28396288 + }, + { + "name": "visual.blocks.9.mlp.up_proj.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 28404480 + }, + { + "name": "visual.blocks.9.mlp.up_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 30501632 + }, + { + "name": "visual.blocks.9.norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30763776 + }, + { + "name": "visual.blocks.9.norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30765824 + }, + { + "name": "visual.downsample.bias", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 30767872 + } + ], + "md5sum": "a64bb94cf707527e9b43d46fa85bdba6" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 26546176, + "records": [ + { + "name": "visual.downsample.weight", + "shape": [ + 1536, + 1024, + 2, + 2 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "visual.merger.down_proj.q_weight", + "shape": [ + 1536, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3538944, + "byteOffset": 12582912 + }, + { + "name": "visual.merger.down_proj.q_scale", + "shape": [ + 1536, + 144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 442368, + "byteOffset": 16121856 + }, + { + "name": "visual.merger.gate_proj.q_weight", + "shape": [ + 4608, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3538944, + "byteOffset": 16564224 + }, + { + "name": "visual.merger.gate_proj.q_scale", + "shape": [ + 4608, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 442368, + "byteOffset": 20103168 + }, + { + "name": "visual.merger.post_projection_norm.bias", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 20545536 + }, + { + "name": "visual.merger.post_projection_norm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 20548608 + }, + { + "name": "visual.merger.proj.q_weight", + "shape": [ + 1536, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 20551680 + }, + { + "name": "visual.merger.proj.q_scale", + "shape": [ + 1536, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 21731328 + }, + { + "name": "visual.merger.up_proj.q_weight", + "shape": [ + 4608, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3538944, + "byteOffset": 21878784 + }, + { + "name": "visual.merger.up_proj.q_scale", + "shape": [ + 4608, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 442368, + "byteOffset": 25417728 + }, + { + "name": "visual.patch_embed.proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25860096 + }, + { + "name": "visual.patch_embed.proj.q_weight", + "shape": [ + 1024, + 148 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 606208, + "byteOffset": 25862144 + }, + { + "name": "visual.patch_embed.proj.q_scale", + "shape": [ + 1024, + 37 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75776, + "byteOffset": 26468352 + }, + { + "name": "visual.post_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26544128 + } + ], + "md5sum": "00c040c365f6d84089a22a6d9b0c0b2c" + } + ] +} \ No newline at end of file