diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,10204 @@ +{ + "metadata": { + "ParamSize": 867, + "ParamBytes": 3974904384.0, + "BitsPerParam": 4.501955601418291 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 65667072, + "records": [ + { + "name": "language_model.lm_head.q_weight", + "shape": [ + 32064, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65667072, + "byteOffset": 0 + } + ], + "md5sum": "f30f6135e47f8f58a45e11d177a11f88" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 30760960, + "records": [ + { + "name": "language_model.lm_head.q_scale", + "shape": [ + 32064, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8208384, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8208384 + }, + { + "name": "language_model.model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 8216576 + } + ], + "md5sum": "eb88dbdd3f59249886a1d6029622fa94" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "aa55c7fa2f37d79cbaeb8555b39a480a" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 31014912, + "records": [ + { + "name": "language_model.model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "language_model.model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "language_model.model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8462336 + }, + { + "name": "language_model.model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 8470528 + } + ], + "md5sum": "2a9b2259dc3bbe75f765bce898a97af4" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "bb2ad76c4883eec2c4c90b2873b75c39" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "9028d1afe6bc0254b9a34e27eb690b6d" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "language_model.model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "3f2572e2b61f7640c0d343b43e7827d1" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "272bc21c897383b31b12665eff0a3189" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "7038a842ff2bcca13f2bec15006d6896" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "language_model.model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "language_model.model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "language_model.model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "language_model.model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "language_model.model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "language_model.model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "language_model.model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "language_model.model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "language_model.model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "language_model.model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "a6b0e0b78a521225e6e8db3ac650b358" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "language_model.model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "language_model.model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "language_model.model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "9766644dad18cc4877ded3d65e389f1c" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "a9d7fe4769a54f85743b42464d16db2e" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1e8e8e91f7b4ff2ba55280ce68e55995" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "language_model.model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "27c10d36e84003eebeffd1192c7f4ac1" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "12563853bf6a1531031ad7b57044c766" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "8994db9f5e863d94e75c64dc9d70f8de" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "language_model.model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "language_model.model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "language_model.model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "language_model.model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "language_model.model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "language_model.model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "language_model.model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "language_model.model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "language_model.model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "language_model.model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "4b6221d562b64723bb4e27d15f2827b9" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "language_model.model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "language_model.model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "language_model.model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "b5edaa376864b9165680b433cf63f5a5" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "34d9ca0a86fa5d003c442bf4f76679b0" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "56891cf232266f6240cd74f0dd5acc0c" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "language_model.model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "86a6b66c21f42230df7e9ed2ced1cd81" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "4460744e5400cbbefede76b590ed2a00" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b0f53f7b1cc185f1550d86850c0f1817" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "language_model.model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "language_model.model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "language_model.model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "language_model.model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "language_model.model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "language_model.model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "language_model.model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "language_model.model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "language_model.model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "language_model.model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "4a02c6165ada8478feae499855d37355" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "language_model.model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "language_model.model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "language_model.model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "780065c7091ec1bd90070acf76f67a12" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "c099ddf52dce2ea4c2dc5b2199f92b06" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "dbd084e3a420b664fca7092eb09098a3" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "language_model.model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "9bffe452785f4caefe98438e64744367" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "7bdfcaca1569ed5f21cbe25d079ac763" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "fd3cd3fd827bb13957d1c1064cec87b2" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "language_model.model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "language_model.model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "language_model.model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "language_model.model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "language_model.model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "language_model.model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "language_model.model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "language_model.model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "language_model.model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "language_model.model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "42ff5800273d916307f56c90e1338547" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "language_model.model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "language_model.model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "language_model.model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "9d003f5d85c9f9f5290feb9e2f6ee983" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ab080d1324b3eb3f94bb3cdcc3050cd2" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "3e30b87842607404596b1cae144b15ca" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 65667072, + "records": [ + { + "name": "language_model.model.embed_tokens.q_weight", + "shape": [ + 32064, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65667072, + "byteOffset": 0 + } + ], + "md5sum": "16f8b578e2bf470f33307572889a3123" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "language_model.model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "7fb85bf2f483696cf97fc2bd772189db" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ae690613d9a7f6259cb0947f58a92974" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 32088064, + "records": [ + { + "name": "language_model.model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "language_model.model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "language_model.model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "language_model.model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "language_model.model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "language_model.model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "language_model.model.embed_tokens.q_scale", + "shape": [ + 32064, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8208384, + "byteOffset": 21053440 + }, + { + "name": "language_model.model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29261824 + }, + { + "name": "language_model.model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 29270016 + } + ], + "md5sum": "83a4763b088cbcc6c1137ed8b042f03c" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 30810112, + "records": [ + { + "name": "language_model.model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5636096 + }, + { + "name": "language_model.model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 5644288 + } + ], + "md5sum": "4f8b4dbda904fd71e1c7ab40b41c959e" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "language_model.model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "c590a8b2103d274df51ddcb51573c408" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "80b611b8eebf5fc0a90a152be92c0476" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "86eded4c5e30b31ca5bc1b5e0634c7d0" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "language_model.model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "language_model.model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "language_model.model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "language_model.model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "language_model.model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "language_model.model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "language_model.model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "language_model.model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "14a6c59b58b5ce5d6cc8bd80dc8ba82e" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "language_model.model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "ddf605b6d16a05bbf50711982ef36026" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "4a31d97996821213023bf3f2234f0392" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 32186368, + "records": [ + { + "name": "language_model.model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 1048576 + }, + { + "name": "language_model.model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 26214400 + }, + { + "name": "language_model.model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29360128 + }, + { + "name": "language_model.model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 29368320 + } + ], + "md5sum": "64492b908ce4b68ff535b7c25727c770" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 30810112, + "records": [ + { + "name": "language_model.model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5636096 + }, + { + "name": "language_model.model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 5644288 + } + ], + "md5sum": "a8f3d4ac0a4dc4d2f75025f448cb4b7a" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "language_model.model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "5e6985b2b112c51855433057ea942b0e" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "38eac9f60cee720045b2801362c59454" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a33952bd1d8645a2ec8555bc39679112" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "language_model.model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "language_model.model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "language_model.model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "language_model.model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "language_model.model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "language_model.model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "language_model.model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "language_model.model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "864d0d1addd48c08e33c869269445e4d" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "584c758ce2027a1e5c3180e1240f274e" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "bafcba32fba85b40d05b2738ca8b57b5" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "language_model.model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "language_model.model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "language_model.model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "language_model.model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "language_model.model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "0dc458e4475fbab075554b9c74701b96" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "language_model.model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "43e678659dd0d1a85196d1e4fcdb7c3f" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "0067d34f8b1bf9b4da1066c0a0c14334" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "30c39daf9b89bd85411908c57d19db0c" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "language_model.model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "language_model.model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "language_model.model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "language_model.model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "language_model.model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "language_model.model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "language_model.model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "language_model.model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "df6ba1f8648417543828ff0227a57e7d" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "474dd594b99b412f7ecf07cede00ba77" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "dd113aab92bd8d85ca5c3d1419eb5330" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "language_model.model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "language_model.model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "language_model.model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "language_model.model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "language_model.model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "663ff081054719480d8405c9c876b3f1" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "language_model.model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "13b3c8c6f0537c1f9fcea3c238c40e1d" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "7ba9fe7f0a0a03c8916f2824d3b8acb1" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "24d6e3395f4492511c02009a637b26a8" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "language_model.model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "language_model.model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "language_model.model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "language_model.model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "language_model.model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "language_model.model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "language_model.model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "language_model.model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "c3ac0e2e03e4e4c99bcccb24cf056f72" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ab63ca17b63bb2febf27b3735f75340e" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "dd0e4937a1fff29908346739a80a3975" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "language_model.model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "language_model.model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "language_model.model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "language_model.model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "language_model.model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "45e77f575dc134c7168f2d675075c48d" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "language_model.model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "d86922697d62592339cd578357d3d382" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "8b9faa0d5223aefc72281adbf999313e" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "24abeb92dff8224c1d26bf5d77913e9a" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "language_model.model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "language_model.model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "language_model.model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "language_model.model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "language_model.model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "language_model.model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "language_model.model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "language_model.model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "73132616ad7c04792021d3fbad9ce268" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 33348160, + "records": [ + { + "name": "language_model.model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "multi_modal_projector.linear_1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "multi_modal_projector.linear_1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 1056768 + }, + { + "name": "multi_modal_projector.linear_1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 3153920 + }, + { + "name": "multi_modal_projector.linear_2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3416064 + }, + { + "name": "multi_modal_projector.linear_2.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3424256 + }, + { + "name": "multi_modal_projector.linear_2.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11812864 + }, + { + "name": "vision_tower.vision_model.embeddings.class_embedding", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12861440 + }, + { + "name": "vision_tower.vision_model.embeddings.patch_embedding.weight", + "shape": [ + 1024, + 3, + 14, + 14 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1204224, + "byteOffset": 12863488 + }, + { + "name": "vision_tower.vision_model.embeddings.position_embedding.q_weight", + "shape": [ + 577, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 295424, + "byteOffset": 14067712 + }, + { + "name": "vision_tower.vision_model.embeddings.position_embedding.q_scale", + "shape": [ + 577, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 36928, + "byteOffset": 14363136 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14400064 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14402112 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14404160 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14406208 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 14408256 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14416448 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 16513600 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16775744 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 16777792 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 18874944 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 19137088 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 19139136 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 19663424 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 19728960 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 19731008 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 20255296 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 20320832 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 20322880 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 20847168 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 20912704 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 20914752 + }, + { + "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 21439040 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21504576 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21506624 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21508672 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21510720 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21512768 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 21520960 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 23618112 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23880256 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23882304 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 25979456 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26241600 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 26243648 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 26767936 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26833472 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 26835520 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 27359808 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 27425344 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 27427392 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 27951680 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 28017216 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 28019264 + }, + { + "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 28543552 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 28609088 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 28611136 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 28613184 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 28615232 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 28617280 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 28625472 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 30722624 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30984768 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30986816 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 33083968 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 33346112 + } + ], + "md5sum": "7d7f69b01f99267b95c261a0a31e1d44" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 33161216, + "records": [ + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 0 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 524288 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 589824 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 591872 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 1116160 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 1181696 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 1183744 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 1708032 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 1773568 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 1775616 + }, + { + "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 2299904 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2365440 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2367488 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2369536 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2371584 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2373632 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 2381824 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 4478976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 4741120 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 4743168 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 6840320 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7102464 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 7104512 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 7628800 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7694336 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 7696384 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 8220672 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8286208 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 8288256 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 8812544 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8878080 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 8880128 + }, + { + "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 9404416 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 9469952 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 9472000 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 9474048 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 9476096 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9478144 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 9486336 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 11583488 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11845632 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 11847680 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 13944832 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14206976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 14209024 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 14733312 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14798848 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 14800896 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 15325184 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 15390720 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 15392768 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 15917056 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 15982592 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 15984640 + }, + { + "name": "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 16508928 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16574464 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16576512 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16578560 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16580608 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16582656 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 16590848 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 18688000 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 18950144 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18952192 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21049344 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21311488 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 21313536 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 21837824 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21903360 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 21905408 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 22429696 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 22495232 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 22497280 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 23021568 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23087104 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 23089152 + }, + { + "name": "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 23613440 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23678976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23681024 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23683072 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23685120 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23687168 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23695360 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 25792512 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26054656 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26056704 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28153856 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 28416000 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 28418048 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 28942336 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 29007872 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 29009920 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 29534208 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 29599744 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 29601792 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 30126080 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30191616 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 30193664 + }, + { + "name": "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 30717952 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30783488 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30785536 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30787584 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30789632 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30791680 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30799872 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 32897024 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 33159168 + } + ], + "md5sum": "897dea616a9e76d01189a7b7b5d3f1c0" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 33161216, + "records": [ + { + "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 2097152 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2359296 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 2361344 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 2885632 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2951168 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 2953216 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 3477504 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 3543040 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 3545088 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 4069376 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 4134912 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 4136960 + }, + { + "name": "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 4661248 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 4726784 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 4728832 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 4730880 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 4732928 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4734976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 4743168 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 6840320 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7102464 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 7104512 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 9201664 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 9463808 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 9465856 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 9990144 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10055680 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 10057728 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 10582016 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10647552 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 10649600 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 11173888 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11239424 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 11241472 + }, + { + "name": "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 11765760 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11831296 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11833344 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11835392 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11837440 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11839488 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 11847680 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 13944832 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14206976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14209024 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 16306176 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16568320 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 16570368 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 17094656 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 17160192 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 17162240 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 17686528 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 17752064 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 17754112 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 18278400 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 18343936 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 18345984 + }, + { + "name": "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 18870272 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 18935808 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 18937856 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 18939904 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 18941952 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18944000 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18952192 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21049344 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21311488 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 21313536 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 23410688 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23672832 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 23674880 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 24199168 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 24264704 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 24266752 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 24791040 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 24856576 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 24858624 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 25382912 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25448448 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 25450496 + }, + { + "name": "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 25974784 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26040320 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26042368 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26044416 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26046464 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26048512 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26056704 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28153856 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 28416000 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 28418048 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 30515200 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30777344 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 30779392 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 31303680 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 31369216 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 31371264 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 31895552 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 31961088 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 31963136 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 32487424 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 32552960 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 32555008 + }, + { + "name": "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 33079296 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 33144832 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 33146880 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 33148928 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 33150976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 33153024 + } + ], + "md5sum": "4cd446a56771ef3c5938dc30a8015598" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 33140736, + "records": [ + { + "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 2097152 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2359296 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 2361344 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 4458496 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 4720640 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 4722688 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 5246976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 5312512 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 5314560 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 5838848 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 5904384 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 5906432 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 6430720 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 6496256 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 6498304 + }, + { + "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 7022592 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7088128 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7090176 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7092224 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7094272 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7096320 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 7104512 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 9201664 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 9463808 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 9465856 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 11563008 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11825152 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 11827200 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 12351488 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12417024 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 12419072 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 12943360 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 13008896 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 13010944 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 13535232 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 13600768 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 13602816 + }, + { + "name": "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 14127104 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14192640 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14194688 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14196736 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14198784 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 14200832 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14209024 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 16306176 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16568320 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 16570368 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 18667520 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 18929664 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 18931712 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 19456000 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 19521536 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 19523584 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 20047872 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 20113408 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 20115456 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 20639744 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 20705280 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 20707328 + }, + { + "name": "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 21231616 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21297152 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21299200 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21301248 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21303296 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21305344 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 21313536 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 23410688 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23672832 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23674880 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 25772032 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26034176 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 26036224 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 26560512 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26626048 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 26628096 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 27152384 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 27217920 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 27219968 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 27744256 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 27809792 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 27811840 + }, + { + "name": "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 28336128 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 28401664 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 28403712 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 28405760 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 28407808 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 28409856 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 28418048 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 30515200 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30777344 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30779392 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 32876544 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 33138688 + } + ], + "md5sum": "d1abe6bea9d17e48148f6e8d8c126e0f" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 33161216, + "records": [ + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 0 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 524288 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 589824 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 591872 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 1116160 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 1181696 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 1183744 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 1708032 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 1773568 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 1775616 + }, + { + "name": "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 2299904 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2365440 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2367488 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2369536 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2371584 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2373632 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 2381824 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 4478976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 4741120 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 4743168 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 6840320 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7102464 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 7104512 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 7628800 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7694336 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 7696384 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 8220672 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8286208 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 8288256 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 8812544 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8878080 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 8880128 + }, + { + "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 9404416 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 9469952 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 9472000 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 9474048 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 9476096 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9478144 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 9486336 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 11583488 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11845632 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 11847680 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 13944832 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14206976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 14209024 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 14733312 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14798848 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 14800896 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 15325184 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 15390720 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 15392768 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 15917056 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 15982592 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 15984640 + }, + { + "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 16508928 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16574464 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16576512 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16578560 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16580608 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16582656 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 16590848 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 18688000 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 18950144 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18952192 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21049344 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21311488 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 21313536 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 21837824 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21903360 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 21905408 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 22429696 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 22495232 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 22497280 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 23021568 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23087104 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 23089152 + }, + { + "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 23613440 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23678976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23681024 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23683072 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23685120 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23687168 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23695360 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 25792512 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 26054656 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26056704 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28153856 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 28416000 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 28418048 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 28942336 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 29007872 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 29009920 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 29534208 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 29599744 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 29601792 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 30126080 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30191616 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 30193664 + }, + { + "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 30717952 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30783488 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30785536 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30787584 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 30789632 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30791680 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30799872 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 32897024 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 33159168 + } + ], + "md5sum": "832ae41602fb7e78da1a5efa3003ff1c" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "language_model.model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "59685a7a8960f4986281d02932a452b5" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "fc2cb5f2298f2af9f43b6eaf8bc2f7a3" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 27414528, + "records": [ + { + "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 2097152 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2359296 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 2361344 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 2885632 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2951168 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 2953216 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 3477504 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 3543040 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 3545088 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 4069376 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 4134912 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 4136960 + }, + { + "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 4661248 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 4726784 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 4728832 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 4730880 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 4732928 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4734976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 4743168 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 6840320 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 7102464 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 7104512 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 9201664 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 9463808 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 9465856 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 9990144 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10055680 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 10057728 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 10582016 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10647552 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 10649600 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 11173888 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11239424 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 11241472 + }, + { + "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 11765760 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11831296 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11833344 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11835392 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 11837440 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11839488 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.q_weight", + "shape": [ + 4096, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 11847680 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 13944832 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14206976 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.q_weight", + "shape": [ + 1024, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14209024 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.q_scale", + "shape": [ + 1024, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 16306176 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16568320 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 16570368 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 17094656 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 17160192 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 17162240 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 17686528 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 17752064 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 17754112 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 18278400 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 18343936 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 128 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 524288, + "byteOffset": 18345984 + }, + { + "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 65536, + "byteOffset": 18870272 + }, + { + "name": "vision_tower.vision_model.post_layernorm.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 18935808 + }, + { + "name": "vision_tower.vision_model.post_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 18937856 + }, + { + "name": "vision_tower.vision_model.pre_layrnorm.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 18939904 + }, + { + "name": "vision_tower.vision_model.pre_layrnorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 18941952 + }, + { + "name": "language_model.model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18944000 + }, + { + "name": "language_model.model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 18952192 + }, + { + "name": "language_model.model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 21770240 + }, + { + "name": "language_model.model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27406336 + } + ], + "md5sum": "5f4c1e5b2b01eb544f02b915b6e7340a" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "language_model.model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "language_model.model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "language_model.model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "b6092107bb09d477f0ce25d2d2edfafb" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "420f4c03ff7fe441f54a413f961ea1bc" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "5c3ac20331e2ff3025e5a4c034455ff6" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "language_model.model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "0a1f90aa755ac9b0f7c0fc2901ecedd0" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "3d0112308b5fea19cd0d926d5c9e78ac" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e76830690b43f30def9748050dee9675" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "language_model.model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "language_model.model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "language_model.model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "language_model.model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "language_model.model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "language_model.model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "language_model.model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "language_model.model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "language_model.model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "language_model.model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "680b983285ccdd97e10aa03294e26dc8" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "language_model.model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "language_model.model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "language_model.model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "5dc43555890158b5f71c0e6e9304d422" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "4c2384ce27829729a2543acbbcb95d02" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a60bb14da0d7b8703779a5f757c02873" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "language_model.model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "b30d8d42ab5bb9dc07959ee5277131af" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "bf1cdace1824c698f98df040a7aad8fc" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "af4a1bfac065ba2d643fa5b41773e5a8" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "language_model.model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "language_model.model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "language_model.model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "language_model.model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "language_model.model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "language_model.model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "language_model.model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "language_model.model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "language_model.model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "language_model.model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "4f3b51893bb52f22638cbefdfd911e41" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "language_model.model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "language_model.model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "language_model.model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "8b7ed773cd45b7f1ef84935cc3ee7879" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "f93e1fb7216b05fc2046ebb14e6924e0" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "69e8b3a3e4d1c08203e23e3f3872d570" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "language_model.model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "ed495c504a5a9ba8a1bae69e2aef098a" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "3979b5628525a61d991b255eb5b2fd85" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e8060130124bb1d503ecc03ba732dca7" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "language_model.model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "language_model.model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "language_model.model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "language_model.model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "language_model.model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "language_model.model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "language_model.model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "language_model.model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "language_model.model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "language_model.model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "82c96c7d339680084aea882851395e23" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "language_model.model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "language_model.model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "language_model.model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "30bbd4a0697d2e85805ec57e4ff2c339" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ba896f47d344b4b9ad38ead8bbbb9ccc" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "c246cff50605197943abccaab1ae7ec5" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "language_model.model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "e8abcecf08e3383cde710a596b884eba" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "167474bca6c547b2a280f660157b82f7" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "796b1f4159f111a8aee22257bed297c3" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "language_model.model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "language_model.model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "language_model.model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "language_model.model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "language_model.model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "language_model.model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "language_model.model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "language_model.model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "language_model.model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "language_model.model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "6fabd94ed2325fd4eb9929f353c86f07" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "language_model.model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "language_model.model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "language_model.model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "4aa2ed82cf0e0398faceed8a854c3574" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "f52e37fed43c41b50f08f4d66b7f6b8d" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e5922ede550fd37162658857b5d2d9ee" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "language_model.model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "f5310b748b7737eabd61fa3c4d3f6c05" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "935f3d92b72ae1ac2391f822be551343" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "db93c094f8368593facf37baa259b9c4" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "language_model.model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "language_model.model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "language_model.model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "language_model.model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "language_model.model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "language_model.model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "language_model.model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "language_model.model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "language_model.model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "language_model.model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "fb6db50f430ab35e158612831ea5bef7" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "language_model.model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "language_model.model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "language_model.model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "cead38a9f34424d133cecbeebb104c8c" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "language_model.model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "6dbc2515b23886b3aec49f6ac0edc59d" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "2d22c15aad260d1d1615b4875fb02c55" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "language_model.model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "3d63f60630596bb3b235820906e029d7" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 32579584, + "records": [ + { + "name": "language_model.model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "language_model.model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "language_model.model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "language_model.model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "language_model.model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "language_model.model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21045248 + }, + { + "name": "language_model.model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24190976 + } + ], + "md5sum": "0d3aa9824a8b6e2bc9421c9460433503" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 1048576, + "records": [ + { + "name": "language_model.model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + } + ], + "md5sum": "367ee4424641f71225c12a94eee3ed83" + } + ] +} \ No newline at end of file