| { | |
| "metadata": { | |
| "ParamSize": 392, | |
| "ParamBytes": 767991808.0, | |
| "BitsPerParam": 5.012134507076374 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50462720, | |
| "records": [ | |
| { | |
| "name": "transformer.wte.q_weight", | |
| "shape": [ | |
| 49280, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50462720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c8614381334074d4d9673358e897f238" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32608768, | |
| "records": [ | |
| { | |
| "name": "transformer.wte.q_scale", | |
| "shape": [ | |
| 49280, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6307840, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.wpe.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 6307840 | |
| }, | |
| { | |
| "name": "transformer.wpe.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 8404992 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8667136 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 8671232 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 8675328 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 11034624 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 11329536 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 11334144 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 13431296 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 13693440 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 13697536 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 13701632 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13705728 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22094336 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 23142912 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 23159296 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 31547904 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 32596480 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 32600576 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 32604672 | |
| } | |
| ], | |
| "md5sum": "d3d13dbccd39925612d05c08c52911ab" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28963840, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 2359296 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 2654208 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 2658816 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 4755968 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 5018112 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 5022208 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 5026304 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 5030400 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 13419008 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 14467584 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14483968 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22872576 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23921152 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23925248 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23929344 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 23933440 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 26292736 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 26587648 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26592256 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28689408 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28951552 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28955648 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28959744 | |
| } | |
| ], | |
| "md5sum": "1bfeea565128cdc9041441309d723683" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33387008, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 17842176 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18890752 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18894848 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 21262336 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 21557248 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 21561856 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 23659008 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23921152 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23925248 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23929344 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 23933440 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 32322048 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 33370624 | |
| } | |
| ], | |
| "md5sum": "646125f0933b0304b05c767c96c1c536" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33382912, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 11808768 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 12103680 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 12108288 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 14205440 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 14467584 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 14471680 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 14475776 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14479872 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22868480 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 23917056 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 23933440 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 32322048 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33370624 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33374720 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33378816 | |
| } | |
| ], | |
| "md5sum": "07a2e948bf38139edff5bbb2606e088d" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28963840, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 2359296 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 2654208 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 2658816 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 4755968 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 5018112 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 5022208 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 5026304 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 5030400 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 13419008 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 14467584 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14483968 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22872576 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23921152 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23925248 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23929344 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 23933440 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 26292736 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 26587648 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26592256 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28689408 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28951552 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28955648 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28959744 | |
| } | |
| ], | |
| "md5sum": "e0e1a0272165c399a60f7483b7264ec1" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33387008, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 17842176 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18890752 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18894848 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 21262336 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 21557248 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 21561856 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 23659008 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23921152 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23925248 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23929344 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 23933440 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 32322048 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 33370624 | |
| } | |
| ], | |
| "md5sum": "3c58db311edaf8c8ecf1dcd5357f0a3c" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33382912, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 11808768 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 12103680 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 12108288 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 14205440 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 14467584 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 14471680 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 14475776 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14479872 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22868480 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 23917056 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 23933440 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 32322048 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33370624 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33374720 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33378816 | |
| } | |
| ], | |
| "md5sum": "dfa499980fb93e37f26f42049a2786e9" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28963840, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 2359296 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 2654208 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 2658816 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 4755968 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 5018112 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 5022208 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 5026304 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 5030400 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 13419008 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 14467584 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14483968 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22872576 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23921152 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23925248 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23929344 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 23933440 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 26292736 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 26587648 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26592256 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28689408 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28951552 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28955648 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28959744 | |
| } | |
| ], | |
| "md5sum": "b1b1f9824fe68dafd41876bb79bf47dd" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33387008, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 17842176 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18890752 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18894848 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 21262336 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 21557248 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 21561856 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 23659008 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23921152 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23925248 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23929344 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 23933440 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 32322048 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 33370624 | |
| } | |
| ], | |
| "md5sum": "c6b36bfdc8a029b273acad526e6aeecc" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33382912, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 11808768 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 12103680 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 12108288 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 14205440 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 14467584 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 14471680 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 14475776 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14479872 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22868480 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 23917056 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 23933440 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 32322048 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33370624 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33374720 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33378816 | |
| } | |
| ], | |
| "md5sum": "8f7a7056943eae2d8d15df0b46a7268c" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28963840, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.13.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 2359296 | |
| }, | |
| { | |
| "name": "transformer.h.13.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 2654208 | |
| }, | |
| { | |
| "name": "transformer.h.13.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 2658816 | |
| }, | |
| { | |
| "name": "transformer.h.13.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 4755968 | |
| }, | |
| { | |
| "name": "transformer.h.13.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 5018112 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 5022208 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 5026304 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 5030400 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 13419008 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 14467584 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14483968 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22872576 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23921152 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23925248 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23929344 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 23933440 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 26292736 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 26587648 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26592256 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28689408 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28951552 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28955648 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28959744 | |
| } | |
| ], | |
| "md5sum": "44fafd8629769d0809e5415f4810dd21" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33387008, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 17842176 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18890752 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18894848 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 21262336 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 21557248 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 21561856 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 23659008 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23921152 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23925248 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23929344 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 23933440 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 32322048 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 33370624 | |
| } | |
| ], | |
| "md5sum": "263ede5d448481508802c07683586820" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33382912, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 11808768 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 12103680 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 12108288 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 14205440 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 14467584 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 14471680 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 14475776 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14479872 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22868480 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 23917056 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 23933440 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 32322048 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33370624 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33374720 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33378816 | |
| } | |
| ], | |
| "md5sum": "843815e39204d5d42a6a0a374326c928" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28963840, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.17.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 2359296 | |
| }, | |
| { | |
| "name": "transformer.h.17.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 2654208 | |
| }, | |
| { | |
| "name": "transformer.h.17.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 2658816 | |
| }, | |
| { | |
| "name": "transformer.h.17.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 4755968 | |
| }, | |
| { | |
| "name": "transformer.h.17.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 5018112 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 5022208 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 5026304 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 5030400 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 13419008 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 14467584 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14483968 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22872576 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23921152 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23925248 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23929344 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 23933440 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 26292736 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 26587648 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26592256 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28689408 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28951552 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28955648 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28959744 | |
| } | |
| ], | |
| "md5sum": "200d1c8d07aa1533c067c366922aa33e" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33387008, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 17842176 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18890752 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18894848 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 21262336 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 21557248 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 21561856 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 23659008 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23921152 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23925248 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23929344 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 23933440 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 32322048 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 33370624 | |
| } | |
| ], | |
| "md5sum": "a60779d550f74f0c844db4bfc04f2b89" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33382912, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 9449472 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 11808768 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 12103680 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 12108288 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 14205440 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 14467584 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 14471680 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 14475776 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14479872 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22868480 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 23917056 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 23933440 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 32322048 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33370624 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33374720 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 33378816 | |
| } | |
| ], | |
| "md5sum": "4818301b5136ef86fe6baac5d6013d4e" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28963840, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.21.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 2359296 | |
| }, | |
| { | |
| "name": "transformer.h.21.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 2654208 | |
| }, | |
| { | |
| "name": "transformer.h.21.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 2658816 | |
| }, | |
| { | |
| "name": "transformer.h.21.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 4755968 | |
| }, | |
| { | |
| "name": "transformer.h.21.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 5018112 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 5022208 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 5026304 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 5030400 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 13419008 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 14467584 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 14483968 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 22872576 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23921152 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23925248 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23929344 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 23933440 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 26292736 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 26587648 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26592256 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28689408 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28951552 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28955648 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28959744 | |
| } | |
| ], | |
| "md5sum": "ab69b5762460fd3d39f4b78008eacfa3" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33387008, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 9453568 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 17842176 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18890752 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18894848 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln_1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_attn.q_weight", | |
| "shape": [ | |
| 2304, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2359296, | |
| "byteOffset": 18903040 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_attn.q_scale", | |
| "shape": [ | |
| 2304, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 294912, | |
| "byteOffset": 21262336 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_attn.bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4608, | |
| "byteOffset": 21557248 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 21561856 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 23659008 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23921152 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23925248 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln_2.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 23929344 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.c_fc.q_weight", | |
| "shape": [ | |
| 8192, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 23933440 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.c_fc.q_scale", | |
| "shape": [ | |
| 8192, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 32322048 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.c_fc.bias", | |
| "shape": [ | |
| 8192 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 16384, | |
| "byteOffset": 33370624 | |
| } | |
| ], | |
| "md5sum": "8de25f6d83a88ebd3e66d130b47c9526" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50462720, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_weight", | |
| "shape": [ | |
| 49280, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50462720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c8614381334074d4d9673358e897f238" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 15757312, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 1024 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.c_proj.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "transformer.ln_f.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9441280 | |
| }, | |
| { | |
| "name": "transformer.ln_f.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 9445376 | |
| }, | |
| { | |
| "name": "lm_head.q_scale", | |
| "shape": [ | |
| 49280, | |
| 64 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6307840, | |
| "byteOffset": 9449472 | |
| } | |
| ], | |
| "md5sum": "bcb946313a60ea1018a2963977a126fa" | |
| } | |
| ] | |
| } |