jcopo's picture
Add files using upload-large-folder tool
111068f verified
{"array_metadatas": [{"array_metadata": {"param_name": "blocks.0.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.self_attn.relative_attention_bias.embedding.value", "write_shape": [32, 64], "chunk_shape": [32, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.6.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.6.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.6.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.6.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.6.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.6.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.6.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.6.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.6.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.7.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.7.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.7.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.7.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.7.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.7.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.7.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.7.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.7.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.8.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.8.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.8.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.8.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.8.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.8.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.8.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.8.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.8.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.9.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.9.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.9.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.9.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.9.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.9.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.9.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.9.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.9.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.10.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.10.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.10.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.10.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.10.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.10.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.10.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.10.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.10.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.11.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.11.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.11.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.11.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.11.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.11.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.11.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.11.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.11.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.12.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.12.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.12.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.12.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.12.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.12.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.12.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.12.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.12.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.13.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.13.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.13.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.13.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.13.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.13.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.13.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.13.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.13.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.14.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.14.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.14.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.14.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.14.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.14.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.14.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.14.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.14.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.15.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.15.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.15.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.15.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.15.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.15.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.15.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.15.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.15.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.16.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.16.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.16.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.16.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.16.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.16.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.16.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.16.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.16.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.17.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.17.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.17.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.17.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.17.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.17.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.17.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.17.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.17.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.18.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.18.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.18.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.18.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.18.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.18.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.18.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.18.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.18.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.19.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.19.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.19.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.19.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.19.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.19.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.19.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.19.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.19.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.20.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.20.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.20.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.20.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.20.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.20.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.20.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.20.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.20.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.21.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.21.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.21.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.21.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.21.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.21.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.21.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.21.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.21.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.22.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.22.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.22.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.22.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.22.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.22.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.22.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.22.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.22.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.23.feed_forward.DenseReluDense.wi_0.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.23.feed_forward.DenseReluDense.wi_1.kernel.value", "write_shape": [4096, 10240], "chunk_shape": [4096, 10240], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.23.feed_forward.DenseReluDense.wo.kernel.value", "write_shape": [10240, 4096], "chunk_shape": [10240, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.23.ff_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.23.self_attn.k.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.23.self_attn.o.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.23.self_attn.q.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.23.self_attn.v.kernel.value", "write_shape": [4096, 4096], "chunk_shape": [4096, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.23.self_attn_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "embed_tokens.embedding.value", "write_shape": [32128, 4096], "chunk_shape": [32128, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "final_layer_norm.scale.value", "write_shape": [4096], "chunk_shape": [4096], "ext_metadata": null}}]}