| {"array_metadatas": [{"array_metadata": {"param_name": "lm_head.kernel.value", "write_shape": [768, 82369], "chunk_shape": [768, 82369], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.k_rope_proj.kernel.value", "write_shape": [4, 768, 64], "chunk_shape": [4, 768, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.k_up_proj.kernel.value", "write_shape": [4, 256, 384], "chunk_shape": [4, 256, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.kv_down_proj.kernel.value", "write_shape": [4, 768, 256], "chunk_shape": [4, 768, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.o_proj.kernel.value", "write_shape": [4, 768, 1536], "chunk_shape": [4, 768, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.q_down_proj.kernel.value", "write_shape": [4, 768, 512], "chunk_shape": [4, 768, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.q_rope_proj.kernel.value", "write_shape": [4, 512, 384], "chunk_shape": [4, 512, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.q_up_proj.kernel.value", "write_shape": [4, 512, 384], "chunk_shape": [4, 512, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.rotary.inv_freq", "write_shape": [2, 32], "chunk_shape": [2, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.v_up_proj.kernel.value", "write_shape": [4, 256, 768], "chunk_shape": [4, 256, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn_norm.weight.value", "write_shape": [4, 768], "chunk_shape": [4, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn_residual.norm.weight.value", "write_shape": [4, 768], "chunk_shape": [4, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn_residual.query.value", "write_shape": [4, 768], "chunk_shape": [4, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.ffn.down_proj.kernel.value", "write_shape": [4, 2048, 1536], "chunk_shape": [4, 2048, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.ffn.gate_proj.kernel.value", "write_shape": [4, 1536, 2048], "chunk_shape": [4, 1536, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.ffn.up_proj.kernel.value", "write_shape": [4, 1536, 2048], "chunk_shape": [4, 1536, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.ffn_norm.weight.value", "write_shape": [4, 768], "chunk_shape": [4, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.ffn_residual.norm.weight.value", "write_shape": [4, 768], "chunk_shape": [4, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.ffn_residual.query.value", "write_shape": [4, 768], "chunk_shape": [4, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.layer_idx", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.embed_tokens.embedding.value", "write_shape": [82369, 768], "chunk_shape": [82369, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.embed_tokens.scaled", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.final_norm.weight.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.final_residual.norm.weight.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.final_residual.query.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.k_rope_proj.kernel.value", "write_shape": [3, 768, 64], "chunk_shape": [3, 768, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.k_up_proj.kernel.value", "write_shape": [3, 256, 384], "chunk_shape": [3, 256, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.kv_down_proj.kernel.value", "write_shape": [3, 768, 256], "chunk_shape": [3, 768, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.o_proj.kernel.value", "write_shape": [3, 768, 1536], "chunk_shape": [3, 768, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.q_down_proj.kernel.value", "write_shape": [3, 768, 512], "chunk_shape": [3, 768, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.q_rope_proj.kernel.value", "write_shape": [3, 512, 384], "chunk_shape": [3, 512, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.q_up_proj.kernel.value", "write_shape": [3, 512, 384], "chunk_shape": [3, 512, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.rotary.inv_freq", "write_shape": [3, 16], "chunk_shape": [3, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.v_up_proj.kernel.value", "write_shape": [3, 256, 768], "chunk_shape": [3, 256, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn_norm.weight.value", "write_shape": [3, 768], "chunk_shape": [3, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn_residual.norm.weight.value", "write_shape": [3, 768], "chunk_shape": [3, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn_residual.query.value", "write_shape": [3, 768], "chunk_shape": [3, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.expert_to_group", "write_shape": [3, 8], "chunk_shape": [3, 8], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.experts.down_proj_experts.value", "write_shape": [3, 17, 512, 768], "chunk_shape": [3, 17, 512, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.experts.gate_proj_experts.value", "write_shape": [3, 17, 768, 512], "chunk_shape": [3, 17, 768, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.experts.shared_weights.kernel.value", "write_shape": [3, 768, 1], "chunk_shape": [3, 768, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.experts.up_proj_experts.value", "write_shape": [3, 17, 768, 512], "chunk_shape": [3, 17, 768, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.group_sizes", "write_shape": [3, 2], "chunk_shape": [3, 2], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.router.kernel.value", "write_shape": [3, 768, 16], "chunk_shape": [3, 768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn_norm.weight.value", "write_shape": [3, 768], "chunk_shape": [3, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn_residual.norm.weight.value", "write_shape": [3, 768], "chunk_shape": [3, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn_residual.query.value", "write_shape": [3, 768], "chunk_shape": [3, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.layer_idx", "write_shape": [3], "chunk_shape": [3], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.k_rope_proj.kernel.value", "write_shape": [11, 768, 64], "chunk_shape": [11, 768, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.k_up_proj.kernel.value", "write_shape": [11, 256, 384], "chunk_shape": [11, 256, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.kv_down_proj.kernel.value", "write_shape": [11, 768, 256], "chunk_shape": [11, 768, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.o_proj.kernel.value", "write_shape": [11, 768, 1536], "chunk_shape": [11, 768, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.q_down_proj.kernel.value", "write_shape": [11, 768, 512], "chunk_shape": [11, 768, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.q_rope_proj.kernel.value", "write_shape": [11, 512, 384], "chunk_shape": [11, 512, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.q_up_proj.kernel.value", "write_shape": [11, 512, 384], "chunk_shape": [11, 512, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.rotary.inv_freq", "write_shape": [11, 16], "chunk_shape": [11, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.v_up_proj.kernel.value", "write_shape": [11, 256, 768], "chunk_shape": [11, 256, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn_norm.weight.value", "write_shape": [11, 768], "chunk_shape": [11, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn_residual.norm.weight.value", "write_shape": [11, 768], "chunk_shape": [11, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn_residual.query.value", "write_shape": [11, 768], "chunk_shape": [11, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.expert_to_group", "write_shape": [11, 8], "chunk_shape": [11, 8], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.experts.down_proj_experts.value", "write_shape": [11, 17, 512, 768], "chunk_shape": [11, 17, 512, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.experts.gate_proj_experts.value", "write_shape": [11, 17, 768, 512], "chunk_shape": [11, 17, 768, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.experts.shared_weights.kernel.value", "write_shape": [11, 768, 1], "chunk_shape": [11, 768, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.experts.up_proj_experts.value", "write_shape": [11, 17, 768, 512], "chunk_shape": [11, 17, 768, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.group_sizes", "write_shape": [11, 2], "chunk_shape": [11, 2], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.router.kernel.value", "write_shape": [11, 768, 16], "chunk_shape": [11, 768, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn_norm.weight.value", "write_shape": [11, 768], "chunk_shape": [11, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn_residual.norm.weight.value", "write_shape": [11, 768], "chunk_shape": [11, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn_residual.query.value", "write_shape": [11, 768], "chunk_shape": [11, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.layer_idx", "write_shape": [11], "chunk_shape": [11], "ext_metadata": null}}, {"array_metadata": {"param_name": "rngs.params.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "rngs.params.key.value", "write_shape": [1], "chunk_shape": [1], "ext_metadata": {"random_key_impl": "threefry2x32"}}}]} |