| {"array_metadatas": [{"array_metadata": {"param_name": "lm_head.kernel.value", "write_shape": [192, 82369], "chunk_shape": [192, 82369], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.k_rope_proj.kernel.value", "write_shape": [2, 384, 64], "chunk_shape": [2, 384, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.k_up_proj.kernel.value", "write_shape": [2, 256, 192], "chunk_shape": [2, 256, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.kv_down_proj.kernel.value", "write_shape": [2, 384, 256], "chunk_shape": [2, 384, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.o_proj.kernel.value", "write_shape": [2, 384, 1536], "chunk_shape": [2, 384, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.q_down_proj.kernel.value", "write_shape": [2, 384, 512], "chunk_shape": [2, 384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.q_rope_proj.kernel.value", "write_shape": [2, 512, 192], "chunk_shape": [2, 512, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.q_up_proj.kernel.value", "write_shape": [2, 512, 192], "chunk_shape": [2, 512, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.rotary.inv_freq", "write_shape": [4, 4], "chunk_shape": [4, 4], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.v_up_proj.kernel.value", "write_shape": [2, 256, 384], "chunk_shape": [2, 256, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn_norm.weight.value", "write_shape": [2, 384], "chunk_shape": [2, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn_residual.norm.weight.value", "write_shape": [2, 384], "chunk_shape": [2, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn_residual.query.value", "write_shape": [2, 384], "chunk_shape": [2, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.ffn.down_proj.kernel.value", "write_shape": [2, 1024, 1536], "chunk_shape": [2, 1024, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.ffn.gate_proj.kernel.value", "write_shape": [2, 1536, 1024], "chunk_shape": [2, 1536, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.ffn.up_proj.kernel.value", "write_shape": [2, 1536, 1024], "chunk_shape": [2, 1536, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.ffn_norm.weight.value", "write_shape": [2, 384], "chunk_shape": [2, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.ffn_residual.norm.weight.value", "write_shape": [2, 384], "chunk_shape": [2, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.ffn_residual.query.value", "write_shape": [2, 384], "chunk_shape": [2, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.layer_idx", "write_shape": [4], "chunk_shape": [4], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.embed_tokens.embedding.value", "write_shape": [82369, 192], "chunk_shape": [82369, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.embed_tokens.scaled", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.final_norm.weight.value", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.final_residual.norm.weight.value", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.final_residual.query.value", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.k_rope_proj.kernel.value", "write_shape": [3, 192, 64], "chunk_shape": [3, 192, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.k_up_proj.kernel.value", "write_shape": [3, 128, 192], "chunk_shape": [3, 128, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.kv_down_proj.kernel.value", "write_shape": [3, 192, 256], "chunk_shape": [3, 192, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.o_proj.kernel.value", "write_shape": [3, 192, 1536], "chunk_shape": [3, 192, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.q_down_proj.kernel.value", "write_shape": [3, 192, 512], "chunk_shape": [3, 192, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.q_rope_proj.kernel.value", "write_shape": [3, 256, 192], "chunk_shape": [3, 256, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.q_up_proj.kernel.value", "write_shape": [3, 256, 192], "chunk_shape": [3, 256, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.rotary.inv_freq", "write_shape": [3, 4], "chunk_shape": [3, 4], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.v_up_proj.kernel.value", "write_shape": [3, 128, 384], "chunk_shape": [3, 128, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn_norm.weight.value", "write_shape": [3, 192], "chunk_shape": [3, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn_residual.norm.weight.value", "write_shape": [3, 192], "chunk_shape": [3, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn_residual.query.value", "write_shape": [3, 192], "chunk_shape": [3, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.expert_to_group", "write_shape": [3, 2], "chunk_shape": [3, 2], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.experts.down_proj_experts.value", "write_shape": [3, 17, 256, 384], "chunk_shape": [3, 17, 256, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.experts.gate_proj_experts.value", "write_shape": [3, 17, 192, 512], "chunk_shape": [3, 17, 192, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.experts.shared_weights.kernel.value", "write_shape": [3, 192, 1], "chunk_shape": [3, 192, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.experts.up_proj_experts.value", "write_shape": [3, 17, 192, 512], "chunk_shape": [3, 17, 192, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.group_sizes", "write_shape": [3, 4], "chunk_shape": [3, 4], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.router.kernel.value", "write_shape": [3, 192, 16], "chunk_shape": [3, 192, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn_norm.weight.value", "write_shape": [3, 192], "chunk_shape": [3, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn_residual.norm.weight.value", "write_shape": [3, 192], "chunk_shape": [3, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn_residual.query.value", "write_shape": [3, 192], "chunk_shape": [3, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.layer_idx", "write_shape": [3], "chunk_shape": [3], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.k_rope_proj.kernel.value", "write_shape": [11, 192, 64], "chunk_shape": [11, 192, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.k_up_proj.kernel.value", "write_shape": [11, 128, 192], "chunk_shape": [11, 128, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.kv_down_proj.kernel.value", "write_shape": [11, 192, 256], "chunk_shape": [11, 192, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.o_proj.kernel.value", "write_shape": [11, 192, 1536], "chunk_shape": [11, 192, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.q_down_proj.kernel.value", "write_shape": [11, 192, 512], "chunk_shape": [11, 192, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.q_rope_proj.kernel.value", "write_shape": [11, 256, 192], "chunk_shape": [11, 256, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.q_up_proj.kernel.value", "write_shape": [11, 256, 192], "chunk_shape": [11, 256, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.rotary.inv_freq", "write_shape": [11, 4], "chunk_shape": [11, 4], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.v_up_proj.kernel.value", "write_shape": [11, 128, 384], "chunk_shape": [11, 128, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn_norm.weight.value", "write_shape": [11, 192], "chunk_shape": [11, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn_residual.norm.weight.value", "write_shape": [11, 192], "chunk_shape": [11, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn_residual.query.value", "write_shape": [11, 192], "chunk_shape": [11, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.expert_to_group", "write_shape": [11, 2], "chunk_shape": [11, 2], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.experts.down_proj_experts.value", "write_shape": [11, 17, 256, 384], "chunk_shape": [11, 17, 256, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.experts.gate_proj_experts.value", "write_shape": [11, 17, 192, 512], "chunk_shape": [11, 17, 192, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.experts.shared_weights.kernel.value", "write_shape": [11, 192, 1], "chunk_shape": [11, 192, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.experts.up_proj_experts.value", "write_shape": [11, 17, 192, 512], "chunk_shape": [11, 17, 192, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.group_sizes", "write_shape": [11, 4], "chunk_shape": [11, 4], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.router.kernel.value", "write_shape": [11, 192, 16], "chunk_shape": [11, 192, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn_norm.weight.value", "write_shape": [11, 192], "chunk_shape": [11, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn_residual.norm.weight.value", "write_shape": [11, 192], "chunk_shape": [11, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn_residual.query.value", "write_shape": [11, 192], "chunk_shape": [11, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.layer_idx", "write_shape": [11], "chunk_shape": [11], "ext_metadata": null}}, {"array_metadata": {"param_name": "rngs.params.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "rngs.params.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}]} |