Shinapri's picture
Upload folder using huggingface_hub
997df35 verified
{"array_metadatas": [{"array_metadata": {"param_name": "lm_head.kernel.value", "write_shape": [1536, 82369], "chunk_shape": [1536, 82369], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.k_rope_proj.kernel.value", "write_shape": [4, 1536, 64], "chunk_shape": [4, 1536, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.k_up_proj.kernel.value", "write_shape": [4, 256, 768], "chunk_shape": [4, 256, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.kv_down_proj.kernel.value", "write_shape": [4, 1536, 256], "chunk_shape": [4, 1536, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.o_proj.kernel.value", "write_shape": [4, 1536, 1536], "chunk_shape": [4, 1536, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.q_down_proj.kernel.value", "write_shape": [4, 1536, 512], "chunk_shape": [4, 1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.q_rope_proj.kernel.value", "write_shape": [4, 512, 768], "chunk_shape": [4, 512, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.q_up_proj.kernel.value", "write_shape": [4, 512, 768], "chunk_shape": [4, 512, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.rotary.inv_freq", "write_shape": [4, 32], "chunk_shape": [4, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn.v_up_proj.kernel.value", "write_shape": [4, 256, 1536], "chunk_shape": [4, 256, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn_norm.weight.value", "write_shape": [4, 1536], "chunk_shape": [4, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn_residual.norm.weight.value", "write_shape": [4, 1536], "chunk_shape": [4, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.attn_residual.query.value", "write_shape": [4, 1536], "chunk_shape": [4, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.ffn.down_proj.kernel.value", "write_shape": [4, 4096, 1536], "chunk_shape": [4, 4096, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.ffn.gate_proj.kernel.value", "write_shape": [4, 1536, 4096], "chunk_shape": [4, 1536, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.ffn.up_proj.kernel.value", "write_shape": [4, 1536, 4096], "chunk_shape": [4, 1536, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.ffn_norm.weight.value", "write_shape": [4, 1536], "chunk_shape": [4, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.ffn_residual.norm.weight.value", "write_shape": [4, 1536], "chunk_shape": [4, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.ffn_residual.query.value", "write_shape": [4, 1536], "chunk_shape": [4, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.dense_layers.layer_idx", "write_shape": [4], "chunk_shape": [4], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.embed_tokens.embedding.value", "write_shape": [82369, 1536], "chunk_shape": [82369, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.embed_tokens.scaled", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.final_norm.weight.value", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.final_residual.norm.weight.value", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.final_residual.query.value", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.k_rope_proj.kernel.value", "write_shape": [3, 1536, 64], "chunk_shape": [3, 1536, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.k_up_proj.kernel.value", "write_shape": [3, 256, 768], "chunk_shape": [3, 256, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.kv_down_proj.kernel.value", "write_shape": [3, 1536, 256], "chunk_shape": [3, 1536, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.o_proj.kernel.value", "write_shape": [3, 1536, 1536], "chunk_shape": [3, 1536, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.q_down_proj.kernel.value", "write_shape": [3, 1536, 512], "chunk_shape": [3, 1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.q_rope_proj.kernel.value", "write_shape": [3, 512, 768], "chunk_shape": [3, 512, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.q_up_proj.kernel.value", "write_shape": [3, 512, 768], "chunk_shape": [3, 512, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.rotary.inv_freq", "write_shape": [3, 32], "chunk_shape": [3, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn.v_up_proj.kernel.value", "write_shape": [3, 256, 1536], "chunk_shape": [3, 256, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn_norm.weight.value", "write_shape": [3, 1536], "chunk_shape": [3, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn_residual.norm.weight.value", "write_shape": [3, 1536], "chunk_shape": [3, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.attn_residual.query.value", "write_shape": [3, 1536], "chunk_shape": [3, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.expert_to_group", "write_shape": [3, 16], "chunk_shape": [3, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.experts.down_proj_experts.value", "write_shape": [3, 17, 512, 1536], "chunk_shape": [3, 17, 512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.experts.gate_proj_experts.value", "write_shape": [3, 17, 1536, 512], "chunk_shape": [3, 17, 1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.experts.shared_weights.kernel.value", "write_shape": [3, 1536, 1], "chunk_shape": [3, 1536, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.experts.up_proj_experts.value", "write_shape": [3, 17, 1536, 512], "chunk_shape": [3, 17, 1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.group_sizes", "write_shape": [3, 4], "chunk_shape": [3, 4], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn.router.kernel.value", "write_shape": [3, 1536, 16], "chunk_shape": [3, 1536, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn_norm.weight.value", "write_shape": [3, 1536], "chunk_shape": [3, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn_residual.norm.weight.value", "write_shape": [3, 1536], "chunk_shape": [3, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.ffn_residual.query.value", "write_shape": [3, 1536], "chunk_shape": [3, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_deep_layers.layer_idx", "write_shape": [3], "chunk_shape": [3], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.k_rope_proj.kernel.value", "write_shape": [11, 1536, 64], "chunk_shape": [11, 1536, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.k_up_proj.kernel.value", "write_shape": [11, 256, 768], "chunk_shape": [11, 256, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.kv_down_proj.kernel.value", "write_shape": [11, 1536, 256], "chunk_shape": [11, 1536, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.o_proj.kernel.value", "write_shape": [11, 1536, 1536], "chunk_shape": [11, 1536, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.q_down_proj.kernel.value", "write_shape": [11, 1536, 512], "chunk_shape": [11, 1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.q_rope_proj.kernel.value", "write_shape": [11, 512, 768], "chunk_shape": [11, 512, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.q_up_proj.kernel.value", "write_shape": [11, 512, 768], "chunk_shape": [11, 512, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.rotary.inv_freq", "write_shape": [11, 32], "chunk_shape": [11, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn.v_up_proj.kernel.value", "write_shape": [11, 256, 1536], "chunk_shape": [11, 256, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn_norm.weight.value", "write_shape": [11, 1536], "chunk_shape": [11, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn_residual.norm.weight.value", "write_shape": [11, 1536], "chunk_shape": [11, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.attn_residual.query.value", "write_shape": [11, 1536], "chunk_shape": [11, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.expert_to_group", "write_shape": [11, 16], "chunk_shape": [11, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.experts.down_proj_experts.value", "write_shape": [11, 17, 512, 1536], "chunk_shape": [11, 17, 512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.experts.gate_proj_experts.value", "write_shape": [11, 17, 1536, 512], "chunk_shape": [11, 17, 1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.experts.shared_weights.kernel.value", "write_shape": [11, 1536, 1], "chunk_shape": [11, 1536, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.experts.up_proj_experts.value", "write_shape": [11, 17, 1536, 512], "chunk_shape": [11, 17, 1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.group_sizes", "write_shape": [11, 4], "chunk_shape": [11, 4], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn.router.kernel.value", "write_shape": [11, 1536, 16], "chunk_shape": [11, 1536, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn_norm.weight.value", "write_shape": [11, 1536], "chunk_shape": [11, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn_residual.norm.weight.value", "write_shape": [11, 1536], "chunk_shape": [11, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.ffn_residual.query.value", "write_shape": [11, 1536], "chunk_shape": [11, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "model.moe_layers.layer_idx", "write_shape": [11], "chunk_shape": [11], "ext_metadata": null}}, {"array_metadata": {"param_name": "rngs.params.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "rngs.params.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}]}