clockwork7's picture
Upload folder using huggingface_hub
279317f verified
{"array_metadatas": [{"array_metadata": {"param_name": "step", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.0.attn.c_attn.bias", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.0.attn.c_attn.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.0.attn.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.0.attn.c_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.0.ln_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.0.ln_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.0.ln_2.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.0.ln_2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.0.mlp.c_fc.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.0.mlp.c_fc.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.0.mlp.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.0.mlp.c_proj.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.1.attn.c_attn.bias", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.1.attn.c_attn.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.1.attn.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.1.attn.c_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.1.ln_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.1.ln_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.1.ln_2.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.1.ln_2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.1.mlp.c_fc.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.1.mlp.c_fc.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.1.mlp.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.1.mlp.c_proj.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.2.attn.c_attn.bias", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.2.attn.c_attn.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.2.attn.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.2.attn.c_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.2.ln_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.2.ln_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.2.ln_2.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.2.ln_2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.2.mlp.c_fc.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.2.mlp.c_fc.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.2.mlp.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.2.mlp.c_proj.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.3.attn.c_attn.bias", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.3.attn.c_attn.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.3.attn.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.3.attn.c_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.3.ln_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.3.ln_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.3.ln_2.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.3.ln_2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.3.mlp.c_fc.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.3.mlp.c_fc.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.3.mlp.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.3.mlp.c_proj.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.4.attn.c_attn.bias", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.4.attn.c_attn.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.4.attn.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.4.attn.c_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.4.ln_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.4.ln_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.4.ln_2.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.4.ln_2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.4.mlp.c_fc.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.4.mlp.c_fc.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.4.mlp.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.4.mlp.c_proj.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.5.attn.c_attn.bias", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.5.attn.c_attn.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.5.attn.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.5.attn.c_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.5.ln_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.5.ln_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.5.ln_2.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.5.ln_2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.5.mlp.c_fc.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.5.mlp.c_fc.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.5.mlp.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.h.5.mlp.c_proj.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.ln_f.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.ln_f.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.wpe.embedding", "write_shape": [30, 512], "chunk_shape": [30, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer.wte.embedding", "write_shape": [19, 512], "chunk_shape": [19, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.count", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.0.attn.c_attn.bias", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.0.attn.c_attn.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.0.attn.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.0.attn.c_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.0.ln_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.0.ln_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.0.ln_2.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.0.ln_2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.0.mlp.c_fc.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.0.mlp.c_fc.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.0.mlp.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.0.mlp.c_proj.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.1.attn.c_attn.bias", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.1.attn.c_attn.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.1.attn.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.1.attn.c_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.1.ln_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.1.ln_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.1.ln_2.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.1.ln_2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.1.mlp.c_fc.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.1.mlp.c_fc.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.1.mlp.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.1.mlp.c_proj.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.2.attn.c_attn.bias", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.2.attn.c_attn.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.2.attn.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.2.attn.c_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.2.ln_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.2.ln_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.2.ln_2.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.2.ln_2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.2.mlp.c_fc.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.2.mlp.c_fc.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.2.mlp.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.2.mlp.c_proj.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.3.attn.c_attn.bias", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.3.attn.c_attn.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.3.attn.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.3.attn.c_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.3.ln_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.3.ln_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.3.ln_2.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.3.ln_2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.3.mlp.c_fc.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.3.mlp.c_fc.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.3.mlp.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.3.mlp.c_proj.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.4.attn.c_attn.bias", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.4.attn.c_attn.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.4.attn.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.4.attn.c_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.4.ln_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.4.ln_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.4.ln_2.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.4.ln_2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.4.mlp.c_fc.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.4.mlp.c_fc.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.4.mlp.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.4.mlp.c_proj.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.5.attn.c_attn.bias", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.5.attn.c_attn.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.5.attn.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.5.attn.c_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.5.ln_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.5.ln_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.5.ln_2.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.5.ln_2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.5.mlp.c_fc.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.5.mlp.c_fc.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.5.mlp.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.h.5.mlp.c_proj.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.ln_f.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.ln_f.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.wpe.embedding", "write_shape": [30, 512], "chunk_shape": [30, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer.wte.embedding", "write_shape": [19, 512], "chunk_shape": [19, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.0.attn.c_attn.bias", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.0.attn.c_attn.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.0.attn.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.0.attn.c_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.0.ln_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.0.ln_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.0.ln_2.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.0.ln_2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.0.mlp.c_fc.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.0.mlp.c_fc.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.0.mlp.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.0.mlp.c_proj.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.1.attn.c_attn.bias", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.1.attn.c_attn.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.1.attn.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.1.attn.c_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.1.ln_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.1.ln_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.1.ln_2.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.1.ln_2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.1.mlp.c_fc.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.1.mlp.c_fc.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.1.mlp.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.1.mlp.c_proj.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.2.attn.c_attn.bias", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.2.attn.c_attn.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.2.attn.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.2.attn.c_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.2.ln_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.2.ln_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.2.ln_2.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.2.ln_2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.2.mlp.c_fc.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.2.mlp.c_fc.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.2.mlp.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.2.mlp.c_proj.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.3.attn.c_attn.bias", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.3.attn.c_attn.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.3.attn.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.3.attn.c_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.3.ln_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.3.ln_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.3.ln_2.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.3.ln_2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.3.mlp.c_fc.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.3.mlp.c_fc.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.3.mlp.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.3.mlp.c_proj.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.4.attn.c_attn.bias", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.4.attn.c_attn.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.4.attn.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.4.attn.c_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.4.ln_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.4.ln_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.4.ln_2.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.4.ln_2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.4.mlp.c_fc.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.4.mlp.c_fc.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.4.mlp.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.4.mlp.c_proj.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.5.attn.c_attn.bias", "write_shape": [1536], "chunk_shape": [1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.5.attn.c_attn.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.5.attn.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.5.attn.c_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.5.ln_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.5.ln_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.5.ln_2.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.5.ln_2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.5.mlp.c_fc.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.5.mlp.c_fc.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.5.mlp.c_proj.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.h.5.mlp.c_proj.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.ln_f.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.ln_f.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.wpe.embedding", "write_shape": [30, 512], "chunk_shape": [30, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer.wte.embedding", "write_shape": [19, 512], "chunk_shape": [19, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.2.count", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "dropout_rng", "write_shape": [4, 2], "chunk_shape": [4, 2], "ext_metadata": null}}]}