File size: 49,542 Bytes
39336ad
1
{"array_metadatas": [{"array_metadata": {"param_name": "step", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.TokenAndPositionEmbedding_0.pos_emb.embedding", "write_shape": [256, 512], "chunk_shape": [256, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.TokenAndPositionEmbedding_0.token_emb.embedding", "write_shape": [64000, 512], "chunk_shape": [64000, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.lm_head.bias", "write_shape": [64000], "chunk_shape": [64000], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.lm_head.kernel", "write_shape": [512, 64000], "chunk_shape": [512, 64000], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_0.Dense_0.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_0.Dense_0.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_0.Dense_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_0.Dense_1.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_0.LayerNorm_0.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_0.LayerNorm_0.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_0.LayerNorm_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_0.LayerNorm_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_0.SelfAttention_0.key.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_0.SelfAttention_0.key.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_0.SelfAttention_0.out.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_0.SelfAttention_0.out.kernel", "write_shape": [8, 64, 512], "chunk_shape": [8, 64, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_0.SelfAttention_0.query.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_0.SelfAttention_0.query.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_0.SelfAttention_0.value.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_0.SelfAttention_0.value.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_1.Dense_0.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_1.Dense_0.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_1.Dense_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_1.Dense_1.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_1.LayerNorm_0.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_1.LayerNorm_0.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_1.LayerNorm_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_1.LayerNorm_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_1.SelfAttention_0.key.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_1.SelfAttention_0.key.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_1.SelfAttention_0.out.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_1.SelfAttention_0.out.kernel", "write_shape": [8, 64, 512], "chunk_shape": [8, 64, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_1.SelfAttention_0.query.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_1.SelfAttention_0.query.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_1.SelfAttention_0.value.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_1.SelfAttention_0.value.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_2.Dense_0.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_2.Dense_0.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_2.Dense_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_2.Dense_1.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_2.LayerNorm_0.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_2.LayerNorm_0.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_2.LayerNorm_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_2.LayerNorm_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_2.SelfAttention_0.key.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_2.SelfAttention_0.key.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_2.SelfAttention_0.out.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_2.SelfAttention_0.out.kernel", "write_shape": [8, 64, 512], "chunk_shape": [8, 64, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_2.SelfAttention_0.query.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_2.SelfAttention_0.query.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_2.SelfAttention_0.value.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_2.SelfAttention_0.value.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_3.Dense_0.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_3.Dense_0.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_3.Dense_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_3.Dense_1.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_3.LayerNorm_0.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_3.LayerNorm_0.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_3.LayerNorm_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_3.LayerNorm_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_3.SelfAttention_0.key.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_3.SelfAttention_0.key.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_3.SelfAttention_0.out.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_3.SelfAttention_0.out.kernel", "write_shape": [8, 64, 512], "chunk_shape": [8, 64, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_3.SelfAttention_0.query.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_3.SelfAttention_0.query.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_3.SelfAttention_0.value.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_3.SelfAttention_0.value.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_4.Dense_0.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_4.Dense_0.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_4.Dense_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_4.Dense_1.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_4.LayerNorm_0.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_4.LayerNorm_0.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_4.LayerNorm_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_4.LayerNorm_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_4.SelfAttention_0.key.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_4.SelfAttention_0.key.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_4.SelfAttention_0.out.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_4.SelfAttention_0.out.kernel", "write_shape": [8, 64, 512], "chunk_shape": [8, 64, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_4.SelfAttention_0.query.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_4.SelfAttention_0.query.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_4.SelfAttention_0.value.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_4.SelfAttention_0.value.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_5.Dense_0.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_5.Dense_0.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_5.Dense_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_5.Dense_1.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_5.LayerNorm_0.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_5.LayerNorm_0.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_5.LayerNorm_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_5.LayerNorm_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_5.SelfAttention_0.key.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_5.SelfAttention_0.key.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_5.SelfAttention_0.out.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_5.SelfAttention_0.out.kernel", "write_shape": [8, 64, 512], "chunk_shape": [8, 64, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_5.SelfAttention_0.query.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_5.SelfAttention_0.query.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_5.SelfAttention_0.value.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.transformer_block_5.SelfAttention_0.value.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.count", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.TokenAndPositionEmbedding_0.pos_emb.embedding", "write_shape": [256, 512], "chunk_shape": [256, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.TokenAndPositionEmbedding_0.token_emb.embedding", "write_shape": [64000, 512], "chunk_shape": [64000, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.lm_head.bias", "write_shape": [64000], "chunk_shape": [64000], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.lm_head.kernel", "write_shape": [512, 64000], "chunk_shape": [512, 64000], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_0.Dense_0.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_0.Dense_0.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_0.Dense_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_0.Dense_1.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_0.LayerNorm_0.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_0.LayerNorm_0.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_0.LayerNorm_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_0.LayerNorm_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_0.SelfAttention_0.key.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_0.SelfAttention_0.key.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_0.SelfAttention_0.out.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_0.SelfAttention_0.out.kernel", "write_shape": [8, 64, 512], "chunk_shape": [8, 64, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_0.SelfAttention_0.query.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_0.SelfAttention_0.query.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_0.SelfAttention_0.value.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_0.SelfAttention_0.value.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_1.Dense_0.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_1.Dense_0.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_1.Dense_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_1.Dense_1.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_1.LayerNorm_0.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_1.LayerNorm_0.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_1.LayerNorm_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_1.LayerNorm_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_1.SelfAttention_0.key.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_1.SelfAttention_0.key.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_1.SelfAttention_0.out.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_1.SelfAttention_0.out.kernel", "write_shape": [8, 64, 512], "chunk_shape": [8, 64, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_1.SelfAttention_0.query.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_1.SelfAttention_0.query.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_1.SelfAttention_0.value.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_1.SelfAttention_0.value.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_2.Dense_0.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_2.Dense_0.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_2.Dense_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_2.Dense_1.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_2.LayerNorm_0.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_2.LayerNorm_0.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_2.LayerNorm_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_2.LayerNorm_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_2.SelfAttention_0.key.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_2.SelfAttention_0.key.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_2.SelfAttention_0.out.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_2.SelfAttention_0.out.kernel", "write_shape": [8, 64, 512], "chunk_shape": [8, 64, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_2.SelfAttention_0.query.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_2.SelfAttention_0.query.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_2.SelfAttention_0.value.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_2.SelfAttention_0.value.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_3.Dense_0.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_3.Dense_0.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_3.Dense_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_3.Dense_1.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_3.LayerNorm_0.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_3.LayerNorm_0.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_3.LayerNorm_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_3.LayerNorm_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_3.SelfAttention_0.key.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_3.SelfAttention_0.key.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_3.SelfAttention_0.out.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_3.SelfAttention_0.out.kernel", "write_shape": [8, 64, 512], "chunk_shape": [8, 64, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_3.SelfAttention_0.query.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_3.SelfAttention_0.query.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_3.SelfAttention_0.value.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_3.SelfAttention_0.value.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_4.Dense_0.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_4.Dense_0.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_4.Dense_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_4.Dense_1.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_4.LayerNorm_0.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_4.LayerNorm_0.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_4.LayerNorm_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_4.LayerNorm_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_4.SelfAttention_0.key.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_4.SelfAttention_0.key.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_4.SelfAttention_0.out.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_4.SelfAttention_0.out.kernel", "write_shape": [8, 64, 512], "chunk_shape": [8, 64, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_4.SelfAttention_0.query.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_4.SelfAttention_0.query.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_4.SelfAttention_0.value.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_4.SelfAttention_0.value.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_5.Dense_0.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_5.Dense_0.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_5.Dense_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_5.Dense_1.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_5.LayerNorm_0.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_5.LayerNorm_0.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_5.LayerNorm_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_5.LayerNorm_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_5.SelfAttention_0.key.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_5.SelfAttention_0.key.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_5.SelfAttention_0.out.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_5.SelfAttention_0.out.kernel", "write_shape": [8, 64, 512], "chunk_shape": [8, 64, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_5.SelfAttention_0.query.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_5.SelfAttention_0.query.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_5.SelfAttention_0.value.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.mu.transformer_block_5.SelfAttention_0.value.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.TokenAndPositionEmbedding_0.pos_emb.embedding", "write_shape": [256, 512], "chunk_shape": [256, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.TokenAndPositionEmbedding_0.token_emb.embedding", "write_shape": [64000, 512], "chunk_shape": [64000, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.lm_head.bias", "write_shape": [64000], "chunk_shape": [64000], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.lm_head.kernel", "write_shape": [512, 64000], "chunk_shape": [512, 64000], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_0.Dense_0.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_0.Dense_0.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_0.Dense_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_0.Dense_1.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_0.LayerNorm_0.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_0.LayerNorm_0.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_0.LayerNorm_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_0.LayerNorm_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_0.SelfAttention_0.key.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_0.SelfAttention_0.key.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_0.SelfAttention_0.out.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_0.SelfAttention_0.out.kernel", "write_shape": [8, 64, 512], "chunk_shape": [8, 64, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_0.SelfAttention_0.query.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_0.SelfAttention_0.query.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_0.SelfAttention_0.value.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_0.SelfAttention_0.value.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_1.Dense_0.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_1.Dense_0.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_1.Dense_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_1.Dense_1.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_1.LayerNorm_0.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_1.LayerNorm_0.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_1.LayerNorm_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_1.LayerNorm_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_1.SelfAttention_0.key.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_1.SelfAttention_0.key.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_1.SelfAttention_0.out.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_1.SelfAttention_0.out.kernel", "write_shape": [8, 64, 512], "chunk_shape": [8, 64, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_1.SelfAttention_0.query.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_1.SelfAttention_0.query.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_1.SelfAttention_0.value.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_1.SelfAttention_0.value.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_2.Dense_0.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_2.Dense_0.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_2.Dense_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_2.Dense_1.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_2.LayerNorm_0.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_2.LayerNorm_0.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_2.LayerNorm_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_2.LayerNorm_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_2.SelfAttention_0.key.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_2.SelfAttention_0.key.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_2.SelfAttention_0.out.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_2.SelfAttention_0.out.kernel", "write_shape": [8, 64, 512], "chunk_shape": [8, 64, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_2.SelfAttention_0.query.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_2.SelfAttention_0.query.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_2.SelfAttention_0.value.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_2.SelfAttention_0.value.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_3.Dense_0.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_3.Dense_0.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_3.Dense_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_3.Dense_1.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_3.LayerNorm_0.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_3.LayerNorm_0.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_3.LayerNorm_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_3.LayerNorm_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_3.SelfAttention_0.key.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_3.SelfAttention_0.key.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_3.SelfAttention_0.out.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_3.SelfAttention_0.out.kernel", "write_shape": [8, 64, 512], "chunk_shape": [8, 64, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_3.SelfAttention_0.query.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_3.SelfAttention_0.query.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_3.SelfAttention_0.value.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_3.SelfAttention_0.value.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_4.Dense_0.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_4.Dense_0.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_4.Dense_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_4.Dense_1.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_4.LayerNorm_0.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_4.LayerNorm_0.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_4.LayerNorm_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_4.LayerNorm_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_4.SelfAttention_0.key.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_4.SelfAttention_0.key.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_4.SelfAttention_0.out.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_4.SelfAttention_0.out.kernel", "write_shape": [8, 64, 512], "chunk_shape": [8, 64, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_4.SelfAttention_0.query.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_4.SelfAttention_0.query.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_4.SelfAttention_0.value.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_4.SelfAttention_0.value.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_5.Dense_0.bias", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_5.Dense_0.kernel", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_5.Dense_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_5.Dense_1.kernel", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_5.LayerNorm_0.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_5.LayerNorm_0.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_5.LayerNorm_1.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_5.LayerNorm_1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_5.SelfAttention_0.key.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_5.SelfAttention_0.key.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_5.SelfAttention_0.out.bias", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_5.SelfAttention_0.out.kernel", "write_shape": [8, 64, 512], "chunk_shape": [8, 64, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_5.SelfAttention_0.query.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_5.SelfAttention_0.query.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_5.SelfAttention_0.value.bias", "write_shape": [8, 64], "chunk_shape": [8, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.0.nu.transformer_block_5.SelfAttention_0.value.kernel", "write_shape": [512, 8, 64], "chunk_shape": [512, 8, 64], "ext_metadata": null}}]}