File size: 74,674 Bytes
c7f839a | 1 | {"array_metadatas": [{"array_metadata": {"param_name": "step", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.acc.halt_net.layers_0.bias", "write_shape": [24], "chunk_shape": [24], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.acc.halt_net.layers_0.kernel", "write_shape": [96, 192], "chunk_shape": [96, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.acc.halt_net.layers_2.bias", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.acc.halt_net.layers_2.kernel", "write_shape": [24, 1], "chunk_shape": [24, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.acc.loop_embed.embedding", "write_shape": [4, 768], "chunk_shape": [4, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.acc.state_gate.layers_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.acc.state_gate.layers_0.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.acc.state_norm.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.acc.state_norm.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.acc.state_transform.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.acc.state_transform.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.embedding.embedding", "write_shape": [50257, 96], "chunk_shape": [50257, 96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.final_norm.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.final_norm.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_0.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_0.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_0.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_0.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_0.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_0.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_0.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_0.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_0.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_0.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_1.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_1.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_1.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_1.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_1.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_1.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_1.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_1.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_1.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_1.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_10.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_10.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_10.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_10.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_10.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_10.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_10.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_10.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_10.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_10.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_11.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_11.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_11.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_11.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_11.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_11.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_11.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_11.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_11.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_11.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_2.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_2.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_2.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_2.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_2.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_2.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_2.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_2.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_2.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_2.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_3.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_3.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_3.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_3.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_3.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_3.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_3.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_3.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_3.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_3.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_4.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_4.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_4.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_4.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_4.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_4.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_4.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_4.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_4.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_4.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_5.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_5.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_5.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_5.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_5.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_5.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_5.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_5.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_5.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_5.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_6.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_6.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_6.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_6.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_6.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_6.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_6.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_6.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_6.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_6.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_7.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_7.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_7.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_7.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_7.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_7.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_7.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_7.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_7.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_7.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_8.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_8.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_8.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_8.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_8.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_8.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_8.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_8.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_8.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_8.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_9.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_9.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_9.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_9.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_9.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_9.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_9.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_9.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_9.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.layers_9.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.lm_head.kernel", "write_shape": [96, 50257], "chunk_shape": [96, 50257], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.controller.pos_encoding.embedding", "write_shape": [128, 768], "chunk_shape": [128, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.indexer.Dense_0.kernel", "write_shape": [96, 1], "chunk_shape": [96, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.indexer.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.indexer.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.indexer.Dense_2.bias", "write_shape": [48], "chunk_shape": [48], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.indexer.Dense_2.kernel", "write_shape": [96, 384], "chunk_shape": [96, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.indexer.Dense_3.bias", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.indexer.Dense_3.kernel", "write_shape": [48, 1], "chunk_shape": [48, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.indexer.Dense_4.bias", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.indexer.Dense_4.kernel", "write_shape": [48, 1], "chunk_shape": [48, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.pool.params_storage", "write_shape": [32768, 768], "chunk_shape": [32768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.retrieval_integrator.layers_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.retrieval_integrator.layers_0.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.retrieval_integrator.layers_2.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.retrieval_integrator.layers_2.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.retrieval_integrator.layers_3.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.retrieval_integrator.layers_3.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.count", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.acc.halt_net.layers_0.bias", "write_shape": [24], "chunk_shape": [24], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.acc.halt_net.layers_0.kernel", "write_shape": [96, 192], "chunk_shape": [96, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.acc.halt_net.layers_2.bias", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.acc.halt_net.layers_2.kernel", "write_shape": [24, 1], "chunk_shape": [24, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.acc.loop_embed.embedding", "write_shape": [4, 768], "chunk_shape": [4, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.acc.state_gate.layers_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.acc.state_gate.layers_0.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.acc.state_norm.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.acc.state_norm.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.acc.state_transform.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.acc.state_transform.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.embedding.embedding", "write_shape": [50257, 96], "chunk_shape": [50257, 96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.final_norm.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.final_norm.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_0.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_0.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_0.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_0.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_0.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_0.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_0.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_0.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_0.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_0.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_1.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_1.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_1.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_1.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_1.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_1.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_1.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_1.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_1.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_1.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_10.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_10.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_10.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_10.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_10.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_10.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_10.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_10.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_10.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_10.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_11.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_11.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_11.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_11.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_11.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_11.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_11.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_11.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_11.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_11.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_2.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_2.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_2.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_2.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_2.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_2.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_2.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_2.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_2.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_2.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_3.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_3.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_3.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_3.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_3.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_3.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_3.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_3.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_3.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_3.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_4.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_4.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_4.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_4.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_4.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_4.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_4.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_4.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_4.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_4.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_5.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_5.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_5.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_5.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_5.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_5.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_5.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_5.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_5.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_5.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_6.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_6.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_6.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_6.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_6.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_6.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_6.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_6.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_6.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_6.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_7.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_7.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_7.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_7.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_7.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_7.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_7.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_7.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_7.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_7.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_8.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_8.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_8.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_8.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_8.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_8.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_8.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_8.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_8.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_8.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_9.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_9.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_9.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_9.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_9.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_9.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_9.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_9.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_9.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.layers_9.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.lm_head.kernel", "write_shape": [96, 50257], "chunk_shape": [96, 50257], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.controller.pos_encoding.embedding", "write_shape": [128, 768], "chunk_shape": [128, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.indexer.Dense_0.kernel", "write_shape": [96, 1], "chunk_shape": [96, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.indexer.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.indexer.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.indexer.Dense_2.bias", "write_shape": [48], "chunk_shape": [48], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.indexer.Dense_2.kernel", "write_shape": [96, 384], "chunk_shape": [96, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.indexer.Dense_3.bias", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.indexer.Dense_3.kernel", "write_shape": [48, 1], "chunk_shape": [48, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.indexer.Dense_4.bias", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.indexer.Dense_4.kernel", "write_shape": [48, 1], "chunk_shape": [48, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.retrieval_integrator.layers_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.retrieval_integrator.layers_0.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.retrieval_integrator.layers_2.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.retrieval_integrator.layers_2.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.retrieval_integrator.layers_3.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.retrieval_integrator.layers_3.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.acc.halt_net.layers_0.bias", "write_shape": [24], "chunk_shape": [24], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.acc.halt_net.layers_0.kernel", "write_shape": [96, 192], "chunk_shape": [96, 192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.acc.halt_net.layers_2.bias", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.acc.halt_net.layers_2.kernel", "write_shape": [24, 1], "chunk_shape": [24, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.acc.loop_embed.embedding", "write_shape": [4, 768], "chunk_shape": [4, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.acc.state_gate.layers_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.acc.state_gate.layers_0.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.acc.state_norm.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.acc.state_norm.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.acc.state_transform.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.acc.state_transform.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.embedding.embedding", "write_shape": [50257, 96], "chunk_shape": [50257, 96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.final_norm.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.final_norm.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_0.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_0.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_0.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_0.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_0.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_0.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_0.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_0.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_0.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_0.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_1.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_1.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_1.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_1.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_1.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_1.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_1.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_1.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_1.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_1.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_10.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_10.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_10.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_10.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_10.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_10.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_10.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_10.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_10.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_10.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_11.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_11.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_11.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_11.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_11.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_11.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_11.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_11.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_11.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_11.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_2.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_2.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_2.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_2.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_2.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_2.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_2.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_2.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_2.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_2.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_3.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_3.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_3.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_3.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_3.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_3.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_3.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_3.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_3.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_3.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_4.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_4.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_4.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_4.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_4.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_4.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_4.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_4.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_4.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_4.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_5.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_5.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_5.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_5.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_5.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_5.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_5.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_5.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_5.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_5.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_6.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_6.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_6.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_6.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_6.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_6.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_6.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_6.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_6.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_6.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_7.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_7.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_7.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_7.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_7.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_7.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_7.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_7.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_7.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_7.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_8.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_8.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_8.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_8.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_8.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_8.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_8.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_8.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_8.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_8.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_9.FlashCausalSelfAttention_0.Dense_0.kernel", "write_shape": [96, 2304], "chunk_shape": [96, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_9.FlashCausalSelfAttention_0.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_9.LayerNorm_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_9.LayerNorm_0.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_9.LayerNorm_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_9.LayerNorm_1.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_9.TinyFFN_0.Dense_0.bias", "write_shape": [192], "chunk_shape": [192], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_9.TinyFFN_0.Dense_0.kernel", "write_shape": [96, 1536], "chunk_shape": [96, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_9.TinyFFN_0.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.layers_9.TinyFFN_0.Dense_1.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.lm_head.kernel", "write_shape": [96, 50257], "chunk_shape": [96, 50257], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.controller.pos_encoding.embedding", "write_shape": [128, 768], "chunk_shape": [128, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.indexer.Dense_0.kernel", "write_shape": [96, 1], "chunk_shape": [96, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.indexer.Dense_1.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.indexer.Dense_1.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.indexer.Dense_2.bias", "write_shape": [48], "chunk_shape": [48], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.indexer.Dense_2.kernel", "write_shape": [96, 384], "chunk_shape": [96, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.indexer.Dense_3.bias", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.indexer.Dense_3.kernel", "write_shape": [48, 1], "chunk_shape": [48, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.indexer.Dense_4.bias", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.indexer.Dense_4.kernel", "write_shape": [48, 1], "chunk_shape": [48, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.retrieval_integrator.layers_0.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.retrieval_integrator.layers_0.kernel", "write_shape": [192, 768], "chunk_shape": [192, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.retrieval_integrator.layers_2.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.retrieval_integrator.layers_2.kernel", "write_shape": [96, 768], "chunk_shape": [96, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.retrieval_integrator.layers_3.bias", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.retrieval_integrator.layers_3.scale", "write_shape": [96], "chunk_shape": [96], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.2.count", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "rng", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}, {"array_metadata": {"param_name": "pool_m", "write_shape": [32768, 768], "chunk_shape": [32768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "pool_v", "write_shape": [32768, 768], "chunk_shape": [32768, 768], "ext_metadata": null}}]} |