Wilsonwin's picture
Upload folder using huggingface_hub
39336ad verified
{"tree_metadata": {"('step',)": {"key_metadata": [{"key": "step", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('params', 'TokenAndPositionEmbedding_0', 'pos_emb', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "TokenAndPositionEmbedding_0", "key_type": 2}, {"key": "pos_emb", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 512]}}, "('params', 'TokenAndPositionEmbedding_0', 'token_emb', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "TokenAndPositionEmbedding_0", "key_type": 2}, {"key": "token_emb", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64000, 512]}}, "('params', 'lm_head', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "lm_head", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64000]}}, "('params', 'lm_head', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "lm_head", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 64000]}}, "('params', 'transformer_block_0', 'Dense_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('params', 'transformer_block_0', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('params', 'transformer_block_0', 'Dense_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_0', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('params', 'transformer_block_0', 'LayerNorm_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_0', 'LayerNorm_0', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_0', 'LayerNorm_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_0', 'LayerNorm_1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_0', 'SelfAttention_0', 'key', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('params', 'transformer_block_0', 'SelfAttention_0', 'key', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('params', 'transformer_block_0', 'SelfAttention_0', 'out', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_0', 'SelfAttention_0', 'out', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64, 512]}}, "('params', 'transformer_block_0', 'SelfAttention_0', 'query', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('params', 'transformer_block_0', 'SelfAttention_0', 'query', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('params', 'transformer_block_0', 'SelfAttention_0', 'value', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('params', 'transformer_block_0', 'SelfAttention_0', 'value', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('params', 'transformer_block_1', 'Dense_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('params', 'transformer_block_1', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('params', 'transformer_block_1', 'Dense_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_1', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('params', 'transformer_block_1', 'LayerNorm_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_1', 'LayerNorm_0', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_1', 'LayerNorm_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_1', 'LayerNorm_1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_1', 'SelfAttention_0', 'key', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('params', 'transformer_block_1', 'SelfAttention_0', 'key', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('params', 'transformer_block_1', 'SelfAttention_0', 'out', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_1', 'SelfAttention_0', 'out', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64, 512]}}, "('params', 'transformer_block_1', 'SelfAttention_0', 'query', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('params', 'transformer_block_1', 'SelfAttention_0', 'query', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('params', 'transformer_block_1', 'SelfAttention_0', 'value', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('params', 'transformer_block_1', 'SelfAttention_0', 'value', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('params', 'transformer_block_2', 'Dense_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('params', 'transformer_block_2', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('params', 'transformer_block_2', 'Dense_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_2', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('params', 'transformer_block_2', 'LayerNorm_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_2', 'LayerNorm_0', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_2', 'LayerNorm_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_2', 'LayerNorm_1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_2', 'SelfAttention_0', 'key', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('params', 'transformer_block_2', 'SelfAttention_0', 'key', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('params', 'transformer_block_2', 'SelfAttention_0', 'out', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_2', 'SelfAttention_0', 'out', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64, 512]}}, "('params', 'transformer_block_2', 'SelfAttention_0', 'query', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('params', 'transformer_block_2', 'SelfAttention_0', 'query', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('params', 'transformer_block_2', 'SelfAttention_0', 'value', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('params', 'transformer_block_2', 'SelfAttention_0', 'value', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('params', 'transformer_block_3', 'Dense_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('params', 'transformer_block_3', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('params', 'transformer_block_3', 'Dense_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_3', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('params', 'transformer_block_3', 'LayerNorm_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_3', 'LayerNorm_0', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_3', 'LayerNorm_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_3', 'LayerNorm_1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_3', 'SelfAttention_0', 'key', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('params', 'transformer_block_3', 'SelfAttention_0', 'key', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('params', 'transformer_block_3', 'SelfAttention_0', 'out', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_3', 'SelfAttention_0', 'out', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64, 512]}}, "('params', 'transformer_block_3', 'SelfAttention_0', 'query', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('params', 'transformer_block_3', 'SelfAttention_0', 'query', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('params', 'transformer_block_3', 'SelfAttention_0', 'value', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('params', 'transformer_block_3', 'SelfAttention_0', 'value', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('params', 'transformer_block_4', 'Dense_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('params', 'transformer_block_4', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('params', 'transformer_block_4', 'Dense_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_4', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('params', 'transformer_block_4', 'LayerNorm_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_4', 'LayerNorm_0', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_4', 'LayerNorm_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_4', 'LayerNorm_1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_4', 'SelfAttention_0', 'key', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('params', 'transformer_block_4', 'SelfAttention_0', 'key', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('params', 'transformer_block_4', 'SelfAttention_0', 'out', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_4', 'SelfAttention_0', 'out', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64, 512]}}, "('params', 'transformer_block_4', 'SelfAttention_0', 'query', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('params', 'transformer_block_4', 'SelfAttention_0', 'query', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('params', 'transformer_block_4', 'SelfAttention_0', 'value', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('params', 'transformer_block_4', 'SelfAttention_0', 'value', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('params', 'transformer_block_5', 'Dense_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('params', 'transformer_block_5', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('params', 'transformer_block_5', 'Dense_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_5', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('params', 'transformer_block_5', 'LayerNorm_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_5', 'LayerNorm_0', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_5', 'LayerNorm_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_5', 'LayerNorm_1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_5', 'SelfAttention_0', 'key', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('params', 'transformer_block_5', 'SelfAttention_0', 'key', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('params', 'transformer_block_5', 'SelfAttention_0', 'out', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'transformer_block_5', 'SelfAttention_0', 'out', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64, 512]}}, "('params', 'transformer_block_5', 'SelfAttention_0', 'query', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('params', 'transformer_block_5', 'SelfAttention_0', 'query', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('params', 'transformer_block_5', 'SelfAttention_0', 'value', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('params', 'transformer_block_5', 'SelfAttention_0', 'value', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'count')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "count", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('opt_state', '0', 'mu', 'TokenAndPositionEmbedding_0', 'pos_emb', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "TokenAndPositionEmbedding_0", "key_type": 2}, {"key": "pos_emb", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 512]}}, "('opt_state', '0', 'mu', 'TokenAndPositionEmbedding_0', 'token_emb', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "TokenAndPositionEmbedding_0", "key_type": 2}, {"key": "token_emb", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64000, 512]}}, "('opt_state', '0', 'mu', 'lm_head', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "lm_head", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64000]}}, "('opt_state', '0', 'mu', 'lm_head', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "lm_head", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 64000]}}, "('opt_state', '0', 'mu', 'transformer_block_0', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('opt_state', '0', 'mu', 'transformer_block_0', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('opt_state', '0', 'mu', 'transformer_block_0', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_0', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('opt_state', '0', 'mu', 'transformer_block_0', 'LayerNorm_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_0', 'LayerNorm_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_0', 'LayerNorm_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_0', 'LayerNorm_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_0', 'SelfAttention_0', 'key', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_0', 'SelfAttention_0', 'key', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_0', 'SelfAttention_0', 'out', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_0', 'SelfAttention_0', 'out', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64, 512]}}, "('opt_state', '0', 'mu', 'transformer_block_0', 'SelfAttention_0', 'query', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_0', 'SelfAttention_0', 'query', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_0', 'SelfAttention_0', 'value', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_0', 'SelfAttention_0', 'value', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_1', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('opt_state', '0', 'mu', 'transformer_block_1', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('opt_state', '0', 'mu', 'transformer_block_1', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_1', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('opt_state', '0', 'mu', 'transformer_block_1', 'LayerNorm_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_1', 'LayerNorm_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_1', 'LayerNorm_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_1', 'LayerNorm_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_1', 'SelfAttention_0', 'key', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_1', 'SelfAttention_0', 'key', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_1', 'SelfAttention_0', 'out', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_1', 'SelfAttention_0', 'out', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64, 512]}}, "('opt_state', '0', 'mu', 'transformer_block_1', 'SelfAttention_0', 'query', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_1', 'SelfAttention_0', 'query', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_1', 'SelfAttention_0', 'value', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_1', 'SelfAttention_0', 'value', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_2', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('opt_state', '0', 'mu', 'transformer_block_2', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('opt_state', '0', 'mu', 'transformer_block_2', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_2', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('opt_state', '0', 'mu', 'transformer_block_2', 'LayerNorm_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_2', 'LayerNorm_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_2', 'LayerNorm_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_2', 'LayerNorm_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_2', 'SelfAttention_0', 'key', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_2', 'SelfAttention_0', 'key', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_2', 'SelfAttention_0', 'out', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_2', 'SelfAttention_0', 'out', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64, 512]}}, "('opt_state', '0', 'mu', 'transformer_block_2', 'SelfAttention_0', 'query', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_2', 'SelfAttention_0', 'query', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_2', 'SelfAttention_0', 'value', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_2', 'SelfAttention_0', 'value', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_3', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('opt_state', '0', 'mu', 'transformer_block_3', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('opt_state', '0', 'mu', 'transformer_block_3', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_3', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('opt_state', '0', 'mu', 'transformer_block_3', 'LayerNorm_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_3', 'LayerNorm_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_3', 'LayerNorm_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_3', 'LayerNorm_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_3', 'SelfAttention_0', 'key', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_3', 'SelfAttention_0', 'key', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_3', 'SelfAttention_0', 'out', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_3', 'SelfAttention_0', 'out', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64, 512]}}, "('opt_state', '0', 'mu', 'transformer_block_3', 'SelfAttention_0', 'query', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_3', 'SelfAttention_0', 'query', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_3', 'SelfAttention_0', 'value', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_3', 'SelfAttention_0', 'value', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_4', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('opt_state', '0', 'mu', 'transformer_block_4', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('opt_state', '0', 'mu', 'transformer_block_4', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_4', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('opt_state', '0', 'mu', 'transformer_block_4', 'LayerNorm_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_4', 'LayerNorm_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_4', 'LayerNorm_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_4', 'LayerNorm_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_4', 'SelfAttention_0', 'key', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_4', 'SelfAttention_0', 'key', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_4', 'SelfAttention_0', 'out', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_4', 'SelfAttention_0', 'out', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64, 512]}}, "('opt_state', '0', 'mu', 'transformer_block_4', 'SelfAttention_0', 'query', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_4', 'SelfAttention_0', 'query', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_4', 'SelfAttention_0', 'value', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_4', 'SelfAttention_0', 'value', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_5', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('opt_state', '0', 'mu', 'transformer_block_5', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('opt_state', '0', 'mu', 'transformer_block_5', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_5', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('opt_state', '0', 'mu', 'transformer_block_5', 'LayerNorm_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_5', 'LayerNorm_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_5', 'LayerNorm_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_5', 'LayerNorm_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_5', 'SelfAttention_0', 'key', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_5', 'SelfAttention_0', 'key', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_5', 'SelfAttention_0', 'out', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'mu', 'transformer_block_5', 'SelfAttention_0', 'out', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64, 512]}}, "('opt_state', '0', 'mu', 'transformer_block_5', 'SelfAttention_0', 'query', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_5', 'SelfAttention_0', 'query', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_5', 'SelfAttention_0', 'value', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'mu', 'transformer_block_5', 'SelfAttention_0', 'value', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'nu', 'TokenAndPositionEmbedding_0', 'pos_emb', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "TokenAndPositionEmbedding_0", "key_type": 2}, {"key": "pos_emb", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 512]}}, "('opt_state', '0', 'nu', 'TokenAndPositionEmbedding_0', 'token_emb', 'embedding')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "TokenAndPositionEmbedding_0", "key_type": 2}, {"key": "token_emb", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64000, 512]}}, "('opt_state', '0', 'nu', 'lm_head', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "lm_head", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64000]}}, "('opt_state', '0', 'nu', 'lm_head', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "lm_head", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 64000]}}, "('opt_state', '0', 'nu', 'transformer_block_0', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('opt_state', '0', 'nu', 'transformer_block_0', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('opt_state', '0', 'nu', 'transformer_block_0', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_0', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('opt_state', '0', 'nu', 'transformer_block_0', 'LayerNorm_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_0', 'LayerNorm_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_0', 'LayerNorm_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_0', 'LayerNorm_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_0', 'SelfAttention_0', 'key', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_0', 'SelfAttention_0', 'key', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_0', 'SelfAttention_0', 'out', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_0', 'SelfAttention_0', 'out', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64, 512]}}, "('opt_state', '0', 'nu', 'transformer_block_0', 'SelfAttention_0', 'query', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_0', 'SelfAttention_0', 'query', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_0', 'SelfAttention_0', 'value', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_0', 'SelfAttention_0', 'value', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_0", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_1', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('opt_state', '0', 'nu', 'transformer_block_1', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('opt_state', '0', 'nu', 'transformer_block_1', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_1', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('opt_state', '0', 'nu', 'transformer_block_1', 'LayerNorm_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_1', 'LayerNorm_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_1', 'LayerNorm_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_1', 'LayerNorm_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_1', 'SelfAttention_0', 'key', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_1', 'SelfAttention_0', 'key', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_1', 'SelfAttention_0', 'out', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_1', 'SelfAttention_0', 'out', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64, 512]}}, "('opt_state', '0', 'nu', 'transformer_block_1', 'SelfAttention_0', 'query', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_1', 'SelfAttention_0', 'query', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_1', 'SelfAttention_0', 'value', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_1', 'SelfAttention_0', 'value', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_1", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_2', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('opt_state', '0', 'nu', 'transformer_block_2', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('opt_state', '0', 'nu', 'transformer_block_2', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_2', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('opt_state', '0', 'nu', 'transformer_block_2', 'LayerNorm_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_2', 'LayerNorm_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_2', 'LayerNorm_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_2', 'LayerNorm_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_2', 'SelfAttention_0', 'key', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_2', 'SelfAttention_0', 'key', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_2', 'SelfAttention_0', 'out', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_2', 'SelfAttention_0', 'out', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64, 512]}}, "('opt_state', '0', 'nu', 'transformer_block_2', 'SelfAttention_0', 'query', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_2', 'SelfAttention_0', 'query', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_2', 'SelfAttention_0', 'value', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_2', 'SelfAttention_0', 'value', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_2", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_3', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('opt_state', '0', 'nu', 'transformer_block_3', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('opt_state', '0', 'nu', 'transformer_block_3', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_3', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('opt_state', '0', 'nu', 'transformer_block_3', 'LayerNorm_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_3', 'LayerNorm_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_3', 'LayerNorm_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_3', 'LayerNorm_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_3', 'SelfAttention_0', 'key', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_3', 'SelfAttention_0', 'key', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_3', 'SelfAttention_0', 'out', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_3', 'SelfAttention_0', 'out', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64, 512]}}, "('opt_state', '0', 'nu', 'transformer_block_3', 'SelfAttention_0', 'query', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_3', 'SelfAttention_0', 'query', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_3', 'SelfAttention_0', 'value', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_3', 'SelfAttention_0', 'value', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_3", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_4', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('opt_state', '0', 'nu', 'transformer_block_4', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('opt_state', '0', 'nu', 'transformer_block_4', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_4', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('opt_state', '0', 'nu', 'transformer_block_4', 'LayerNorm_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_4', 'LayerNorm_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_4', 'LayerNorm_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_4', 'LayerNorm_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_4', 'SelfAttention_0', 'key', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_4', 'SelfAttention_0', 'key', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_4', 'SelfAttention_0', 'out', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_4', 'SelfAttention_0', 'out', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64, 512]}}, "('opt_state', '0', 'nu', 'transformer_block_4', 'SelfAttention_0', 'query', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_4', 'SelfAttention_0', 'query', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_4', 'SelfAttention_0', 'value', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_4', 'SelfAttention_0', 'value', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_4", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_5', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('opt_state', '0', 'nu', 'transformer_block_5', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('opt_state', '0', 'nu', 'transformer_block_5', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_5', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('opt_state', '0', 'nu', 'transformer_block_5', 'LayerNorm_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_5', 'LayerNorm_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_5', 'LayerNorm_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_5', 'LayerNorm_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_5', 'SelfAttention_0', 'key', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_5', 'SelfAttention_0', 'key', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_5', 'SelfAttention_0', 'out', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('opt_state', '0', 'nu', 'transformer_block_5', 'SelfAttention_0', 'out', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64, 512]}}, "('opt_state', '0', 'nu', 'transformer_block_5', 'SelfAttention_0', 'query', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_5', 'SelfAttention_0', 'query', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_5', 'SelfAttention_0', 'value', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 64]}}, "('opt_state', '0', 'nu', 'transformer_block_5', 'SelfAttention_0', 'value', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "transformer_block_5", "key_type": 2}, {"key": "SelfAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 8, 64]}}, "('opt_state', '1')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('opt_state', '2')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "2", "key_type": 1}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}