diff --git "a/checkpoints/best_3/_METADATA" "b/checkpoints/best_3/_METADATA" new file mode 100644--- /dev/null +++ "b/checkpoints/best_3/_METADATA" @@ -0,0 +1 @@ +{"tree_metadata": {"('embed_tokens', 'embedding')": {"key_metadata": [{"key": "embed_tokens", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [50264, 768]}}, "('layers_0', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_0', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_0', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_0', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_0', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_0', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'candidate', 'bias')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_0', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'candidate', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_0', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'gate', 'bias')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_0', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'gate', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_0', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'candidate', 'bias')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_0', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'candidate', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_0', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'gate', 'bias')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_0', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'gate', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_0', 'bidirectional_rnn', 'input_proj', 'bias')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "input_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_0', 'bidirectional_rnn', 'input_proj', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "input_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 384]}}, "('layers_0', 'bidirectional_rnn', 'layer_scale')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "layer_scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_0', 'bidirectional_rnn', 'output_proj', 'bias')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "output_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_0', 'bidirectional_rnn', 'output_proj', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "output_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_0', 'cnn_norm', 'scale')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "cnn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_0', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_0', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_0', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_0', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_0', 'local_cnn', 'conv3', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv3", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3, 1, 768]}}, "('layers_0', 'local_cnn', 'conv3', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv3", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_0', 'local_cnn', 'conv5', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv5", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [5, 1, 768]}}, "('layers_0', 'local_cnn', 'conv5', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv5", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_0', 'local_cnn', 'conv7', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv7", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [7, 1, 768]}}, "('layers_0', 'local_cnn', 'conv7', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv7", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_0', 'local_cnn', 'fusion_gate', 'bias')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "fusion_gate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2304]}}, "('layers_0', 'local_cnn', 'fusion_gate', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "fusion_gate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 2304]}}, "('layers_0', 'local_cnn', 'layer_scale')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "layer_scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_0', 'rnn_norm', 'scale')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "rnn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_1', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_1', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_1', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_1', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_1', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_1", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_1', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_1", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_1', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_1", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_1', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_1", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_1', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_1", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_10', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_10", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_10', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_10", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_10', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_10", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_10', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_10", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_10', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_10", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_10', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_10", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_10', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_10", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_10', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_10", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_10', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_10", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_11', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_11", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_11', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_11", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_11', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_11", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_11', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_11", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_11', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_11", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_11', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_11", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_11', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_11", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_11', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_11", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_11', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_11", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_12', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_12', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_12', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_12', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_12', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_12', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'candidate', 'bias')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_12', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'candidate', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_12', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'gate', 'bias')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_12', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'gate', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_12', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'candidate', 'bias')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_12', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'candidate', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_12', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'gate', 'bias')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_12', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'gate', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_12', 'bidirectional_rnn', 'input_proj', 'bias')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "input_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_12', 'bidirectional_rnn', 'input_proj', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "input_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 384]}}, "('layers_12', 'bidirectional_rnn', 'layer_scale')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "layer_scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_12', 'bidirectional_rnn', 'output_proj', 'bias')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "output_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_12', 'bidirectional_rnn', 'output_proj', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "output_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_12', 'cnn_norm', 'scale')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "cnn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_12', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_12', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_12', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_12', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_12', 'local_cnn', 'conv3', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv3", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3, 1, 768]}}, "('layers_12', 'local_cnn', 'conv3', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv3", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_12', 'local_cnn', 'conv5', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv5", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [5, 1, 768]}}, "('layers_12', 'local_cnn', 'conv5', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv5", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_12', 'local_cnn', 'conv7', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv7", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [7, 1, 768]}}, "('layers_12', 'local_cnn', 'conv7', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv7", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_12', 'local_cnn', 'fusion_gate', 'bias')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "fusion_gate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2304]}}, "('layers_12', 'local_cnn', 'fusion_gate', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "fusion_gate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 2304]}}, "('layers_12', 'local_cnn', 'layer_scale')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "layer_scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_12', 'rnn_norm', 'scale')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "rnn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_13', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_13", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_13', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_13", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_13', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_13", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_13', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_13", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_13', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_13", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_13', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_13", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_13', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_13", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_13', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_13", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_13', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_13", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_14', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_14", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_14', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_14", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_14', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_14", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_14', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_14", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_14', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_14", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_14', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_14", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_14', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_14", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_14', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_14", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_14', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_14", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_15', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_15', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_15', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_15', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_15', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_15', 'cnn_norm', 'scale')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "cnn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_15', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_15', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_15', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_15', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_15', 'local_cnn', 'conv3', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv3", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3, 1, 768]}}, "('layers_15', 'local_cnn', 'conv3', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv3", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_15', 'local_cnn', 'conv5', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv5", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [5, 1, 768]}}, "('layers_15', 'local_cnn', 'conv5', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv5", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_15', 'local_cnn', 'conv7', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv7", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [7, 1, 768]}}, "('layers_15', 'local_cnn', 'conv7', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv7", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_15', 'local_cnn', 'fusion_gate', 'bias')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "fusion_gate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2304]}}, "('layers_15', 'local_cnn', 'fusion_gate', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "fusion_gate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 2304]}}, "('layers_15', 'local_cnn', 'layer_scale')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "layer_scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_16', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_16', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_16', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_16', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_16', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_16', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'candidate', 'bias')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_16', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'candidate', 'kernel')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_16', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'gate', 'bias')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_16', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'gate', 'kernel')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_16', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'candidate', 'bias')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_16', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'candidate', 'kernel')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_16', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'gate', 'bias')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_16', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'gate', 'kernel')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_16', 'bidirectional_rnn', 'input_proj', 'bias')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "input_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_16', 'bidirectional_rnn', 'input_proj', 'kernel')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "input_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 384]}}, "('layers_16', 'bidirectional_rnn', 'layer_scale')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "layer_scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_16', 'bidirectional_rnn', 'output_proj', 'bias')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "output_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_16', 'bidirectional_rnn', 'output_proj', 'kernel')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "output_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_16', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_16', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_16', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_16', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_16', 'rnn_norm', 'scale')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "rnn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_17', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_17", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_17', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_17", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_17', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_17", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_17', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_17", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_17', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_17", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_17', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_17", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_17', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_17", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_17', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_17", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_17', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_17", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_18', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_18', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_18', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_18', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_18', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_18', 'cnn_norm', 'scale')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "cnn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_18', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_18', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_18', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_18', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_18', 'local_cnn', 'conv3', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv3", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3, 1, 768]}}, "('layers_18', 'local_cnn', 'conv3', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv3", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_18', 'local_cnn', 'conv5', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv5", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [5, 1, 768]}}, "('layers_18', 'local_cnn', 'conv5', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv5", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_18', 'local_cnn', 'conv7', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv7", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [7, 1, 768]}}, "('layers_18', 'local_cnn', 'conv7', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv7", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_18', 'local_cnn', 'fusion_gate', 'bias')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "fusion_gate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2304]}}, "('layers_18', 'local_cnn', 'fusion_gate', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "fusion_gate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 2304]}}, "('layers_18', 'local_cnn', 'layer_scale')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "layer_scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_19', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_19", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_19', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_19", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_19', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_19", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_19', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_19", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_19', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_19", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_19', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_19", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_19', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_19", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_19', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_19", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_19', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_19", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_2', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_2', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_2', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_2', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_2', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_2", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_2', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_2", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_2', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_2", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_2', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_2", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_2', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_2", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_20', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_20', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_20', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_20', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_20', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_20', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'candidate', 'bias')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_20', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'candidate', 'kernel')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_20', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'gate', 'bias')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_20', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'gate', 'kernel')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_20', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'candidate', 'bias')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_20', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'candidate', 'kernel')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_20', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'gate', 'bias')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_20', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'gate', 'kernel')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_20', 'bidirectional_rnn', 'input_proj', 'bias')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "input_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_20', 'bidirectional_rnn', 'input_proj', 'kernel')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "input_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 384]}}, "('layers_20', 'bidirectional_rnn', 'layer_scale')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "layer_scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_20', 'bidirectional_rnn', 'output_proj', 'bias')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "output_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_20', 'bidirectional_rnn', 'output_proj', 'kernel')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "output_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_20', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_20', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_20', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_20', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_20', 'rnn_norm', 'scale')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "rnn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_21', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_21', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_21', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_21', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_21', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_21', 'cnn_norm', 'scale')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "cnn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_21', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_21', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_21', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_21', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_21', 'local_cnn', 'conv3', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv3", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3, 1, 768]}}, "('layers_21', 'local_cnn', 'conv3', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv3", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_21', 'local_cnn', 'conv5', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv5", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [5, 1, 768]}}, "('layers_21', 'local_cnn', 'conv5', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv5", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_21', 'local_cnn', 'conv7', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv7", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [7, 1, 768]}}, "('layers_21', 'local_cnn', 'conv7', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv7", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_21', 'local_cnn', 'fusion_gate', 'bias')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "fusion_gate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2304]}}, "('layers_21', 'local_cnn', 'fusion_gate', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "fusion_gate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 2304]}}, "('layers_21', 'local_cnn', 'layer_scale')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "layer_scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_22', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_22", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_22', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_22", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_22', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_22", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_22', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_22", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_22', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_22", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_22', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_22", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_22', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_22", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_22', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_22", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_22', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_22", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_23', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_23", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_23', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_23", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_23', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_23", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_23', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_23", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_23', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_23", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_23', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_23", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_23', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_23", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_23', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_23", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_23', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_23", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_3', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_3', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_3', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_3', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_3', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_3', 'cnn_norm', 'scale')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "cnn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_3', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_3', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_3', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_3', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_3', 'local_cnn', 'conv3', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv3", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3, 1, 768]}}, "('layers_3', 'local_cnn', 'conv3', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv3", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_3', 'local_cnn', 'conv5', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv5", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [5, 1, 768]}}, "('layers_3', 'local_cnn', 'conv5', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv5", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_3', 'local_cnn', 'conv7', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv7", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [7, 1, 768]}}, "('layers_3', 'local_cnn', 'conv7', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv7", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_3', 'local_cnn', 'fusion_gate', 'bias')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "fusion_gate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2304]}}, "('layers_3', 'local_cnn', 'fusion_gate', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "fusion_gate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 2304]}}, "('layers_3', 'local_cnn', 'layer_scale')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "layer_scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_4', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_4', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_4', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_4', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_4', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_4', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'candidate', 'bias')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_4', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'candidate', 'kernel')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_4', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'gate', 'bias')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_4', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'gate', 'kernel')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_4', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'candidate', 'bias')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_4', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'candidate', 'kernel')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_4', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'gate', 'bias')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_4', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'gate', 'kernel')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_4', 'bidirectional_rnn', 'input_proj', 'bias')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "input_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_4', 'bidirectional_rnn', 'input_proj', 'kernel')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "input_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 384]}}, "('layers_4', 'bidirectional_rnn', 'layer_scale')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "layer_scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_4', 'bidirectional_rnn', 'output_proj', 'bias')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "output_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_4', 'bidirectional_rnn', 'output_proj', 'kernel')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "output_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_4', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_4', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_4', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_4', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_4', 'rnn_norm', 'scale')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "rnn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_5', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_5', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_5', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_5', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_5', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_5", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_5', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_5", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_5', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_5", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_5', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_5", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_5', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_5", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_6', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_6', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_6', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_6', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_6', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_6', 'cnn_norm', 'scale')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "cnn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_6', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_6', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_6', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_6', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_6', 'local_cnn', 'conv3', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv3", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3, 1, 768]}}, "('layers_6', 'local_cnn', 'conv3', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv3", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_6', 'local_cnn', 'conv5', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv5", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [5, 1, 768]}}, "('layers_6', 'local_cnn', 'conv5', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv5", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_6', 'local_cnn', 'conv7', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv7", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [7, 1, 768]}}, "('layers_6', 'local_cnn', 'conv7', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv7", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_6', 'local_cnn', 'fusion_gate', 'bias')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "fusion_gate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2304]}}, "('layers_6', 'local_cnn', 'fusion_gate', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "fusion_gate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 2304]}}, "('layers_6', 'local_cnn', 'layer_scale')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "layer_scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_7', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_7', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_7', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_7', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_7', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_7", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_7', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_7", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_7', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_7", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_7', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_7", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_7', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_7", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_8', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_8', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_8', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_8', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_8', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_8', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'candidate', 'bias')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_8', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'candidate', 'kernel')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_8', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'gate', 'bias')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_8', 'bidirectional_rnn', 'backward_cell', 'MinGRUCell_0', 'gate', 'kernel')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "backward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_8', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'candidate', 'bias')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_8', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'candidate', 'kernel')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "candidate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_8', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'gate', 'bias')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_8', 'bidirectional_rnn', 'forward_cell', 'MinGRUCell_0', 'gate', 'kernel')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "forward_cell", "key_type": 2}, {"key": "MinGRUCell_0", "key_type": 2}, {"key": "gate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 384]}}, "('layers_8', 'bidirectional_rnn', 'input_proj', 'bias')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "input_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('layers_8', 'bidirectional_rnn', 'input_proj', 'kernel')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "input_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 384]}}, "('layers_8', 'bidirectional_rnn', 'layer_scale')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "layer_scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_8', 'bidirectional_rnn', 'output_proj', 'bias')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "output_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_8', 'bidirectional_rnn', 'output_proj', 'kernel')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "bidirectional_rnn", "key_type": 2}, {"key": "output_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_8', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_8', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_8', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_8', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_8', 'rnn_norm', 'scale')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "rnn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_9', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_9', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_9', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('layers_9', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 128]}}, "('layers_9', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_9', 'cnn_norm', 'scale')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "cnn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_9', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1920, 768]}}, "('layers_9', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_9', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 1920]}}, "('layers_9', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('layers_9', 'local_cnn', 'conv3', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv3", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3, 1, 768]}}, "('layers_9', 'local_cnn', 'conv3', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv3", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_9', 'local_cnn', 'conv5', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv5", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [5, 1, 768]}}, "('layers_9', 'local_cnn', 'conv5', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv5", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_9', 'local_cnn', 'conv7', 'depthwise', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv7", "key_type": 2}, {"key": "depthwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [7, 1, 768]}}, "('layers_9', 'local_cnn', 'conv7', 'pointwise', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "conv7", "key_type": 2}, {"key": "pointwise", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 768, 768]}}, "('layers_9', 'local_cnn', 'fusion_gate', 'bias')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "fusion_gate", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2304]}}, "('layers_9', 'local_cnn', 'fusion_gate', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "fusion_gate", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 2304]}}, "('layers_9', 'local_cnn', 'layer_scale')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "local_cnn", "key_type": 2}, {"key": "layer_scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('lm_head', 'kernel')": {"key_metadata": [{"key": "lm_head", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 50264]}}, "('norm', 'scale')": {"key_metadata": [{"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null} \ No newline at end of file