Upload folder using huggingface_hub
Browse files- .gitattributes +3 -0
- _CHECKPOINT_METADATA +1 -0
- _METADATA +1 -0
- _sharding +1 -0
- array_metadatas/process_0 +1 -0
- d/82376d3607afc6601ee09b8a73b2be12 +0 -0
- manifest.ocdbt +0 -0
- ocdbt.process_0/d/04374155753a500d09deb544b4a1b590 +3 -0
- ocdbt.process_0/d/3c0b11c7edf6e695cd3e7bd22b6be45f +0 -0
- ocdbt.process_0/d/6b82546b9dc06fc888daa0ba17d5dcc8 +0 -0
- ocdbt.process_0/d/91c38a4a8065d44b23cc087e4743577b +3 -0
- ocdbt.process_0/d/dce9fb0cac4f2ea729bf07612aa4ef2d +3 -0
- ocdbt.process_0/manifest.ocdbt +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
ocdbt.process_0/d/04374155753a500d09deb544b4a1b590 filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
ocdbt.process_0/d/91c38a4a8065d44b23cc087e4743577b filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
ocdbt.process_0/d/dce9fb0cac4f2ea729bf07612aa4ef2d filter=lfs diff=lfs merge=lfs -text
|
_CHECKPOINT_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"item_handlers": "orbax.checkpoint._src.handlers.pytree_checkpoint_handler.PyTreeCheckpointHandler", "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1766045809097540341, "commit_timestamp_nsecs": 1766045811851038310, "custom_metadata": {}}
|
_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"tree_metadata": {"('embedding_layer', 'pos_emb', 'embedding', 'value')": {"key_metadata": [{"key": "embedding_layer", "key_type": 2}, {"key": "pos_emb", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 128]}}, "('embedding_layer', 'token_emb', 'embedding', 'value')": {"key_metadata": [{"key": "embedding_layer", "key_type": 2}, {"key": "token_emb", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [50258, 32]}}, "('output_layer', 'bias', 'value')": {"key_metadata": [{"key": "output_layer", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [25129]}}, "('output_layer', 'kernel', 'value')": {"key_metadata": [{"key": "output_layer", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 25129]}}, "('transformer_blocks', '0', 'layer_norm1', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "layer_norm1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '0', 'layer_norm1', 'scale', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "layer_norm1", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '0', 'layer_norm2', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "layer_norm2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '0', 'layer_norm2', 'scale', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "layer_norm2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '0', 'linear1', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "linear1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '0', 'linear1', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "linear1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 128]}}, "('transformer_blocks', '0', 'linear2', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "linear2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '0', 'linear2', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "linear2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 128]}}, "('transformer_blocks', '0', 'mha', 'key', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 32]}}, "('transformer_blocks', '0', 'mha', 'key', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 4, 32]}}, "('transformer_blocks', '0', 'mha', 'out', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '0', 'mha', 'out', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2, 16, 256]}}, "('transformer_blocks', '0', 'mha', 'query', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 32]}}, "('transformer_blocks', '0', 'mha', 'query', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 4, 32]}}, "('transformer_blocks', '0', 'mha', 'value', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 32]}}, "('transformer_blocks', '0', 'mha', 'value', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 4, 32]}}, "('transformer_blocks', '1', 'layer_norm1', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "layer_norm1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '1', 'layer_norm1', 'scale', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "layer_norm1", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '1', 'layer_norm2', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "layer_norm2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '1', 'layer_norm2', 'scale', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "layer_norm2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '1', 'linear1', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "linear1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '1', 'linear1', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "linear1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 128]}}, "('transformer_blocks', '1', 'linear2', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "linear2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '1', 'linear2', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "linear2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 128]}}, "('transformer_blocks', '1', 'mha', 'key', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 32]}}, "('transformer_blocks', '1', 'mha', 'key', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 4, 32]}}, "('transformer_blocks', '1', 'mha', 'out', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '1', 'mha', 'out', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2, 16, 256]}}, "('transformer_blocks', '1', 'mha', 'query', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 32]}}, "('transformer_blocks', '1', 'mha', 'query', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 4, 32]}}, "('transformer_blocks', '1', 'mha', 'value', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 32]}}, "('transformer_blocks', '1', 'mha', 'value', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 4, 32]}}, "('transformer_blocks', '2', 'layer_norm1', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "layer_norm1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '2', 'layer_norm1', 'scale', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "layer_norm1", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '2', 'layer_norm2', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "layer_norm2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '2', 'layer_norm2', 'scale', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "layer_norm2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '2', 'linear1', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "linear1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '2', 'linear1', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "linear1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 128]}}, "('transformer_blocks', '2', 'linear2', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "linear2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '2', 'linear2', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "linear2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 128]}}, "('transformer_blocks', '2', 'mha', 'key', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 32]}}, "('transformer_blocks', '2', 'mha', 'key', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 4, 32]}}, "('transformer_blocks', '2', 'mha', 'out', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '2', 'mha', 'out', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2, 16, 256]}}, "('transformer_blocks', '2', 'mha', 'query', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 32]}}, "('transformer_blocks', '2', 'mha', 'query', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 4, 32]}}, "('transformer_blocks', '2', 'mha', 'value', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 32]}}, "('transformer_blocks', '2', 'mha', 'value', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 4, 32]}}, "('transformer_blocks', '3', 'layer_norm1', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "layer_norm1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '3', 'layer_norm1', 'scale', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "layer_norm1", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '3', 'layer_norm2', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "layer_norm2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '3', 'layer_norm2', 'scale', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "layer_norm2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '3', 'linear1', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "linear1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '3', 'linear1', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "linear1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 128]}}, "('transformer_blocks', '3', 'linear2', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "linear2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '3', 'linear2', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "linear2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 128]}}, "('transformer_blocks', '3', 'mha', 'key', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 32]}}, "('transformer_blocks', '3', 'mha', 'key', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 4, 32]}}, "('transformer_blocks', '3', 'mha', 'out', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32]}}, "('transformer_blocks', '3', 'mha', 'out', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2, 16, 256]}}, "('transformer_blocks', '3', 'mha', 'query', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 32]}}, "('transformer_blocks', '3', 'mha', 'query', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 4, 32]}}, "('transformer_blocks', '3', 'mha', 'value', 'bias', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 32]}}, "('transformer_blocks', '3', 'mha', 'value', 'kernel', 'value')": {"key_metadata": [{"key": "transformer_blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "mha", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 4, 32]}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
|
_sharding
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"ZW1iZWRkaW5nX2xheWVyLnBvc19lbWIuZW1iZWRkaW5nLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","ZW1iZWRkaW5nX2xheWVyLnRva2VuX2VtYi5lbWJlZGRpbmcudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","b3V0cHV0X2xheWVyLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","b3V0cHV0X2xheWVyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjAubGF5ZXJfbm9ybTEuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjAubGF5ZXJfbm9ybTEuc2NhbGUudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjAubGF5ZXJfbm9ybTIuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjAubGF5ZXJfbm9ybTIuc2NhbGUudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjAubGluZWFyMS5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjAubGluZWFyMS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjAubGluZWFyMi5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjAubGluZWFyMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjAubWhhLm91dC5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjAubWhhLm91dC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjAubWhhLmtleS5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjAubWhhLmtleS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjAubWhhLnF1ZXJ5LmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjAubWhhLnF1ZXJ5Lmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjAubWhhLnZhbHVlLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjAubWhhLnZhbHVlLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjEubGF5ZXJfbm9ybTEuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjEubGF5ZXJfbm9ybTEuc2NhbGUudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjEubGF5ZXJfbm9ybTIuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjEubGF5ZXJfbm9ybTIuc2NhbGUudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjEubGluZWFyMS5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjEubGluZWFyMS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjEubGluZWFyMi5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjEubGluZWFyMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjEubWhhLm91dC5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjEubWhhLm91dC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjEubWhhLmtleS5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjEubWhhLmtleS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjEubWhhLnF1ZXJ5LmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjEubWhhLnF1ZXJ5Lmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjEubWhhLnZhbHVlLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjEubWhhLnZhbHVlLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjIubGF5ZXJfbm9ybTEuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjIubGF5ZXJfbm9ybTEuc2NhbGUudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjIubGF5ZXJfbm9ybTIuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjIubGF5ZXJfbm9ybTIuc2NhbGUudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjIubGluZWFyMS5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjIubGluZWFyMS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjIubGluZWFyMi5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjIubGluZWFyMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjIubWhhLm91dC5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjIubWhhLm91dC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjIubWhhLmtleS5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjIubWhhLmtleS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjIubWhhLnF1ZXJ5LmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjIubWhhLnF1ZXJ5Lmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjIubWhhLnZhbHVlLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjIubWhhLnZhbHVlLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjMubGF5ZXJfbm9ybTEuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjMubGF5ZXJfbm9ybTEuc2NhbGUudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjMubGF5ZXJfbm9ybTIuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjMubGF5ZXJfbm9ybTIuc2NhbGUudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjMubGluZWFyMS5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjMubGluZWFyMS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjMubGluZWFyMi5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjMubGluZWFyMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjMubWhhLm91dC5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjMubWhhLm91dC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjMubWhhLmtleS5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjMubWhhLmtleS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjMubWhhLnF1ZXJ5LmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjMubWhhLnF1ZXJ5Lmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjMubWhhLnZhbHVlLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [\"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}","dHJhbnNmb3JtZXJfYmxvY2tzLjMubWhhLnZhbHVlLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4, 2], \"axis_names\": [\"batch\", \"model\"], \"axis_types\": [\"AxisType.Auto\", \"AxisType.Auto\"], \"partition_spec\": [null, \"model\"], \"device_mesh\": {\"mesh\": [[{\"id\": 0}, {\"id\": 1}], [{\"id\": 2}, {\"id\": 3}], [{\"id\": 7}, {\"id\": 6}], [{\"id\": 5}, {\"id\": 4}]]}}"}
|
array_metadatas/process_0
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"array_metadatas": [{"array_metadata": {"param_name": "embedding_layer.pos_emb.embedding.value", "write_shape": [64, 128], "chunk_shape": [64, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "embedding_layer.token_emb.embedding.value", "write_shape": [50258, 32], "chunk_shape": [50258, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "output_layer.bias.value", "write_shape": [25129], "chunk_shape": [25129], "ext_metadata": null}}, {"array_metadata": {"param_name": "output_layer.kernel.value", "write_shape": [64, 25129], "chunk_shape": [64, 25129], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.0.layer_norm1.bias.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.0.layer_norm1.scale.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.0.layer_norm2.bias.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.0.layer_norm2.scale.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.0.linear1.bias.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.0.linear1.kernel.value", "write_shape": [64, 128], "chunk_shape": [64, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.0.linear2.bias.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.0.linear2.kernel.value", "write_shape": [64, 128], "chunk_shape": [64, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.0.mha.key.bias.value", "write_shape": [1, 32], "chunk_shape": [1, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.0.mha.key.kernel.value", "write_shape": [64, 4, 32], "chunk_shape": [64, 4, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.0.mha.out.bias.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.0.mha.out.kernel.value", "write_shape": [2, 16, 256], "chunk_shape": [2, 16, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.0.mha.query.bias.value", "write_shape": [1, 32], "chunk_shape": [1, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.0.mha.query.kernel.value", "write_shape": [64, 4, 32], "chunk_shape": [64, 4, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.0.mha.value.bias.value", "write_shape": [1, 32], "chunk_shape": [1, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.0.mha.value.kernel.value", "write_shape": [64, 4, 32], "chunk_shape": [64, 4, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.1.layer_norm1.bias.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.1.layer_norm1.scale.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.1.layer_norm2.bias.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.1.layer_norm2.scale.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.1.linear1.bias.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.1.linear1.kernel.value", "write_shape": [64, 128], "chunk_shape": [64, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.1.linear2.bias.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.1.linear2.kernel.value", "write_shape": [64, 128], "chunk_shape": [64, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.1.mha.key.bias.value", "write_shape": [1, 32], "chunk_shape": [1, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.1.mha.key.kernel.value", "write_shape": [64, 4, 32], "chunk_shape": [64, 4, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.1.mha.out.bias.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.1.mha.out.kernel.value", "write_shape": [2, 16, 256], "chunk_shape": [2, 16, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.1.mha.query.bias.value", "write_shape": [1, 32], "chunk_shape": [1, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.1.mha.query.kernel.value", "write_shape": [64, 4, 32], "chunk_shape": [64, 4, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.1.mha.value.bias.value", "write_shape": [1, 32], "chunk_shape": [1, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.1.mha.value.kernel.value", "write_shape": [64, 4, 32], "chunk_shape": [64, 4, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.2.layer_norm1.bias.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.2.layer_norm1.scale.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.2.layer_norm2.bias.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.2.layer_norm2.scale.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.2.linear1.bias.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.2.linear1.kernel.value", "write_shape": [64, 128], "chunk_shape": [64, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.2.linear2.bias.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.2.linear2.kernel.value", "write_shape": [64, 128], "chunk_shape": [64, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.2.mha.key.bias.value", "write_shape": [1, 32], "chunk_shape": [1, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.2.mha.key.kernel.value", "write_shape": [64, 4, 32], "chunk_shape": [64, 4, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.2.mha.out.bias.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.2.mha.out.kernel.value", "write_shape": [2, 16, 256], "chunk_shape": [2, 16, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.2.mha.query.bias.value", "write_shape": [1, 32], "chunk_shape": [1, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.2.mha.query.kernel.value", "write_shape": [64, 4, 32], "chunk_shape": [64, 4, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.2.mha.value.bias.value", "write_shape": [1, 32], "chunk_shape": [1, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.2.mha.value.kernel.value", "write_shape": [64, 4, 32], "chunk_shape": [64, 4, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.3.layer_norm1.bias.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.3.layer_norm1.scale.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.3.layer_norm2.bias.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.3.layer_norm2.scale.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.3.linear1.bias.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.3.linear1.kernel.value", "write_shape": [64, 128], "chunk_shape": [64, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.3.linear2.bias.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.3.linear2.kernel.value", "write_shape": [64, 128], "chunk_shape": [64, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.3.mha.key.bias.value", "write_shape": [1, 32], "chunk_shape": [1, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.3.mha.key.kernel.value", "write_shape": [64, 4, 32], "chunk_shape": [64, 4, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.3.mha.out.bias.value", "write_shape": [32], "chunk_shape": [32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.3.mha.out.kernel.value", "write_shape": [2, 16, 256], "chunk_shape": [2, 16, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.3.mha.query.bias.value", "write_shape": [1, 32], "chunk_shape": [1, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.3.mha.query.kernel.value", "write_shape": [64, 4, 32], "chunk_shape": [64, 4, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.3.mha.value.bias.value", "write_shape": [1, 32], "chunk_shape": [1, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "transformer_blocks.3.mha.value.kernel.value", "write_shape": [64, 4, 32], "chunk_shape": [64, 4, 32], "ext_metadata": null}}]}
|
d/82376d3607afc6601ee09b8a73b2be12
ADDED
|
Binary file (41.5 kB). View file
|
|
|
manifest.ocdbt
ADDED
|
Binary file (119 Bytes). View file
|
|
|
ocdbt.process_0/d/04374155753a500d09deb544b4a1b590
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27e6a788d36dcdcdeaf866bfd6d9ab1a5e43f39449a63610962ff0e2338e8a1e
|
| 3 |
+
size 2968162
|
ocdbt.process_0/d/3c0b11c7edf6e695cd3e7bd22b6be45f
ADDED
|
Binary file (562 Bytes). View file
|
|
|
ocdbt.process_0/d/6b82546b9dc06fc888daa0ba17d5dcc8
ADDED
|
Binary file (207 Bytes). View file
|
|
|
ocdbt.process_0/d/91c38a4a8065d44b23cc087e4743577b
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1440ff0f60f777a944e8315cdd1d35e560836d0b7be5fc6e1a1d186f24f02530
|
| 3 |
+
size 243641
|
ocdbt.process_0/d/dce9fb0cac4f2ea729bf07612aa4ef2d
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0bc856f301218bd7541f7961fb10ca5542c5c59fcbc2ef5eefff191cde168c28
|
| 3 |
+
size 96944128
|
ocdbt.process_0/manifest.ocdbt
ADDED
|
Binary file (318 Bytes). View file
|
|
|