gradling push experiments/aiayn/baseline/runs/0001
Browse files- .gitattributes +7 -0
- experiments/aiayn/baseline/runs/0001/checkpoints/final/_CHECKPOINT_METADATA +1 -0
- experiments/aiayn/baseline/runs/0001/checkpoints/final/_METADATA +1 -0
- experiments/aiayn/baseline/runs/0001/checkpoints/final/_sharding +1 -0
- experiments/aiayn/baseline/runs/0001/checkpoints/final/array_metadatas/process_0 +1 -0
- experiments/aiayn/baseline/runs/0001/checkpoints/final/d/e9cb36b30740d34ba2a49e071a8e609a +0 -0
- experiments/aiayn/baseline/runs/0001/checkpoints/final/manifest.ocdbt +0 -0
- experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/0cfc70b3ea1e060effbdf1a1c146e354 +3 -0
- experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/107f870f364ea57b6314fff5f27fe998 +0 -0
- experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/194e9e4da3431b6bd0b14ab0bef3ed4f +3 -0
- experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/32f79325c59e4e268c5f96365a86a80b +0 -0
- experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/34c2ca613b7475efe74d6a77b1ece744 +3 -0
- experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/4ca33e0f6969474ca55df6795e9e9d64 +3 -0
- experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/51156e38c40d2246cc0cb3fdb8bfc183 +0 -0
- experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/ab914487ea687e2ef7ca99c47b1df979 +3 -0
- experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/adce8b7b105cfac3d1a6b27f7fa4d033 +3 -0
- experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/d19d8595f1810f24745814e4d13115b4 +3 -0
- experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/d74db98a016c6cac0842c60cc750951a +0 -0
- experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/fa331db95a502c7477a3b27a8e9f82d6 +0 -0
- experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/manifest.ocdbt +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/0cfc70b3ea1e060effbdf1a1c146e354 filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/194e9e4da3431b6bd0b14ab0bef3ed4f filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/34c2ca613b7475efe74d6a77b1ece744 filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/4ca33e0f6969474ca55df6795e9e9d64 filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/ab914487ea687e2ef7ca99c47b1df979 filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/adce8b7b105cfac3d1a6b27f7fa4d033 filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/d19d8595f1810f24745814e4d13115b4 filter=lfs diff=lfs merge=lfs -text
|
experiments/aiayn/baseline/runs/0001/checkpoints/final/_CHECKPOINT_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"item_handlers": "orbax.checkpoint._src.handlers.standard_checkpoint_handler.StandardCheckpointHandler", "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1772958286722113461, "commit_timestamp_nsecs": 1772958287997065982, "custom_metadata": {}}
|
experiments/aiayn/baseline/runs/0001/checkpoints/final/_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"tree_metadata": {"('blocks', 'layers', '0', 'ff', 'net', 'layers', '0', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('blocks', 'layers', '0', 'ff', 'net', 'layers', '0', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('blocks', 'layers', '0', 'ff', 'net', 'layers', '2', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '0', 'ff', 'net', 'layers', '2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('blocks', 'layers', '0', 'ff', 'net', 'layers', '3', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '0', 'ff', 'net', 'layers', '3', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '0', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '0', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '0', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '0', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '0', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '0', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '0', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '0', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 256, 256]}}, "('blocks', 'layers', '0', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '0', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '0', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '0', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '1', 'ff', 'net', 'layers', '0', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('blocks', 'layers', '1', 'ff', 'net', 'layers', '0', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('blocks', 'layers', '1', 'ff', 'net', 'layers', '2', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '1', 'ff', 'net', 'layers', '2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('blocks', 'layers', '1', 'ff', 'net', 'layers', '3', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '1', 'ff', 'net', 'layers', '3', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '1', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '1', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '1', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '1', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '1', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '1', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '1', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '1', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 256, 256]}}, "('blocks', 'layers', '1', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '1', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '1', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '1', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '2', 'ff', 'net', 'layers', '0', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('blocks', 'layers', '2', 'ff', 'net', 'layers', '0', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('blocks', 'layers', '2', 'ff', 'net', 'layers', '2', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '2', 'ff', 'net', 'layers', '2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('blocks', 'layers', '2', 'ff', 'net', 'layers', '3', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '2', 'ff', 'net', 'layers', '3', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '2', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '2', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '2', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '2', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '2', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '2', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '2', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '2', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 256, 256]}}, "('blocks', 'layers', '2', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '2', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '2', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '2', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '3', 'ff', 'net', 'layers', '0', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('blocks', 'layers', '3', 'ff', 'net', 'layers', '0', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('blocks', 'layers', '3', 'ff', 'net', 'layers', '2', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '3', 'ff', 'net', 'layers', '2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('blocks', 'layers', '3', 'ff', 'net', 'layers', '3', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '3', 'ff', 'net', 'layers', '3', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '3', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '3', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '3', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '3', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '3', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '3', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '3', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '3', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 256, 256]}}, "('blocks', 'layers', '3', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '3', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '3', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '3', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '4', 'ff', 'net', 'layers', '0', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('blocks', 'layers', '4', 'ff', 'net', 'layers', '0', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('blocks', 'layers', '4', 'ff', 'net', 'layers', '2', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '4', 'ff', 'net', 'layers', '2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('blocks', 'layers', '4', 'ff', 'net', 'layers', '3', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '4', 'ff', 'net', 'layers', '3', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '4', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '4', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '4', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '4', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '4', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '4', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '4', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '4', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 256, 256]}}, "('blocks', 'layers', '4', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '4', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '4', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '4', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '5', 'ff', 'net', 'layers', '0', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('blocks', 'layers', '5', 'ff', 'net', 'layers', '0', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('blocks', 'layers', '5', 'ff', 'net', 'layers', '2', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '5', 'ff', 'net', 'layers', '2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('blocks', 'layers', '5', 'ff', 'net', 'layers', '3', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '5', 'ff', 'net', 'layers', '3', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '5', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '5', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '5', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '5', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '5', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '5', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '5', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '5', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 256, 256]}}, "('blocks', 'layers', '5', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '5', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '5', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '5', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '6', 'ff', 'net', 'layers', '0', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('blocks', 'layers', '6', 'ff', 'net', 'layers', '0', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('blocks', 'layers', '6', 'ff', 'net', 'layers', '2', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '6', 'ff', 'net', 'layers', '2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('blocks', 'layers', '6', 'ff', 'net', 'layers', '3', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '6', 'ff', 'net', 'layers', '3', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '6', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '6', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '6', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '6', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '6', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '6', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '6', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '6', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 256, 256]}}, "('blocks', 'layers', '6', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '6', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '6', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '6', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '7', 'ff', 'net', 'layers', '0', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('blocks', 'layers', '7', 'ff', 'net', 'layers', '0', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('blocks', 'layers', '7', 'ff', 'net', 'layers', '2', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '7', 'ff', 'net', 'layers', '2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('blocks', 'layers', '7', 'ff', 'net', 'layers', '3', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '7', 'ff', 'net', 'layers', '3', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '7', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '7', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '7', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '7', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '7', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '7', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '7', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '7', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 256, 256]}}, "('blocks', 'layers', '7', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '7', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '7', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '7', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '8', 'ff', 'net', 'layers', '0', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('blocks', 'layers', '8', 'ff', 'net', 'layers', '0', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('blocks', 'layers', '8', 'ff', 'net', 'layers', '2', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '8', 'ff', 'net', 'layers', '2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('blocks', 'layers', '8', 'ff', 'net', 'layers', '3', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '8', 'ff', 'net', 'layers', '3', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '8', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '8', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '8', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '8', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '8', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '8', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '8', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '8', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 256, 256]}}, "('blocks', 'layers', '8', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '8', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '8', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '8', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '9', 'ff', 'net', 'layers', '0', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048]}}, "('blocks', 'layers', '9', 'ff', 'net', 'layers', '0', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 2048]}}, "('blocks', 'layers', '9', 'ff', 'net', 'layers', '2', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '9', 'ff', 'net', 'layers', '2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2048, 512]}}, "('blocks', 'layers', '9', 'ff', 'net', 'layers', '3', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '9', 'ff', 'net', 'layers', '3', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '9', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '9', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '9', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '9', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '9', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '9', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '9', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '9', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 256, 256]}}, "('blocks', 'layers', '9', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '9', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '9', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '9', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '10', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '10', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('lm_head', 'bias', 'value')": {"key_metadata": [{"key": "lm_head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [50257]}}, "('lm_head', 'kernel', 'value')": {"key_metadata": [{"key": "lm_head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 50257]}}, "('pos_emb', 'embedding', 'value')": {"key_metadata": [{"key": "pos_emb", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 512]}}, "('tok_emb', 'embedding', 'value')": {"key_metadata": [{"key": "tok_emb", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [50257, 512]}}}, "use_ocdbt": true, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
|
experiments/aiayn/baseline/runs/0001/checkpoints/final/_sharding
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"YmxvY2tzLmxheWVycy40LmZmLm5ldC5sYXllcnMuMC5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmZmLm5ldC5sYXllcnMuMC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmZmLm5ldC5sYXllcnMuMi5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmZmLm5ldC5sYXllcnMuMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmZmLm5ldC5sYXllcnMuMy5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmZmLm5ldC5sYXllcnMuMy5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLm5ldC5sYXllcnMuMC5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLm5ldC5sYXllcnMuMC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLm5ldC5sYXllcnMuMi5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLm5ldC5sYXllcnMuMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLm5ldC5sYXllcnMuMy5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLm5ldC5sYXllcnMuMy5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLm5ldC5sYXllcnMuMC5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLm5ldC5sYXllcnMuMC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLm5ldC5sYXllcnMuMi5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLm5ldC5sYXllcnMuMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLm5ldC5sYXllcnMuMy5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLm5ldC5sYXllcnMuMy5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLm5ldC5sYXllcnMuMC5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLm5ldC5sYXllcnMuMC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLm5ldC5sYXllcnMuMi5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLm5ldC5sYXllcnMuMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLm5ldC5sYXllcnMuMy5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLm5ldC5sYXllcnMuMy5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLm5ldC5sYXllcnMuMC5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLm5ldC5sYXllcnMuMC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLm5ldC5sYXllcnMuMi5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLm5ldC5sYXllcnMuMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLm5ldC5sYXllcnMuMy5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLm5ldC5sYXllcnMuMy5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLm5ldC5sYXllcnMuMC5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLm5ldC5sYXllcnMuMC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLm5ldC5sYXllcnMuMi5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLm5ldC5sYXllcnMuMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLm5ldC5sYXllcnMuMy5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLm5ldC5sYXllcnMuMy5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLm5ldC5sYXllcnMuMC5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLm5ldC5sYXllcnMuMC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLm5ldC5sYXllcnMuMi5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLm5ldC5sYXllcnMuMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLm5ldC5sYXllcnMuMy5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLm5ldC5sYXllcnMuMy5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLm5ldC5sYXllcnMuMC5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLm5ldC5sYXllcnMuMC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLm5ldC5sYXllcnMuMi5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLm5ldC5sYXllcnMuMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLm5ldC5sYXllcnMuMy5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLm5ldC5sYXllcnMuMy5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLm5ldC5sYXllcnMuMC5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLm5ldC5sYXllcnMuMC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLm5ldC5sYXllcnMuMi5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLm5ldC5sYXllcnMuMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLm5ldC5sYXllcnMuMy5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLm5ldC5sYXllcnMuMy5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLm5ldC5sYXllcnMuMC5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLm5ldC5sYXllcnMuMC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLm5ldC5sYXllcnMuMi5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLm5ldC5sYXllcnMuMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLm5ldC5sYXllcnMuMy5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLm5ldC5sYXllcnMuMy5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bG1faGVhZC5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bG1faGVhZC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cG9zX2VtYi5lbWJlZGRpbmcudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","dG9rX2VtYi5lbWJlZGRpbmcudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}"}
|
experiments/aiayn/baseline/runs/0001/checkpoints/final/array_metadatas/process_0
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"array_metadatas": [{"array_metadata": {"param_name": "blocks.layers.0.ff.net.layers.0.bias.value", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ff.net.layers.0.kernel.value", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ff.net.layers.2.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ff.net.layers.2.kernel.value", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ff.net.layers.3.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ff.net.layers.3.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.0.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.mask.value", "write_shape": [1, 1, 256, 256], "chunk_shape": [1, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.net.layers.0.bias.value", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.net.layers.0.kernel.value", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.net.layers.2.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.net.layers.2.kernel.value", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.net.layers.3.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.net.layers.3.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.1.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.mask.value", "write_shape": [1, 1, 256, 256], "chunk_shape": [1, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.net.layers.0.bias.value", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.net.layers.0.kernel.value", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.net.layers.2.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.net.layers.2.kernel.value", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.net.layers.3.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.net.layers.3.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.2.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.mask.value", "write_shape": [1, 1, 256, 256], "chunk_shape": [1, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.net.layers.0.bias.value", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.net.layers.0.kernel.value", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.net.layers.2.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.net.layers.2.kernel.value", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.net.layers.3.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.net.layers.3.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.3.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.mask.value", "write_shape": [1, 1, 256, 256], "chunk_shape": [1, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.net.layers.0.bias.value", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.net.layers.0.kernel.value", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.net.layers.2.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.net.layers.2.kernel.value", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.net.layers.3.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.net.layers.3.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.4.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.mask.value", "write_shape": [1, 1, 256, 256], "chunk_shape": [1, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.net.layers.0.bias.value", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.net.layers.0.kernel.value", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.net.layers.2.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.net.layers.2.kernel.value", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.net.layers.3.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.net.layers.3.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.5.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.mask.value", "write_shape": [1, 1, 256, 256], "chunk_shape": [1, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.net.layers.0.bias.value", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.net.layers.0.kernel.value", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.net.layers.2.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.net.layers.2.kernel.value", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.net.layers.3.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.net.layers.3.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.6.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.mask.value", "write_shape": [1, 1, 256, 256], "chunk_shape": [1, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.net.layers.0.bias.value", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.net.layers.0.kernel.value", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.net.layers.2.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.net.layers.2.kernel.value", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.net.layers.3.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.net.layers.3.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.7.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.mask.value", "write_shape": [1, 1, 256, 256], "chunk_shape": [1, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.net.layers.0.bias.value", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.net.layers.0.kernel.value", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.net.layers.2.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.net.layers.2.kernel.value", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.net.layers.3.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.net.layers.3.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.8.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.mask.value", "write_shape": [1, 1, 256, 256], "chunk_shape": [1, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.net.layers.0.bias.value", "write_shape": [2048], "chunk_shape": [2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.net.layers.0.kernel.value", "write_shape": [512, 2048], "chunk_shape": [512, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.net.layers.2.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.net.layers.2.kernel.value", "write_shape": [2048, 512], "chunk_shape": [2048, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.net.layers.3.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.net.layers.3.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.9.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.mask.value", "write_shape": [1, 1, 256, 256], "chunk_shape": [1, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.10.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.10.scale.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "lm_head.bias.value", "write_shape": [50257], "chunk_shape": [50257], "ext_metadata": null}}, {"array_metadata": {"param_name": "lm_head.kernel.value", "write_shape": [512, 50257], "chunk_shape": [512, 50257], "ext_metadata": null}}, {"array_metadata": {"param_name": "pos_emb.embedding.value", "write_shape": [256, 512], "chunk_shape": [256, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "tok_emb.embedding.value", "write_shape": [50257, 512], "chunk_shape": [50257, 512], "ext_metadata": null}}]}
|
experiments/aiayn/baseline/runs/0001/checkpoints/final/d/e9cb36b30740d34ba2a49e071a8e609a
ADDED
|
Binary file (2.36 kB). View file
|
|
|
experiments/aiayn/baseline/runs/0001/checkpoints/final/manifest.ocdbt
ADDED
|
Binary file (118 Bytes). View file
|
|
|
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/0cfc70b3ea1e060effbdf1a1c146e354
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2166972d2312d3659d39a721cf0af2853fe035cc3180ff73db9e0a356ce6b040
|
| 3 |
+
size 3891335
|
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/107f870f364ea57b6314fff5f27fe998
ADDED
|
Binary file (8.14 kB). View file
|
|
|
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/194e9e4da3431b6bd0b14ab0bef3ed4f
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a6f3c46ebe086e451387686d7474585b2763a7f231d20e9b728754d890fec9f
|
| 3 |
+
size 95395840
|
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/32f79325c59e4e268c5f96365a86a80b
ADDED
|
Binary file (230 Bytes). View file
|
|
|
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/34c2ca613b7475efe74d6a77b1ece744
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f4b42ed7f3fc44b2668c2394b312b727581139a5e0ad7af398e0835bd19a19f
|
| 3 |
+
size 56352768
|
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/4ca33e0f6969474ca55df6795e9e9d64
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4b4fb420935a6c21064c72dfd4a8683bf78dbfcbfaad6b0b645fab7e1c74a11
|
| 3 |
+
size 2918241
|
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/51156e38c40d2246cc0cb3fdb8bfc183
ADDED
|
Binary file (629 Bytes). View file
|
|
|
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/ab914487ea687e2ef7ca99c47b1df979
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:baff98a8a0ff770ec3083e9f99ddb76a6846aff8a4bbfe565b43613fe7e14626
|
| 3 |
+
size 50282496
|
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/adce8b7b105cfac3d1a6b27f7fa4d033
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa7636f5d649257ab5300cf0bb162c39bdbe362af66edfce3e41d43908fd4bd8
|
| 3 |
+
size 3889173
|
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/d19d8595f1810f24745814e4d13115b4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f01285bf724bfa8b5b5919cc2754ba2bd5f853c9d41d0eab414084bb5818d8a2
|
| 3 |
+
size 94343168
|
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/d74db98a016c6cac0842c60cc750951a
ADDED
|
Binary file (771 Bytes). View file
|
|
|
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/fa331db95a502c7477a3b27a8e9f82d6
ADDED
|
Binary file (605 Bytes). View file
|
|
|
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/manifest.ocdbt
ADDED
|
Binary file (596 Bytes). View file
|
|
|