gradling push experiments/aiayn/swiglu/runs/0001
Browse files- .gitattributes +6 -0
- experiments/aiayn/swiglu/runs/0001/checkpoints/final/_CHECKPOINT_METADATA +1 -0
- experiments/aiayn/swiglu/runs/0001/checkpoints/final/_METADATA +1 -0
- experiments/aiayn/swiglu/runs/0001/checkpoints/final/_sharding +1 -0
- experiments/aiayn/swiglu/runs/0001/checkpoints/final/array_metadatas/process_0 +1 -0
- experiments/aiayn/swiglu/runs/0001/checkpoints/final/d/13ef5bfc2744ca216350e310175a55c6 +0 -0
- experiments/aiayn/swiglu/runs/0001/checkpoints/final/manifest.ocdbt +0 -0
- experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/68f93ea577886606711484aee8a861cc +3 -0
- experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/837baebbc0ce6869fdb52111647fc36e +3 -0
- experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/a1dbf8dcb28d3629f82fef469bfa4553 +0 -0
- experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/b2ace9d5867571f9d28d7a640f7ce349 +3 -0
- experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/b5190152b091d338600812b3d8e4dad3 +3 -0
- experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/c87ba3fe00fdf4ea59edd754511b037d +3 -0
- experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/c891ba8d91e863152f12ff098cf1bd22 +0 -0
- experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/c9b3fb2fa5c8ccb271a5e0ba10c8f48c +3 -0
- experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/d28956bd2fad2edff6369f6ac70ab4aa +0 -0
- experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/de1c3eb1d4b0b04969ce1197655afef9 +0 -0
- experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/manifest.ocdbt +0 -0
.gitattributes
CHANGED
|
@@ -47,3 +47,9 @@ experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/7a93b9623
|
|
| 47 |
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/7be3c90ac3cda00e8d80bc1d5313ad49 filter=lfs diff=lfs merge=lfs -text
|
| 48 |
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/b783847ff8069e62546be91da69d1a15 filter=lfs diff=lfs merge=lfs -text
|
| 49 |
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/e1ede68325e621c85a8ea24fe164f695 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/7be3c90ac3cda00e8d80bc1d5313ad49 filter=lfs diff=lfs merge=lfs -text
|
| 48 |
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/b783847ff8069e62546be91da69d1a15 filter=lfs diff=lfs merge=lfs -text
|
| 49 |
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/e1ede68325e621c85a8ea24fe164f695 filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/68f93ea577886606711484aee8a861cc filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/837baebbc0ce6869fdb52111647fc36e filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/b2ace9d5867571f9d28d7a640f7ce349 filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/b5190152b091d338600812b3d8e4dad3 filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/c87ba3fe00fdf4ea59edd754511b037d filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/c9b3fb2fa5c8ccb271a5e0ba10c8f48c filter=lfs diff=lfs merge=lfs -text
|
experiments/aiayn/swiglu/runs/0001/checkpoints/final/_CHECKPOINT_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"item_handlers": "orbax.checkpoint._src.handlers.standard_checkpoint_handler.StandardCheckpointHandler", "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1773620150106454278, "commit_timestamp_nsecs": 1773620151109672844, "custom_metadata": {}}
|
experiments/aiayn/swiglu/runs/0001/checkpoints/final/_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"tree_metadata": {"('blocks', 'layers', '0', 'ff', 'net', 'layers', '0', 'V', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "V", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '0', 'ff', 'net', 'layers', '0', 'W', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '0', 'ff', 'net', 'layers', '0', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1280, 512]}}, "('blocks', 'layers', '0', 'ff', 'net', 'layers', '1', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '0', 'ff', 'net', 'layers', '1', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '0', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '0', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '0', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '0', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '0', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '0', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '0', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '0', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 256, 256]}}, "('blocks', 'layers', '0', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '0', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '0', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '0', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '1', 'ff', 'net', 'layers', '0', 'V', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "V", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '1', 'ff', 'net', 'layers', '0', 'W', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '1', 'ff', 'net', 'layers', '0', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1280, 512]}}, "('blocks', 'layers', '1', 'ff', 'net', 'layers', '1', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '1', 'ff', 'net', 'layers', '1', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '1', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '1', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '1', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '1', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '1', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '1', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '1', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '1', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 256, 256]}}, "('blocks', 'layers', '1', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '1', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '1', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '1', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '2', 'ff', 'net', 'layers', '0', 'V', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "V", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '2', 'ff', 'net', 'layers', '0', 'W', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '2', 'ff', 'net', 'layers', '0', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1280, 512]}}, "('blocks', 'layers', '2', 'ff', 'net', 'layers', '1', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '2', 'ff', 'net', 'layers', '1', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '2', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '2', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '2', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '2', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '2', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '2', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '2', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '2', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 256, 256]}}, "('blocks', 'layers', '2', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '2', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '2', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '2', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '3', 'ff', 'net', 'layers', '0', 'V', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "V", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '3', 'ff', 'net', 'layers', '0', 'W', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '3', 'ff', 'net', 'layers', '0', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1280, 512]}}, "('blocks', 'layers', '3', 'ff', 'net', 'layers', '1', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '3', 'ff', 'net', 'layers', '1', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '3', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '3', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '3', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '3', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '3', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '3', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '3', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '3', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 256, 256]}}, "('blocks', 'layers', '3', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '3', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '3', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '3', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '4', 'ff', 'net', 'layers', '0', 'V', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "V", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '4', 'ff', 'net', 'layers', '0', 'W', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '4', 'ff', 'net', 'layers', '0', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1280, 512]}}, "('blocks', 'layers', '4', 'ff', 'net', 'layers', '1', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '4', 'ff', 'net', 'layers', '1', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '4', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '4', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '4', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '4', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '4', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '4', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '4', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '4', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 256, 256]}}, "('blocks', 'layers', '4', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '4', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '4', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '4', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '5', 'ff', 'net', 'layers', '0', 'V', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "V", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '5', 'ff', 'net', 'layers', '0', 'W', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '5', 'ff', 'net', 'layers', '0', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1280, 512]}}, "('blocks', 'layers', '5', 'ff', 'net', 'layers', '1', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '5', 'ff', 'net', 'layers', '1', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '5', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '5', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '5', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '5', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '5', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '5', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '5', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '5', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 256, 256]}}, "('blocks', 'layers', '5', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '5', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '5', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '5', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '6', 'ff', 'net', 'layers', '0', 'V', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "V", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '6', 'ff', 'net', 'layers', '0', 'W', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '6', 'ff', 'net', 'layers', '0', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1280, 512]}}, "('blocks', 'layers', '6', 'ff', 'net', 'layers', '1', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '6', 'ff', 'net', 'layers', '1', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '6', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '6', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '6', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '6', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '6', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '6', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '6', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '6', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 256, 256]}}, "('blocks', 'layers', '6', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '6', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '6', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '6', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '7', 'ff', 'net', 'layers', '0', 'V', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "V", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '7', 'ff', 'net', 'layers', '0', 'W', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '7', 'ff', 'net', 'layers', '0', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1280, 512]}}, "('blocks', 'layers', '7', 'ff', 'net', 'layers', '1', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '7', 'ff', 'net', 'layers', '1', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '7', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '7', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '7', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '7', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '7', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '7', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '7', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '7', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 256, 256]}}, "('blocks', 'layers', '7', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '7', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '7', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '7', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '8', 'ff', 'net', 'layers', '0', 'V', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "V", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '8', 'ff', 'net', 'layers', '0', 'W', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '8', 'ff', 'net', 'layers', '0', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1280, 512]}}, "('blocks', 'layers', '8', 'ff', 'net', 'layers', '1', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '8', 'ff', 'net', 'layers', '1', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '8', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '8', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '8', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '8', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '8', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '8', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '8', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '8', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 256, 256]}}, "('blocks', 'layers', '8', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '8', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '8', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '8', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '9', 'ff', 'net', 'layers', '0', 'V', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "V", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '9', 'ff', 'net', 'layers', '0', 'W', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '9', 'ff', 'net', 'layers', '0', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1280, 512]}}, "('blocks', 'layers', '9', 'ff', 'net', 'layers', '1', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '9', 'ff', 'net', 'layers', '1', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '9', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '9', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '9', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '9', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '9', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '9', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '9', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '9', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 256, 256]}}, "('blocks', 'layers', '9', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '9', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '9', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '9', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '10', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '10', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('lm_head', 'bias', 'value')": {"key_metadata": [{"key": "lm_head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [50257]}}, "('lm_head', 'kernel', 'value')": {"key_metadata": [{"key": "lm_head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 50257]}}, "('pos_emb', 'embedding', 'value')": {"key_metadata": [{"key": "pos_emb", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 512]}}, "('tok_emb', 'embedding', 'value')": {"key_metadata": [{"key": "tok_emb", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [50257, 512]}}}, "use_ocdbt": true, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
|
experiments/aiayn/swiglu/runs/0001/checkpoints/final/_sharding
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"YmxvY2tzLmxheWVycy40LmZmLm5ldC5sYXllcnMuMC5WLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmZmLm5ldC5sYXllcnMuMC5XLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmZmLm5ldC5sYXllcnMuMC5XMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmZmLm5ldC5sYXllcnMuMS5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmZmLm5ldC5sYXllcnMuMS5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLm5ldC5sYXllcnMuMC5WLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLm5ldC5sYXllcnMuMC5XLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLm5ldC5sYXllcnMuMC5XMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLm5ldC5sYXllcnMuMS5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLm5ldC5sYXllcnMuMS5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLm5ldC5sYXllcnMuMC5WLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLm5ldC5sYXllcnMuMC5XLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLm5ldC5sYXllcnMuMC5XMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLm5ldC5sYXllcnMuMS5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLm5ldC5sYXllcnMuMS5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLm5ldC5sYXllcnMuMC5WLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLm5ldC5sYXllcnMuMC5XLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLm5ldC5sYXllcnMuMC5XMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLm5ldC5sYXllcnMuMS5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLm5ldC5sYXllcnMuMS5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLm5ldC5sYXllcnMuMC5WLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLm5ldC5sYXllcnMuMC5XLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLm5ldC5sYXllcnMuMC5XMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLm5ldC5sYXllcnMuMS5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLm5ldC5sYXllcnMuMS5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLm5ldC5sYXllcnMuMC5WLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLm5ldC5sYXllcnMuMC5XLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLm5ldC5sYXllcnMuMC5XMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLm5ldC5sYXllcnMuMS5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLm5ldC5sYXllcnMuMS5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLm5ldC5sYXllcnMuMC5WLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLm5ldC5sYXllcnMuMC5XLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLm5ldC5sYXllcnMuMC5XMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLm5ldC5sYXllcnMuMS5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLm5ldC5sYXllcnMuMS5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLm5ldC5sYXllcnMuMC5WLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLm5ldC5sYXllcnMuMC5XLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLm5ldC5sYXllcnMuMC5XMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLm5ldC5sYXllcnMuMS5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLm5ldC5sYXllcnMuMS5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLm5ldC5sYXllcnMuMC5WLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLm5ldC5sYXllcnMuMC5XLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLm5ldC5sYXllcnMuMC5XMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLm5ldC5sYXllcnMuMS5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLm5ldC5sYXllcnMuMS5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLm5ldC5sYXllcnMuMC5WLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLm5ldC5sYXllcnMuMC5XLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLm5ldC5sYXllcnMuMC5XMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLm5ldC5sYXllcnMuMS5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLm5ldC5sYXllcnMuMS5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bG1faGVhZC5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bG1faGVhZC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cG9zX2VtYi5lbWJlZGRpbmcudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","dG9rX2VtYi5lbWJlZGRpbmcudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}"}
|
experiments/aiayn/swiglu/runs/0001/checkpoints/final/array_metadatas/process_0
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"array_metadatas": [{"array_metadata": {"param_name": "blocks.layers.0.ff.net.layers.0.V.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ff.net.layers.0.W.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ff.net.layers.0.W2.kernel.value", "write_shape": [1280, 512], "chunk_shape": [1280, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ff.net.layers.1.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ff.net.layers.1.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.0.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.mask.value", "write_shape": [1, 1, 256, 256], "chunk_shape": [1, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.net.layers.0.V.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.net.layers.0.W.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.net.layers.0.W2.kernel.value", "write_shape": [1280, 512], "chunk_shape": [1280, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.net.layers.1.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.net.layers.1.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.1.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.mask.value", "write_shape": [1, 1, 256, 256], "chunk_shape": [1, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.net.layers.0.V.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.net.layers.0.W.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.net.layers.0.W2.kernel.value", "write_shape": [1280, 512], "chunk_shape": [1280, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.net.layers.1.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.net.layers.1.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.2.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.mask.value", "write_shape": [1, 1, 256, 256], "chunk_shape": [1, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.net.layers.0.V.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.net.layers.0.W.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.net.layers.0.W2.kernel.value", "write_shape": [1280, 512], "chunk_shape": [1280, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.net.layers.1.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.net.layers.1.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.3.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.mask.value", "write_shape": [1, 1, 256, 256], "chunk_shape": [1, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.net.layers.0.V.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.net.layers.0.W.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.net.layers.0.W2.kernel.value", "write_shape": [1280, 512], "chunk_shape": [1280, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.net.layers.1.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.net.layers.1.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.4.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.mask.value", "write_shape": [1, 1, 256, 256], "chunk_shape": [1, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.net.layers.0.V.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.net.layers.0.W.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.net.layers.0.W2.kernel.value", "write_shape": [1280, 512], "chunk_shape": [1280, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.net.layers.1.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.net.layers.1.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.5.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.mask.value", "write_shape": [1, 1, 256, 256], "chunk_shape": [1, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.net.layers.0.V.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.net.layers.0.W.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.net.layers.0.W2.kernel.value", "write_shape": [1280, 512], "chunk_shape": [1280, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.net.layers.1.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.net.layers.1.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.6.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.mask.value", "write_shape": [1, 1, 256, 256], "chunk_shape": [1, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.net.layers.0.V.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.net.layers.0.W.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.net.layers.0.W2.kernel.value", "write_shape": [1280, 512], "chunk_shape": [1280, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.net.layers.1.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.net.layers.1.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.7.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.mask.value", "write_shape": [1, 1, 256, 256], "chunk_shape": [1, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.net.layers.0.V.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.net.layers.0.W.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.net.layers.0.W2.kernel.value", "write_shape": [1280, 512], "chunk_shape": [1280, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.net.layers.1.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.net.layers.1.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.8.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.mask.value", "write_shape": [1, 1, 256, 256], "chunk_shape": [1, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.net.layers.0.V.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.net.layers.0.W.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.net.layers.0.W2.kernel.value", "write_shape": [1280, 512], "chunk_shape": [1280, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.net.layers.1.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.net.layers.1.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.9.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.mask.value", "write_shape": [1, 1, 256, 256], "chunk_shape": [1, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.10.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.10.scale.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "lm_head.bias.value", "write_shape": [50257], "chunk_shape": [50257], "ext_metadata": null}}, {"array_metadata": {"param_name": "lm_head.kernel.value", "write_shape": [512, 50257], "chunk_shape": [512, 50257], "ext_metadata": null}}, {"array_metadata": {"param_name": "pos_emb.embedding.value", "write_shape": [256, 512], "chunk_shape": [256, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "tok_emb.embedding.value", "write_shape": [50257, 512], "chunk_shape": [50257, 512], "ext_metadata": null}}]}
|
experiments/aiayn/swiglu/runs/0001/checkpoints/final/d/13ef5bfc2744ca216350e310175a55c6
ADDED
|
Binary file (2.25 kB). View file
|
|
|
experiments/aiayn/swiglu/runs/0001/checkpoints/final/manifest.ocdbt
ADDED
|
Binary file (118 Bytes). View file
|
|
|
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/68f93ea577886606711484aee8a861cc
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62e0ad051f9f4836ac1cf3129a9c382ff68d0dfaee0155fe4ef211abebc7c6a3
|
| 3 |
+
size 94314496
|
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/837baebbc0ce6869fdb52111647fc36e
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8275897198577915afe09cac377209d3b86dee407ebf3907b2f0367267beaef
|
| 3 |
+
size 95395840
|
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/a1dbf8dcb28d3629f82fef469bfa4553
ADDED
|
Binary file (553 Bytes). View file
|
|
|
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/b2ace9d5867571f9d28d7a640f7ce349
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b557341deeb42ea4d186af035e0664391f3292a6f0587883e3535303a1234929
|
| 3 |
+
size 28680192
|
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/b5190152b091d338600812b3d8e4dad3
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1359ab1bb0e1ebd07baac87f2b2b390c2429a7e7e344f317adfbfa3cac0b54c4
|
| 3 |
+
size 67698688
|
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/c87ba3fe00fdf4ea59edd754511b037d
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80317a17858c35f92ee83b266cf662cf2f651cbe9c9bf26b88be48ecc372be1c
|
| 3 |
+
size 9723904
|
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/c891ba8d91e863152f12ff098cf1bd22
ADDED
|
Binary file (694 Bytes). View file
|
|
|
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/c9b3fb2fa5c8ccb271a5e0ba10c8f48c
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:019847ac5e43b0a30b48a778cc7f5ef6024306073403b0b11dbd60e1a3537dd9
|
| 3 |
+
size 6320128
|
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/d28956bd2fad2edff6369f6ac70ab4aa
ADDED
|
Binary file (223 Bytes). View file
|
|
|
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/de1c3eb1d4b0b04969ce1197655afef9
ADDED
|
Binary file (527 Bytes). View file
|
|
|
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/manifest.ocdbt
ADDED
|
Binary file (517 Bytes). View file
|
|
|