gradling push experiments/aiayn/swiglu/runs/0002
Browse files- .gitattributes +7 -0
- experiments/aiayn/swiglu/runs/0002/checkpoints/final/_CHECKPOINT_METADATA +1 -0
- experiments/aiayn/swiglu/runs/0002/checkpoints/final/_METADATA +1 -0
- experiments/aiayn/swiglu/runs/0002/checkpoints/final/_sharding +1 -0
- experiments/aiayn/swiglu/runs/0002/checkpoints/final/array_metadatas/process_0 +1 -0
- experiments/aiayn/swiglu/runs/0002/checkpoints/final/d/0ad552c44f7a941a7cd9bdde8889c330 +0 -0
- experiments/aiayn/swiglu/runs/0002/checkpoints/final/manifest.ocdbt +0 -0
- experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/017c64b3f4b82e9e5eaf160b8b027f62 +3 -0
- experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/06649d0f9d2c3225e6c1add5bfea573f +0 -0
- experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/0ebd87846ad88cae8d80facff73ae650 +0 -0
- experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/4479afd90163c7f405fe630546f31c93 +3 -0
- experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/4b2be22bb083b7018325dbd5fb3c7d42 +0 -0
- experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/4c544c4f24faf80155508356ac1ef942 +3 -0
- experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/6159401d1f86ee3c6caceb80f5e323a4 +0 -0
- experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/7a93b9623f8e9da15a85991e3020b348 +3 -0
- experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/7be3c90ac3cda00e8d80bc1d5313ad49 +3 -0
- experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/b783847ff8069e62546be91da69d1a15 +3 -0
- experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/e1ede68325e621c85a8ea24fe164f695 +3 -0
- experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/manifest.ocdbt +0 -0
.gitattributes
CHANGED
|
@@ -40,3 +40,10 @@ experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/4ca33e0
|
|
| 40 |
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/ab914487ea687e2ef7ca99c47b1df979 filter=lfs diff=lfs merge=lfs -text
|
| 41 |
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/adce8b7b105cfac3d1a6b27f7fa4d033 filter=lfs diff=lfs merge=lfs -text
|
| 42 |
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/d19d8595f1810f24745814e4d13115b4 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/ab914487ea687e2ef7ca99c47b1df979 filter=lfs diff=lfs merge=lfs -text
|
| 41 |
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/adce8b7b105cfac3d1a6b27f7fa4d033 filter=lfs diff=lfs merge=lfs -text
|
| 42 |
experiments/aiayn/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/d19d8595f1810f24745814e4d13115b4 filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/017c64b3f4b82e9e5eaf160b8b027f62 filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/4479afd90163c7f405fe630546f31c93 filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/4c544c4f24faf80155508356ac1ef942 filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/7a93b9623f8e9da15a85991e3020b348 filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/7be3c90ac3cda00e8d80bc1d5313ad49 filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/b783847ff8069e62546be91da69d1a15 filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/e1ede68325e621c85a8ea24fe164f695 filter=lfs diff=lfs merge=lfs -text
|
experiments/aiayn/swiglu/runs/0002/checkpoints/final/_CHECKPOINT_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"item_handlers": "orbax.checkpoint._src.handlers.standard_checkpoint_handler.StandardCheckpointHandler", "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1773638605872754746, "commit_timestamp_nsecs": 1773638606977248536, "custom_metadata": {}}
|
experiments/aiayn/swiglu/runs/0002/checkpoints/final/_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"tree_metadata": {"('blocks', 'layers', '0', 'ff', 'net', 'layers', '0', 'V', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "V", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '0', 'ff', 'net', 'layers', '0', 'W', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '0', 'ff', 'net', 'layers', '0', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1280, 512]}}, "('blocks', 'layers', '0', 'ff', 'net', 'layers', '1', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '0', 'ff', 'net', 'layers', '1', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '0', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '0', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '0', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '0', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '0', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '0', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '0', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '0', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 512, 512]}}, "('blocks', 'layers', '0', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '0', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '0', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '0', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '1', 'ff', 'net', 'layers', '0', 'V', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "V", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '1', 'ff', 'net', 'layers', '0', 'W', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '1', 'ff', 'net', 'layers', '0', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1280, 512]}}, "('blocks', 'layers', '1', 'ff', 'net', 'layers', '1', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '1', 'ff', 'net', 'layers', '1', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '1', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '1', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '1', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '1', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '1', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '1', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '1', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '1', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 512, 512]}}, "('blocks', 'layers', '1', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '1', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '1', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '1', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '2', 'ff', 'net', 'layers', '0', 'V', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "V", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '2', 'ff', 'net', 'layers', '0', 'W', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '2', 'ff', 'net', 'layers', '0', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1280, 512]}}, "('blocks', 'layers', '2', 'ff', 'net', 'layers', '1', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '2', 'ff', 'net', 'layers', '1', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '2', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '2', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '2', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '2', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '2', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '2', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '2', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '2', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 512, 512]}}, "('blocks', 'layers', '2', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '2', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '2', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '2', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '3', 'ff', 'net', 'layers', '0', 'V', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "V", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '3', 'ff', 'net', 'layers', '0', 'W', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '3', 'ff', 'net', 'layers', '0', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1280, 512]}}, "('blocks', 'layers', '3', 'ff', 'net', 'layers', '1', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '3', 'ff', 'net', 'layers', '1', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '3', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '3', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '3', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '3', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '3', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '3', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '3', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '3', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 512, 512]}}, "('blocks', 'layers', '3', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '3', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '3', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '3', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '4', 'ff', 'net', 'layers', '0', 'V', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "V", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '4', 'ff', 'net', 'layers', '0', 'W', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '4', 'ff', 'net', 'layers', '0', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1280, 512]}}, "('blocks', 'layers', '4', 'ff', 'net', 'layers', '1', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '4', 'ff', 'net', 'layers', '1', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '4', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '4', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '4', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '4', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '4', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '4', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '4', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '4', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 512, 512]}}, "('blocks', 'layers', '4', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '4', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '4', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '4', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '5', 'ff', 'net', 'layers', '0', 'V', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "V", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '5', 'ff', 'net', 'layers', '0', 'W', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '5', 'ff', 'net', 'layers', '0', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1280, 512]}}, "('blocks', 'layers', '5', 'ff', 'net', 'layers', '1', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '5', 'ff', 'net', 'layers', '1', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '5', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '5', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '5', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '5', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '5', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '5', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '5', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '5', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 512, 512]}}, "('blocks', 'layers', '5', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '5', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '5', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '5', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '6', 'ff', 'net', 'layers', '0', 'V', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "V", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '6', 'ff', 'net', 'layers', '0', 'W', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '6', 'ff', 'net', 'layers', '0', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1280, 512]}}, "('blocks', 'layers', '6', 'ff', 'net', 'layers', '1', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '6', 'ff', 'net', 'layers', '1', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '6', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '6', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '6', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '6', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '6', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '6', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '6', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '6', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 512, 512]}}, "('blocks', 'layers', '6', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '6', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '6', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '6', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '7', 'ff', 'net', 'layers', '0', 'V', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "V", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '7', 'ff', 'net', 'layers', '0', 'W', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '7', 'ff', 'net', 'layers', '0', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1280, 512]}}, "('blocks', 'layers', '7', 'ff', 'net', 'layers', '1', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '7', 'ff', 'net', 'layers', '1', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '7', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '7', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '7', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '7', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '7', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '7', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '7', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '7', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 512, 512]}}, "('blocks', 'layers', '7', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '7', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '7', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '7', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '8', 'ff', 'net', 'layers', '0', 'V', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "V", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '8', 'ff', 'net', 'layers', '0', 'W', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '8', 'ff', 'net', 'layers', '0', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1280, 512]}}, "('blocks', 'layers', '8', 'ff', 'net', 'layers', '1', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '8', 'ff', 'net', 'layers', '1', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '8', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '8', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '8', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '8', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '8', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '8', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '8', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '8', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 512, 512]}}, "('blocks', 'layers', '8', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '8', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '8', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '8', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '9', 'ff', 'net', 'layers', '0', 'V', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "V", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '9', 'ff', 'net', 'layers', '0', 'W', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1280]}}, "('blocks', 'layers', '9', 'ff', 'net', 'layers', '0', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1280, 512]}}, "('blocks', 'layers', '9', 'ff', 'net', 'layers', '1', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '9', 'ff', 'net', 'layers', '1', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '9', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '9', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '9', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '9', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '9', 'sa_heads', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '9', 'sa_heads', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '9', 'sa_heads', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('blocks', 'layers', '9', 'sa_heads', 'mask', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "mask", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 512, 512]}}, "('blocks', 'layers', '9', 'sa_heads', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '9', 'sa_heads', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('blocks', 'layers', '9', 'sa_heads', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '9', 'sa_heads', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa_heads", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '10', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('blocks', 'layers', '10', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('lm_head', 'bias', 'value')": {"key_metadata": [{"key": "lm_head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [50257]}}, "('lm_head', 'kernel', 'value')": {"key_metadata": [{"key": "lm_head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 50257]}}, "('pos_emb', 'embedding', 'value')": {"key_metadata": [{"key": "pos_emb", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('tok_emb', 'embedding', 'value')": {"key_metadata": [{"key": "tok_emb", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [50257, 512]}}}, "use_ocdbt": true, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
|
experiments/aiayn/swiglu/runs/0002/checkpoints/final/_sharding
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"YmxvY2tzLmxheWVycy40LmZmLm5ldC5sYXllcnMuMC5WLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmZmLm5ldC5sYXllcnMuMC5XLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmZmLm5ldC5sYXllcnMuMC5XMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmZmLm5ldC5sYXllcnMuMS5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmZmLm5ldC5sYXllcnMuMS5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLm5ldC5sYXllcnMuMC5WLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLm5ldC5sYXllcnMuMC5XLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLm5ldC5sYXllcnMuMC5XMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLm5ldC5sYXllcnMuMS5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLm5ldC5sYXllcnMuMS5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLm5ldC5sYXllcnMuMC5WLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLm5ldC5sYXllcnMuMC5XLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLm5ldC5sYXllcnMuMC5XMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLm5ldC5sYXllcnMuMS5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLm5ldC5sYXllcnMuMS5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLm5ldC5sYXllcnMuMC5WLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLm5ldC5sYXllcnMuMC5XLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLm5ldC5sYXllcnMuMC5XMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLm5ldC5sYXllcnMuMS5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLm5ldC5sYXllcnMuMS5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLm5ldC5sYXllcnMuMC5WLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLm5ldC5sYXllcnMuMC5XLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLm5ldC5sYXllcnMuMC5XMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLm5ldC5sYXllcnMuMS5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLm5ldC5sYXllcnMuMS5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLm5ldC5sYXllcnMuMC5WLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLm5ldC5sYXllcnMuMC5XLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLm5ldC5sYXllcnMuMC5XMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLm5ldC5sYXllcnMuMS5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLm5ldC5sYXllcnMuMS5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLm5ldC5sYXllcnMuMC5WLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLm5ldC5sYXllcnMuMC5XLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLm5ldC5sYXllcnMuMC5XMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLm5ldC5sYXllcnMuMS5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLm5ldC5sYXllcnMuMS5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLm5ldC5sYXllcnMuMC5WLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLm5ldC5sYXllcnMuMC5XLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLm5ldC5sYXllcnMuMC5XMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLm5ldC5sYXllcnMuMS5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLm5ldC5sYXllcnMuMS5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLm5ldC5sYXllcnMuMC5WLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLm5ldC5sYXllcnMuMC5XLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLm5ldC5sYXllcnMuMC5XMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLm5ldC5sYXllcnMuMS5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLm5ldC5sYXllcnMuMS5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLm5ldC5sYXllcnMuMC5WLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLm5ldC5sYXllcnMuMC5XLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLm5ldC5sYXllcnMuMC5XMi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLm5ldC5sYXllcnMuMS5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLm5ldC5sYXllcnMuMS5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLm1hc2sudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhX2hlYWRzLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bG1faGVhZC5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bG1faGVhZC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cG9zX2VtYi5lbWJlZGRpbmcudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","dG9rX2VtYi5lbWJlZGRpbmcudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}"}
|
experiments/aiayn/swiglu/runs/0002/checkpoints/final/array_metadatas/process_0
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"array_metadatas": [{"array_metadata": {"param_name": "blocks.layers.0.ff.net.layers.0.V.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ff.net.layers.0.W.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ff.net.layers.0.W2.kernel.value", "write_shape": [1280, 512], "chunk_shape": [1280, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ff.net.layers.1.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ff.net.layers.1.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.0.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.mask.value", "write_shape": [1, 1, 512, 512], "chunk_shape": [1, 1, 512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.net.layers.0.V.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.net.layers.0.W.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.net.layers.0.W2.kernel.value", "write_shape": [1280, 512], "chunk_shape": [1280, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.net.layers.1.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.net.layers.1.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.1.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.mask.value", "write_shape": [1, 1, 512, 512], "chunk_shape": [1, 1, 512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.net.layers.0.V.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.net.layers.0.W.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.net.layers.0.W2.kernel.value", "write_shape": [1280, 512], "chunk_shape": [1280, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.net.layers.1.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.net.layers.1.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.2.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.mask.value", "write_shape": [1, 1, 512, 512], "chunk_shape": [1, 1, 512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.net.layers.0.V.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.net.layers.0.W.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.net.layers.0.W2.kernel.value", "write_shape": [1280, 512], "chunk_shape": [1280, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.net.layers.1.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.net.layers.1.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.3.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.mask.value", "write_shape": [1, 1, 512, 512], "chunk_shape": [1, 1, 512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.net.layers.0.V.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.net.layers.0.W.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.net.layers.0.W2.kernel.value", "write_shape": [1280, 512], "chunk_shape": [1280, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.net.layers.1.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.net.layers.1.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.4.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.mask.value", "write_shape": [1, 1, 512, 512], "chunk_shape": [1, 1, 512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.net.layers.0.V.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.net.layers.0.W.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.net.layers.0.W2.kernel.value", "write_shape": [1280, 512], "chunk_shape": [1280, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.net.layers.1.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.net.layers.1.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.5.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.mask.value", "write_shape": [1, 1, 512, 512], "chunk_shape": [1, 1, 512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.net.layers.0.V.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.net.layers.0.W.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.net.layers.0.W2.kernel.value", "write_shape": [1280, 512], "chunk_shape": [1280, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.net.layers.1.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.net.layers.1.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.6.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.mask.value", "write_shape": [1, 1, 512, 512], "chunk_shape": [1, 1, 512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.net.layers.0.V.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.net.layers.0.W.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.net.layers.0.W2.kernel.value", "write_shape": [1280, 512], "chunk_shape": [1280, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.net.layers.1.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.net.layers.1.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.7.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.mask.value", "write_shape": [1, 1, 512, 512], "chunk_shape": [1, 1, 512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.net.layers.0.V.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.net.layers.0.W.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.net.layers.0.W2.kernel.value", "write_shape": [1280, 512], "chunk_shape": [1280, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.net.layers.1.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.net.layers.1.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.8.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.mask.value", "write_shape": [1, 1, 512, 512], "chunk_shape": [1, 1, 512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.net.layers.0.V.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.net.layers.0.W.kernel.value", "write_shape": [512, 1280], "chunk_shape": [512, 1280], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.net.layers.0.W2.kernel.value", "write_shape": [1280, 512], "chunk_shape": [1280, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.net.layers.1.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.net.layers.1.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.9.ln1.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ln1.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ln2.beta.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ln2.gamma.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.attn.kernel.value", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.mask.value", "write_shape": [1, 1, 512, 512], "chunk_shape": [1, 1, 512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.proj.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.proj.kernel.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa_heads.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.10.bias.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.10.scale.value", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "lm_head.bias.value", "write_shape": [50257], "chunk_shape": [50257], "ext_metadata": null}}, {"array_metadata": {"param_name": "lm_head.kernel.value", "write_shape": [512, 50257], "chunk_shape": [512, 50257], "ext_metadata": null}}, {"array_metadata": {"param_name": "pos_emb.embedding.value", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "tok_emb.embedding.value", "write_shape": [50257, 512], "chunk_shape": [50257, 512], "ext_metadata": null}}]}
|
experiments/aiayn/swiglu/runs/0002/checkpoints/final/d/0ad552c44f7a941a7cd9bdde8889c330
ADDED
|
Binary file (2.25 kB). View file
|
|
|
experiments/aiayn/swiglu/runs/0002/checkpoints/final/manifest.ocdbt
ADDED
|
Binary file (118 Bytes). View file
|
|
|
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/017c64b3f4b82e9e5eaf160b8b027f62
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:645af50798cac00c09ecb16c24c4295df30dfa48cae7da29fe3afc23904670d2
|
| 3 |
+
size 95395840
|
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/06649d0f9d2c3225e6c1add5bfea573f
ADDED
|
Binary file (531 Bytes). View file
|
|
|
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/0ebd87846ad88cae8d80facff73ae650
ADDED
|
Binary file (700 Bytes). View file
|
|
|
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/4479afd90163c7f405fe630546f31c93
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d79fc0df7bfcbf99de2bb8951cfff950475897909cfb9b89c9a405e2b46885f
|
| 3 |
+
size 8749056
|
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/4b2be22bb083b7018325dbd5fb3c7d42
ADDED
|
Binary file (546 Bytes). View file
|
|
|
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/4c544c4f24faf80155508356ac1ef942
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3fedff1ee01a2630978c27771ccd19bdc03bfdaec437cea391168460e0103fb7
|
| 3 |
+
size 18944000
|
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/6159401d1f86ee3c6caceb80f5e323a4
ADDED
|
Binary file (223 Bytes). View file
|
|
|
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/7a93b9623f8e9da15a85991e3020b348
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84bdeb8363bebd844bd42669ba4b6d996e121436b08afe65df6dd3a14298304b
|
| 3 |
+
size 10203136
|
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/7be3c90ac3cda00e8d80bc1d5313ad49
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d1204586676e1a639d4c51e211a64414a0f25130b2b23dd36f3a1aaf02e8ebe
|
| 3 |
+
size 74035200
|
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/b783847ff8069e62546be91da69d1a15
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b8ed100f9bd3cdad621bf79ab5c21ed291d45def3d8683e1ecb4096338be41b
|
| 3 |
+
size 94334976
|
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/d/e1ede68325e621c85a8ea24fe164f695
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d91cd7bfbf457d98d172e216b5cfd2a551a8d820345620510d0ba86f1b83cc4
|
| 3 |
+
size 982145
|
experiments/aiayn/swiglu/runs/0002/checkpoints/final/ocdbt.process_0/manifest.ocdbt
ADDED
|
Binary file (556 Bytes). View file
|
|
|