gradling push experiments/gpt2/baseline/runs/0001
Browse files- .gitattributes +4 -0
- experiments/gpt2/baseline/runs/0001/checkpoints/final/_CHECKPOINT_METADATA +1 -0
- experiments/gpt2/baseline/runs/0001/checkpoints/final/_METADATA +1 -0
- experiments/gpt2/baseline/runs/0001/checkpoints/final/_sharding +1 -0
- experiments/gpt2/baseline/runs/0001/checkpoints/final/array_metadatas/process_0 +1 -0
- experiments/gpt2/baseline/runs/0001/checkpoints/final/d/97689a3d28f46e71a6654f32574e6f1e +0 -0
- experiments/gpt2/baseline/runs/0001/checkpoints/final/manifest.ocdbt +0 -0
- experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/105c4e497a93eaa7665213e48a58fe5b +0 -0
- experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/106472dca883594846c2510eb4be3576 +0 -0
- experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/147ac55628280f9172302fa594f47101 +0 -0
- experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/1bbc49fd72405501b0d8308e37a53a58 +3 -0
- experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/23b3b31b2a31898d1d706a090ebb7944 +0 -0
- experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/2a4699eceb062dca12c7b7f41d5ebc92 +0 -0
- experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/57b03944839ad147aef22c1b80dfe2a3 +3 -0
- experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/6a6b067a0eaf45d007b0dbdbae1c53b2 +0 -0
- experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/96ebaa04bfc4c571ba58b11c35cd3c53 +3 -0
- experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/ab7d74ce5ae3f85e3fa4fe00e69ee228 +0 -0
- experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/c97574b954630bea35d0e96c69caac88 +3 -0
- experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/d6aacb5b539ceed3290285dad4624963 +0 -0
- experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/e643daf39903f4fdbd1cdf286e048e63 +0 -0
- experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/manifest.ocdbt +0 -0
.gitattributes
CHANGED
|
@@ -53,3 +53,7 @@ experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/b2ace9d58
|
|
| 53 |
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/b5190152b091d338600812b3d8e4dad3 filter=lfs diff=lfs merge=lfs -text
|
| 54 |
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/c87ba3fe00fdf4ea59edd754511b037d filter=lfs diff=lfs merge=lfs -text
|
| 55 |
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/c9b3fb2fa5c8ccb271a5e0ba10c8f48c filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/b5190152b091d338600812b3d8e4dad3 filter=lfs diff=lfs merge=lfs -text
|
| 54 |
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/c87ba3fe00fdf4ea59edd754511b037d filter=lfs diff=lfs merge=lfs -text
|
| 55 |
experiments/aiayn/swiglu/runs/0001/checkpoints/final/ocdbt.process_0/d/c9b3fb2fa5c8ccb271a5e0ba10c8f48c filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/1bbc49fd72405501b0d8308e37a53a58 filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/57b03944839ad147aef22c1b80dfe2a3 filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/96ebaa04bfc4c571ba58b11c35cd3c53 filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/c97574b954630bea35d0e96c69caac88 filter=lfs diff=lfs merge=lfs -text
|
experiments/gpt2/baseline/runs/0001/checkpoints/final/_CHECKPOINT_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"item_handlers": "orbax.checkpoint._src.handlers.standard_checkpoint_handler.StandardCheckpointHandler", "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1774278066010548952, "commit_timestamp_nsecs": 1774278068342363458, "custom_metadata": {}}
|
experiments/gpt2/baseline/runs/0001/checkpoints/final/_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"tree_metadata": {"('blocks', 'layers', '0', 'ff', 'dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '0', 'ff', 'dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '0', 'ff', 'fc', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072]}}, "('blocks', 'layers', '0', 'ff', 'fc', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 3072]}}, "('blocks', 'layers', '0', 'ff', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '0', 'ff', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072, 768]}}, "('blocks', 'layers', '0', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '0', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '0', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '0', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '0', 'sa', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '0', 'sa', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '0', 'sa', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 2304]}}, "('blocks', 'layers', '0', 'sa', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '0', 'sa', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('blocks', 'layers', '0', 'sa', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '0', 'sa', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '1', 'ff', 'dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '1', 'ff', 'dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '1', 'ff', 'fc', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072]}}, "('blocks', 'layers', '1', 'ff', 'fc', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 3072]}}, "('blocks', 'layers', '1', 'ff', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '1', 'ff', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072, 768]}}, "('blocks', 'layers', '1', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '1', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '1', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '1', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '1', 'sa', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '1', 'sa', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '1', 'sa', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 2304]}}, "('blocks', 'layers', '1', 'sa', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '1', 'sa', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('blocks', 'layers', '1', 'sa', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '1', 'sa', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '2', 'ff', 'dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '2', 'ff', 'dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '2', 'ff', 'fc', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072]}}, "('blocks', 'layers', '2', 'ff', 'fc', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 3072]}}, "('blocks', 'layers', '2', 'ff', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '2', 'ff', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072, 768]}}, "('blocks', 'layers', '2', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '2', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '2', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '2', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '2', 'sa', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '2', 'sa', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '2', 'sa', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 2304]}}, "('blocks', 'layers', '2', 'sa', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '2', 'sa', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('blocks', 'layers', '2', 'sa', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '2', 'sa', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '3', 'ff', 'dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '3', 'ff', 'dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '3', 'ff', 'fc', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072]}}, "('blocks', 'layers', '3', 'ff', 'fc', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 3072]}}, "('blocks', 'layers', '3', 'ff', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '3', 'ff', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072, 768]}}, "('blocks', 'layers', '3', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '3', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '3', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '3', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '3', 'sa', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '3', 'sa', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '3', 'sa', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 2304]}}, "('blocks', 'layers', '3', 'sa', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '3', 'sa', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('blocks', 'layers', '3', 'sa', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '3', 'sa', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '4', 'ff', 'dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '4', 'ff', 'dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '4', 'ff', 'fc', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072]}}, "('blocks', 'layers', '4', 'ff', 'fc', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 3072]}}, "('blocks', 'layers', '4', 'ff', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '4', 'ff', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072, 768]}}, "('blocks', 'layers', '4', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '4', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '4', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '4', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '4', 'sa', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '4', 'sa', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '4', 'sa', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 2304]}}, "('blocks', 'layers', '4', 'sa', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '4', 'sa', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('blocks', 'layers', '4', 'sa', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '4', 'sa', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '5', 'ff', 'dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '5', 'ff', 'dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '5', 'ff', 'fc', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072]}}, "('blocks', 'layers', '5', 'ff', 'fc', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 3072]}}, "('blocks', 'layers', '5', 'ff', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '5', 'ff', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072, 768]}}, "('blocks', 'layers', '5', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '5', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '5', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '5', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '5', 'sa', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '5', 'sa', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '5', 'sa', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 2304]}}, "('blocks', 'layers', '5', 'sa', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '5', 'sa', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('blocks', 'layers', '5', 'sa', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '5', 'sa', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '6', 'ff', 'dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '6', 'ff', 'dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '6', 'ff', 'fc', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072]}}, "('blocks', 'layers', '6', 'ff', 'fc', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 3072]}}, "('blocks', 'layers', '6', 'ff', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '6', 'ff', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072, 768]}}, "('blocks', 'layers', '6', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '6', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '6', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '6', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '6', 'sa', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '6', 'sa', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '6', 'sa', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 2304]}}, "('blocks', 'layers', '6', 'sa', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '6', 'sa', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('blocks', 'layers', '6', 'sa', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '6', 'sa', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "6", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '7', 'ff', 'dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '7', 'ff', 'dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '7', 'ff', 'fc', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072]}}, "('blocks', 'layers', '7', 'ff', 'fc', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 3072]}}, "('blocks', 'layers', '7', 'ff', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '7', 'ff', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072, 768]}}, "('blocks', 'layers', '7', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '7', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '7', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '7', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '7', 'sa', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '7', 'sa', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '7', 'sa', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 2304]}}, "('blocks', 'layers', '7', 'sa', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '7', 'sa', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('blocks', 'layers', '7', 'sa', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '7', 'sa', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "7", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '8', 'ff', 'dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '8', 'ff', 'dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '8', 'ff', 'fc', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072]}}, "('blocks', 'layers', '8', 'ff', 'fc', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 3072]}}, "('blocks', 'layers', '8', 'ff', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '8', 'ff', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072, 768]}}, "('blocks', 'layers', '8', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '8', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '8', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '8', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '8', 'sa', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '8', 'sa', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '8', 'sa', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 2304]}}, "('blocks', 'layers', '8', 'sa', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '8', 'sa', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('blocks', 'layers', '8', 'sa', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '8', 'sa', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "8", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '9', 'ff', 'dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '9', 'ff', 'dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '9', 'ff', 'fc', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072]}}, "('blocks', 'layers', '9', 'ff', 'fc', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 3072]}}, "('blocks', 'layers', '9', 'ff', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '9', 'ff', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072, 768]}}, "('blocks', 'layers', '9', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '9', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '9', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '9', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '9', 'sa', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '9', 'sa', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '9', 'sa', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 2304]}}, "('blocks', 'layers', '9', 'sa', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '9', 'sa', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('blocks', 'layers', '9', 'sa', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '9', 'sa', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "9", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '10', 'ff', 'dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '10', 'ff', 'dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '10', 'ff', 'fc', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072]}}, "('blocks', 'layers', '10', 'ff', 'fc', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 3072]}}, "('blocks', 'layers', '10', 'ff', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '10', 'ff', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072, 768]}}, "('blocks', 'layers', '10', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '10', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '10', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '10', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '10', 'sa', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '10', 'sa', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '10', 'sa', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 2304]}}, "('blocks', 'layers', '10', 'sa', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '10', 'sa', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('blocks', 'layers', '10', 'sa', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '10', 'sa', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "10", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '11', 'ff', 'dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "11", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '11', 'ff', 'dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "11", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '11', 'ff', 'fc', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "11", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072]}}, "('blocks', 'layers', '11', 'ff', 'fc', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "11", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "fc", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 3072]}}, "('blocks', 'layers', '11', 'ff', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "11", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '11', 'ff', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "11", "key_type": 2}, {"key": "ff", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [3072, 768]}}, "('blocks', 'layers', '11', 'ln1', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "11", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '11', 'ln1', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "11", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '11', 'ln2', 'beta', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "11", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '11', 'ln2', 'gamma', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "11", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '11', 'sa', 'a_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "11", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '11', 'sa', 'a_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "11", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "a_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', 'layers', '11', 'sa', 'attn', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "11", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 2304]}}, "('blocks', 'layers', '11', 'sa', 'proj', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "11", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('blocks', 'layers', '11', 'sa', 'proj', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "11", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768, 768]}}, "('blocks', 'layers', '11', 'sa', 'r_dropout', 'rngs', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "11", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', 'layers', '11', 'sa', 'r_dropout', 'rngs', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "11", "key_type": 2}, {"key": "sa", "key_type": 2}, {"key": "r_dropout", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('ln_f', 'beta', 'value')": {"key_metadata": [{"key": "ln_f", "key_type": 2}, {"key": "beta", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('ln_f', 'gamma', 'value')": {"key_metadata": [{"key": "ln_f", "key_type": 2}, {"key": "gamma", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [768]}}, "('pos_emb', 'embedding', 'value')": {"key_metadata": [{"key": "pos_emb", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 768]}}, "('tok_emb', 'embedding', 'value')": {"key_metadata": [{"key": "tok_emb", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [50257, 768]}}}, "use_ocdbt": true, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
|
experiments/gpt2/baseline/runs/0001/checkpoints/final/_sharding
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"YmxvY2tzLmxheWVycy40LmZmLmRyb3BvdXQucm5ncy5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmZmLmRyb3BvdXQucm5ncy5rZXkudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmZmLmZjLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmZmLmZjLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmZmLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmZmLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy40LnNhLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLmRyb3BvdXQucm5ncy5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLmRyb3BvdXQucm5ncy5rZXkudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLmZjLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLmZjLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmZmLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy41LnNhLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLmRyb3BvdXQucm5ncy5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLmRyb3BvdXQucm5ncy5rZXkudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLmZjLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLmZjLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmZmLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy42LnNhLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLmRyb3BvdXQucm5ncy5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLmRyb3BvdXQucm5ncy5rZXkudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLmZjLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLmZjLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmZmLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy43LnNhLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLmRyb3BvdXQucm5ncy5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLmRyb3BvdXQucm5ncy5rZXkudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLmZjLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLmZjLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmZmLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy44LnNhLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLmRyb3BvdXQucm5ncy5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLmRyb3BvdXQucm5ncy5rZXkudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLmZjLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLmZjLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmZmLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy45LnNhLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLmRyb3BvdXQucm5ncy5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLmRyb3BvdXQucm5ncy5rZXkudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLmZjLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLmZjLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmZmLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4wLnNhLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLmRyb3BvdXQucm5ncy5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLmRyb3BvdXQucm5ncy5rZXkudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLmZjLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLmZjLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmZmLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xLnNhLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5mZi5kcm9wb3V0LnJuZ3MuY291bnQudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5mZi5kcm9wb3V0LnJuZ3Mua2V5LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5mZi5mYy5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5mZi5mYy5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5mZi5wcm9qLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5mZi5wcm9qLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5sbjEuYmV0YS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5sbjEuZ2FtbWEudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5sbjIuYmV0YS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5sbjIuZ2FtbWEudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5zYS5hX2Ryb3BvdXQucm5ncy5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5zYS5hX2Ryb3BvdXQucm5ncy5rZXkudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5zYS5hdHRuLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5zYS5wcm9qLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5zYS5wcm9qLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5zYS5yX2Ryb3BvdXQucm5ncy5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMC5zYS5yX2Ryb3BvdXQucm5ncy5rZXkudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMS5mZi5kcm9wb3V0LnJuZ3MuY291bnQudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMS5mZi5kcm9wb3V0LnJuZ3Mua2V5LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMS5mZi5mYy5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMS5mZi5mYy5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMS5mZi5wcm9qLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMS5mZi5wcm9qLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMS5sbjEuYmV0YS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMS5sbjEuZ2FtbWEudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMS5sbjIuYmV0YS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMS5sbjIuZ2FtbWEudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMS5zYS5hX2Ryb3BvdXQucm5ncy5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMS5zYS5hX2Ryb3BvdXQucm5ncy5rZXkudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMS5zYS5hdHRuLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMS5zYS5wcm9qLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMS5zYS5wcm9qLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMS5zYS5yX2Ryb3BvdXQucm5ncy5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4xMS5zYS5yX2Ryb3BvdXQucm5ncy5rZXkudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLmRyb3BvdXQucm5ncy5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLmRyb3BvdXQucm5ncy5rZXkudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLmZjLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLmZjLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmZmLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4yLnNhLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLmRyb3BvdXQucm5ncy5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLmRyb3BvdXQucm5ncy5rZXkudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLmZjLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLmZjLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmZmLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmxuMS5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmxuMS5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmxuMi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLmxuMi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhLmF0dG4ua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhLmFfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhLmFfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhLnByb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhLnByb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhLnJfZHJvcG91dC5ybmdzLmNvdW50LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","YmxvY2tzLmxheWVycy4zLnNhLnJfZHJvcG91dC5ybmdzLmtleS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bG5fZi5iZXRhLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","bG5fZi5nYW1tYS52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cG9zX2VtYi5lbWJlZGRpbmcudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","dG9rX2VtYi5lbWJlZGRpbmcudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}"}
|
experiments/gpt2/baseline/runs/0001/checkpoints/final/array_metadatas/process_0
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"array_metadatas": [{"array_metadata": {"param_name": "blocks.layers.0.ff.dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ff.dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.0.ff.fc.bias.value", "write_shape": [3072], "chunk_shape": [3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ff.fc.kernel.value", "write_shape": [768, 3072], "chunk_shape": [768, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ff.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ff.proj.kernel.value", "write_shape": [3072, 768], "chunk_shape": [3072, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ln1.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ln1.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ln2.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.ln2.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.0.sa.attn.kernel.value", "write_shape": [768, 2304], "chunk_shape": [768, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa.proj.kernel.value", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.0.sa.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.fc.bias.value", "write_shape": [3072], "chunk_shape": [3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.fc.kernel.value", "write_shape": [768, 3072], "chunk_shape": [768, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ff.proj.kernel.value", "write_shape": [3072, 768], "chunk_shape": [3072, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ln1.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ln1.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ln2.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.ln2.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.1.sa.attn.kernel.value", "write_shape": [768, 2304], "chunk_shape": [768, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa.proj.kernel.value", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.1.sa.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.fc.bias.value", "write_shape": [3072], "chunk_shape": [3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.fc.kernel.value", "write_shape": [768, 3072], "chunk_shape": [768, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ff.proj.kernel.value", "write_shape": [3072, 768], "chunk_shape": [3072, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ln1.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ln1.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ln2.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.ln2.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.2.sa.attn.kernel.value", "write_shape": [768, 2304], "chunk_shape": [768, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa.proj.kernel.value", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.2.sa.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.fc.bias.value", "write_shape": [3072], "chunk_shape": [3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.fc.kernel.value", "write_shape": [768, 3072], "chunk_shape": [768, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ff.proj.kernel.value", "write_shape": [3072, 768], "chunk_shape": [3072, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ln1.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ln1.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ln2.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.ln2.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.3.sa.attn.kernel.value", "write_shape": [768, 2304], "chunk_shape": [768, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa.proj.kernel.value", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.3.sa.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.fc.bias.value", "write_shape": [3072], "chunk_shape": [3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.fc.kernel.value", "write_shape": [768, 3072], "chunk_shape": [768, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ff.proj.kernel.value", "write_shape": [3072, 768], "chunk_shape": [3072, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ln1.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ln1.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ln2.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.ln2.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.4.sa.attn.kernel.value", "write_shape": [768, 2304], "chunk_shape": [768, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa.proj.kernel.value", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.4.sa.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.fc.bias.value", "write_shape": [3072], "chunk_shape": [3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.fc.kernel.value", "write_shape": [768, 3072], "chunk_shape": [768, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ff.proj.kernel.value", "write_shape": [3072, 768], "chunk_shape": [3072, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ln1.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ln1.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ln2.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.ln2.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.5.sa.attn.kernel.value", "write_shape": [768, 2304], "chunk_shape": [768, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa.proj.kernel.value", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.5.sa.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.fc.bias.value", "write_shape": [3072], "chunk_shape": [3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.fc.kernel.value", "write_shape": [768, 3072], "chunk_shape": [768, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ff.proj.kernel.value", "write_shape": [3072, 768], "chunk_shape": [3072, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ln1.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ln1.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ln2.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.ln2.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.6.sa.attn.kernel.value", "write_shape": [768, 2304], "chunk_shape": [768, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa.proj.kernel.value", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.6.sa.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.fc.bias.value", "write_shape": [3072], "chunk_shape": [3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.fc.kernel.value", "write_shape": [768, 3072], "chunk_shape": [768, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ff.proj.kernel.value", "write_shape": [3072, 768], "chunk_shape": [3072, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ln1.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ln1.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ln2.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.ln2.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.7.sa.attn.kernel.value", "write_shape": [768, 2304], "chunk_shape": [768, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa.proj.kernel.value", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.7.sa.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.fc.bias.value", "write_shape": [3072], "chunk_shape": [3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.fc.kernel.value", "write_shape": [768, 3072], "chunk_shape": [768, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ff.proj.kernel.value", "write_shape": [3072, 768], "chunk_shape": [3072, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ln1.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ln1.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ln2.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.ln2.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.8.sa.attn.kernel.value", "write_shape": [768, 2304], "chunk_shape": [768, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa.proj.kernel.value", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.8.sa.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.fc.bias.value", "write_shape": [3072], "chunk_shape": [3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.fc.kernel.value", "write_shape": [768, 3072], "chunk_shape": [768, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ff.proj.kernel.value", "write_shape": [3072, 768], "chunk_shape": [3072, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ln1.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ln1.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ln2.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.ln2.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.9.sa.attn.kernel.value", "write_shape": [768, 2304], "chunk_shape": [768, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa.proj.kernel.value", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.9.sa.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.10.ff.dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.10.ff.dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.10.ff.fc.bias.value", "write_shape": [3072], "chunk_shape": [3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.10.ff.fc.kernel.value", "write_shape": [768, 3072], "chunk_shape": [768, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.10.ff.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.10.ff.proj.kernel.value", "write_shape": [3072, 768], "chunk_shape": [3072, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.10.ln1.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.10.ln1.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.10.ln2.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.10.ln2.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.10.sa.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.10.sa.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.10.sa.attn.kernel.value", "write_shape": [768, 2304], "chunk_shape": [768, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.10.sa.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.10.sa.proj.kernel.value", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.10.sa.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.10.sa.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.11.ff.dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.11.ff.dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.11.ff.fc.bias.value", "write_shape": [3072], "chunk_shape": [3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.11.ff.fc.kernel.value", "write_shape": [768, 3072], "chunk_shape": [768, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.11.ff.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.11.ff.proj.kernel.value", "write_shape": [3072, 768], "chunk_shape": [3072, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.11.ln1.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.11.ln1.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.11.ln2.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.11.ln2.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.11.sa.a_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.11.sa.a_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.layers.11.sa.attn.kernel.value", "write_shape": [768, 2304], "chunk_shape": [768, 2304], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.11.sa.proj.bias.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.11.sa.proj.kernel.value", "write_shape": [768, 768], "chunk_shape": [768, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.11.sa.r_dropout.rngs.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.layers.11.sa.r_dropout.rngs.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "ln_f.beta.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "ln_f.gamma.value", "write_shape": [768], "chunk_shape": [768], "ext_metadata": null}}, {"array_metadata": {"param_name": "pos_emb.embedding.value", "write_shape": [1024, 768], "chunk_shape": [1024, 768], "ext_metadata": null}}, {"array_metadata": {"param_name": "tok_emb.embedding.value", "write_shape": [50257, 768], "chunk_shape": [50257, 768], "ext_metadata": null}}]}
|
experiments/gpt2/baseline/runs/0001/checkpoints/final/d/97689a3d28f46e71a6654f32574e6f1e
ADDED
|
Binary file (2.36 kB). View file
|
|
|
experiments/gpt2/baseline/runs/0001/checkpoints/final/manifest.ocdbt
ADDED
|
Binary file (118 Bytes). View file
|
|
|
experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/105c4e497a93eaa7665213e48a58fe5b
ADDED
|
Binary file (28.9 kB). View file
|
|
|
experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/106472dca883594846c2510eb4be3576
ADDED
|
Binary file (222 Bytes). View file
|
|
|
experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/147ac55628280f9172302fa594f47101
ADDED
|
Binary file (787 Bytes). View file
|
|
|
experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/1bbc49fd72405501b0d8308e37a53a58
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:221025bffb31a8c5da40ba917af5bc3afd21a8b8b954023d9fab02db651834c1
|
| 3 |
+
size 256921600
|
experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/23b3b31b2a31898d1d706a090ebb7944
ADDED
|
Binary file (628 Bytes). View file
|
|
|
experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/2a4699eceb062dca12c7b7f41d5ebc92
ADDED
|
Binary file (495 Bytes). View file
|
|
|
experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/57b03944839ad147aef22c1b80dfe2a3
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a987e60285032402bcf478560e923eb7626cce16799925219134a98a1c1b8ed4
|
| 3 |
+
size 59097088
|
experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/6a6b067a0eaf45d007b0dbdbae1c53b2
ADDED
|
Binary file (14.7 kB). View file
|
|
|
experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/96ebaa04bfc4c571ba58b11c35cd3c53
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be8e2ab2955e4664818f27dd6ed131fbd5a19aacc87a80653e4547073739fbf5
|
| 3 |
+
size 142786560
|
experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/ab7d74ce5ae3f85e3fa4fe00e69ee228
ADDED
|
Binary file (11.7 kB). View file
|
|
|
experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/c97574b954630bea35d0e96c69caac88
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ec0a5997795a18f789cd0693b9417beb66a7b4cb28d6af396fb821b4ec2e934
|
| 3 |
+
size 2199607
|
experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/d6aacb5b539ceed3290285dad4624963
ADDED
|
Binary file (519 Bytes). View file
|
|
|
experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/d/e643daf39903f4fdbd1cdf286e048e63
ADDED
|
Binary file (500 Bytes). View file
|
|
|
experiments/gpt2/baseline/runs/0001/checkpoints/final/ocdbt.process_0/manifest.ocdbt
ADDED
|
Binary file (613 Bytes). View file
|
|
|