diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..ea84339472cd71d254823ab865e951547f82b736 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,26 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +params/ocdbt.process_0/d/06f38ac8ce3b4d335b4d8874c6f4cbdd filter=lfs diff=lfs merge=lfs -text +params/ocdbt.process_0/d/10793e6211fcc7417882f7e0045e4716 filter=lfs diff=lfs merge=lfs -text +params/ocdbt.process_0/d/1fd2df50f9903ae9af667f64e5b419d0 filter=lfs diff=lfs merge=lfs -text +params/ocdbt.process_0/d/2c941f15cc97858f9b30af0ee43f29f7 filter=lfs diff=lfs merge=lfs -text +params/ocdbt.process_0/d/4169568cfbf46b0dae6fc22a0bf2eda7 filter=lfs diff=lfs merge=lfs -text +params/ocdbt.process_0/d/688c267d7a9b74953496a770eb397a95 filter=lfs diff=lfs merge=lfs -text +params/ocdbt.process_0/d/7d4706d06df24d39c8f3fa6c0ba0f148 filter=lfs diff=lfs merge=lfs -text +params/ocdbt.process_0/d/e4524ef6248870c9924af8ded3aeeb89 filter=lfs diff=lfs merge=lfs -text +params/ocdbt.process_0/d/ed213ba2737a0c76ac9df23fffdf10c3 filter=lfs diff=lfs merge=lfs -text +train_state/ocdbt.process_0/d/20b1984374c9e8d0f8b3086460f0624d filter=lfs diff=lfs merge=lfs -text +train_state/ocdbt.process_0/d/317b48e2d515a62a10f3c179f63210c5 filter=lfs diff=lfs merge=lfs -text +train_state/ocdbt.process_0/d/4a68b4b49959ff5989903c8fc59872fa filter=lfs diff=lfs merge=lfs -text +train_state/ocdbt.process_0/d/801ff81d3ffbfc3a95a38a9c0b7e5396 filter=lfs diff=lfs merge=lfs -text +train_state/ocdbt.process_0/d/8e20e44db20ee68f51e3c6b17376eaa1 filter=lfs diff=lfs merge=lfs -text +train_state/ocdbt.process_0/d/9780ab93e1f3f37834522e628b7da7ca filter=lfs diff=lfs merge=lfs -text +train_state/ocdbt.process_0/d/a5dda2ec41f2c2119adb5dd7f9f0c8f1 filter=lfs diff=lfs merge=lfs -text +train_state/ocdbt.process_0/d/b0695e4b5e01aeb5dfd91c3393fb4ff3 filter=lfs diff=lfs merge=lfs -text +train_state/ocdbt.process_0/d/c0e01bd55c77234cd1d9d9a213c03648 filter=lfs diff=lfs merge=lfs -text +train_state/ocdbt.process_0/d/c5bc03ad4c8fde9c32dd602c45f3b1fe filter=lfs diff=lfs merge=lfs -text +train_state/ocdbt.process_0/d/d051c4aa2a7a185100c6c85ee87f5c01 filter=lfs diff=lfs merge=lfs -text +train_state/ocdbt.process_0/d/d3c7961f794bd59de794145208ca8c4a filter=lfs diff=lfs merge=lfs -text +train_state/ocdbt.process_0/d/e745ec5c30c817cca44a428b33884cc4 filter=lfs diff=lfs merge=lfs -text +train_state/ocdbt.process_0/d/f12d64cd97e2bf0a3919c020b3f01955 filter=lfs diff=lfs merge=lfs -text diff --git a/_CHECKPOINT_METADATA b/_CHECKPOINT_METADATA new file mode 100644 index 0000000000000000000000000000000000000000..7eee9338ac56713516c4d52bf0f7d62d41318eed --- /dev/null +++ b/_CHECKPOINT_METADATA @@ -0,0 +1 @@ +{"item_handlers": {"assets": "openpi.training.checkpoints.CallbackHandler", "params": "orbax.checkpoint._src.handlers.pytree_checkpoint_handler.PyTreeCheckpointHandler", "train_state": "orbax.checkpoint._src.handlers.pytree_checkpoint_handler.PyTreeCheckpointHandler"}, "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1758204301595457911, "commit_timestamp_nsecs": 1758204323094941522, "custom_metadata": {}} \ No newline at end of file diff --git a/assets/vlabench/vlabench_ft_primitive/norm_stats.json b/assets/vlabench/vlabench_ft_primitive/norm_stats.json new file mode 100644 index 0000000000000000000000000000000000000000..3d60e65da019b00a73770bf1b7e5095433a0be68 --- /dev/null +++ b/assets/vlabench/vlabench_ft_primitive/norm_stats.json @@ -0,0 +1,280 @@ +{ + "norm_stats": { + "state": { + "mean": [ + 0.02967369742691517, + 0.39555251598358154, + 0.25719010829925537, + 0.4673101305961609, + -0.37718722224235535, + -0.11985507607460022, + 0.44498270750045776, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "std": [ + 0.1581457555294037, + 0.13765743374824524, + 0.1370578110218048, + 2.873211145401001, + 0.625612199306488, + 1.7478904724121094, + 0.49696388840675354, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "q01": [ + -0.2880732418596744, + 0.16642616012468936, + 0.016219121195189656, + -3.1415927410125732, + -1.5606922270774841, + -3.1378225915908815, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "q99": [ + 0.33706892974972724, + 0.7168794604014606, + 0.4705753956319765, + 3.1403361039161677, + 0.029222321224212644, + 3.1353093175888063, + 0.9998, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "actions": { + "mean": [ + -0.00008998554403660819, + 0.004090703558176756, + 0.0006838542176410556, + 0.5465309023857117, + -0.004646406974643469, + -0.019831420853734016, + 0.5484480857849121, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "std": [ + 0.012184085324406624, + 0.013969186693429947, + 0.021805619820952415, + 3.0321147441864014, + 0.028920045122504234, + 0.7395531535148621, + 0.497647225856781, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "q01": [ + -0.01572922980785374, + -0.011965612983703577, + -0.012324783468246447, + -6.28066982460022, + -0.07970612406730648, + -1.807010852813721, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "q99": [ + 0.014919160246849028, + 0.016237698739767104, + 0.020379505300521905, + 6.280669824600221, + 0.06261572251319891, + 1.7844429779052735, + 0.9998, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] + } + } +} \ No newline at end of file diff --git a/params/_METADATA b/params/_METADATA new file mode 100644 index 0000000000000000000000000000000000000000..38153ee2995836aae38081a4f1df72f0d55bb8a3 --- /dev/null +++ b/params/_METADATA @@ -0,0 +1 @@ +{"tree_metadata": {"('params', 'PaliGemma', 'img', 'Transformer', 'encoder_norm', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoder_norm", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [144]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoder_norm', 'scale', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoder_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [144]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'LayerNorm_0', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'LayerNorm_0', 'scale', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'LayerNorm_1', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'LayerNorm_1', 'scale', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MlpBlock_0', 'Dense_0', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MlpBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 538]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MlpBlock_0', 'Dense_0', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MlpBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144, 4304]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MlpBlock_0', 'Dense_1', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MlpBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MlpBlock_0', 'Dense_1', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MlpBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 538, 1152]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'key', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 2, 72]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'key', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144, 16, 72]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'out', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'out', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 2, 72, 1152]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'query', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 2, 72]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'query', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144, 16, 72]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'value', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 2, 72]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'value', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144, 16, 72]}}, "('params', 'PaliGemma', 'img', 'embedding', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [144]}}, "('params', 'PaliGemma', 'img', 'embedding', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [14, 14, 3, 144]}}, "('params', 'PaliGemma', 'img', 'head', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('params', 'PaliGemma', 'img', 'head', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [144, 2048]}}, "('params', 'PaliGemma', 'img', 'pos_embedding', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "pos_embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 32, 1152]}}, "('params', 'PaliGemma', 'llm', 'embedder', 'input_embedding', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "embedder", "key_type": 2}, {"key": "input_embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32144, 2048]}}, "('params', 'PaliGemma', 'llm', 'final_norm', 'scale', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "final_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('params', 'PaliGemma', 'llm', 'final_norm_1', 'Dense_0', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "final_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('params', 'PaliGemma', 'llm', 'final_norm_1', 'Dense_0', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "final_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 3072]}}, "('params', 'PaliGemma', 'llm', 'layers', 'attn', 'attn_vec_einsum', 'w', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 1, 256, 2048]}}, "('params', 'PaliGemma', 'llm', 'layers', 'attn', 'attn_vec_einsum_1', 'w', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "attn_vec_einsum_1", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 1, 256, 1024]}}, "('params', 'PaliGemma', 'llm', 'layers', 'attn', 'kv_einsum', 'w', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 2, 1, 256, 256]}}, "('params', 'PaliGemma', 'llm', 'layers', 'attn', 'kv_einsum_1', 'w', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kv_einsum_1", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 2, 1, 128, 256]}}, "('params', 'PaliGemma', 'llm', 'layers', 'attn', 'q_einsum', 'w', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 1, 2048, 256]}}, "('params', 'PaliGemma', 'llm', 'layers', 'attn', 'q_einsum_1', 'w', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_einsum_1", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 1, 1024, 256]}}, "('params', 'PaliGemma', 'llm', 'layers', 'mlp', 'gating_einsum', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "gating_einsum", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 2, 256, 16384]}}, "('params', 'PaliGemma', 'llm', 'layers', 'mlp', 'linear', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "linear", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 2048, 2048]}}, "('params', 'PaliGemma', 'llm', 'layers', 'mlp_1', 'gating_einsum', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "mlp_1", "key_type": 2}, {"key": "gating_einsum", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 2, 128, 4096]}}, "('params', 'PaliGemma', 'llm', 'layers', 'mlp_1', 'linear', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "mlp_1", "key_type": 2}, {"key": "linear", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 512, 1024]}}, "('params', 'PaliGemma', 'llm', 'layers', 'pre_attention_norm', 'scale', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 256]}}, "('params', 'PaliGemma', 'llm', 'layers', 'pre_attention_norm_1', 'Dense_0', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_attention_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 384]}}, "('params', 'PaliGemma', 'llm', 'layers', 'pre_attention_norm_1', 'Dense_0', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_attention_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 128, 3072]}}, "('params', 'PaliGemma', 'llm', 'layers', 'pre_ffw_norm', 'scale', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 256]}}, "('params', 'PaliGemma', 'llm', 'layers', 'pre_ffw_norm_1', 'Dense_0', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_ffw_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 384]}}, "('params', 'PaliGemma', 'llm', 'layers', 'pre_ffw_norm_1', 'Dense_0', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_ffw_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 128, 3072]}}, "('params', 'action_in_proj', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "action_in_proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('params', 'action_in_proj', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "action_in_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [4, 1024]}}, "('params', 'action_out_proj', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "action_out_proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [4]}}, "('params', 'action_out_proj', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "action_out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 32]}}, "('params', 'time_mlp_in', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "time_mlp_in", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('params', 'time_mlp_in', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "time_mlp_in", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1024]}}, "('params', 'time_mlp_out', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "time_mlp_out", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('params', 'time_mlp_out', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "time_mlp_out", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1024]}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null} \ No newline at end of file diff --git a/params/_sharding b/params/_sharding new file mode 100644 index 0000000000000000000000000000000000000000..dee7b906f3d22f39dcb6cbfd9ace2b27c46842ea --- /dev/null +++ b/params/_sharding @@ -0,0 +1 @@ +{"cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2Rlcl9ub3JtLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2Rlcl9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk11bHRpSGVhZERvdFByb2R1Y3RBdHRlbnRpb25fMC52YWx1ZS5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk11bHRpSGVhZERvdFByb2R1Y3RBdHRlbnRpb25fMC52YWx1ZS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk11bHRpSGVhZERvdFByb2R1Y3RBdHRlbnRpb25fMC5rZXkuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk11bHRpSGVhZERvdFByb2R1Y3RBdHRlbnRpb25fMC5rZXkua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk11bHRpSGVhZERvdFByb2R1Y3RBdHRlbnRpb25fMC5vdXQuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk11bHRpSGVhZERvdFByb2R1Y3RBdHRlbnRpb25fMC5vdXQua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk11bHRpSGVhZERvdFByb2R1Y3RBdHRlbnRpb25fMC5xdWVyeS5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk11bHRpSGVhZERvdFByb2R1Y3RBdHRlbnRpb25fMC5xdWVyeS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk1scEJsb2NrXzAuRGVuc2VfMC5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk1scEJsb2NrXzAuRGVuc2VfMC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk1scEJsb2NrXzAuRGVuc2VfMS5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk1scEJsb2NrXzAuRGVuc2VfMS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLkxheWVyTm9ybV8wLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLkxheWVyTm9ybV8wLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLkxheWVyTm9ybV8xLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLkxheWVyTm9ybV8xLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuZW1iZWRkaW5nLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuZW1iZWRkaW5nLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuaGVhZC5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuaGVhZC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcucG9zX2VtYmVkZGluZy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0uZW1iZWRkZXIuaW5wdXRfZW1iZWRkaW5nLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0uZmluYWxfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0uZmluYWxfbm9ybV8xLkRlbnNlXzAuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0uZmluYWxfbm9ybV8xLkRlbnNlXzAua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLm1scC5nYXRpbmdfZWluc3VtLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLm1scC5saW5lYXIudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLm1scF8xLmdhdGluZ19laW5zdW0udmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLm1scF8xLmxpbmVhci52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLmF0dG4uYXR0bl92ZWNfZWluc3VtLncudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLmF0dG4uYXR0bl92ZWNfZWluc3VtXzEudy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLmF0dG4ua3ZfZWluc3VtLncudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLmF0dG4ua3ZfZWluc3VtXzEudy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLmF0dG4ucV9laW5zdW0udy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLmF0dG4ucV9laW5zdW1fMS53LnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLnByZV9hdHRlbnRpb25fbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLnByZV9hdHRlbnRpb25fbm9ybV8xLkRlbnNlXzAuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLnByZV9hdHRlbnRpb25fbm9ybV8xLkRlbnNlXzAua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLnByZV9mZndfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLnByZV9mZndfbm9ybV8xLkRlbnNlXzAuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLnByZV9mZndfbm9ybV8xLkRlbnNlXzAua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLmFjdGlvbl9pbl9wcm9qLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLmFjdGlvbl9pbl9wcm9qLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLmFjdGlvbl9vdXRfcHJvai5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLmFjdGlvbl9vdXRfcHJvai5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLnRpbWVfbWxwX291dC5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLnRpbWVfbWxwX291dC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLnRpbWVfbWxwX2luLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLnRpbWVfbWxwX2luLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}"} \ No newline at end of file diff --git a/params/array_metadatas/process_0 b/params/array_metadatas/process_0 new file mode 100644 index 0000000000000000000000000000000000000000..cd7d761cfc471c71b47aa46f0b76895f2c0795d1 --- /dev/null +++ b/params/array_metadatas/process_0 @@ -0,0 +1 @@ +{"array_metadatas": [{"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoder_norm.bias.value", "write_shape": [144], "chunk_shape": [144], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoder_norm.scale.value", "write_shape": [144], "chunk_shape": [144], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.LayerNorm_0.bias.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.LayerNorm_0.scale.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.LayerNorm_1.bias.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.LayerNorm_1.scale.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MlpBlock_0.Dense_0.bias.value", "write_shape": [27, 538], "chunk_shape": [27, 538], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MlpBlock_0.Dense_0.kernel.value", "write_shape": [27, 144, 4304], "chunk_shape": [27, 144, 4304], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MlpBlock_0.Dense_1.bias.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MlpBlock_0.Dense_1.kernel.value", "write_shape": [27, 538, 1152], "chunk_shape": [27, 538, 1152], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.key.bias.value", "write_shape": [27, 2, 72], "chunk_shape": [27, 2, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.key.kernel.value", "write_shape": [27, 144, 16, 72], "chunk_shape": [27, 144, 16, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.out.bias.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.out.kernel.value", "write_shape": [27, 2, 72, 1152], "chunk_shape": [27, 2, 72, 1152], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.query.bias.value", "write_shape": [27, 2, 72], "chunk_shape": [27, 2, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.query.kernel.value", "write_shape": [27, 144, 16, 72], "chunk_shape": [27, 144, 16, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.value.bias.value", "write_shape": [27, 2, 72], "chunk_shape": [27, 2, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.value.kernel.value", "write_shape": [27, 144, 16, 72], "chunk_shape": [27, 144, 16, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.embedding.bias.value", "write_shape": [144], "chunk_shape": [144], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.embedding.kernel.value", "write_shape": [14, 14, 3, 144], "chunk_shape": [14, 14, 3, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.head.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.head.kernel.value", "write_shape": [144, 2048], "chunk_shape": [144, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.pos_embedding.value", "write_shape": [1, 32, 1152], "chunk_shape": [1, 32, 1152], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.embedder.input_embedding.value", "write_shape": [32144, 2048], "chunk_shape": [32144, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.final_norm.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.final_norm_1.Dense_0.bias.value", "write_shape": [384], "chunk_shape": [384], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.final_norm_1.Dense_0.kernel.value", "write_shape": [128, 3072], "chunk_shape": [128, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.attn.attn_vec_einsum.w.value", "write_shape": [18, 1, 256, 2048], "chunk_shape": [18, 1, 256, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.attn.attn_vec_einsum_1.w.value", "write_shape": [18, 1, 256, 1024], "chunk_shape": [18, 1, 256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.attn.kv_einsum.w.value", "write_shape": [18, 2, 1, 256, 256], "chunk_shape": [18, 2, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.attn.kv_einsum_1.w.value", "write_shape": [18, 2, 1, 128, 256], "chunk_shape": [18, 2, 1, 128, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.attn.q_einsum.w.value", "write_shape": [18, 1, 2048, 256], "chunk_shape": [18, 1, 2048, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.attn.q_einsum_1.w.value", "write_shape": [18, 1, 1024, 256], "chunk_shape": [18, 1, 1024, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.mlp.gating_einsum.value", "write_shape": [18, 2, 256, 16384], "chunk_shape": [18, 2, 256, 16384], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.mlp.linear.value", "write_shape": [18, 2048, 2048], "chunk_shape": [18, 2048, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.mlp_1.gating_einsum.value", "write_shape": [18, 2, 128, 4096], "chunk_shape": [18, 2, 128, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.mlp_1.linear.value", "write_shape": [18, 512, 1024], "chunk_shape": [18, 512, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.pre_attention_norm.scale.value", "write_shape": [18, 256], "chunk_shape": [18, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.pre_attention_norm_1.Dense_0.bias.value", "write_shape": [18, 384], "chunk_shape": [18, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.pre_attention_norm_1.Dense_0.kernel.value", "write_shape": [18, 128, 3072], "chunk_shape": [18, 128, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.pre_ffw_norm.scale.value", "write_shape": [18, 256], "chunk_shape": [18, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.pre_ffw_norm_1.Dense_0.bias.value", "write_shape": [18, 384], "chunk_shape": [18, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.pre_ffw_norm_1.Dense_0.kernel.value", "write_shape": [18, 128, 3072], "chunk_shape": [18, 128, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.action_in_proj.bias.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.action_in_proj.kernel.value", "write_shape": [4, 1024], "chunk_shape": [4, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.action_out_proj.bias.value", "write_shape": [4], "chunk_shape": [4], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.action_out_proj.kernel.value", "write_shape": [128, 32], "chunk_shape": [128, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.time_mlp_in.bias.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.time_mlp_in.kernel.value", "write_shape": [128, 1024], "chunk_shape": [128, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.time_mlp_out.bias.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.time_mlp_out.kernel.value", "write_shape": [128, 1024], "chunk_shape": [128, 1024], "ext_metadata": null}}]} \ No newline at end of file diff --git a/params/d/240db4f987e97c9477b3f39d77996210 b/params/d/240db4f987e97c9477b3f39d77996210 new file mode 100644 index 0000000000000000000000000000000000000000..f4ac982cfb43489f0051004344787ae03ff9c928 Binary files /dev/null and b/params/d/240db4f987e97c9477b3f39d77996210 differ diff --git a/params/manifest.ocdbt b/params/manifest.ocdbt new file mode 100644 index 0000000000000000000000000000000000000000..5f560982aac3ed47321ff545c20dea225fa01c4b Binary files /dev/null and b/params/manifest.ocdbt differ diff --git a/params/ocdbt.process_0/d/06f38ac8ce3b4d335b4d8874c6f4cbdd b/params/ocdbt.process_0/d/06f38ac8ce3b4d335b4d8874c6f4cbdd new file mode 100644 index 0000000000000000000000000000000000000000..bdb1954c235a6cf5d27166cc71bf1a6b2a3227b6 --- /dev/null +++ b/params/ocdbt.process_0/d/06f38ac8ce3b4d335b4d8874c6f4cbdd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d85fdecfd8bf733236b77bb07a7cbe3ac56f5fd2709e61f457258ce9506be37 +size 1400381529 diff --git a/params/ocdbt.process_0/d/10793e6211fcc7417882f7e0045e4716 b/params/ocdbt.process_0/d/10793e6211fcc7417882f7e0045e4716 new file mode 100644 index 0000000000000000000000000000000000000000..8e456989081e6c295ae2e96404fbbf79df481522 --- /dev/null +++ b/params/ocdbt.process_0/d/10793e6211fcc7417882f7e0045e4716 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a268f12b03ffa222a4f149471d8cdb0138f920dd7e24e285f97995b7bf65d4b +size 2413742780 diff --git a/params/ocdbt.process_0/d/1ecf5103d5b0f495f5863314e86eea7b b/params/ocdbt.process_0/d/1ecf5103d5b0f495f5863314e86eea7b new file mode 100644 index 0000000000000000000000000000000000000000..145ab1a22d5883a8cda827bcda001174282898bd Binary files /dev/null and b/params/ocdbt.process_0/d/1ecf5103d5b0f495f5863314e86eea7b differ diff --git a/params/ocdbt.process_0/d/1fd2df50f9903ae9af667f64e5b419d0 b/params/ocdbt.process_0/d/1fd2df50f9903ae9af667f64e5b419d0 new file mode 100644 index 0000000000000000000000000000000000000000..27baf2f66a803cd3182d1f891b525f2a824ef895 --- /dev/null +++ b/params/ocdbt.process_0/d/1fd2df50f9903ae9af667f64e5b419d0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:441ae698cb9b4e9213e852cfc8ac776dbd8836c0aaf67c8b5ee1a6eeb14c30d0 +size 560018549 diff --git a/params/ocdbt.process_0/d/2c941f15cc97858f9b30af0ee43f29f7 b/params/ocdbt.process_0/d/2c941f15cc97858f9b30af0ee43f29f7 new file mode 100644 index 0000000000000000000000000000000000000000..b7291e6f174020db8cf09377767f2f7eb6e0a63b --- /dev/null +++ b/params/ocdbt.process_0/d/2c941f15cc97858f9b30af0ee43f29f7 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81d7e90e91ddcedf5c185161597928a0ac888e950afabe46c94046c7b11c8260 +size 2325564895 diff --git a/params/ocdbt.process_0/d/4169568cfbf46b0dae6fc22a0bf2eda7 b/params/ocdbt.process_0/d/4169568cfbf46b0dae6fc22a0bf2eda7 new file mode 100644 index 0000000000000000000000000000000000000000..8907697cd816d5acacd5623f06bbd28da2175204 --- /dev/null +++ b/params/ocdbt.process_0/d/4169568cfbf46b0dae6fc22a0bf2eda7 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9d2aaeddbd67dcff219fac2adcbd18f6b9d198babaa574524565f48f7af2e54 +size 359815733 diff --git a/params/ocdbt.process_0/d/45d9e5a421f69966c7607b581347ecbe b/params/ocdbt.process_0/d/45d9e5a421f69966c7607b581347ecbe new file mode 100644 index 0000000000000000000000000000000000000000..b4a4f19b626dd94b59f02e20e84bcb37282ac267 Binary files /dev/null and b/params/ocdbt.process_0/d/45d9e5a421f69966c7607b581347ecbe differ diff --git a/params/ocdbt.process_0/d/688c267d7a9b74953496a770eb397a95 b/params/ocdbt.process_0/d/688c267d7a9b74953496a770eb397a95 new file mode 100644 index 0000000000000000000000000000000000000000..f8cf2874c2f99ec0c937319ff7794396046bba2b --- /dev/null +++ b/params/ocdbt.process_0/d/688c267d7a9b74953496a770eb397a95 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fefd85b19c2963d7711c1e76634b469e3055d6d1e385b7dcf3d4e333d537a77e +size 2239879800 diff --git a/params/ocdbt.process_0/d/7d4706d06df24d39c8f3fa6c0ba0f148 b/params/ocdbt.process_0/d/7d4706d06df24d39c8f3fa6c0ba0f148 new file mode 100644 index 0000000000000000000000000000000000000000..e22d5abea5a293cc63d37abac9a9741dde969d9e --- /dev/null +++ b/params/ocdbt.process_0/d/7d4706d06df24d39c8f3fa6c0ba0f148 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7bfe248e437d2bfe2f28dd9d293c70d60401d1fb2c4ab280764df9a2ceaeb12 +size 489355208 diff --git a/params/ocdbt.process_0/d/b75a78724b72047ad2794d1fb82eb9b4 b/params/ocdbt.process_0/d/b75a78724b72047ad2794d1fb82eb9b4 new file mode 100644 index 0000000000000000000000000000000000000000..1a45b10796bb3b4e37b70a7c2778fef75e73ba8d Binary files /dev/null and b/params/ocdbt.process_0/d/b75a78724b72047ad2794d1fb82eb9b4 differ diff --git a/params/ocdbt.process_0/d/e4524ef6248870c9924af8ded3aeeb89 b/params/ocdbt.process_0/d/e4524ef6248870c9924af8ded3aeeb89 new file mode 100644 index 0000000000000000000000000000000000000000..620c64dfc6b6d8564a673269f19b54b99e78a10a --- /dev/null +++ b/params/ocdbt.process_0/d/e4524ef6248870c9924af8ded3aeeb89 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:039ca41bca440a958b3d6b9ae587fff4b962e64a5a596df78643dfc84d9e372f +size 733892948 diff --git a/params/ocdbt.process_0/d/ed213ba2737a0c76ac9df23fffdf10c3 b/params/ocdbt.process_0/d/ed213ba2737a0c76ac9df23fffdf10c3 new file mode 100644 index 0000000000000000000000000000000000000000..e77a48398cdc4035ac5bbff2de0d0f2fb52e3d81 --- /dev/null +++ b/params/ocdbt.process_0/d/ed213ba2737a0c76ac9df23fffdf10c3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35ececf94f374973567e9b4175e4e1ee338c68190c9f9faedc4d1b861a672e82 +size 1918124196 diff --git a/params/ocdbt.process_0/manifest.ocdbt b/params/ocdbt.process_0/manifest.ocdbt new file mode 100644 index 0000000000000000000000000000000000000000..19ea9dcd26552fc77baa1abe23fd552173d1e617 Binary files /dev/null and b/params/ocdbt.process_0/manifest.ocdbt differ diff --git a/train_state/_METADATA b/train_state/_METADATA new file mode 100644 index 0000000000000000000000000000000000000000..cc8be922ec80d98bade87de6566f7e1df183b7b0 --- /dev/null +++ b/train_state/_METADATA @@ -0,0 +1 @@ +{"tree_metadata": {"('step',)": {"key_metadata": [{"key": "step", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoder_norm', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoder_norm", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [144]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoder_norm', 'scale', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoder_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [144]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'LayerNorm_0', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'LayerNorm_0', 'scale', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'LayerNorm_1', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'LayerNorm_1', 'scale', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MlpBlock_0', 'Dense_0', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MlpBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 538]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MlpBlock_0', 'Dense_0', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MlpBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144, 4304]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MlpBlock_0', 'Dense_1', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MlpBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MlpBlock_0', 'Dense_1', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MlpBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 538, 1152]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'key', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 2, 72]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'key', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144, 16, 72]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'out', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'out', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 2, 72, 1152]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'query', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 2, 72]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'query', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144, 16, 72]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'value', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 2, 72]}}, "('params', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'value', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144, 16, 72]}}, "('params', 'PaliGemma', 'img', 'embedding', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [144]}}, "('params', 'PaliGemma', 'img', 'embedding', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [14, 14, 3, 144]}}, "('params', 'PaliGemma', 'img', 'head', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('params', 'PaliGemma', 'img', 'head', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [144, 2048]}}, "('params', 'PaliGemma', 'img', 'pos_embedding', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "pos_embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 32, 1152]}}, "('params', 'PaliGemma', 'llm', 'embedder', 'input_embedding', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "embedder", "key_type": 2}, {"key": "input_embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32144, 2048]}}, "('params', 'PaliGemma', 'llm', 'final_norm', 'scale', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "final_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('params', 'PaliGemma', 'llm', 'final_norm_1', 'Dense_0', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "final_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('params', 'PaliGemma', 'llm', 'final_norm_1', 'Dense_0', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "final_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 3072]}}, "('params', 'PaliGemma', 'llm', 'layers', 'attn', 'attn_vec_einsum', 'w', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 1, 256, 2048]}}, "('params', 'PaliGemma', 'llm', 'layers', 'attn', 'attn_vec_einsum_1', 'w', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "attn_vec_einsum_1", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 1, 256, 1024]}}, "('params', 'PaliGemma', 'llm', 'layers', 'attn', 'kv_einsum', 'w', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 2, 1, 256, 256]}}, "('params', 'PaliGemma', 'llm', 'layers', 'attn', 'kv_einsum_1', 'w', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kv_einsum_1", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 2, 1, 128, 256]}}, "('params', 'PaliGemma', 'llm', 'layers', 'attn', 'q_einsum', 'w', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 1, 2048, 256]}}, "('params', 'PaliGemma', 'llm', 'layers', 'attn', 'q_einsum_1', 'w', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_einsum_1", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 1, 1024, 256]}}, "('params', 'PaliGemma', 'llm', 'layers', 'mlp', 'gating_einsum', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "gating_einsum", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 2, 256, 16384]}}, "('params', 'PaliGemma', 'llm', 'layers', 'mlp', 'linear', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "linear", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 2048, 2048]}}, "('params', 'PaliGemma', 'llm', 'layers', 'mlp_1', 'gating_einsum', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "mlp_1", "key_type": 2}, {"key": "gating_einsum", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 2, 128, 4096]}}, "('params', 'PaliGemma', 'llm', 'layers', 'mlp_1', 'linear', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "mlp_1", "key_type": 2}, {"key": "linear", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 512, 1024]}}, "('params', 'PaliGemma', 'llm', 'layers', 'pre_attention_norm', 'scale', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 256]}}, "('params', 'PaliGemma', 'llm', 'layers', 'pre_attention_norm_1', 'Dense_0', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_attention_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 384]}}, "('params', 'PaliGemma', 'llm', 'layers', 'pre_attention_norm_1', 'Dense_0', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_attention_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 128, 3072]}}, "('params', 'PaliGemma', 'llm', 'layers', 'pre_ffw_norm', 'scale', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 256]}}, "('params', 'PaliGemma', 'llm', 'layers', 'pre_ffw_norm_1', 'Dense_0', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_ffw_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 384]}}, "('params', 'PaliGemma', 'llm', 'layers', 'pre_ffw_norm_1', 'Dense_0', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_ffw_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 128, 3072]}}, "('params', 'action_in_proj', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "action_in_proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('params', 'action_in_proj', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "action_in_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [4, 1024]}}, "('params', 'action_out_proj', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "action_out_proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [4]}}, "('params', 'action_out_proj', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "action_out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 32]}}, "('params', 'time_mlp_in', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "time_mlp_in", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('params', 'time_mlp_in', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "time_mlp_in", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1024]}}, "('params', 'time_mlp_out', 'bias', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "time_mlp_out", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('params', 'time_mlp_out', 'kernel', 'value')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "time_mlp_out", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1024]}}, "('model_def',)": {"key_metadata": [{"key": "model_def", "key_type": 2}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('opt_state', '0')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('opt_state', '1', '0', 'count')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "count", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'Transformer', 'encoder_norm', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoder_norm", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [144]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'Transformer', 'encoder_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoder_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [144]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'LayerNorm_0', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'LayerNorm_0', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'LayerNorm_1', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'LayerNorm_1', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MlpBlock_0', 'Dense_0', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MlpBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 538]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MlpBlock_0', 'Dense_0', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MlpBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144, 4304]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MlpBlock_0', 'Dense_1', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MlpBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MlpBlock_0', 'Dense_1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MlpBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 538, 1152]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'key', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 2, 72]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'key', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144, 16, 72]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'out', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'out', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 2, 72, 1152]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'query', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 2, 72]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'query', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144, 16, 72]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'value', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 2, 72]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'value', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144, 16, 72]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'embedding', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [144]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'embedding', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [14, 14, 3, 144]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'head', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'head', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [144, 2048]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'img', 'pos_embedding', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "pos_embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 32, 1152]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'llm', 'embedder', 'input_embedding', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "embedder", "key_type": 2}, {"key": "input_embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32144, 2048]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'llm', 'final_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "final_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'llm', 'final_norm_1', 'Dense_0', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "final_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'llm', 'final_norm_1', 'Dense_0', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "final_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 3072]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'llm', 'layers', 'attn', 'attn_vec_einsum', 'w', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 1, 256, 2048]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'llm', 'layers', 'attn', 'attn_vec_einsum_1', 'w', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "attn_vec_einsum_1", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 1, 256, 1024]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'llm', 'layers', 'attn', 'kv_einsum', 'w', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 2, 1, 256, 256]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'llm', 'layers', 'attn', 'kv_einsum_1', 'w', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kv_einsum_1", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 2, 1, 128, 256]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'llm', 'layers', 'attn', 'q_einsum', 'w', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 1, 2048, 256]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'llm', 'layers', 'attn', 'q_einsum_1', 'w', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_einsum_1", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 1, 1024, 256]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'llm', 'layers', 'mlp', 'gating_einsum', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "gating_einsum", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 2, 256, 16384]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'llm', 'layers', 'mlp', 'linear', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "linear", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 2048, 2048]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'llm', 'layers', 'mlp_1', 'gating_einsum', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "mlp_1", "key_type": 2}, {"key": "gating_einsum", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 2, 128, 4096]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'llm', 'layers', 'mlp_1', 'linear', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "mlp_1", "key_type": 2}, {"key": "linear", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 512, 1024]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'llm', 'layers', 'pre_attention_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 256]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'llm', 'layers', 'pre_attention_norm_1', 'Dense_0', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_attention_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 384]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'llm', 'layers', 'pre_attention_norm_1', 'Dense_0', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_attention_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 128, 3072]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'llm', 'layers', 'pre_ffw_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 256]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'llm', 'layers', 'pre_ffw_norm_1', 'Dense_0', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_ffw_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 384]}}, "('opt_state', '1', '0', 'mu', 'PaliGemma', 'llm', 'layers', 'pre_ffw_norm_1', 'Dense_0', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_ffw_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 128, 3072]}}, "('opt_state', '1', '0', 'mu', 'action_in_proj', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "action_in_proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'mu', 'action_in_proj', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "action_in_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [4, 1024]}}, "('opt_state', '1', '0', 'mu', 'action_out_proj', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "action_out_proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [4]}}, "('opt_state', '1', '0', 'mu', 'action_out_proj', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "action_out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 32]}}, "('opt_state', '1', '0', 'mu', 'time_mlp_in', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "time_mlp_in", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'mu', 'time_mlp_in', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "time_mlp_in", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1024]}}, "('opt_state', '1', '0', 'mu', 'time_mlp_out', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "time_mlp_out", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'mu', 'time_mlp_out', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "time_mlp_out", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1024]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'Transformer', 'encoder_norm', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoder_norm", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [144]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'Transformer', 'encoder_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoder_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [144]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'LayerNorm_0', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'LayerNorm_0', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "LayerNorm_0", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'LayerNorm_1', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'LayerNorm_1', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "LayerNorm_1", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MlpBlock_0', 'Dense_0', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MlpBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 538]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MlpBlock_0', 'Dense_0', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MlpBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144, 4304]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MlpBlock_0', 'Dense_1', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MlpBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MlpBlock_0', 'Dense_1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MlpBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 538, 1152]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'key', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 2, 72]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'key', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144, 16, 72]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'out', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'out', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 2, 72, 1152]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'query', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 2, 72]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'query', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144, 16, 72]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'value', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 2, 72]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'Transformer', 'encoderblock', 'MultiHeadDotProductAttention_0', 'value', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "Transformer", "key_type": 2}, {"key": "encoderblock", "key_type": 2}, {"key": "MultiHeadDotProductAttention_0", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [27, 144, 16, 72]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'embedding', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [144]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'embedding', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [14, 14, 3, 144]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'head', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'head', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [144, 2048]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'img', 'pos_embedding', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "img", "key_type": 2}, {"key": "pos_embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 32, 1152]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'llm', 'embedder', 'input_embedding', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "embedder", "key_type": 2}, {"key": "input_embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [32144, 2048]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'llm', 'final_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "final_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'llm', 'final_norm_1', 'Dense_0', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "final_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'llm', 'final_norm_1', 'Dense_0', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "final_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 3072]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'llm', 'layers', 'attn', 'attn_vec_einsum', 'w', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 1, 256, 2048]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'llm', 'layers', 'attn', 'attn_vec_einsum_1', 'w', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "attn_vec_einsum_1", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 1, 256, 1024]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'llm', 'layers', 'attn', 'kv_einsum', 'w', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 2, 1, 256, 256]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'llm', 'layers', 'attn', 'kv_einsum_1', 'w', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "kv_einsum_1", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 2, 1, 128, 256]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'llm', 'layers', 'attn', 'q_einsum', 'w', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 1, 2048, 256]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'llm', 'layers', 'attn', 'q_einsum_1', 'w', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_einsum_1", "key_type": 2}, {"key": "w", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 1, 1024, 256]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'llm', 'layers', 'mlp', 'gating_einsum', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "gating_einsum", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 2, 256, 16384]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'llm', 'layers', 'mlp', 'linear', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "linear", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 2048, 2048]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'llm', 'layers', 'mlp_1', 'gating_einsum', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "mlp_1", "key_type": 2}, {"key": "gating_einsum", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 2, 128, 4096]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'llm', 'layers', 'mlp_1', 'linear', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "mlp_1", "key_type": 2}, {"key": "linear", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 512, 1024]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'llm', 'layers', 'pre_attention_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 256]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'llm', 'layers', 'pre_attention_norm_1', 'Dense_0', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_attention_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 384]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'llm', 'layers', 'pre_attention_norm_1', 'Dense_0', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_attention_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 128, 3072]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'llm', 'layers', 'pre_ffw_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 256]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'llm', 'layers', 'pre_ffw_norm_1', 'Dense_0', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_ffw_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 384]}}, "('opt_state', '1', '0', 'nu', 'PaliGemma', 'llm', 'layers', 'pre_ffw_norm_1', 'Dense_0', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "PaliGemma", "key_type": 2}, {"key": "llm", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "pre_ffw_norm_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [18, 128, 3072]}}, "('opt_state', '1', '0', 'nu', 'action_in_proj', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "action_in_proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'nu', 'action_in_proj', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "action_in_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [4, 1024]}}, "('opt_state', '1', '0', 'nu', 'action_out_proj', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "action_out_proj", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [4]}}, "('opt_state', '1', '0', 'nu', 'action_out_proj', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "action_out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 32]}}, "('opt_state', '1', '0', 'nu', 'time_mlp_in', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "time_mlp_in", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'nu', 'time_mlp_in', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "time_mlp_in", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1024]}}, "('opt_state', '1', '0', 'nu', 'time_mlp_out', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "time_mlp_out", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'nu', 'time_mlp_out', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "time_mlp_out", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1024]}}, "('opt_state', '1', '1')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "1", "key_type": 1}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('opt_state', '1', '2', 'count')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "2", "key_type": 1}, {"key": "count", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('ema_params',)": {"key_metadata": [{"key": "ema_params", "key_type": 2}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null} \ No newline at end of file diff --git a/train_state/_sharding b/train_state/_sharding new file mode 100644 index 0000000000000000000000000000000000000000..744acabb7364576adf2a40a9b6f856f3bc26eb4f --- /dev/null +++ b/train_state/_sharding @@ -0,0 +1 @@ +{"b3B0X3N0YXRlLjEuMC5jb3VudA==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS50aW1lX21scF9pbi5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS50aW1lX21scF9pbi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS50aW1lX21scF9vdXQuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS50aW1lX21scF9vdXQua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJfbm9ybS5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5MYXllck5vcm1fMC5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5MYXllck5vcm1fMC5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5MYXllck5vcm1fMS5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5MYXllck5vcm1fMS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NbHBCbG9ja18wLkRlbnNlXzAuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NbHBCbG9ja18wLkRlbnNlXzAua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NbHBCbG9ja18wLkRlbnNlXzEuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NbHBCbG9ja18wLkRlbnNlXzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NdWx0aUhlYWREb3RQcm9kdWN0QXR0ZW50aW9uXzAua2V5LmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NdWx0aUhlYWREb3RQcm9kdWN0QXR0ZW50aW9uXzAua2V5Lmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NdWx0aUhlYWREb3RQcm9kdWN0QXR0ZW50aW9uXzAub3V0LmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NdWx0aUhlYWREb3RQcm9kdWN0QXR0ZW50aW9uXzAub3V0Lmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NdWx0aUhlYWREb3RQcm9kdWN0QXR0ZW50aW9uXzAucXVlcnkuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NdWx0aUhlYWREb3RQcm9kdWN0QXR0ZW50aW9uXzAucXVlcnkua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NdWx0aUhlYWREb3RQcm9kdWN0QXR0ZW50aW9uXzAudmFsdWUuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NdWx0aUhlYWREb3RQcm9kdWN0QXR0ZW50aW9uXzAudmFsdWUua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLmVtYmVkZGluZy5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLmVtYmVkZGluZy5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLmhlYWQuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLmhlYWQua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEuaW1nLnBvc19lbWJlZGRpbmcudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEubGxtLmVtYmVkZGVyLmlucHV0X2VtYmVkZGluZy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEubGxtLmZpbmFsX25vcm0uc2NhbGUudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEubGxtLmZpbmFsX25vcm1fMS5EZW5zZV8wLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEubGxtLmZpbmFsX25vcm1fMS5EZW5zZV8wLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEubGxtLmxheWVycy5hdHRuLmF0dG5fdmVjX2VpbnN1bS53LnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEubGxtLmxheWVycy5hdHRuLmF0dG5fdmVjX2VpbnN1bV8xLncudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEubGxtLmxheWVycy5hdHRuLmt2X2VpbnN1bS53LnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEubGxtLmxheWVycy5hdHRuLmt2X2VpbnN1bV8xLncudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEubGxtLmxheWVycy5hdHRuLnFfZWluc3VtLncudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEubGxtLmxheWVycy5hdHRuLnFfZWluc3VtXzEudy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEubGxtLmxheWVycy5tbHAuZ2F0aW5nX2VpbnN1bS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEubGxtLmxheWVycy5tbHAubGluZWFyLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEubGxtLmxheWVycy5tbHBfMS5nYXRpbmdfZWluc3VtLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEubGxtLmxheWVycy5tbHBfMS5saW5lYXIudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEubGxtLmxheWVycy5wcmVfYXR0ZW50aW9uX25vcm0uc2NhbGUudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEubGxtLmxheWVycy5wcmVfYXR0ZW50aW9uX25vcm1fMS5EZW5zZV8wLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEubGxtLmxheWVycy5wcmVfYXR0ZW50aW9uX25vcm1fMS5EZW5zZV8wLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEubGxtLmxheWVycy5wcmVfZmZ3X25vcm0uc2NhbGUudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEubGxtLmxheWVycy5wcmVfZmZ3X25vcm1fMS5EZW5zZV8wLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5QYWxpR2VtbWEubGxtLmxheWVycy5wcmVfZmZ3X25vcm1fMS5EZW5zZV8wLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5hY3Rpb25faW5fcHJvai5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5hY3Rpb25faW5fcHJvai5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5hY3Rpb25fb3V0X3Byb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5tdS5hY3Rpb25fb3V0X3Byb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS50aW1lX21scF9pbi5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS50aW1lX21scF9pbi5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS50aW1lX21scF9vdXQuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS50aW1lX21scF9vdXQua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJfbm9ybS5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5MYXllck5vcm1fMC5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5MYXllck5vcm1fMC5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5MYXllck5vcm1fMS5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5MYXllck5vcm1fMS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NbHBCbG9ja18wLkRlbnNlXzAuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NbHBCbG9ja18wLkRlbnNlXzAua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NbHBCbG9ja18wLkRlbnNlXzEuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NbHBCbG9ja18wLkRlbnNlXzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NdWx0aUhlYWREb3RQcm9kdWN0QXR0ZW50aW9uXzAua2V5LmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NdWx0aUhlYWREb3RQcm9kdWN0QXR0ZW50aW9uXzAua2V5Lmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NdWx0aUhlYWREb3RQcm9kdWN0QXR0ZW50aW9uXzAub3V0LmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NdWx0aUhlYWREb3RQcm9kdWN0QXR0ZW50aW9uXzAub3V0Lmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NdWx0aUhlYWREb3RQcm9kdWN0QXR0ZW50aW9uXzAucXVlcnkuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NdWx0aUhlYWREb3RQcm9kdWN0QXR0ZW50aW9uXzAucXVlcnkua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NdWx0aUhlYWREb3RQcm9kdWN0QXR0ZW50aW9uXzAudmFsdWUuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLlRyYW5zZm9ybWVyLmVuY29kZXJibG9jay5NdWx0aUhlYWREb3RQcm9kdWN0QXR0ZW50aW9uXzAudmFsdWUua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLmVtYmVkZGluZy5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLmVtYmVkZGluZy5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLmhlYWQuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLmhlYWQua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEuaW1nLnBvc19lbWJlZGRpbmcudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEubGxtLmVtYmVkZGVyLmlucHV0X2VtYmVkZGluZy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEubGxtLmZpbmFsX25vcm0uc2NhbGUudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEubGxtLmZpbmFsX25vcm1fMS5EZW5zZV8wLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEubGxtLmZpbmFsX25vcm1fMS5EZW5zZV8wLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEubGxtLmxheWVycy5hdHRuLmF0dG5fdmVjX2VpbnN1bS53LnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEubGxtLmxheWVycy5hdHRuLmF0dG5fdmVjX2VpbnN1bV8xLncudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEubGxtLmxheWVycy5hdHRuLmt2X2VpbnN1bS53LnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEubGxtLmxheWVycy5hdHRuLmt2X2VpbnN1bV8xLncudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEubGxtLmxheWVycy5hdHRuLnFfZWluc3VtLncudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEubGxtLmxheWVycy5hdHRuLnFfZWluc3VtXzEudy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEubGxtLmxheWVycy5tbHAuZ2F0aW5nX2VpbnN1bS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEubGxtLmxheWVycy5tbHAubGluZWFyLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEubGxtLmxheWVycy5tbHBfMS5nYXRpbmdfZWluc3VtLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEubGxtLmxheWVycy5tbHBfMS5saW5lYXIudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEubGxtLmxheWVycy5wcmVfYXR0ZW50aW9uX25vcm0uc2NhbGUudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEubGxtLmxheWVycy5wcmVfYXR0ZW50aW9uX25vcm1fMS5EZW5zZV8wLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEubGxtLmxheWVycy5wcmVfYXR0ZW50aW9uX25vcm1fMS5EZW5zZV8wLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEubGxtLmxheWVycy5wcmVfZmZ3X25vcm0uc2NhbGUudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEubGxtLmxheWVycy5wcmVfZmZ3X25vcm1fMS5EZW5zZV8wLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5QYWxpR2VtbWEubGxtLmxheWVycy5wcmVfZmZ3X25vcm1fMS5EZW5zZV8wLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5hY3Rpb25faW5fcHJvai5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5hY3Rpb25faW5fcHJvai5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5hY3Rpb25fb3V0X3Byb2ouYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMC5udS5hY3Rpb25fb3V0X3Byb2oua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","b3B0X3N0YXRlLjEuMi5jb3VudA==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","c3RlcA==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2Rlcl9ub3JtLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2Rlcl9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk11bHRpSGVhZERvdFByb2R1Y3RBdHRlbnRpb25fMC52YWx1ZS5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk11bHRpSGVhZERvdFByb2R1Y3RBdHRlbnRpb25fMC52YWx1ZS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk11bHRpSGVhZERvdFByb2R1Y3RBdHRlbnRpb25fMC5rZXkuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk11bHRpSGVhZERvdFByb2R1Y3RBdHRlbnRpb25fMC5rZXkua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk11bHRpSGVhZERvdFByb2R1Y3RBdHRlbnRpb25fMC5vdXQuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk11bHRpSGVhZERvdFByb2R1Y3RBdHRlbnRpb25fMC5vdXQua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk11bHRpSGVhZERvdFByb2R1Y3RBdHRlbnRpb25fMC5xdWVyeS5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk11bHRpSGVhZERvdFByb2R1Y3RBdHRlbnRpb25fMC5xdWVyeS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk1scEJsb2NrXzAuRGVuc2VfMC5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk1scEJsb2NrXzAuRGVuc2VfMC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk1scEJsb2NrXzAuRGVuc2VfMS5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLk1scEJsb2NrXzAuRGVuc2VfMS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLkxheWVyTm9ybV8wLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLkxheWVyTm9ybV8wLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLkxheWVyTm9ybV8xLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuVHJhbnNmb3JtZXIuZW5jb2RlcmJsb2NrLkxheWVyTm9ybV8xLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuZW1iZWRkaW5nLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuZW1iZWRkaW5nLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuaGVhZC5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcuaGVhZC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5pbWcucG9zX2VtYmVkZGluZy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0uZW1iZWRkZXIuaW5wdXRfZW1iZWRkaW5nLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0uZmluYWxfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0uZmluYWxfbm9ybV8xLkRlbnNlXzAuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0uZmluYWxfbm9ybV8xLkRlbnNlXzAua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLm1scC5nYXRpbmdfZWluc3VtLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLm1scC5saW5lYXIudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLm1scF8xLmdhdGluZ19laW5zdW0udmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLm1scF8xLmxpbmVhci52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLmF0dG4uYXR0bl92ZWNfZWluc3VtLncudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLmF0dG4uYXR0bl92ZWNfZWluc3VtXzEudy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLmF0dG4ua3ZfZWluc3VtLncudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLmF0dG4ua3ZfZWluc3VtXzEudy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLmF0dG4ucV9laW5zdW0udy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLmF0dG4ucV9laW5zdW1fMS53LnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLnByZV9hdHRlbnRpb25fbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLnByZV9hdHRlbnRpb25fbm9ybV8xLkRlbnNlXzAuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLnByZV9hdHRlbnRpb25fbm9ybV8xLkRlbnNlXzAua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLnByZV9mZndfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLnByZV9mZndfbm9ybV8xLkRlbnNlXzAuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLlBhbGlHZW1tYS5sbG0ubGF5ZXJzLnByZV9mZndfbm9ybV8xLkRlbnNlXzAua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLmFjdGlvbl9pbl9wcm9qLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLmFjdGlvbl9pbl9wcm9qLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLmFjdGlvbl9vdXRfcHJvai5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLmFjdGlvbl9vdXRfcHJvai5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLnRpbWVfbWxwX291dC5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLnRpbWVfbWxwX291dC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLnRpbWVfbWxwX2luLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}","cGFyYW1zLnRpbWVfbWxwX2luLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [8, 1], \"axis_names\": [\"batch\", \"fsdp\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [[{\"id\": 0}], [{\"id\": 1}], [{\"id\": 2}], [{\"id\": 3}], [{\"id\": 4}], [{\"id\": 5}], [{\"id\": 6}], [{\"id\": 7}]]}}"} \ No newline at end of file diff --git a/train_state/array_metadatas/process_0 b/train_state/array_metadatas/process_0 new file mode 100644 index 0000000000000000000000000000000000000000..a98529526c5dd5996d8cf3c998682bf9976cfd38 --- /dev/null +++ b/train_state/array_metadatas/process_0 @@ -0,0 +1 @@ +{"array_metadatas": [{"array_metadata": {"param_name": "step", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoder_norm.bias.value", "write_shape": [144], "chunk_shape": [144], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoder_norm.scale.value", "write_shape": [144], "chunk_shape": [144], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.LayerNorm_0.bias.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.LayerNorm_0.scale.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.LayerNorm_1.bias.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.LayerNorm_1.scale.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MlpBlock_0.Dense_0.bias.value", "write_shape": [27, 538], "chunk_shape": [27, 538], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MlpBlock_0.Dense_0.kernel.value", "write_shape": [27, 144, 4304], "chunk_shape": [27, 144, 4304], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MlpBlock_0.Dense_1.bias.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MlpBlock_0.Dense_1.kernel.value", "write_shape": [27, 538, 1152], "chunk_shape": [27, 538, 1152], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.key.bias.value", "write_shape": [27, 2, 72], "chunk_shape": [27, 2, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.key.kernel.value", "write_shape": [27, 144, 16, 72], "chunk_shape": [27, 144, 16, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.out.bias.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.out.kernel.value", "write_shape": [27, 2, 72, 1152], "chunk_shape": [27, 2, 72, 1152], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.query.bias.value", "write_shape": [27, 2, 72], "chunk_shape": [27, 2, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.query.kernel.value", "write_shape": [27, 144, 16, 72], "chunk_shape": [27, 144, 16, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.value.bias.value", "write_shape": [27, 2, 72], "chunk_shape": [27, 2, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.value.kernel.value", "write_shape": [27, 144, 16, 72], "chunk_shape": [27, 144, 16, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.embedding.bias.value", "write_shape": [144], "chunk_shape": [144], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.embedding.kernel.value", "write_shape": [14, 14, 3, 144], "chunk_shape": [14, 14, 3, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.head.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.head.kernel.value", "write_shape": [144, 2048], "chunk_shape": [144, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.img.pos_embedding.value", "write_shape": [1, 32, 1152], "chunk_shape": [1, 32, 1152], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.embedder.input_embedding.value", "write_shape": [32144, 2048], "chunk_shape": [32144, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.final_norm.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.final_norm_1.Dense_0.bias.value", "write_shape": [384], "chunk_shape": [384], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.final_norm_1.Dense_0.kernel.value", "write_shape": [128, 3072], "chunk_shape": [128, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.attn.attn_vec_einsum.w.value", "write_shape": [18, 1, 256, 2048], "chunk_shape": [18, 1, 256, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.attn.attn_vec_einsum_1.w.value", "write_shape": [18, 1, 256, 1024], "chunk_shape": [18, 1, 256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.attn.kv_einsum.w.value", "write_shape": [18, 2, 1, 256, 256], "chunk_shape": [18, 2, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.attn.kv_einsum_1.w.value", "write_shape": [18, 2, 1, 128, 256], "chunk_shape": [18, 2, 1, 128, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.attn.q_einsum.w.value", "write_shape": [18, 1, 2048, 256], "chunk_shape": [18, 1, 2048, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.attn.q_einsum_1.w.value", "write_shape": [18, 1, 1024, 256], "chunk_shape": [18, 1, 1024, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.mlp.gating_einsum.value", "write_shape": [18, 2, 256, 16384], "chunk_shape": [18, 2, 256, 16384], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.mlp.linear.value", "write_shape": [18, 2048, 2048], "chunk_shape": [18, 2048, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.mlp_1.gating_einsum.value", "write_shape": [18, 2, 128, 4096], "chunk_shape": [18, 2, 128, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.mlp_1.linear.value", "write_shape": [18, 512, 1024], "chunk_shape": [18, 512, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.pre_attention_norm.scale.value", "write_shape": [18, 256], "chunk_shape": [18, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.pre_attention_norm_1.Dense_0.bias.value", "write_shape": [18, 384], "chunk_shape": [18, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.pre_attention_norm_1.Dense_0.kernel.value", "write_shape": [18, 128, 3072], "chunk_shape": [18, 128, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.pre_ffw_norm.scale.value", "write_shape": [18, 256], "chunk_shape": [18, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.pre_ffw_norm_1.Dense_0.bias.value", "write_shape": [18, 384], "chunk_shape": [18, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.PaliGemma.llm.layers.pre_ffw_norm_1.Dense_0.kernel.value", "write_shape": [18, 128, 3072], "chunk_shape": [18, 128, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.action_in_proj.bias.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.action_in_proj.kernel.value", "write_shape": [4, 1024], "chunk_shape": [4, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.action_out_proj.bias.value", "write_shape": [4], "chunk_shape": [4], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.action_out_proj.kernel.value", "write_shape": [128, 32], "chunk_shape": [128, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.time_mlp_in.bias.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.time_mlp_in.kernel.value", "write_shape": [128, 1024], "chunk_shape": [128, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.time_mlp_out.bias.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.time_mlp_out.kernel.value", "write_shape": [128, 1024], "chunk_shape": [128, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.count", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.Transformer.encoder_norm.bias.value", "write_shape": [144], "chunk_shape": [144], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.Transformer.encoder_norm.scale.value", "write_shape": [144], "chunk_shape": [144], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.Transformer.encoderblock.LayerNorm_0.bias.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.Transformer.encoderblock.LayerNorm_0.scale.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.Transformer.encoderblock.LayerNorm_1.bias.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.Transformer.encoderblock.LayerNorm_1.scale.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.Transformer.encoderblock.MlpBlock_0.Dense_0.bias.value", "write_shape": [27, 538], "chunk_shape": [27, 538], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.Transformer.encoderblock.MlpBlock_0.Dense_0.kernel.value", "write_shape": [27, 144, 4304], "chunk_shape": [27, 144, 4304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.Transformer.encoderblock.MlpBlock_0.Dense_1.bias.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.Transformer.encoderblock.MlpBlock_0.Dense_1.kernel.value", "write_shape": [27, 538, 1152], "chunk_shape": [27, 538, 1152], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.key.bias.value", "write_shape": [27, 2, 72], "chunk_shape": [27, 2, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.key.kernel.value", "write_shape": [27, 144, 16, 72], "chunk_shape": [27, 144, 16, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.out.bias.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.out.kernel.value", "write_shape": [27, 2, 72, 1152], "chunk_shape": [27, 2, 72, 1152], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.query.bias.value", "write_shape": [27, 2, 72], "chunk_shape": [27, 2, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.query.kernel.value", "write_shape": [27, 144, 16, 72], "chunk_shape": [27, 144, 16, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.value.bias.value", "write_shape": [27, 2, 72], "chunk_shape": [27, 2, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.value.kernel.value", "write_shape": [27, 144, 16, 72], "chunk_shape": [27, 144, 16, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.embedding.bias.value", "write_shape": [144], "chunk_shape": [144], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.embedding.kernel.value", "write_shape": [14, 14, 3, 144], "chunk_shape": [14, 14, 3, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.head.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.head.kernel.value", "write_shape": [144, 2048], "chunk_shape": [144, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.img.pos_embedding.value", "write_shape": [1, 32, 1152], "chunk_shape": [1, 32, 1152], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.llm.embedder.input_embedding.value", "write_shape": [32144, 2048], "chunk_shape": [32144, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.llm.final_norm.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.llm.final_norm_1.Dense_0.bias.value", "write_shape": [384], "chunk_shape": [384], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.llm.final_norm_1.Dense_0.kernel.value", "write_shape": [128, 3072], "chunk_shape": [128, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.llm.layers.attn.attn_vec_einsum.w.value", "write_shape": [18, 1, 256, 2048], "chunk_shape": [18, 1, 256, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.llm.layers.attn.attn_vec_einsum_1.w.value", "write_shape": [18, 1, 256, 1024], "chunk_shape": [18, 1, 256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.llm.layers.attn.kv_einsum.w.value", "write_shape": [18, 2, 1, 256, 256], "chunk_shape": [18, 2, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.llm.layers.attn.kv_einsum_1.w.value", "write_shape": [18, 2, 1, 128, 256], "chunk_shape": [18, 2, 1, 128, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.llm.layers.attn.q_einsum.w.value", "write_shape": [18, 1, 2048, 256], "chunk_shape": [18, 1, 2048, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.llm.layers.attn.q_einsum_1.w.value", "write_shape": [18, 1, 1024, 256], "chunk_shape": [18, 1, 1024, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.llm.layers.mlp.gating_einsum.value", "write_shape": [18, 2, 256, 16384], "chunk_shape": [18, 2, 256, 16384], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.llm.layers.mlp.linear.value", "write_shape": [18, 2048, 2048], "chunk_shape": [18, 2048, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.llm.layers.mlp_1.gating_einsum.value", "write_shape": [18, 2, 128, 4096], "chunk_shape": [18, 2, 128, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.llm.layers.mlp_1.linear.value", "write_shape": [18, 512, 1024], "chunk_shape": [18, 512, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.llm.layers.pre_attention_norm.scale.value", "write_shape": [18, 256], "chunk_shape": [18, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.llm.layers.pre_attention_norm_1.Dense_0.bias.value", "write_shape": [18, 384], "chunk_shape": [18, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.llm.layers.pre_attention_norm_1.Dense_0.kernel.value", "write_shape": [18, 128, 3072], "chunk_shape": [18, 128, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.llm.layers.pre_ffw_norm.scale.value", "write_shape": [18, 256], "chunk_shape": [18, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.llm.layers.pre_ffw_norm_1.Dense_0.bias.value", "write_shape": [18, 384], "chunk_shape": [18, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.PaliGemma.llm.layers.pre_ffw_norm_1.Dense_0.kernel.value", "write_shape": [18, 128, 3072], "chunk_shape": [18, 128, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.action_in_proj.bias.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.action_in_proj.kernel.value", "write_shape": [4, 1024], "chunk_shape": [4, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.action_out_proj.bias.value", "write_shape": [4], "chunk_shape": [4], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.action_out_proj.kernel.value", "write_shape": [128, 32], "chunk_shape": [128, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.time_mlp_in.bias.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.time_mlp_in.kernel.value", "write_shape": [128, 1024], "chunk_shape": [128, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.time_mlp_out.bias.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.time_mlp_out.kernel.value", "write_shape": [128, 1024], "chunk_shape": [128, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.Transformer.encoder_norm.bias.value", "write_shape": [144], "chunk_shape": [144], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.Transformer.encoder_norm.scale.value", "write_shape": [144], "chunk_shape": [144], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.Transformer.encoderblock.LayerNorm_0.bias.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.Transformer.encoderblock.LayerNorm_0.scale.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.Transformer.encoderblock.LayerNorm_1.bias.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.Transformer.encoderblock.LayerNorm_1.scale.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.Transformer.encoderblock.MlpBlock_0.Dense_0.bias.value", "write_shape": [27, 538], "chunk_shape": [27, 538], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.Transformer.encoderblock.MlpBlock_0.Dense_0.kernel.value", "write_shape": [27, 144, 4304], "chunk_shape": [27, 144, 4304], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.Transformer.encoderblock.MlpBlock_0.Dense_1.bias.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.Transformer.encoderblock.MlpBlock_0.Dense_1.kernel.value", "write_shape": [27, 538, 1152], "chunk_shape": [27, 538, 1152], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.key.bias.value", "write_shape": [27, 2, 72], "chunk_shape": [27, 2, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.key.kernel.value", "write_shape": [27, 144, 16, 72], "chunk_shape": [27, 144, 16, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.out.bias.value", "write_shape": [27, 144], "chunk_shape": [27, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.out.kernel.value", "write_shape": [27, 2, 72, 1152], "chunk_shape": [27, 2, 72, 1152], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.query.bias.value", "write_shape": [27, 2, 72], "chunk_shape": [27, 2, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.query.kernel.value", "write_shape": [27, 144, 16, 72], "chunk_shape": [27, 144, 16, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.value.bias.value", "write_shape": [27, 2, 72], "chunk_shape": [27, 2, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.Transformer.encoderblock.MultiHeadDotProductAttention_0.value.kernel.value", "write_shape": [27, 144, 16, 72], "chunk_shape": [27, 144, 16, 72], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.embedding.bias.value", "write_shape": [144], "chunk_shape": [144], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.embedding.kernel.value", "write_shape": [14, 14, 3, 144], "chunk_shape": [14, 14, 3, 144], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.head.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.head.kernel.value", "write_shape": [144, 2048], "chunk_shape": [144, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.img.pos_embedding.value", "write_shape": [1, 32, 1152], "chunk_shape": [1, 32, 1152], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.llm.embedder.input_embedding.value", "write_shape": [32144, 2048], "chunk_shape": [32144, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.llm.final_norm.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.llm.final_norm_1.Dense_0.bias.value", "write_shape": [384], "chunk_shape": [384], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.llm.final_norm_1.Dense_0.kernel.value", "write_shape": [128, 3072], "chunk_shape": [128, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.llm.layers.attn.attn_vec_einsum.w.value", "write_shape": [18, 1, 256, 2048], "chunk_shape": [18, 1, 256, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.llm.layers.attn.attn_vec_einsum_1.w.value", "write_shape": [18, 1, 256, 1024], "chunk_shape": [18, 1, 256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.llm.layers.attn.kv_einsum.w.value", "write_shape": [18, 2, 1, 256, 256], "chunk_shape": [18, 2, 1, 256, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.llm.layers.attn.kv_einsum_1.w.value", "write_shape": [18, 2, 1, 128, 256], "chunk_shape": [18, 2, 1, 128, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.llm.layers.attn.q_einsum.w.value", "write_shape": [18, 1, 2048, 256], "chunk_shape": [18, 1, 2048, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.llm.layers.attn.q_einsum_1.w.value", "write_shape": [18, 1, 1024, 256], "chunk_shape": [18, 1, 1024, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.llm.layers.mlp.gating_einsum.value", "write_shape": [18, 2, 256, 16384], "chunk_shape": [18, 2, 256, 16384], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.llm.layers.mlp.linear.value", "write_shape": [18, 2048, 2048], "chunk_shape": [18, 2048, 2048], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.llm.layers.mlp_1.gating_einsum.value", "write_shape": [18, 2, 128, 4096], "chunk_shape": [18, 2, 128, 4096], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.llm.layers.mlp_1.linear.value", "write_shape": [18, 512, 1024], "chunk_shape": [18, 512, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.llm.layers.pre_attention_norm.scale.value", "write_shape": [18, 256], "chunk_shape": [18, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.llm.layers.pre_attention_norm_1.Dense_0.bias.value", "write_shape": [18, 384], "chunk_shape": [18, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.llm.layers.pre_attention_norm_1.Dense_0.kernel.value", "write_shape": [18, 128, 3072], "chunk_shape": [18, 128, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.llm.layers.pre_ffw_norm.scale.value", "write_shape": [18, 256], "chunk_shape": [18, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.llm.layers.pre_ffw_norm_1.Dense_0.bias.value", "write_shape": [18, 384], "chunk_shape": [18, 384], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.PaliGemma.llm.layers.pre_ffw_norm_1.Dense_0.kernel.value", "write_shape": [18, 128, 3072], "chunk_shape": [18, 128, 3072], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.action_in_proj.bias.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.action_in_proj.kernel.value", "write_shape": [4, 1024], "chunk_shape": [4, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.action_out_proj.bias.value", "write_shape": [4], "chunk_shape": [4], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.action_out_proj.kernel.value", "write_shape": [128, 32], "chunk_shape": [128, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.time_mlp_in.bias.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.time_mlp_in.kernel.value", "write_shape": [128, 1024], "chunk_shape": [128, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.time_mlp_out.bias.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.time_mlp_out.kernel.value", "write_shape": [128, 1024], "chunk_shape": [128, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.2.count", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}]} \ No newline at end of file diff --git a/train_state/d/0e224d9d43b5d0a754417c1f63111844 b/train_state/d/0e224d9d43b5d0a754417c1f63111844 new file mode 100644 index 0000000000000000000000000000000000000000..f84b96834387a1838c889025501759db97596b22 Binary files /dev/null and b/train_state/d/0e224d9d43b5d0a754417c1f63111844 differ diff --git a/train_state/manifest.ocdbt b/train_state/manifest.ocdbt new file mode 100644 index 0000000000000000000000000000000000000000..e6aac13ded864901dcf587fd8cf1c00b04c360e4 Binary files /dev/null and b/train_state/manifest.ocdbt differ diff --git a/train_state/ocdbt.process_0/d/072d45fc552b273dc72c6f025ac362ee b/train_state/ocdbt.process_0/d/072d45fc552b273dc72c6f025ac362ee new file mode 100644 index 0000000000000000000000000000000000000000..3224878dc4abeecbf199c71536a7cb7d86a737c0 Binary files /dev/null and b/train_state/ocdbt.process_0/d/072d45fc552b273dc72c6f025ac362ee differ diff --git a/train_state/ocdbt.process_0/d/0a0f9e07775f3db0a253e48154129e8c b/train_state/ocdbt.process_0/d/0a0f9e07775f3db0a253e48154129e8c new file mode 100644 index 0000000000000000000000000000000000000000..145e1044faa7293fa93d337e4346316ce7018c8a Binary files /dev/null and b/train_state/ocdbt.process_0/d/0a0f9e07775f3db0a253e48154129e8c differ diff --git a/train_state/ocdbt.process_0/d/20b1984374c9e8d0f8b3086460f0624d b/train_state/ocdbt.process_0/d/20b1984374c9e8d0f8b3086460f0624d new file mode 100644 index 0000000000000000000000000000000000000000..6f24d7845b5959f8c0338a22ac63c5924ee9a5cd --- /dev/null +++ b/train_state/ocdbt.process_0/d/20b1984374c9e8d0f8b3086460f0624d @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2add80d2573df4bf8f4a974ce5ee9592fc9f4085d4d33a7f7a96964202feb380 +size 2335477733 diff --git a/train_state/ocdbt.process_0/d/2d041d2f5aff90e3f8c30e301ba713ba b/train_state/ocdbt.process_0/d/2d041d2f5aff90e3f8c30e301ba713ba new file mode 100644 index 0000000000000000000000000000000000000000..8c09adfcd295cc920c0c4d9e755cec3f76406d32 Binary files /dev/null and b/train_state/ocdbt.process_0/d/2d041d2f5aff90e3f8c30e301ba713ba differ diff --git a/train_state/ocdbt.process_0/d/317b48e2d515a62a10f3c179f63210c5 b/train_state/ocdbt.process_0/d/317b48e2d515a62a10f3c179f63210c5 new file mode 100644 index 0000000000000000000000000000000000000000..8212661ed1c4b5f70c62bb3c25054cf6120c2e78 --- /dev/null +++ b/train_state/ocdbt.process_0/d/317b48e2d515a62a10f3c179f63210c5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a285b82cb37a88f4ae14c598a8fa2c6dfe4bb024c3200697e508d2fde0ffd51 +size 2210815783 diff --git a/train_state/ocdbt.process_0/d/46df5f8fd252e48cd21afce655162298 b/train_state/ocdbt.process_0/d/46df5f8fd252e48cd21afce655162298 new file mode 100644 index 0000000000000000000000000000000000000000..32aeb3b90d60b0a7055cd0d594f6225851c8357d Binary files /dev/null and b/train_state/ocdbt.process_0/d/46df5f8fd252e48cd21afce655162298 differ diff --git a/train_state/ocdbt.process_0/d/4a68b4b49959ff5989903c8fc59872fa b/train_state/ocdbt.process_0/d/4a68b4b49959ff5989903c8fc59872fa new file mode 100644 index 0000000000000000000000000000000000000000..73118451bdf05c1fbf5d03c8465150f4d097c664 --- /dev/null +++ b/train_state/ocdbt.process_0/d/4a68b4b49959ff5989903c8fc59872fa @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2cc8cc89edbcd844d1798a00c2b4fe0bfd9004543d9843702559cd1b3a7a8a2 +size 2323506418 diff --git a/train_state/ocdbt.process_0/d/511856f64b17fe1cb3fa8eab7d0c7f82 b/train_state/ocdbt.process_0/d/511856f64b17fe1cb3fa8eab7d0c7f82 new file mode 100644 index 0000000000000000000000000000000000000000..ee0e3c312cdb20ccc1bc45ac7a1cd5b4af6cfceb Binary files /dev/null and b/train_state/ocdbt.process_0/d/511856f64b17fe1cb3fa8eab7d0c7f82 differ diff --git a/train_state/ocdbt.process_0/d/5a7bf4ca17051893e04991ac1bc7f550 b/train_state/ocdbt.process_0/d/5a7bf4ca17051893e04991ac1bc7f550 new file mode 100644 index 0000000000000000000000000000000000000000..861a1e75ce796e297f1c2db5fb4358d2d2b2fb0a Binary files /dev/null and b/train_state/ocdbt.process_0/d/5a7bf4ca17051893e04991ac1bc7f550 differ diff --git a/train_state/ocdbt.process_0/d/6c8f3e73521fb9e0beea50b3276638cb b/train_state/ocdbt.process_0/d/6c8f3e73521fb9e0beea50b3276638cb new file mode 100644 index 0000000000000000000000000000000000000000..7c6ca0ab209fcc421b90ff888acf9981f81a55c8 Binary files /dev/null and b/train_state/ocdbt.process_0/d/6c8f3e73521fb9e0beea50b3276638cb differ diff --git a/train_state/ocdbt.process_0/d/761d723ea96a2cb4cc33b35514fd7d46 b/train_state/ocdbt.process_0/d/761d723ea96a2cb4cc33b35514fd7d46 new file mode 100644 index 0000000000000000000000000000000000000000..80934222270bc3850c8835c9ea0d2dc443653cb4 Binary files /dev/null and b/train_state/ocdbt.process_0/d/761d723ea96a2cb4cc33b35514fd7d46 differ diff --git a/train_state/ocdbt.process_0/d/801ff81d3ffbfc3a95a38a9c0b7e5396 b/train_state/ocdbt.process_0/d/801ff81d3ffbfc3a95a38a9c0b7e5396 new file mode 100644 index 0000000000000000000000000000000000000000..af9427353d529443a0bbb7b60218494fbd5fde1b --- /dev/null +++ b/train_state/ocdbt.process_0/d/801ff81d3ffbfc3a95a38a9c0b7e5396 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20eec1c5844fc48d9e41130a961502b899f0b24992cbc68dd1fb1c993091182 +size 2434347795 diff --git a/train_state/ocdbt.process_0/d/8e20e44db20ee68f51e3c6b17376eaa1 b/train_state/ocdbt.process_0/d/8e20e44db20ee68f51e3c6b17376eaa1 new file mode 100644 index 0000000000000000000000000000000000000000..078bdcaff3af498bb76f2564e4a7d2174f2e6c46 --- /dev/null +++ b/train_state/ocdbt.process_0/d/8e20e44db20ee68f51e3c6b17376eaa1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbe6418f195f321ea79e592c8c254c9532ee1575f0c7b08fccb8bc943498bcd5 +size 2157300272 diff --git a/train_state/ocdbt.process_0/d/9780ab93e1f3f37834522e628b7da7ca b/train_state/ocdbt.process_0/d/9780ab93e1f3f37834522e628b7da7ca new file mode 100644 index 0000000000000000000000000000000000000000..f193cc124659a4b06ccdaefa7718cb62436fac52 --- /dev/null +++ b/train_state/ocdbt.process_0/d/9780ab93e1f3f37834522e628b7da7ca @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d1871f295d16b830b9c37e2b5bd2db61dc6005c2b88402c926fde54e7ba2f98 +size 2656852767 diff --git a/train_state/ocdbt.process_0/d/986fd14ab1ba4c58b7cfbf8a7d6cebf1 b/train_state/ocdbt.process_0/d/986fd14ab1ba4c58b7cfbf8a7d6cebf1 new file mode 100644 index 0000000000000000000000000000000000000000..e245f61c20695c3a4ef9cd2abd9aa37d7d1121b9 Binary files /dev/null and b/train_state/ocdbt.process_0/d/986fd14ab1ba4c58b7cfbf8a7d6cebf1 differ diff --git a/train_state/ocdbt.process_0/d/a5dda2ec41f2c2119adb5dd7f9f0c8f1 b/train_state/ocdbt.process_0/d/a5dda2ec41f2c2119adb5dd7f9f0c8f1 new file mode 100644 index 0000000000000000000000000000000000000000..3ec674ca35871ad78fac8e4130238bde591bbd7e --- /dev/null +++ b/train_state/ocdbt.process_0/d/a5dda2ec41f2c2119adb5dd7f9f0c8f1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe4634a361818a5f486cf01284d378cf90fc9ef6c26b3b72723e6fa89488a8eb +size 2187991468 diff --git a/train_state/ocdbt.process_0/d/a82739e7692cef57a17ca4b5ea9ff47c b/train_state/ocdbt.process_0/d/a82739e7692cef57a17ca4b5ea9ff47c new file mode 100644 index 0000000000000000000000000000000000000000..bf9eebff1210db599ad1512826be4f104c154e8c Binary files /dev/null and b/train_state/ocdbt.process_0/d/a82739e7692cef57a17ca4b5ea9ff47c differ diff --git a/train_state/ocdbt.process_0/d/b0695e4b5e01aeb5dfd91c3393fb4ff3 b/train_state/ocdbt.process_0/d/b0695e4b5e01aeb5dfd91c3393fb4ff3 new file mode 100644 index 0000000000000000000000000000000000000000..7e0ffb4506d3a674346134b64b1f29f9c324d307 --- /dev/null +++ b/train_state/ocdbt.process_0/d/b0695e4b5e01aeb5dfd91c3393fb4ff3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:575fdeb8620bf328869e60aa6cc333a99b7935655cbcf71f98af61e6c7b83af8 +size 1767678709 diff --git a/train_state/ocdbt.process_0/d/c0e01bd55c77234cd1d9d9a213c03648 b/train_state/ocdbt.process_0/d/c0e01bd55c77234cd1d9d9a213c03648 new file mode 100644 index 0000000000000000000000000000000000000000..e4522908a0e20a67a0903513c26aab8e823a1cda --- /dev/null +++ b/train_state/ocdbt.process_0/d/c0e01bd55c77234cd1d9d9a213c03648 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:494b0f0b241f35b9e0a7bfb0b97b6fa523db0862a52dc0b3c816b571d1a26697 +size 2643367304 diff --git a/train_state/ocdbt.process_0/d/c5bc03ad4c8fde9c32dd602c45f3b1fe b/train_state/ocdbt.process_0/d/c5bc03ad4c8fde9c32dd602c45f3b1fe new file mode 100644 index 0000000000000000000000000000000000000000..cd7018fc27752a8908c3abd5dfe069f145e5e825 --- /dev/null +++ b/train_state/ocdbt.process_0/d/c5bc03ad4c8fde9c32dd602c45f3b1fe @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90f0c7b2fac3153c179daee553dcbd7d42c104c05d713882ddbd764731ae8466 +size 2656468952 diff --git a/train_state/ocdbt.process_0/d/d051c4aa2a7a185100c6c85ee87f5c01 b/train_state/ocdbt.process_0/d/d051c4aa2a7a185100c6c85ee87f5c01 new file mode 100644 index 0000000000000000000000000000000000000000..929fb71415572be66e73dcb180a80a81ef327266 --- /dev/null +++ b/train_state/ocdbt.process_0/d/d051c4aa2a7a185100c6c85ee87f5c01 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:084ce0da426e0fb78d9114523ad9dbd0bb636a1c945c199656a91582dc44d079 +size 10672618 diff --git a/train_state/ocdbt.process_0/d/d3c7961f794bd59de794145208ca8c4a b/train_state/ocdbt.process_0/d/d3c7961f794bd59de794145208ca8c4a new file mode 100644 index 0000000000000000000000000000000000000000..94b1fb559cb55e5d0f413f0ffa6484915b9255be --- /dev/null +++ b/train_state/ocdbt.process_0/d/d3c7961f794bd59de794145208ca8c4a @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34ddcbd22cc0fa68faed4eb90c8013d51c8364b6102edd59558c2f40c3936d44 +size 2157722884 diff --git a/train_state/ocdbt.process_0/d/e745ec5c30c817cca44a428b33884cc4 b/train_state/ocdbt.process_0/d/e745ec5c30c817cca44a428b33884cc4 new file mode 100644 index 0000000000000000000000000000000000000000..6c89df18ced5b274ef6bd3d1e4c747db10f8b7cd --- /dev/null +++ b/train_state/ocdbt.process_0/d/e745ec5c30c817cca44a428b33884cc4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c476599b49a21559839e6a8fcdbc7ed491f042345dd940c038a7029b0fdeede +size 2209223059 diff --git a/train_state/ocdbt.process_0/d/f12d64cd97e2bf0a3919c020b3f01955 b/train_state/ocdbt.process_0/d/f12d64cd97e2bf0a3919c020b3f01955 new file mode 100644 index 0000000000000000000000000000000000000000..bf4747e44cff1aa5f47ac4b67ef1b6e666a8d047 --- /dev/null +++ b/train_state/ocdbt.process_0/d/f12d64cd97e2bf0a3919c020b3f01955 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b089a69f2989a89af0b7c8d15db94db071c3eb373e4bc8ae45962e7056c13ed +size 1650899336 diff --git a/train_state/ocdbt.process_0/manifest.ocdbt b/train_state/ocdbt.process_0/manifest.ocdbt new file mode 100644 index 0000000000000000000000000000000000000000..d9b1385b77e7d52f3566ff116ad6ea90a62274dc Binary files /dev/null and b/train_state/ocdbt.process_0/manifest.ocdbt differ