Upload folder using huggingface_hub
Browse files- .gitattributes +6 -0
- _CHECKPOINT_METADATA +1 -0
- ema_model/_METADATA +1 -0
- ema_model/_sharding +1 -0
- ema_model/array_metadatas/process_0 +1 -0
- ema_model/d/623503ab6ef4733c285bc9b3d25bbd80 +0 -0
- ema_model/manifest.ocdbt +0 -0
- ema_model/ocdbt.process_0/d/617ce87548d409cdc7505f0249cf47c1 +0 -0
- ema_model/ocdbt.process_0/d/82649f39b50e05cbdbce02c8b44d094c +0 -0
- ema_model/ocdbt.process_0/d/846b4b9482e83afcdd81c76d11fc14da +0 -0
- ema_model/ocdbt.process_0/d/e5503b6a42e304e6f5c8a1ae10683e2b +3 -0
- ema_model/ocdbt.process_0/d/e64d117118b1c71994010273a1351438 +3 -0
- ema_model/ocdbt.process_0/d/ec9215383c89e2a364cf47b9110a13e5 +0 -0
- ema_model/ocdbt.process_0/manifest.ocdbt +0 -0
- metrics/metrics +1 -0
- model/_METADATA +1 -0
- model/_sharding +1 -0
- model/array_metadatas/process_0 +1 -0
- model/d/c45f7ee6086f49985ad321d2b2a3ba6e +0 -0
- model/manifest.ocdbt +0 -0
- model/ocdbt.process_0/d/605c938ecfb1e3e6754e3ae00c5f72bc +0 -0
- model/ocdbt.process_0/d/7f4dae192e6eb44d0122ff17d7409f09 +0 -0
- model/ocdbt.process_0/d/8edf0dde4e7853964909c18476decf9b +0 -0
- model/ocdbt.process_0/d/cbfae2bb822befbe210a96190ca374f4 +3 -0
- model/ocdbt.process_0/d/ef1cbb95fe20efb6593873c72dd9f866 +0 -0
- model/ocdbt.process_0/d/f46f6390e2f476455c4ada9a4b7d1490 +3 -0
- model/ocdbt.process_0/manifest.ocdbt +0 -0
- opt/_METADATA +1 -0
- opt/_sharding +1 -0
- opt/array_metadatas/process_0 +1 -0
- opt/d/856bed74554b34521e07a8345817413d +0 -0
- opt/manifest.ocdbt +0 -0
- opt/ocdbt.process_0/d/0c59d47160e8738ac28124f69a4a146a +0 -0
- opt/ocdbt.process_0/d/576ab8d76627e7403bcce532a19554f2 +0 -0
- opt/ocdbt.process_0/d/6be33bd399e64a14c25071fd59e1d0ec +3 -0
- opt/ocdbt.process_0/d/c0a68afd95c19fd66840469448087b0b +0 -0
- opt/ocdbt.process_0/d/e0bf35b5fe7aefe229d76d62f40c9712 +3 -0
- opt/ocdbt.process_0/d/e889c18b0bafdee5e513aed8c4b03e2f +0 -0
- opt/ocdbt.process_0/manifest.ocdbt +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
ema_model/ocdbt.process_0/d/e5503b6a42e304e6f5c8a1ae10683e2b filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
ema_model/ocdbt.process_0/d/e64d117118b1c71994010273a1351438 filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
model/ocdbt.process_0/d/cbfae2bb822befbe210a96190ca374f4 filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
model/ocdbt.process_0/d/f46f6390e2f476455c4ada9a4b7d1490 filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
opt/ocdbt.process_0/d/6be33bd399e64a14c25071fd59e1d0ec filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
opt/ocdbt.process_0/d/e0bf35b5fe7aefe229d76d62f40c9712 filter=lfs diff=lfs merge=lfs -text
|
_CHECKPOINT_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"item_handlers": {"ema_model": "orbax.checkpoint._src.handlers.standard_checkpoint_handler.StandardCheckpointHandler", "metrics": "orbax.checkpoint._src.handlers.json_checkpoint_handler.JsonCheckpointHandler", "model": "orbax.checkpoint._src.handlers.standard_checkpoint_handler.StandardCheckpointHandler", "opt": "orbax.checkpoint._src.handlers.standard_checkpoint_handler.StandardCheckpointHandler"}, "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1770143260024716825, "commit_timestamp_nsecs": 1770143260622757460, "custom_metadata": {}}
|
ema_model/_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"tree_metadata": {"('Q_head', 'layers', '1', 'bias', 'value')": {"key_metadata": [{"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('Q_head', 'layers', '1', 'kernel', 'value')": {"key_metadata": [{"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1]}}, "('init_y', 'scale')": {"key_metadata": [{"key": "init_y", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('init_z', 'scale')": {"key_metadata": [{"key": "init_z", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('input_embedding', 'embedding', 'value')": {"key_metadata": [{"key": "input_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10, 128]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '2', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'x_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "x_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'y_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "y_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'z_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "z_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('output_head', 'bias', 'value')": {"key_metadata": [{"key": "output_head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10]}}, "('output_head', 'kernel', 'value')": {"key_metadata": [{"key": "output_head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 10]}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
|
ema_model/_sharding
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"UV9oZWFkLmxheWVycy4xLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","UV9oZWFkLmxheWVycy4xLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF95LnNjYWxl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF96LnNjYWxl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5wdXRfZW1iZWRkaW5nLmVtYmVkZGluZy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3V0cHV0X2hlYWQuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3V0cHV0X2hlYWQua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMi5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnhfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnlfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnpfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}"}
|
ema_model/array_metadatas/process_0
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"array_metadatas": [{"array_metadata": {"param_name": "Q_head.layers.1.bias.value", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "Q_head.layers.1.kernel.value", "write_shape": [128, 1], "chunk_shape": [128, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_y.scale", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_z.scale", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "input_embedding.embedding.value", "write_shape": [10, 128], "chunk_shape": [10, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.2.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.x_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.y_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.z_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "output_head.bias.value", "write_shape": [10], "chunk_shape": [10], "ext_metadata": null}}, {"array_metadata": {"param_name": "output_head.kernel.value", "write_shape": [128, 10], "chunk_shape": [128, 10], "ext_metadata": null}}]}
|
ema_model/d/623503ab6ef4733c285bc9b3d25bbd80
ADDED
|
Binary file (11.3 kB). View file
|
|
|
ema_model/manifest.ocdbt
ADDED
|
Binary file (116 Bytes). View file
|
|
|
ema_model/ocdbt.process_0/d/617ce87548d409cdc7505f0249cf47c1
ADDED
|
Binary file (199 Bytes). View file
|
|
|
ema_model/ocdbt.process_0/d/82649f39b50e05cbdbce02c8b44d094c
ADDED
|
Binary file (509 Bytes). View file
|
|
|
ema_model/ocdbt.process_0/d/846b4b9482e83afcdd81c76d11fc14da
ADDED
|
Binary file (2.58 kB). View file
|
|
|
ema_model/ocdbt.process_0/d/e5503b6a42e304e6f5c8a1ae10683e2b
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49029917a50bc0bf244306284e7b17b6bae8ac9a419b0f5020e200884f019191
|
| 3 |
+
size 2927943
|
ema_model/ocdbt.process_0/d/e64d117118b1c71994010273a1351438
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c085508b125a147948a83874348aa897532de9a5f1d153cf81addbae3a9189c0
|
| 3 |
+
size 15089664
|
ema_model/ocdbt.process_0/d/ec9215383c89e2a364cf47b9110a13e5
ADDED
|
Binary file (21.8 kB). View file
|
|
|
ema_model/ocdbt.process_0/manifest.ocdbt
ADDED
|
Binary file (344 Bytes). View file
|
|
|
metrics/metrics
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"eval/cell_acc": 0.9051730036735535, "eval/cell_acc_first_delta": 0.17279130220413208, "eval/cell_acc_halfway_delta": 0.014821231365203857, "eval/cell_acc_train_length_delta": 0.015721440315246582, "eval/effective_N_supervision": 32.0, "eval/solved_acc": 0.7630208730697632, "eval/solved_acc_first_delta": 0.6028646230697632, "eval/solved_acc_halfway_delta": 0.05078125, "eval/solved_acc_train_length_delta": 0.05338543653488159}
|
model/_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"tree_metadata": {"('Q_head', 'layers', '1', 'bias', 'value')": {"key_metadata": [{"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('Q_head', 'layers', '1', 'kernel', 'value')": {"key_metadata": [{"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1]}}, "('init_y', 'scale')": {"key_metadata": [{"key": "init_y", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('init_z', 'scale')": {"key_metadata": [{"key": "init_z", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('input_embedding', 'embedding', 'value')": {"key_metadata": [{"key": "input_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10, 128]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '2', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'x_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "x_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'y_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "y_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'z_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "z_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('output_head', 'bias', 'value')": {"key_metadata": [{"key": "output_head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10]}}, "('output_head', 'kernel', 'value')": {"key_metadata": [{"key": "output_head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 10]}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
|
model/_sharding
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"UV9oZWFkLmxheWVycy4xLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","UV9oZWFkLmxheWVycy4xLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF95LnNjYWxl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF96LnNjYWxl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5wdXRfZW1iZWRkaW5nLmVtYmVkZGluZy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3V0cHV0X2hlYWQuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3V0cHV0X2hlYWQua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMi5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnhfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnlfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnpfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}"}
|
model/array_metadatas/process_0
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"array_metadatas": [{"array_metadata": {"param_name": "Q_head.layers.1.bias.value", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "Q_head.layers.1.kernel.value", "write_shape": [128, 1], "chunk_shape": [128, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_y.scale", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_z.scale", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "input_embedding.embedding.value", "write_shape": [10, 128], "chunk_shape": [10, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.2.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.x_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.y_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.z_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "output_head.bias.value", "write_shape": [10], "chunk_shape": [10], "ext_metadata": null}}, {"array_metadata": {"param_name": "output_head.kernel.value", "write_shape": [128, 10], "chunk_shape": [128, 10], "ext_metadata": null}}]}
|
model/d/c45f7ee6086f49985ad321d2b2a3ba6e
ADDED
|
Binary file (11.3 kB). View file
|
|
|
model/manifest.ocdbt
ADDED
|
Binary file (116 Bytes). View file
|
|
|
model/ocdbt.process_0/d/605c938ecfb1e3e6754e3ae00c5f72bc
ADDED
|
Binary file (19.8 kB). View file
|
|
|
model/ocdbt.process_0/d/7f4dae192e6eb44d0122ff17d7409f09
ADDED
|
Binary file (553 Bytes). View file
|
|
|
model/ocdbt.process_0/d/8edf0dde4e7853964909c18476decf9b
ADDED
|
Binary file (188 Bytes). View file
|
|
|
model/ocdbt.process_0/d/cbfae2bb822befbe210a96190ca374f4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a07c5ecd0d330c842d23e9638472a478d5ec5db95b174dd8b9a7c210feee98c6
|
| 3 |
+
size 8994816
|
model/ocdbt.process_0/d/ef1cbb95fe20efb6593873c72dd9f866
ADDED
|
Binary file (509 Bytes). View file
|
|
|
model/ocdbt.process_0/d/f46f6390e2f476455c4ada9a4b7d1490
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9b7d8412566282890d093fbedb8fc65278b44b7fd17ebae4588cf8047be664b
|
| 3 |
+
size 9015296
|
model/ocdbt.process_0/manifest.ocdbt
ADDED
|
Binary file (340 Bytes). View file
|
|
|
opt/_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"tree_metadata": {"('opt_state', '1', '0', 'count', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('opt_state', '1', '0', 'mu', 'Q_head', 'layers', '1', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('opt_state', '1', '0', 'mu', 'Q_head', 'layers', '1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1]}}, "('opt_state', '1', '0', 'mu', 'input_embedding', 'embedding', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "input_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10, 128]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '2', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'mu', 'net', 'x_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "x_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'mu', 'net', 'y_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "y_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'mu', 'net', 'z_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "z_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'mu', 'output_head', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "output_head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10]}}, "('opt_state', '1', '0', 'mu', 'output_head', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "output_head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 10]}}, "('opt_state', '1', '0', 'nu', 'Q_head', 'layers', '1', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('opt_state', '1', '0', 'nu', 'Q_head', 'layers', '1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1]}}, "('opt_state', '1', '0', 'nu', 'input_embedding', 'embedding', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "input_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10, 128]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '2', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'nu', 'net', 'x_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "x_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'nu', 'net', 'y_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "y_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'nu', 'net', 'z_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "z_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'nu', 'output_head', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "output_head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10]}}, "('opt_state', '1', '0', 'nu', 'output_head', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "output_head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 10]}}, "('opt_state', '1', '2', 'count', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('step', 'value')": {"key_metadata": [{"key": "step", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
|
opt/_sharding
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"b3B0X3N0YXRlLjEuMC5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5RX2hlYWQubGF5ZXJzLjEuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5RX2hlYWQubGF5ZXJzLjEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5pbnB1dF9lbWJlZGRpbmcuZW1iZWRkaW5nLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4yLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQueF9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQueV9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQuel9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5vdXRwdXRfaGVhZC5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5vdXRwdXRfaGVhZC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5RX2hlYWQubGF5ZXJzLjEuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5RX2hlYWQubGF5ZXJzLjEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5pbnB1dF9lbWJlZGRpbmcuZW1iZWRkaW5nLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4yLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQueF9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQueV9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQuel9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5vdXRwdXRfaGVhZC5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5vdXRwdXRfaGVhZC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMi5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","c3RlcC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}"}
|
opt/array_metadatas/process_0
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"array_metadatas": [{"array_metadata": {"param_name": "opt_state.1.0.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.Q_head.layers.1.bias.value", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.Q_head.layers.1.kernel.value", "write_shape": [128, 1], "chunk_shape": [128, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.input_embedding.embedding.value", "write_shape": [10, 128], "chunk_shape": [10, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.2.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.x_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.y_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.z_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.output_head.bias.value", "write_shape": [10], "chunk_shape": [10], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.output_head.kernel.value", "write_shape": [128, 10], "chunk_shape": [128, 10], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.Q_head.layers.1.bias.value", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.Q_head.layers.1.kernel.value", "write_shape": [128, 1], "chunk_shape": [128, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.input_embedding.embedding.value", "write_shape": [10, 128], "chunk_shape": [10, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.2.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.x_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.y_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.z_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.output_head.bias.value", "write_shape": [10], "chunk_shape": [10], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.output_head.kernel.value", "write_shape": [128, 10], "chunk_shape": [128, 10], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.2.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "step.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}]}
|
opt/d/856bed74554b34521e07a8345817413d
ADDED
|
Binary file (21.3 kB). View file
|
|
|
opt/manifest.ocdbt
ADDED
|
Binary file (119 Bytes). View file
|
|
|
opt/ocdbt.process_0/d/0c59d47160e8738ac28124f69a4a146a
ADDED
|
Binary file (580 Bytes). View file
|
|
|
opt/ocdbt.process_0/d/576ab8d76627e7403bcce532a19554f2
ADDED
|
Binary file (609 Bytes). View file
|
|
|
opt/ocdbt.process_0/d/6be33bd399e64a14c25071fd59e1d0ec
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8f2e96c9a2ed3c959c81dba21cf92a72d0e8f6455b5e6ca1b3f9cb8ebb60e62d
|
| 3 |
+
size 18059264
|
opt/ocdbt.process_0/d/c0a68afd95c19fd66840469448087b0b
ADDED
|
Binary file (187 Bytes). View file
|
|
|
opt/ocdbt.process_0/d/e0bf35b5fe7aefe229d76d62f40c9712
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a65a16dd2cddc93695278b905c9cae8052d6883dc25c772b5b1587cae1c57cb
|
| 3 |
+
size 17768448
|
opt/ocdbt.process_0/d/e889c18b0bafdee5e513aed8c4b03e2f
ADDED
|
Binary file (618 Bytes). View file
|
|
|
opt/ocdbt.process_0/manifest.ocdbt
ADDED
|
Binary file (342 Bytes). View file
|
|
|