Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +7 -0
- 6500/_CHECKPOINT_METADATA +1 -0
- 6500/ema_model/_METADATA +1 -0
- 6500/ema_model/_sharding +1 -0
- 6500/ema_model/array_metadatas/process_0 +1 -0
- 6500/ema_model/d/e7d7a1dfc28cdf19d671e5db41a8b937 +0 -0
- 6500/ema_model/manifest.ocdbt +0 -0
- 6500/ema_model/ocdbt.process_0/d/3ce3cf8d9d0b1732e6e846245ecec9b9 +0 -0
- 6500/ema_model/ocdbt.process_0/d/8b5ab8d8a99351f87faaaed21e42b5f2 +0 -0
- 6500/ema_model/ocdbt.process_0/d/8d01b5d598b9844315a64974f052e00b +3 -0
- 6500/ema_model/ocdbt.process_0/d/92f1442c629bd3abe5613e77548d1b7d +0 -0
- 6500/ema_model/ocdbt.process_0/d/cd0df04a40836b889ce69237f458690d +3 -0
- 6500/ema_model/ocdbt.process_0/d/db55e15ace80b8eff91b714539fdb58c +0 -0
- 6500/ema_model/ocdbt.process_0/manifest.ocdbt +0 -0
- 6500/metrics/metrics +1 -0
- 6500/model/_METADATA +1 -0
- 6500/model/_sharding +1 -0
- 6500/model/array_metadatas/process_0 +1 -0
- 6500/model/d/12096fed2eb5e488c9c8858615718299 +0 -0
- 6500/model/manifest.ocdbt +0 -0
- 6500/model/ocdbt.process_0/d/07448aa3cab8ec465a9feff0ca6e73e9 +3 -0
- 6500/model/ocdbt.process_0/d/1f9d1efe85e5b88ec7a80d0b441e7720 +0 -0
- 6500/model/ocdbt.process_0/d/2c661f7b2e74f4c05eebc489e957a352 +0 -0
- 6500/model/ocdbt.process_0/d/624db43f20bbca9340da93963a4dcb3c +0 -0
- 6500/model/ocdbt.process_0/d/6ab70879b813fbf19a72c72d60755af5 +0 -0
- 6500/model/ocdbt.process_0/d/826d0b548a501b43e6160f9b79bcea6e +3 -0
- 6500/model/ocdbt.process_0/manifest.ocdbt +0 -0
- 6500/opt/_METADATA +1 -0
- 6500/opt/_sharding +1 -0
- 6500/opt/array_metadatas/process_0 +1 -0
- 6500/opt/d/e16199110a9671986644cd03b361074e +0 -0
- 6500/opt/manifest.ocdbt +0 -0
- 6500/opt/ocdbt.process_0/d/081a16d0abd59cebaf807ea915109518 +3 -0
- 6500/opt/ocdbt.process_0/d/1d5885136bed1795d875f170c905940d +0 -0
- 6500/opt/ocdbt.process_0/d/36b5cad5df5bbafddc998b42a9ed7073 +0 -0
- 6500/opt/ocdbt.process_0/d/cba9d28677c8172879542350e0fb229e +0 -0
- 6500/opt/ocdbt.process_0/d/d00652955f1571068567080ce27a255f +3 -0
- 6500/opt/ocdbt.process_0/d/dba3d1d17beb9e65e3361612dbdc7259 +3 -0
- 6500/opt/ocdbt.process_0/manifest.ocdbt +0 -0
- events.out.tfevents.1768881133.t1v-n-9c0e4925-w-0.1797621.0.v2 +3 -0
- plugins/profile/2026_01_20_03_53_12/t1v-n-9c0e4925-w-0.trace.json.gz +3 -0
- plugins/profile/2026_01_20_03_53_12/t1v-n-9c0e4925-w-0.xplane.pb +3 -0
- plugins/profile/2026_01_20_04_53_13/t1v-n-9c0e4925-w-0.trace.json.gz +3 -0
- plugins/profile/2026_01_20_04_53_13/t1v-n-9c0e4925-w-0.xplane.pb +3 -0
- plugins/profile/2026_01_20_05_53_14/t1v-n-9c0e4925-w-0.trace.json.gz +3 -0
- plugins/profile/2026_01_20_05_53_14/t1v-n-9c0e4925-w-0.xplane.pb +3 -0
- plugins/profile/2026_01_20_06_53_14/t1v-n-9c0e4925-w-0.trace.json.gz +3 -0
- plugins/profile/2026_01_20_06_53_14/t1v-n-9c0e4925-w-0.xplane.pb +3 -0
- plugins/profile/2026_01_20_07_53_15/t1v-n-9c0e4925-w-0.trace.json.gz +3 -0
- plugins/profile/2026_01_20_07_53_15/t1v-n-9c0e4925-w-0.xplane.pb +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
6500/ema_model/ocdbt.process_0/d/8d01b5d598b9844315a64974f052e00b filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
6500/ema_model/ocdbt.process_0/d/cd0df04a40836b889ce69237f458690d filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
6500/model/ocdbt.process_0/d/07448aa3cab8ec465a9feff0ca6e73e9 filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
6500/model/ocdbt.process_0/d/826d0b548a501b43e6160f9b79bcea6e filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
6500/opt/ocdbt.process_0/d/081a16d0abd59cebaf807ea915109518 filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
6500/opt/ocdbt.process_0/d/d00652955f1571068567080ce27a255f filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
6500/opt/ocdbt.process_0/d/dba3d1d17beb9e65e3361612dbdc7259 filter=lfs diff=lfs merge=lfs -text
|
6500/_CHECKPOINT_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"item_handlers": {"ema_model": "orbax.checkpoint._src.handlers.standard_checkpoint_handler.StandardCheckpointHandler", "metrics": "orbax.checkpoint._src.handlers.json_checkpoint_handler.JsonCheckpointHandler", "model": "orbax.checkpoint._src.handlers.standard_checkpoint_handler.StandardCheckpointHandler", "opt": "orbax.checkpoint._src.handlers.standard_checkpoint_handler.StandardCheckpointHandler"}, "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1768891910008278924, "commit_timestamp_nsecs": 1768891910591111274, "custom_metadata": {}}
|
6500/ema_model/_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"tree_metadata": {"('Q_head', 'layers', '1', 'bias', 'value')": {"key_metadata": [{"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('Q_head', 'layers', '1', 'kernel', 'value')": {"key_metadata": [{"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1]}}, "('init_y', 'scale')": {"key_metadata": [{"key": "init_y", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('init_y', 'state')": {"key_metadata": [{"key": "init_y", "key_type": 2}, {"key": "state", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 128]}}, "('init_z', 'scale')": {"key_metadata": [{"key": "init_z", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('init_z', 'state')": {"key_metadata": [{"key": "init_z", "key_type": 2}, {"key": "state", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 128]}}, "('input_embedding', 'embedding', 'value')": {"key_metadata": [{"key": "input_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10, 128]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '2', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'x_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "x_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'y_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "y_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'z_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "z_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('output_head', 'bias', 'value')": {"key_metadata": [{"key": "output_head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10]}}, "('output_head', 'kernel', 'value')": {"key_metadata": [{"key": "output_head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 10]}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
|
6500/ema_model/_sharding
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"UV9oZWFkLmxheWVycy4xLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","UV9oZWFkLmxheWVycy4xLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF95LnN0YXRl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF95LnNjYWxl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF96LnN0YXRl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF96LnNjYWxl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5wdXRfZW1iZWRkaW5nLmVtYmVkZGluZy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3V0cHV0X2hlYWQuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3V0cHV0X2hlYWQua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMi5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnhfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnlfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnpfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}"}
|
6500/ema_model/array_metadatas/process_0
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"array_metadatas": [{"array_metadata": {"param_name": "Q_head.layers.1.bias.value", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "Q_head.layers.1.kernel.value", "write_shape": [128, 1], "chunk_shape": [128, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_y.scale", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_y.state", "write_shape": [1, 1, 128], "chunk_shape": [1, 1, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_z.scale", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_z.state", "write_shape": [1, 1, 128], "chunk_shape": [1, 1, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "input_embedding.embedding.value", "write_shape": [10, 128], "chunk_shape": [10, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.2.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.x_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.y_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.z_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "output_head.bias.value", "write_shape": [10], "chunk_shape": [10], "ext_metadata": null}}, {"array_metadata": {"param_name": "output_head.kernel.value", "write_shape": [128, 10], "chunk_shape": [128, 10], "ext_metadata": null}}]}
|
6500/ema_model/d/e7d7a1dfc28cdf19d671e5db41a8b937
ADDED
|
Binary file (15.1 kB). View file
|
|
|
6500/ema_model/manifest.ocdbt
ADDED
|
Binary file (116 Bytes). View file
|
|
|
6500/ema_model/ocdbt.process_0/d/3ce3cf8d9d0b1732e6e846245ecec9b9
ADDED
|
Binary file (584 Bytes). View file
|
|
|
6500/ema_model/ocdbt.process_0/d/8b5ab8d8a99351f87faaaed21e42b5f2
ADDED
|
Binary file (188 Bytes). View file
|
|
|
6500/ema_model/ocdbt.process_0/d/8d01b5d598b9844315a64974f052e00b
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccfe494c0694e94891aa1463a004066716e8e00558f521de086c86b911f89f25
|
| 3 |
+
size 5185536
|
6500/ema_model/ocdbt.process_0/d/92f1442c629bd3abe5613e77548d1b7d
ADDED
|
Binary file (551 Bytes). View file
|
|
|
6500/ema_model/ocdbt.process_0/d/cd0df04a40836b889ce69237f458690d
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:130db22ee5ab7ffe1334fb2faeead6a0a41e3ab362db96dce2c097b7512e18b7
|
| 3 |
+
size 12849152
|
6500/ema_model/ocdbt.process_0/d/db55e15ace80b8eff91b714539fdb58c
ADDED
|
Binary file (15 kB). View file
|
|
|
6500/ema_model/ocdbt.process_0/manifest.ocdbt
ADDED
|
Binary file (341 Bytes). View file
|
|
|
6500/metrics/metrics
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"eval/cell_acc": 0.902761697769165, "eval/cell_acc_first_delta": 0.2032536268234253, "eval/cell_acc_halfway_delta": 0.01422649621963501, "eval/solved_acc": 0.75390625, "eval/solved_acc_first_delta": 0.7122396230697632, "eval/solved_acc_halfway_delta": 0.045572876930236816}
|
6500/model/_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"tree_metadata": {"('Q_head', 'layers', '1', 'bias', 'value')": {"key_metadata": [{"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('Q_head', 'layers', '1', 'kernel', 'value')": {"key_metadata": [{"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1]}}, "('init_y', 'scale')": {"key_metadata": [{"key": "init_y", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('init_y', 'state')": {"key_metadata": [{"key": "init_y", "key_type": 2}, {"key": "state", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 128]}}, "('init_z', 'scale')": {"key_metadata": [{"key": "init_z", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('init_z', 'state')": {"key_metadata": [{"key": "init_z", "key_type": 2}, {"key": "state", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 128]}}, "('input_embedding', 'embedding', 'value')": {"key_metadata": [{"key": "input_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10, 128]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '2', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'x_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "x_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'y_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "y_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'z_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "z_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('output_head', 'bias', 'value')": {"key_metadata": [{"key": "output_head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10]}}, "('output_head', 'kernel', 'value')": {"key_metadata": [{"key": "output_head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 10]}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
|
6500/model/_sharding
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"UV9oZWFkLmxheWVycy4xLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","UV9oZWFkLmxheWVycy4xLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF95LnN0YXRl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF95LnNjYWxl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF96LnN0YXRl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF96LnNjYWxl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5wdXRfZW1iZWRkaW5nLmVtYmVkZGluZy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3V0cHV0X2hlYWQuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3V0cHV0X2hlYWQua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMi5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnhfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnlfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnpfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}"}
|
6500/model/array_metadatas/process_0
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"array_metadatas": [{"array_metadata": {"param_name": "Q_head.layers.1.bias.value", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "Q_head.layers.1.kernel.value", "write_shape": [128, 1], "chunk_shape": [128, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_y.scale", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_y.state", "write_shape": [1, 1, 128], "chunk_shape": [1, 1, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_z.scale", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_z.state", "write_shape": [1, 1, 128], "chunk_shape": [1, 1, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "input_embedding.embedding.value", "write_shape": [10, 128], "chunk_shape": [10, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.2.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.x_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.y_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.z_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "output_head.bias.value", "write_shape": [10], "chunk_shape": [10], "ext_metadata": null}}, {"array_metadata": {"param_name": "output_head.kernel.value", "write_shape": [128, 10], "chunk_shape": [128, 10], "ext_metadata": null}}]}
|
6500/model/d/12096fed2eb5e488c9c8858615718299
ADDED
|
Binary file (15.1 kB). View file
|
|
|
6500/model/manifest.ocdbt
ADDED
|
Binary file (116 Bytes). View file
|
|
|
6500/model/ocdbt.process_0/d/07448aa3cab8ec465a9feff0ca6e73e9
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d59e35f7df87f0ed3f76121e0760a5e82ee7a132e5422632201a63bc29bc615
|
| 3 |
+
size 3035674
|
6500/model/ocdbt.process_0/d/1f9d1efe85e5b88ec7a80d0b441e7720
ADDED
|
Binary file (7.45 kB). View file
|
|
|
6500/model/ocdbt.process_0/d/2c661f7b2e74f4c05eebc489e957a352
ADDED
|
Binary file (2.6 kB). View file
|
|
|
6500/model/ocdbt.process_0/d/624db43f20bbca9340da93963a4dcb3c
ADDED
|
Binary file (189 Bytes). View file
|
|
|
6500/model/ocdbt.process_0/d/6ab70879b813fbf19a72c72d60755af5
ADDED
|
Binary file (551 Bytes). View file
|
|
|
6500/model/ocdbt.process_0/d/826d0b548a501b43e6160f9b79bcea6e
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d23eaa998e3fda0c3c05fea97524712fefc48fa90d66fd9dea49f5346faac30
|
| 3 |
+
size 14999552
|
6500/model/ocdbt.process_0/manifest.ocdbt
ADDED
|
Binary file (344 Bytes). View file
|
|
|
6500/opt/_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"tree_metadata": {"('opt_state', '1', '0', 'count', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('opt_state', '1', '0', 'mu', 'Q_head', 'layers', '1', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('opt_state', '1', '0', 'mu', 'Q_head', 'layers', '1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1]}}, "('opt_state', '1', '0', 'mu', 'input_embedding', 'embedding', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "input_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10, 128]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '2', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'mu', 'net', 'x_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "x_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'mu', 'net', 'y_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "y_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'mu', 'net', 'z_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "z_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'mu', 'output_head', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "output_head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10]}}, "('opt_state', '1', '0', 'mu', 'output_head', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "output_head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 10]}}, "('opt_state', '1', '0', 'nu', 'Q_head', 'layers', '1', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('opt_state', '1', '0', 'nu', 'Q_head', 'layers', '1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1]}}, "('opt_state', '1', '0', 'nu', 'input_embedding', 'embedding', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "input_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10, 128]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '2', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'nu', 'net', 'x_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "x_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'nu', 'net', 'y_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "y_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'nu', 'net', 'z_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "z_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'nu', 'output_head', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "output_head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10]}}, "('opt_state', '1', '0', 'nu', 'output_head', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "output_head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 10]}}, "('opt_state', '1', '2', 'count', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('step', 'value')": {"key_metadata": [{"key": "step", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
|
6500/opt/_sharding
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"b3B0X3N0YXRlLjEuMC5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5RX2hlYWQubGF5ZXJzLjEuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5RX2hlYWQubGF5ZXJzLjEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5pbnB1dF9lbWJlZGRpbmcuZW1iZWRkaW5nLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4yLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQueF9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQueV9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQuel9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5vdXRwdXRfaGVhZC5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5vdXRwdXRfaGVhZC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5RX2hlYWQubGF5ZXJzLjEuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5RX2hlYWQubGF5ZXJzLjEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5pbnB1dF9lbWJlZGRpbmcuZW1iZWRkaW5nLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4yLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQueF9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQueV9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQuel9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5vdXRwdXRfaGVhZC5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5vdXRwdXRfaGVhZC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMi5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","c3RlcC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}"}
|
6500/opt/array_metadatas/process_0
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"array_metadatas": [{"array_metadata": {"param_name": "opt_state.1.0.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.Q_head.layers.1.bias.value", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.Q_head.layers.1.kernel.value", "write_shape": [128, 1], "chunk_shape": [128, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.input_embedding.embedding.value", "write_shape": [10, 128], "chunk_shape": [10, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.2.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.x_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.y_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.z_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.output_head.bias.value", "write_shape": [10], "chunk_shape": [10], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.output_head.kernel.value", "write_shape": [128, 10], "chunk_shape": [128, 10], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.Q_head.layers.1.bias.value", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.Q_head.layers.1.kernel.value", "write_shape": [128, 1], "chunk_shape": [128, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.input_embedding.embedding.value", "write_shape": [10, 128], "chunk_shape": [10, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.2.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.x_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.y_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.z_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.output_head.bias.value", "write_shape": [10], "chunk_shape": [10], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.output_head.kernel.value", "write_shape": [128, 10], "chunk_shape": [128, 10], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.2.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "step.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}]}
|
6500/opt/d/e16199110a9671986644cd03b361074e
ADDED
|
Binary file (21.5 kB). View file
|
|
|
6500/opt/manifest.ocdbt
ADDED
|
Binary file (119 Bytes). View file
|
|
|
6500/opt/ocdbt.process_0/d/081a16d0abd59cebaf807ea915109518
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49e3ed281020a068236027693628c367cbeb4c36f5031d3db6fa55ca4ee8c6a6
|
| 3 |
+
size 23920640
|
6500/opt/ocdbt.process_0/d/1d5885136bed1795d875f170c905940d
ADDED
|
Binary file (609 Bytes). View file
|
|
|
6500/opt/ocdbt.process_0/d/36b5cad5df5bbafddc998b42a9ed7073
ADDED
|
Binary file (580 Bytes). View file
|
|
|
6500/opt/ocdbt.process_0/d/cba9d28677c8172879542350e0fb229e
ADDED
|
Binary file (187 Bytes). View file
|
|
|
6500/opt/ocdbt.process_0/d/d00652955f1571068567080ce27a255f
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4ce7a0e57e496de69d86644214e050316ff705600d8c4fcdff844c40309c4ae
|
| 3 |
+
size 9768960
|
6500/opt/ocdbt.process_0/d/dba3d1d17beb9e65e3361612dbdc7259
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5275c1f8bb28123953da610b4ab753eb4567acd350ea0f0a35f2afa90fdffd8d
|
| 3 |
+
size 2206303
|
6500/opt/ocdbt.process_0/manifest.ocdbt
ADDED
|
Binary file (348 Bytes). View file
|
|
|
events.out.tfevents.1768881133.t1v-n-9c0e4925-w-0.1797621.0.v2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6aa166932498275aeda62ac63b39cec6e1a4a38f6cafacfbd9051515523fbcc8
|
| 3 |
+
size 24743850
|
plugins/profile/2026_01_20_03_53_12/t1v-n-9c0e4925-w-0.trace.json.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9382728f94d66c16d5ab4c99d92601b5621cb66ddfcf1deb7134cb2fec805c9e
|
| 3 |
+
size 27795010
|
plugins/profile/2026_01_20_03_53_12/t1v-n-9c0e4925-w-0.xplane.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35171fc73ef328787d735febf8ceca5c2be66bc6091f2b61e04163dda60143f8
|
| 3 |
+
size 188693866
|
plugins/profile/2026_01_20_04_53_13/t1v-n-9c0e4925-w-0.trace.json.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18af4c6ac3f1717915d703ae2670903548b5669a784689379ec806c1af030a06
|
| 3 |
+
size 28250526
|
plugins/profile/2026_01_20_04_53_13/t1v-n-9c0e4925-w-0.xplane.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0dcc8939720d13105f44e08f32ac65666abb2db411e55669c532523953f9e04c
|
| 3 |
+
size 188640069
|
plugins/profile/2026_01_20_05_53_14/t1v-n-9c0e4925-w-0.trace.json.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:456a3e5585ea576717ac70ab6a48e7468f35446283a03229d4df82528286fae1
|
| 3 |
+
size 28134325
|
plugins/profile/2026_01_20_05_53_14/t1v-n-9c0e4925-w-0.xplane.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:310faa728330db0f90858236414d1c69807c7d5ad5ae4dc7387f068de15c8314
|
| 3 |
+
size 188605666
|
plugins/profile/2026_01_20_06_53_14/t1v-n-9c0e4925-w-0.trace.json.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:614e6f39fa8f77b6f01bd830e71de0f81a6850a650249678d58ef7fcf1f25885
|
| 3 |
+
size 27983064
|
plugins/profile/2026_01_20_06_53_14/t1v-n-9c0e4925-w-0.xplane.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c27228eaa53f7d3a910f3fe46c407d8dcdc7e3345f00047108595bf77102f0e
|
| 3 |
+
size 188764293
|
plugins/profile/2026_01_20_07_53_15/t1v-n-9c0e4925-w-0.trace.json.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8bb2928399e2ba6793eb3a902b2f57b5ba05f9c6fb49a9ebb271d48bb2089797
|
| 3 |
+
size 28117692
|
plugins/profile/2026_01_20_07_53_15/t1v-n-9c0e4925-w-0.xplane.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1bcc64a6b812c8fd0a5716770cfc93d3e45ef6fa8d71e4a2632122bc7f52196f
|
| 3 |
+
size 188862118
|