emiliocantuc commited on
Commit
0a205dd
·
verified ·
1 Parent(s): f14c385

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +7 -0
  2. 6500/_CHECKPOINT_METADATA +1 -0
  3. 6500/ema_model/_METADATA +1 -0
  4. 6500/ema_model/_sharding +1 -0
  5. 6500/ema_model/array_metadatas/process_0 +1 -0
  6. 6500/ema_model/d/e7d7a1dfc28cdf19d671e5db41a8b937 +0 -0
  7. 6500/ema_model/manifest.ocdbt +0 -0
  8. 6500/ema_model/ocdbt.process_0/d/3ce3cf8d9d0b1732e6e846245ecec9b9 +0 -0
  9. 6500/ema_model/ocdbt.process_0/d/8b5ab8d8a99351f87faaaed21e42b5f2 +0 -0
  10. 6500/ema_model/ocdbt.process_0/d/8d01b5d598b9844315a64974f052e00b +3 -0
  11. 6500/ema_model/ocdbt.process_0/d/92f1442c629bd3abe5613e77548d1b7d +0 -0
  12. 6500/ema_model/ocdbt.process_0/d/cd0df04a40836b889ce69237f458690d +3 -0
  13. 6500/ema_model/ocdbt.process_0/d/db55e15ace80b8eff91b714539fdb58c +0 -0
  14. 6500/ema_model/ocdbt.process_0/manifest.ocdbt +0 -0
  15. 6500/metrics/metrics +1 -0
  16. 6500/model/_METADATA +1 -0
  17. 6500/model/_sharding +1 -0
  18. 6500/model/array_metadatas/process_0 +1 -0
  19. 6500/model/d/12096fed2eb5e488c9c8858615718299 +0 -0
  20. 6500/model/manifest.ocdbt +0 -0
  21. 6500/model/ocdbt.process_0/d/07448aa3cab8ec465a9feff0ca6e73e9 +3 -0
  22. 6500/model/ocdbt.process_0/d/1f9d1efe85e5b88ec7a80d0b441e7720 +0 -0
  23. 6500/model/ocdbt.process_0/d/2c661f7b2e74f4c05eebc489e957a352 +0 -0
  24. 6500/model/ocdbt.process_0/d/624db43f20bbca9340da93963a4dcb3c +0 -0
  25. 6500/model/ocdbt.process_0/d/6ab70879b813fbf19a72c72d60755af5 +0 -0
  26. 6500/model/ocdbt.process_0/d/826d0b548a501b43e6160f9b79bcea6e +3 -0
  27. 6500/model/ocdbt.process_0/manifest.ocdbt +0 -0
  28. 6500/opt/_METADATA +1 -0
  29. 6500/opt/_sharding +1 -0
  30. 6500/opt/array_metadatas/process_0 +1 -0
  31. 6500/opt/d/e16199110a9671986644cd03b361074e +0 -0
  32. 6500/opt/manifest.ocdbt +0 -0
  33. 6500/opt/ocdbt.process_0/d/081a16d0abd59cebaf807ea915109518 +3 -0
  34. 6500/opt/ocdbt.process_0/d/1d5885136bed1795d875f170c905940d +0 -0
  35. 6500/opt/ocdbt.process_0/d/36b5cad5df5bbafddc998b42a9ed7073 +0 -0
  36. 6500/opt/ocdbt.process_0/d/cba9d28677c8172879542350e0fb229e +0 -0
  37. 6500/opt/ocdbt.process_0/d/d00652955f1571068567080ce27a255f +3 -0
  38. 6500/opt/ocdbt.process_0/d/dba3d1d17beb9e65e3361612dbdc7259 +3 -0
  39. 6500/opt/ocdbt.process_0/manifest.ocdbt +0 -0
  40. events.out.tfevents.1768881133.t1v-n-9c0e4925-w-0.1797621.0.v2 +3 -0
  41. plugins/profile/2026_01_20_03_53_12/t1v-n-9c0e4925-w-0.trace.json.gz +3 -0
  42. plugins/profile/2026_01_20_03_53_12/t1v-n-9c0e4925-w-0.xplane.pb +3 -0
  43. plugins/profile/2026_01_20_04_53_13/t1v-n-9c0e4925-w-0.trace.json.gz +3 -0
  44. plugins/profile/2026_01_20_04_53_13/t1v-n-9c0e4925-w-0.xplane.pb +3 -0
  45. plugins/profile/2026_01_20_05_53_14/t1v-n-9c0e4925-w-0.trace.json.gz +3 -0
  46. plugins/profile/2026_01_20_05_53_14/t1v-n-9c0e4925-w-0.xplane.pb +3 -0
  47. plugins/profile/2026_01_20_06_53_14/t1v-n-9c0e4925-w-0.trace.json.gz +3 -0
  48. plugins/profile/2026_01_20_06_53_14/t1v-n-9c0e4925-w-0.xplane.pb +3 -0
  49. plugins/profile/2026_01_20_07_53_15/t1v-n-9c0e4925-w-0.trace.json.gz +3 -0
  50. plugins/profile/2026_01_20_07_53_15/t1v-n-9c0e4925-w-0.xplane.pb +3 -0
.gitattributes CHANGED
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ 6500/ema_model/ocdbt.process_0/d/8d01b5d598b9844315a64974f052e00b filter=lfs diff=lfs merge=lfs -text
37
+ 6500/ema_model/ocdbt.process_0/d/cd0df04a40836b889ce69237f458690d filter=lfs diff=lfs merge=lfs -text
38
+ 6500/model/ocdbt.process_0/d/07448aa3cab8ec465a9feff0ca6e73e9 filter=lfs diff=lfs merge=lfs -text
39
+ 6500/model/ocdbt.process_0/d/826d0b548a501b43e6160f9b79bcea6e filter=lfs diff=lfs merge=lfs -text
40
+ 6500/opt/ocdbt.process_0/d/081a16d0abd59cebaf807ea915109518 filter=lfs diff=lfs merge=lfs -text
41
+ 6500/opt/ocdbt.process_0/d/d00652955f1571068567080ce27a255f filter=lfs diff=lfs merge=lfs -text
42
+ 6500/opt/ocdbt.process_0/d/dba3d1d17beb9e65e3361612dbdc7259 filter=lfs diff=lfs merge=lfs -text
6500/_CHECKPOINT_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"item_handlers": {"ema_model": "orbax.checkpoint._src.handlers.standard_checkpoint_handler.StandardCheckpointHandler", "metrics": "orbax.checkpoint._src.handlers.json_checkpoint_handler.JsonCheckpointHandler", "model": "orbax.checkpoint._src.handlers.standard_checkpoint_handler.StandardCheckpointHandler", "opt": "orbax.checkpoint._src.handlers.standard_checkpoint_handler.StandardCheckpointHandler"}, "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1768891910008278924, "commit_timestamp_nsecs": 1768891910591111274, "custom_metadata": {}}
6500/ema_model/_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"tree_metadata": {"('Q_head', 'layers', '1', 'bias', 'value')": {"key_metadata": [{"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('Q_head', 'layers', '1', 'kernel', 'value')": {"key_metadata": [{"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1]}}, "('init_y', 'scale')": {"key_metadata": [{"key": "init_y", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('init_y', 'state')": {"key_metadata": [{"key": "init_y", "key_type": 2}, {"key": "state", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 128]}}, "('init_z', 'scale')": {"key_metadata": [{"key": "init_z", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('init_z', 'state')": {"key_metadata": [{"key": "init_z", "key_type": 2}, {"key": "state", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 128]}}, "('input_embedding', 'embedding', 'value')": {"key_metadata": [{"key": "input_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10, 128]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '2', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'x_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "x_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'y_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "y_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'z_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "z_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('output_head', 'bias', 'value')": {"key_metadata": [{"key": "output_head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10]}}, "('output_head', 'kernel', 'value')": {"key_metadata": [{"key": "output_head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 10]}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
6500/ema_model/_sharding ADDED
@@ -0,0 +1 @@
 
 
1
+ {"UV9oZWFkLmxheWVycy4xLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","UV9oZWFkLmxheWVycy4xLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF95LnN0YXRl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF95LnNjYWxl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF96LnN0YXRl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF96LnNjYWxl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5wdXRfZW1iZWRkaW5nLmVtYmVkZGluZy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3V0cHV0X2hlYWQuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3V0cHV0X2hlYWQua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMi5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnhfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnlfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnpfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}"}
6500/ema_model/array_metadatas/process_0 ADDED
@@ -0,0 +1 @@
 
 
1
+ {"array_metadatas": [{"array_metadata": {"param_name": "Q_head.layers.1.bias.value", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "Q_head.layers.1.kernel.value", "write_shape": [128, 1], "chunk_shape": [128, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_y.scale", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_y.state", "write_shape": [1, 1, 128], "chunk_shape": [1, 1, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_z.scale", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_z.state", "write_shape": [1, 1, 128], "chunk_shape": [1, 1, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "input_embedding.embedding.value", "write_shape": [10, 128], "chunk_shape": [10, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.2.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.x_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.y_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.z_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "output_head.bias.value", "write_shape": [10], "chunk_shape": [10], "ext_metadata": null}}, {"array_metadata": {"param_name": "output_head.kernel.value", "write_shape": [128, 10], "chunk_shape": [128, 10], "ext_metadata": null}}]}
6500/ema_model/d/e7d7a1dfc28cdf19d671e5db41a8b937 ADDED
Binary file (15.1 kB). View file
 
6500/ema_model/manifest.ocdbt ADDED
Binary file (116 Bytes). View file
 
6500/ema_model/ocdbt.process_0/d/3ce3cf8d9d0b1732e6e846245ecec9b9 ADDED
Binary file (584 Bytes). View file
 
6500/ema_model/ocdbt.process_0/d/8b5ab8d8a99351f87faaaed21e42b5f2 ADDED
Binary file (188 Bytes). View file
 
6500/ema_model/ocdbt.process_0/d/8d01b5d598b9844315a64974f052e00b ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccfe494c0694e94891aa1463a004066716e8e00558f521de086c86b911f89f25
3
+ size 5185536
6500/ema_model/ocdbt.process_0/d/92f1442c629bd3abe5613e77548d1b7d ADDED
Binary file (551 Bytes). View file
 
6500/ema_model/ocdbt.process_0/d/cd0df04a40836b889ce69237f458690d ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:130db22ee5ab7ffe1334fb2faeead6a0a41e3ab362db96dce2c097b7512e18b7
3
+ size 12849152
6500/ema_model/ocdbt.process_0/d/db55e15ace80b8eff91b714539fdb58c ADDED
Binary file (15 kB). View file
 
6500/ema_model/ocdbt.process_0/manifest.ocdbt ADDED
Binary file (341 Bytes). View file
 
6500/metrics/metrics ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval/cell_acc": 0.902761697769165, "eval/cell_acc_first_delta": 0.2032536268234253, "eval/cell_acc_halfway_delta": 0.01422649621963501, "eval/solved_acc": 0.75390625, "eval/solved_acc_first_delta": 0.7122396230697632, "eval/solved_acc_halfway_delta": 0.045572876930236816}
6500/model/_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"tree_metadata": {"('Q_head', 'layers', '1', 'bias', 'value')": {"key_metadata": [{"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('Q_head', 'layers', '1', 'kernel', 'value')": {"key_metadata": [{"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1]}}, "('init_y', 'scale')": {"key_metadata": [{"key": "init_y", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('init_y', 'state')": {"key_metadata": [{"key": "init_y", "key_type": 2}, {"key": "state", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 128]}}, "('init_z', 'scale')": {"key_metadata": [{"key": "init_z", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('init_z', 'state')": {"key_metadata": [{"key": "init_z", "key_type": 2}, {"key": "state", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 1, 128]}}, "('input_embedding', 'embedding', 'value')": {"key_metadata": [{"key": "input_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10, 128]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '2', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'x_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "x_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'y_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "y_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'z_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "z_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('output_head', 'bias', 'value')": {"key_metadata": [{"key": "output_head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10]}}, "('output_head', 'kernel', 'value')": {"key_metadata": [{"key": "output_head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 10]}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
6500/model/_sharding ADDED
@@ -0,0 +1 @@
 
 
1
+ {"UV9oZWFkLmxheWVycy4xLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","UV9oZWFkLmxheWVycy4xLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF95LnN0YXRl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF95LnNjYWxl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF96LnN0YXRl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF96LnNjYWxl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5wdXRfZW1iZWRkaW5nLmVtYmVkZGluZy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3V0cHV0X2hlYWQuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3V0cHV0X2hlYWQua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMi5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnhfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnlfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnpfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}"}
6500/model/array_metadatas/process_0 ADDED
@@ -0,0 +1 @@
 
 
1
+ {"array_metadatas": [{"array_metadata": {"param_name": "Q_head.layers.1.bias.value", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "Q_head.layers.1.kernel.value", "write_shape": [128, 1], "chunk_shape": [128, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_y.scale", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_y.state", "write_shape": [1, 1, 128], "chunk_shape": [1, 1, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_z.scale", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_z.state", "write_shape": [1, 1, 128], "chunk_shape": [1, 1, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "input_embedding.embedding.value", "write_shape": [10, 128], "chunk_shape": [10, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.2.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.x_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.y_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.z_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "output_head.bias.value", "write_shape": [10], "chunk_shape": [10], "ext_metadata": null}}, {"array_metadata": {"param_name": "output_head.kernel.value", "write_shape": [128, 10], "chunk_shape": [128, 10], "ext_metadata": null}}]}
6500/model/d/12096fed2eb5e488c9c8858615718299 ADDED
Binary file (15.1 kB). View file
 
6500/model/manifest.ocdbt ADDED
Binary file (116 Bytes). View file
 
6500/model/ocdbt.process_0/d/07448aa3cab8ec465a9feff0ca6e73e9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d59e35f7df87f0ed3f76121e0760a5e82ee7a132e5422632201a63bc29bc615
3
+ size 3035674
6500/model/ocdbt.process_0/d/1f9d1efe85e5b88ec7a80d0b441e7720 ADDED
Binary file (7.45 kB). View file
 
6500/model/ocdbt.process_0/d/2c661f7b2e74f4c05eebc489e957a352 ADDED
Binary file (2.6 kB). View file
 
6500/model/ocdbt.process_0/d/624db43f20bbca9340da93963a4dcb3c ADDED
Binary file (189 Bytes). View file
 
6500/model/ocdbt.process_0/d/6ab70879b813fbf19a72c72d60755af5 ADDED
Binary file (551 Bytes). View file
 
6500/model/ocdbt.process_0/d/826d0b548a501b43e6160f9b79bcea6e ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d23eaa998e3fda0c3c05fea97524712fefc48fa90d66fd9dea49f5346faac30
3
+ size 14999552
6500/model/ocdbt.process_0/manifest.ocdbt ADDED
Binary file (344 Bytes). View file
 
6500/opt/_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"tree_metadata": {"('opt_state', '1', '0', 'count', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('opt_state', '1', '0', 'mu', 'Q_head', 'layers', '1', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('opt_state', '1', '0', 'mu', 'Q_head', 'layers', '1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1]}}, "('opt_state', '1', '0', 'mu', 'input_embedding', 'embedding', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "input_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10, 128]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '2', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'mu', 'net', 'x_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "x_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'mu', 'net', 'y_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "y_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'mu', 'net', 'z_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "z_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'mu', 'output_head', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "output_head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10]}}, "('opt_state', '1', '0', 'mu', 'output_head', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "output_head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 10]}}, "('opt_state', '1', '0', 'nu', 'Q_head', 'layers', '1', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('opt_state', '1', '0', 'nu', 'Q_head', 'layers', '1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1]}}, "('opt_state', '1', '0', 'nu', 'input_embedding', 'embedding', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "input_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10, 128]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '2', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'nu', 'net', 'x_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "x_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'nu', 'net', 'y_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "y_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'nu', 'net', 'z_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "z_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'nu', 'output_head', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "output_head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10]}}, "('opt_state', '1', '0', 'nu', 'output_head', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "output_head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 10]}}, "('opt_state', '1', '2', 'count', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('step', 'value')": {"key_metadata": [{"key": "step", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
6500/opt/_sharding ADDED
@@ -0,0 +1 @@
 
 
1
+ {"b3B0X3N0YXRlLjEuMC5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5RX2hlYWQubGF5ZXJzLjEuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5RX2hlYWQubGF5ZXJzLjEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5pbnB1dF9lbWJlZGRpbmcuZW1iZWRkaW5nLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4yLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQueF9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQueV9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQuel9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5vdXRwdXRfaGVhZC5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5vdXRwdXRfaGVhZC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5RX2hlYWQubGF5ZXJzLjEuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5RX2hlYWQubGF5ZXJzLjEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5pbnB1dF9lbWJlZGRpbmcuZW1iZWRkaW5nLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4yLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQueF9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQueV9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQuel9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5vdXRwdXRfaGVhZC5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5vdXRwdXRfaGVhZC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMi5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","c3RlcC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}"}
6500/opt/array_metadatas/process_0 ADDED
@@ -0,0 +1 @@
 
 
1
+ {"array_metadatas": [{"array_metadata": {"param_name": "opt_state.1.0.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.Q_head.layers.1.bias.value", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.Q_head.layers.1.kernel.value", "write_shape": [128, 1], "chunk_shape": [128, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.input_embedding.embedding.value", "write_shape": [10, 128], "chunk_shape": [10, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.2.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.x_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.y_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.z_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.output_head.bias.value", "write_shape": [10], "chunk_shape": [10], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.output_head.kernel.value", "write_shape": [128, 10], "chunk_shape": [128, 10], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.Q_head.layers.1.bias.value", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.Q_head.layers.1.kernel.value", "write_shape": [128, 1], "chunk_shape": [128, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.input_embedding.embedding.value", "write_shape": [10, 128], "chunk_shape": [10, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.2.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.x_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.y_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.z_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.output_head.bias.value", "write_shape": [10], "chunk_shape": [10], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.output_head.kernel.value", "write_shape": [128, 10], "chunk_shape": [128, 10], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.2.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "step.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}]}
6500/opt/d/e16199110a9671986644cd03b361074e ADDED
Binary file (21.5 kB). View file
 
6500/opt/manifest.ocdbt ADDED
Binary file (119 Bytes). View file
 
6500/opt/ocdbt.process_0/d/081a16d0abd59cebaf807ea915109518 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49e3ed281020a068236027693628c367cbeb4c36f5031d3db6fa55ca4ee8c6a6
3
+ size 23920640
6500/opt/ocdbt.process_0/d/1d5885136bed1795d875f170c905940d ADDED
Binary file (609 Bytes). View file
 
6500/opt/ocdbt.process_0/d/36b5cad5df5bbafddc998b42a9ed7073 ADDED
Binary file (580 Bytes). View file
 
6500/opt/ocdbt.process_0/d/cba9d28677c8172879542350e0fb229e ADDED
Binary file (187 Bytes). View file
 
6500/opt/ocdbt.process_0/d/d00652955f1571068567080ce27a255f ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4ce7a0e57e496de69d86644214e050316ff705600d8c4fcdff844c40309c4ae
3
+ size 9768960
6500/opt/ocdbt.process_0/d/dba3d1d17beb9e65e3361612dbdc7259 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5275c1f8bb28123953da610b4ab753eb4567acd350ea0f0a35f2afa90fdffd8d
3
+ size 2206303
6500/opt/ocdbt.process_0/manifest.ocdbt ADDED
Binary file (348 Bytes). View file
 
events.out.tfevents.1768881133.t1v-n-9c0e4925-w-0.1797621.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6aa166932498275aeda62ac63b39cec6e1a4a38f6cafacfbd9051515523fbcc8
3
+ size 24743850
plugins/profile/2026_01_20_03_53_12/t1v-n-9c0e4925-w-0.trace.json.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9382728f94d66c16d5ab4c99d92601b5621cb66ddfcf1deb7134cb2fec805c9e
3
+ size 27795010
plugins/profile/2026_01_20_03_53_12/t1v-n-9c0e4925-w-0.xplane.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35171fc73ef328787d735febf8ceca5c2be66bc6091f2b61e04163dda60143f8
3
+ size 188693866
plugins/profile/2026_01_20_04_53_13/t1v-n-9c0e4925-w-0.trace.json.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18af4c6ac3f1717915d703ae2670903548b5669a784689379ec806c1af030a06
3
+ size 28250526
plugins/profile/2026_01_20_04_53_13/t1v-n-9c0e4925-w-0.xplane.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dcc8939720d13105f44e08f32ac65666abb2db411e55669c532523953f9e04c
3
+ size 188640069
plugins/profile/2026_01_20_05_53_14/t1v-n-9c0e4925-w-0.trace.json.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:456a3e5585ea576717ac70ab6a48e7468f35446283a03229d4df82528286fae1
3
+ size 28134325
plugins/profile/2026_01_20_05_53_14/t1v-n-9c0e4925-w-0.xplane.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:310faa728330db0f90858236414d1c69807c7d5ad5ae4dc7387f068de15c8314
3
+ size 188605666
plugins/profile/2026_01_20_06_53_14/t1v-n-9c0e4925-w-0.trace.json.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:614e6f39fa8f77b6f01bd830e71de0f81a6850a650249678d58ef7fcf1f25885
3
+ size 27983064
plugins/profile/2026_01_20_06_53_14/t1v-n-9c0e4925-w-0.xplane.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c27228eaa53f7d3a910f3fe46c407d8dcdc7e3345f00047108595bf77102f0e
3
+ size 188764293
plugins/profile/2026_01_20_07_53_15/t1v-n-9c0e4925-w-0.trace.json.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bb2928399e2ba6793eb3a902b2f57b5ba05f9c6fb49a9ebb271d48bb2089797
3
+ size 28117692
plugins/profile/2026_01_20_07_53_15/t1v-n-9c0e4925-w-0.xplane.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bcc64a6b812c8fd0a5716770cfc93d3e45ef6fa8d71e4a2632122bc7f52196f
3
+ size 188862118