emiliocantuc commited on
Commit
e94d9bf
·
verified ·
1 Parent(s): e1a20a8

Upload folder using huggingface_hub

Browse files
Files changed (39) hide show
  1. .gitattributes +6 -0
  2. _CHECKPOINT_METADATA +1 -0
  3. ema_model/_METADATA +1 -0
  4. ema_model/_sharding +1 -0
  5. ema_model/array_metadatas/process_0 +1 -0
  6. ema_model/d/623503ab6ef4733c285bc9b3d25bbd80 +0 -0
  7. ema_model/manifest.ocdbt +0 -0
  8. ema_model/ocdbt.process_0/d/617ce87548d409cdc7505f0249cf47c1 +0 -0
  9. ema_model/ocdbt.process_0/d/82649f39b50e05cbdbce02c8b44d094c +0 -0
  10. ema_model/ocdbt.process_0/d/846b4b9482e83afcdd81c76d11fc14da +0 -0
  11. ema_model/ocdbt.process_0/d/e5503b6a42e304e6f5c8a1ae10683e2b +3 -0
  12. ema_model/ocdbt.process_0/d/e64d117118b1c71994010273a1351438 +3 -0
  13. ema_model/ocdbt.process_0/d/ec9215383c89e2a364cf47b9110a13e5 +0 -0
  14. ema_model/ocdbt.process_0/manifest.ocdbt +0 -0
  15. metrics/metrics +1 -0
  16. model/_METADATA +1 -0
  17. model/_sharding +1 -0
  18. model/array_metadatas/process_0 +1 -0
  19. model/d/c45f7ee6086f49985ad321d2b2a3ba6e +0 -0
  20. model/manifest.ocdbt +0 -0
  21. model/ocdbt.process_0/d/605c938ecfb1e3e6754e3ae00c5f72bc +0 -0
  22. model/ocdbt.process_0/d/7f4dae192e6eb44d0122ff17d7409f09 +0 -0
  23. model/ocdbt.process_0/d/8edf0dde4e7853964909c18476decf9b +0 -0
  24. model/ocdbt.process_0/d/cbfae2bb822befbe210a96190ca374f4 +3 -0
  25. model/ocdbt.process_0/d/ef1cbb95fe20efb6593873c72dd9f866 +0 -0
  26. model/ocdbt.process_0/d/f46f6390e2f476455c4ada9a4b7d1490 +3 -0
  27. model/ocdbt.process_0/manifest.ocdbt +0 -0
  28. opt/_METADATA +1 -0
  29. opt/_sharding +1 -0
  30. opt/array_metadatas/process_0 +1 -0
  31. opt/d/856bed74554b34521e07a8345817413d +0 -0
  32. opt/manifest.ocdbt +0 -0
  33. opt/ocdbt.process_0/d/0c59d47160e8738ac28124f69a4a146a +0 -0
  34. opt/ocdbt.process_0/d/576ab8d76627e7403bcce532a19554f2 +0 -0
  35. opt/ocdbt.process_0/d/6be33bd399e64a14c25071fd59e1d0ec +3 -0
  36. opt/ocdbt.process_0/d/c0a68afd95c19fd66840469448087b0b +0 -0
  37. opt/ocdbt.process_0/d/e0bf35b5fe7aefe229d76d62f40c9712 +3 -0
  38. opt/ocdbt.process_0/d/e889c18b0bafdee5e513aed8c4b03e2f +0 -0
  39. opt/ocdbt.process_0/manifest.ocdbt +0 -0
.gitattributes CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ ema_model/ocdbt.process_0/d/e5503b6a42e304e6f5c8a1ae10683e2b filter=lfs diff=lfs merge=lfs -text
37
+ ema_model/ocdbt.process_0/d/e64d117118b1c71994010273a1351438 filter=lfs diff=lfs merge=lfs -text
38
+ model/ocdbt.process_0/d/cbfae2bb822befbe210a96190ca374f4 filter=lfs diff=lfs merge=lfs -text
39
+ model/ocdbt.process_0/d/f46f6390e2f476455c4ada9a4b7d1490 filter=lfs diff=lfs merge=lfs -text
40
+ opt/ocdbt.process_0/d/6be33bd399e64a14c25071fd59e1d0ec filter=lfs diff=lfs merge=lfs -text
41
+ opt/ocdbt.process_0/d/e0bf35b5fe7aefe229d76d62f40c9712 filter=lfs diff=lfs merge=lfs -text
_CHECKPOINT_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"item_handlers": {"ema_model": "orbax.checkpoint._src.handlers.standard_checkpoint_handler.StandardCheckpointHandler", "metrics": "orbax.checkpoint._src.handlers.json_checkpoint_handler.JsonCheckpointHandler", "model": "orbax.checkpoint._src.handlers.standard_checkpoint_handler.StandardCheckpointHandler", "opt": "orbax.checkpoint._src.handlers.standard_checkpoint_handler.StandardCheckpointHandler"}, "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1770143260024716825, "commit_timestamp_nsecs": 1770143260622757460, "custom_metadata": {}}
ema_model/_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"tree_metadata": {"('Q_head', 'layers', '1', 'bias', 'value')": {"key_metadata": [{"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('Q_head', 'layers', '1', 'kernel', 'value')": {"key_metadata": [{"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1]}}, "('init_y', 'scale')": {"key_metadata": [{"key": "init_y", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('init_z', 'scale')": {"key_metadata": [{"key": "init_z", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('input_embedding', 'embedding', 'value')": {"key_metadata": [{"key": "input_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10, 128]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '2', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'x_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "x_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'y_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "y_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'z_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "z_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('output_head', 'bias', 'value')": {"key_metadata": [{"key": "output_head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10]}}, "('output_head', 'kernel', 'value')": {"key_metadata": [{"key": "output_head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 10]}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
ema_model/_sharding ADDED
@@ -0,0 +1 @@
 
 
1
+ {"UV9oZWFkLmxheWVycy4xLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","UV9oZWFkLmxheWVycy4xLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF95LnNjYWxl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF96LnNjYWxl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5wdXRfZW1iZWRkaW5nLmVtYmVkZGluZy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3V0cHV0X2hlYWQuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3V0cHV0X2hlYWQua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMi5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnhfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnlfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnpfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}"}
ema_model/array_metadatas/process_0 ADDED
@@ -0,0 +1 @@
 
 
1
+ {"array_metadatas": [{"array_metadata": {"param_name": "Q_head.layers.1.bias.value", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "Q_head.layers.1.kernel.value", "write_shape": [128, 1], "chunk_shape": [128, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_y.scale", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_z.scale", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "input_embedding.embedding.value", "write_shape": [10, 128], "chunk_shape": [10, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.2.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.x_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.y_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.z_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "output_head.bias.value", "write_shape": [10], "chunk_shape": [10], "ext_metadata": null}}, {"array_metadata": {"param_name": "output_head.kernel.value", "write_shape": [128, 10], "chunk_shape": [128, 10], "ext_metadata": null}}]}
ema_model/d/623503ab6ef4733c285bc9b3d25bbd80 ADDED
Binary file (11.3 kB). View file
 
ema_model/manifest.ocdbt ADDED
Binary file (116 Bytes). View file
 
ema_model/ocdbt.process_0/d/617ce87548d409cdc7505f0249cf47c1 ADDED
Binary file (199 Bytes). View file
 
ema_model/ocdbt.process_0/d/82649f39b50e05cbdbce02c8b44d094c ADDED
Binary file (509 Bytes). View file
 
ema_model/ocdbt.process_0/d/846b4b9482e83afcdd81c76d11fc14da ADDED
Binary file (2.58 kB). View file
 
ema_model/ocdbt.process_0/d/e5503b6a42e304e6f5c8a1ae10683e2b ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49029917a50bc0bf244306284e7b17b6bae8ac9a419b0f5020e200884f019191
3
+ size 2927943
ema_model/ocdbt.process_0/d/e64d117118b1c71994010273a1351438 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c085508b125a147948a83874348aa897532de9a5f1d153cf81addbae3a9189c0
3
+ size 15089664
ema_model/ocdbt.process_0/d/ec9215383c89e2a364cf47b9110a13e5 ADDED
Binary file (21.8 kB). View file
 
ema_model/ocdbt.process_0/manifest.ocdbt ADDED
Binary file (344 Bytes). View file
 
metrics/metrics ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval/cell_acc": 0.9051730036735535, "eval/cell_acc_first_delta": 0.17279130220413208, "eval/cell_acc_halfway_delta": 0.014821231365203857, "eval/cell_acc_train_length_delta": 0.015721440315246582, "eval/effective_N_supervision": 32.0, "eval/solved_acc": 0.7630208730697632, "eval/solved_acc_first_delta": 0.6028646230697632, "eval/solved_acc_halfway_delta": 0.05078125, "eval/solved_acc_train_length_delta": 0.05338543653488159}
model/_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"tree_metadata": {"('Q_head', 'layers', '1', 'bias', 'value')": {"key_metadata": [{"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('Q_head', 'layers', '1', 'kernel', 'value')": {"key_metadata": [{"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1]}}, "('init_y', 'scale')": {"key_metadata": [{"key": "init_y", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('init_z', 'scale')": {"key_metadata": [{"key": "init_z", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('input_embedding', 'embedding', 'value')": {"key_metadata": [{"key": "input_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10, 128]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('net', 'net', 'layers', '0', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('net', 'net', 'layers', '0', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('net', 'net', 'layers', '1', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('net', 'net', 'layers', '1', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('net', 'net', 'layers', '2', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'x_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "x_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'y_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "y_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('net', 'z_norm', 'scale', 'value')": {"key_metadata": [{"key": "net", "key_type": 2}, {"key": "z_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('output_head', 'bias', 'value')": {"key_metadata": [{"key": "output_head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10]}}, "('output_head', 'kernel', 'value')": {"key_metadata": [{"key": "output_head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 10]}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
model/_sharding ADDED
@@ -0,0 +1 @@
 
 
1
+ {"UV9oZWFkLmxheWVycy4xLmJpYXMudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","UV9oZWFkLmxheWVycy4xLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF95LnNjYWxl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5pdF96LnNjYWxl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","aW5wdXRfZW1iZWRkaW5nLmVtYmVkZGluZy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3V0cHV0X2hlYWQuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3V0cHV0X2hlYWQua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5kX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMC5sX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5kX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlcxLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlcyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMS5sX21peGVyLlczLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lm5ldC5sYXllcnMuMi5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnhfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnlfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","bmV0Lnpfbm9ybS5zY2FsZS52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}"}
model/array_metadatas/process_0 ADDED
@@ -0,0 +1 @@
 
 
1
+ {"array_metadatas": [{"array_metadata": {"param_name": "Q_head.layers.1.bias.value", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "Q_head.layers.1.kernel.value", "write_shape": [128, 1], "chunk_shape": [128, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_y.scale", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "init_z.scale", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "input_embedding.embedding.value", "write_shape": [10, 128], "chunk_shape": [10, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.0.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.1.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.net.layers.2.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.x_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.y_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "net.z_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "output_head.bias.value", "write_shape": [10], "chunk_shape": [10], "ext_metadata": null}}, {"array_metadata": {"param_name": "output_head.kernel.value", "write_shape": [128, 10], "chunk_shape": [128, 10], "ext_metadata": null}}]}
model/d/c45f7ee6086f49985ad321d2b2a3ba6e ADDED
Binary file (11.3 kB). View file
 
model/manifest.ocdbt ADDED
Binary file (116 Bytes). View file
 
model/ocdbt.process_0/d/605c938ecfb1e3e6754e3ae00c5f72bc ADDED
Binary file (19.8 kB). View file
 
model/ocdbt.process_0/d/7f4dae192e6eb44d0122ff17d7409f09 ADDED
Binary file (553 Bytes). View file
 
model/ocdbt.process_0/d/8edf0dde4e7853964909c18476decf9b ADDED
Binary file (188 Bytes). View file
 
model/ocdbt.process_0/d/cbfae2bb822befbe210a96190ca374f4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a07c5ecd0d330c842d23e9638472a478d5ec5db95b174dd8b9a7c210feee98c6
3
+ size 8994816
model/ocdbt.process_0/d/ef1cbb95fe20efb6593873c72dd9f866 ADDED
Binary file (509 Bytes). View file
 
model/ocdbt.process_0/d/f46f6390e2f476455c4ada9a4b7d1490 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9b7d8412566282890d093fbedb8fc65278b44b7fd17ebae4588cf8047be664b
3
+ size 9015296
model/ocdbt.process_0/manifest.ocdbt ADDED
Binary file (340 Bytes). View file
 
opt/_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"tree_metadata": {"('opt_state', '1', '0', 'count', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('opt_state', '1', '0', 'mu', 'Q_head', 'layers', '1', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('opt_state', '1', '0', 'mu', 'Q_head', 'layers', '1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1]}}, "('opt_state', '1', '0', 'mu', 'input_embedding', 'embedding', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "input_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10, 128]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '0', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '1', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'mu', 'net', 'net', 'layers', '2', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'mu', 'net', 'x_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "x_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'mu', 'net', 'y_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "y_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'mu', 'net', 'z_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "z_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'mu', 'output_head', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "output_head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10]}}, "('opt_state', '1', '0', 'mu', 'output_head', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mu", "key_type": 2}, {"key": "output_head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 10]}}, "('opt_state', '1', '0', 'nu', 'Q_head', 'layers', '1', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('opt_state', '1', '0', 'nu', 'Q_head', 'layers', '1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "Q_head", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1]}}, "('opt_state', '1', '0', 'nu', 'input_embedding', 'embedding', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "input_embedding", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10, 128]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '0', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'd_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'd_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [384, 512]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'd_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "d_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 1536]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'l_mixer', 'W1', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W1", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'l_mixer', 'W2', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W2", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 81]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '1', 'l_mixer', 'W3', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "l_mixer", "key_type": 2}, {"key": "W3", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [81, 64]}}, "('opt_state', '1', '0', 'nu', 'net', 'net', 'layers', '2', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "layers", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'nu', 'net', 'x_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "x_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'nu', 'net', 'y_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "y_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'nu', 'net', 'z_norm', 'scale', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "net", "key_type": 2}, {"key": "z_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128]}}, "('opt_state', '1', '0', 'nu', 'output_head', 'bias', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "output_head", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [10]}}, "('opt_state', '1', '0', 'nu', 'output_head', 'kernel', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "nu", "key_type": 2}, {"key": "output_head", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [128, 10]}}, "('opt_state', '1', '2', 'count', 'value')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('step', 'value')": {"key_metadata": [{"key": "step", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
opt/_sharding ADDED
@@ -0,0 +1 @@
 
 
1
+ {"b3B0X3N0YXRlLjEuMC5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5RX2hlYWQubGF5ZXJzLjEuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5RX2hlYWQubGF5ZXJzLjEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5pbnB1dF9lbWJlZGRpbmcuZW1iZWRkaW5nLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQubmV0LmxheWVycy4yLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQueF9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQueV9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5uZXQuel9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5vdXRwdXRfaGVhZC5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5tdS5vdXRwdXRfaGVhZC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5RX2hlYWQubGF5ZXJzLjEuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5RX2hlYWQubGF5ZXJzLjEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5pbnB1dF9lbWJlZGRpbmcuZW1iZWRkaW5nLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmRfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4wLmxfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmRfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzEua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4xLmxfbWl4ZXIuVzMua2VybmVsLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQubmV0LmxheWVycy4yLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQueF9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQueV9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5uZXQuel9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5vdXRwdXRfaGVhZC5iaWFzLnZhbHVl":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMC5udS5vdXRwdXRfaGVhZC5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","b3B0X3N0YXRlLjEuMi5jb3VudC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}","c3RlcC52YWx1ZQ==":"{\"sharding_type\": \"NamedSharding\", \"shape\": [4], \"axis_names\": [\"data\"], \"axis_types\": [\"AxisType.Auto\"], \"partition_spec\": [], \"device_mesh\": {\"mesh\": [{\"id\": 0}, {\"id\": 2}, {\"id\": 1}, {\"id\": 3}]}}"}
opt/array_metadatas/process_0 ADDED
@@ -0,0 +1 @@
 
 
1
+ {"array_metadatas": [{"array_metadata": {"param_name": "opt_state.1.0.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.Q_head.layers.1.bias.value", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.Q_head.layers.1.kernel.value", "write_shape": [128, 1], "chunk_shape": [128, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.input_embedding.embedding.value", "write_shape": [10, 128], "chunk_shape": [10, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.0.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.1.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.net.layers.2.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.x_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.y_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.net.z_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.output_head.bias.value", "write_shape": [10], "chunk_shape": [10], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.output_head.kernel.value", "write_shape": [128, 10], "chunk_shape": [128, 10], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.Q_head.layers.1.bias.value", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.Q_head.layers.1.kernel.value", "write_shape": [128, 1], "chunk_shape": [128, 1], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.input_embedding.embedding.value", "write_shape": [10, 128], "chunk_shape": [10, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.0.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.d_mixer.W1.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.d_mixer.W2.kernel.value", "write_shape": [384, 512], "chunk_shape": [384, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.d_mixer.W3.kernel.value", "write_shape": [128, 1536], "chunk_shape": [128, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.l_mixer.W1.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.l_mixer.W2.kernel.value", "write_shape": [64, 81], "chunk_shape": [64, 81], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.1.l_mixer.W3.kernel.value", "write_shape": [81, 64], "chunk_shape": [81, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.net.layers.2.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.x_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.y_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.net.z_norm.scale.value", "write_shape": [128], "chunk_shape": [128], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.output_head.bias.value", "write_shape": [10], "chunk_shape": [10], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.output_head.kernel.value", "write_shape": [128, 10], "chunk_shape": [128, 10], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.2.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "step.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}]}
opt/d/856bed74554b34521e07a8345817413d ADDED
Binary file (21.3 kB). View file
 
opt/manifest.ocdbt ADDED
Binary file (119 Bytes). View file
 
opt/ocdbt.process_0/d/0c59d47160e8738ac28124f69a4a146a ADDED
Binary file (580 Bytes). View file
 
opt/ocdbt.process_0/d/576ab8d76627e7403bcce532a19554f2 ADDED
Binary file (609 Bytes). View file
 
opt/ocdbt.process_0/d/6be33bd399e64a14c25071fd59e1d0ec ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f2e96c9a2ed3c959c81dba21cf92a72d0e8f6455b5e6ca1b3f9cb8ebb60e62d
3
+ size 18059264
opt/ocdbt.process_0/d/c0a68afd95c19fd66840469448087b0b ADDED
Binary file (187 Bytes). View file
 
opt/ocdbt.process_0/d/e0bf35b5fe7aefe229d76d62f40c9712 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a65a16dd2cddc93695278b905c9cae8052d6883dc25c772b5b1587cae1c57cb
3
+ size 17768448
opt/ocdbt.process_0/d/e889c18b0bafdee5e513aed8c4b03e2f ADDED
Binary file (618 Bytes). View file
 
opt/ocdbt.process_0/manifest.ocdbt ADDED
Binary file (342 Bytes). View file