everythingchalna commited on
Commit
040bd7c
·
verified ·
1 Parent(s): 148a87c

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ ckpt_best/params/ocdbt.process_0/d/915664a8da5945599cd2c1ebb1cfa0cc filter=lfs diff=lfs merge=lfs -text
37
+ ckpt_best/params/ocdbt.process_0/d/ad70705d966286a023c17be9d869190a filter=lfs diff=lfs merge=lfs -text
ckpt_best/params/_CHECKPOINT_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"item_handlers": "orbax.checkpoint._src.handlers.pytree_checkpoint_handler.PyTreeCheckpointHandler", "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1775748139882752023, "commit_timestamp_nsecs": 1775748140166566090, "custom_metadata": {}}
ckpt_best/params/_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"tree_metadata": {"('params', 'block_0', 'attn', 'k_norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'block_0', 'attn', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('params', 'block_0', 'attn', 'q_norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'block_0', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('params', 'block_0', 'ln1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_0", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'block_0', 'ln2', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_0", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'block_0', 'mlp', 'down', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "down", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1536, 512]}}, "('params', 'block_0', 'mlp', 'up', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "up", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('params', 'block_1', 'attn', 'k_norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'block_1', 'attn', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('params', 'block_1', 'attn', 'q_norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'block_1', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('params', 'block_1', 'ln1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_1", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'block_1', 'ln2', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_1", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'block_1', 'mlp', 'down', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "down", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1536, 512]}}, "('params', 'block_1', 'mlp', 'up', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "up", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('params', 'block_2', 'attn', 'k_norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'block_2', 'attn', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('params', 'block_2', 'attn', 'q_norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'block_2', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('params', 'block_2', 'ln1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_2", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'block_2', 'ln2', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_2", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'block_2', 'mlp', 'down', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "down", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1536, 512]}}, "('params', 'block_2', 'mlp', 'up', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "up", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('params', 'block_3', 'attn', 'k_norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'block_3', 'attn', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('params', 'block_3', 'attn', 'q_norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'block_3', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('params', 'block_3', 'ln1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_3", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'block_3', 'ln2', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_3", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'block_3', 'mlp', 'down', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "down", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1536, 512]}}, "('params', 'block_3', 'mlp', 'up', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "up", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('params', 'block_4', 'attn', 'k_norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'block_4', 'attn', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('params', 'block_4', 'attn', 'q_norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'block_4', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('params', 'block_4', 'ln1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_4", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'block_4', 'ln2', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_4", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'block_4', 'mlp', 'down', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "down", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1536, 512]}}, "('params', 'block_4', 'mlp', 'up', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "up", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('params', 'block_5', 'attn', 'k_norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'block_5', 'attn', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('params', 'block_5', 'attn', 'q_norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'block_5', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('params', 'block_5', 'ln1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_5", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'block_5', 'ln2', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_5", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'block_5', 'mlp', 'down', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "down", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1536, 512]}}, "('params', 'block_5', 'mlp', 'up', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "up", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('params', 'block_6', 'attn', 'k_norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'block_6', 'attn', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('params', 'block_6', 'attn', 'q_norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'block_6', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('params', 'block_6', 'ln1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_6", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'block_6', 'ln2', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_6", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'block_6', 'mlp', 'down', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "down", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1536, 512]}}, "('params', 'block_6', 'mlp', 'up', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_6", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "up", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('params', 'block_7', 'attn', 'k_norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'block_7', 'attn', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 512]}}, "('params', 'block_7', 'attn', 'q_norm', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'block_7', 'attn', 'qkv', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "qkv", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('params', 'block_7', 'ln1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_7", "key_type": 2}, {"key": "ln1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'block_7', 'ln2', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_7", "key_type": 2}, {"key": "ln2", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'block_7', 'mlp', 'down', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "down", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1536, 512]}}, "('params', 'block_7', 'mlp', 'up', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "block_7", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "up", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512, 1536]}}, "('params', 'embed', 'embedding')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "embed", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [186, 512]}}, "('params', 'ln_f', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "ln_f", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'ln_x0', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "ln_x0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [512]}}, "('params', 'resid_lambda_0')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "resid_lambda_0", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('params', 'resid_lambda_1')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "resid_lambda_1", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('params', 'resid_lambda_2')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "resid_lambda_2", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('params', 'resid_lambda_3')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "resid_lambda_3", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('params', 'resid_lambda_4')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "resid_lambda_4", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('params', 'resid_lambda_5')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "resid_lambda_5", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('params', 'resid_lambda_6')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "resid_lambda_6", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('params', 'resid_lambda_7')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "resid_lambda_7", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('params', 'x0_lambda_0')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "x0_lambda_0", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('params', 'x0_lambda_1')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "x0_lambda_1", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('params', 'x0_lambda_2')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "x0_lambda_2", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('params', 'x0_lambda_3')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "x0_lambda_3", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('params', 'x0_lambda_4')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "x0_lambda_4", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('params', 'x0_lambda_5')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "x0_lambda_5", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('params', 'x0_lambda_6')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "x0_lambda_6", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}, "('params', 'x0_lambda_7')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "x0_lambda_7", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1]}}}, "use_ocdbt": true, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
ckpt_best/params/_sharding ADDED
@@ -0,0 +1 @@
 
 
1
+ {"cGFyYW1zLmJsb2NrXzAuYXR0bi5rX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzAuYXR0bi5vdXRfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzAuYXR0bi5xX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzAuYXR0bi5xa3Yua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzAubG4xLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzAubG4yLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzAubWxwLmRvd24ua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzAubWxwLnVwLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzEuYXR0bi5rX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzEuYXR0bi5vdXRfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzEuYXR0bi5xX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzEuYXR0bi5xa3Yua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzEubG4xLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzEubG4yLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzEubWxwLmRvd24ua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzEubWxwLnVwLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzIuYXR0bi5rX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzIuYXR0bi5vdXRfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzIuYXR0bi5xX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzIuYXR0bi5xa3Yua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzIubG4xLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzIubG4yLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzIubWxwLmRvd24ua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzIubWxwLnVwLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzMuYXR0bi5rX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzMuYXR0bi5vdXRfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzMuYXR0bi5xX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzMuYXR0bi5xa3Yua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzMubG4xLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzMubG4yLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzMubWxwLmRvd24ua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzMubWxwLnVwLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzQuYXR0bi5rX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzQuYXR0bi5vdXRfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzQuYXR0bi5xX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzQuYXR0bi5xa3Yua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzQubG4xLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzQubG4yLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzQubWxwLmRvd24ua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzQubWxwLnVwLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzUuYXR0bi5rX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzUuYXR0bi5vdXRfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzUuYXR0bi5xX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzUuYXR0bi5xa3Yua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzUubG4xLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzUubG4yLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzUubWxwLmRvd24ua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzUubWxwLnVwLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzYuYXR0bi5rX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzYuYXR0bi5vdXRfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzYuYXR0bi5xX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzYuYXR0bi5xa3Yua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzYubG4xLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzYubG4yLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzYubWxwLmRvd24ua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzYubWxwLnVwLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzcuYXR0bi5rX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzcuYXR0bi5vdXRfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzcuYXR0bi5xX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzcuYXR0bi5xa3Yua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzcubG4xLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzcubG4yLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzcubWxwLmRvd24ua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmJsb2NrXzcubWxwLnVwLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmVtYmVkLmVtYmVkZGluZw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmxuX2Yuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLmxuX3gwLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLnJlc2lkX2xhbWJkYV80":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLnJlc2lkX2xhbWJkYV81":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLnJlc2lkX2xhbWJkYV82":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLnJlc2lkX2xhbWJkYV83":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLnJlc2lkX2xhbWJkYV8w":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLnJlc2lkX2xhbWJkYV8x":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLnJlc2lkX2xhbWJkYV8y":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLnJlc2lkX2xhbWJkYV8z":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLngwX2xhbWJkYV80":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLngwX2xhbWJkYV81":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLngwX2xhbWJkYV82":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLngwX2xhbWJkYV83":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLngwX2xhbWJkYV8w":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLngwX2xhbWJkYV8x":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLngwX2xhbWJkYV8y":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","cGFyYW1zLngwX2xhbWJkYV8z":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}"}
ckpt_best/params/array_metadatas/process_0 ADDED
@@ -0,0 +1 @@
 
 
1
+ {"array_metadatas": [{"array_metadata": {"param_name": "params.block_0.attn.k_norm.scale", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_0.attn.out_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_0.attn.q_norm.scale", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_0.attn.qkv.kernel", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_0.ln1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_0.ln2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_0.mlp.down.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_0.mlp.up.kernel", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_1.attn.k_norm.scale", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_1.attn.out_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_1.attn.q_norm.scale", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_1.attn.qkv.kernel", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_1.ln1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_1.ln2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_1.mlp.down.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_1.mlp.up.kernel", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_2.attn.k_norm.scale", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_2.attn.out_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_2.attn.q_norm.scale", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_2.attn.qkv.kernel", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_2.ln1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_2.ln2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_2.mlp.down.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_2.mlp.up.kernel", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_3.attn.k_norm.scale", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_3.attn.out_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_3.attn.q_norm.scale", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_3.attn.qkv.kernel", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_3.ln1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_3.ln2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_3.mlp.down.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_3.mlp.up.kernel", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_4.attn.k_norm.scale", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_4.attn.out_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_4.attn.q_norm.scale", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_4.attn.qkv.kernel", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_4.ln1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_4.ln2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_4.mlp.down.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_4.mlp.up.kernel", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_5.attn.k_norm.scale", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_5.attn.out_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_5.attn.q_norm.scale", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_5.attn.qkv.kernel", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_5.ln1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_5.ln2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_5.mlp.down.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_5.mlp.up.kernel", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_6.attn.k_norm.scale", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_6.attn.out_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_6.attn.q_norm.scale", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_6.attn.qkv.kernel", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_6.ln1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_6.ln2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_6.mlp.down.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_6.mlp.up.kernel", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_7.attn.k_norm.scale", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_7.attn.out_proj.kernel", "write_shape": [512, 512], "chunk_shape": [512, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_7.attn.q_norm.scale", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_7.attn.qkv.kernel", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_7.ln1.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_7.ln2.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_7.mlp.down.kernel", "write_shape": [1536, 512], "chunk_shape": [1536, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.block_7.mlp.up.kernel", "write_shape": [512, 1536], "chunk_shape": [512, 1536], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.embed.embedding", "write_shape": [186, 512], "chunk_shape": [186, 512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.ln_f.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.ln_x0.scale", "write_shape": [512], "chunk_shape": [512], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.resid_lambda_0", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.resid_lambda_1", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.resid_lambda_2", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.resid_lambda_3", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.resid_lambda_4", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.resid_lambda_5", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.resid_lambda_6", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.resid_lambda_7", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.x0_lambda_0", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.x0_lambda_1", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.x0_lambda_2", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.x0_lambda_3", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.x0_lambda_4", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.x0_lambda_5", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.x0_lambda_6", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.x0_lambda_7", "write_shape": [1], "chunk_shape": [1], "ext_metadata": null}}]}
ckpt_best/params/d/271d3edbf844b7c69da5b15a099aa3df ADDED
Binary file (4.93 kB). View file
 
ckpt_best/params/manifest.ocdbt ADDED
Binary file (117 Bytes). View file
 
ckpt_best/params/ocdbt.process_0/d/41ecb8418c4f25ecc98fc0775faf5188 ADDED
Binary file (518 Bytes). View file
 
ckpt_best/params/ocdbt.process_0/d/512860d2dcdc0eb1221511510ad9c46a ADDED
Binary file (841 Bytes). View file
 
ckpt_best/params/ocdbt.process_0/d/915664a8da5945599cd2c1ebb1cfa0cc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8de89daf7c46ed80dd41ddcc0dd5aeb6045256ece695b7e8a98b4cfd2f27526d
3
+ size 48582656
ckpt_best/params/ocdbt.process_0/d/960fdd278a99c9ad0a9c08926ece4f52 ADDED
Binary file (7.02 kB). View file
 
ckpt_best/params/ocdbt.process_0/d/9b9004ed893e5ffd31690ad308a82465 ADDED
Binary file (195 Bytes). View file
 
ckpt_best/params/ocdbt.process_0/d/ad70705d966286a023c17be9d869190a ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:348d1c22638af3915677f52d74a38fcb99c552cc2730338bf4cf570f5fcb6360
3
+ size 29458432
ckpt_best/params/ocdbt.process_0/manifest.ocdbt ADDED
Binary file (347 Bytes). View file