diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fe4ffde5465c4bdf054e9f07c5ea0b1f8ce3ac21 --- /dev/null +++ b/README.md @@ -0,0 +1,179 @@ +--- +library_name: easydel +pipeline_tag: image-to-text +tags: + - easydel + - jax + - "glm4v" + - "ImageTextToText" + - "vanilla" +--- + +

+ + EasyDeL + +

+ +

zai-org/GLM-4.6V-Flash

+ +

+ EasyDeL checkpoint converted from zai-org/GLM-4.6V-Flash. +

+ +

+ + HuggingFace Repo + + + EasyDeL Version + + Model Type + Task + Attention Mechanism +

+ +--- + +## At a Glance + +| Field | Value | +| --- | --- | +| Repo ID | `EasyDeL/GLM-4.6V-Flash` | +| Model type | `glm4v` | +| Task | `ImageTextToText` | +| Attention | `vanilla` (`AttentionMechanisms.VANILLA`) | +| EasyDeL | `0.2.0` | + +## Overview + +This checkpoint is intended to be loaded with EasyDeL on JAX (CPU/GPU/TPU). It supports sharded loading with `auto_shard_model=True` and configurable precision via `dtype`, `param_dtype`, and `precision`. + +## Quickstart + +```python +import easydel as ed +from jax import numpy as jnp, lax + +repo_id = "EasyDeL/GLM-4.6V-Flash" + +dtype = jnp.bfloat16 # try jnp.float16 on many GPUs + +model = ed.AutoEasyDeLModelForImageTextToText.from_pretrained( + repo_id, + config_kwargs=ed.EasyDeLBaseConfigDict( + attn_dtype=dtype, + attn_mechanism=ed.AttentionMechanisms.VANILLA, + ), + dtype=dtype, + param_dtype=dtype, + precision=lax.Precision("fastest"), + auto_shard_model=True, +) +``` + +If the repository only provides PyTorch weights, pass `from_torch=True` to `from_pretrained(...)`. + +## Sharding & Parallelism (Multi-Device) + +EasyDeL can scale to multiple devices by creating a logical device mesh. Most EasyDeL loaders use a 5D mesh: + +- `dp`: data parallel (replicated parameters, different batch shards) +- `fsdp`: parameter sharding (memory saver; often the biggest axis) +- `ep`: expert parallel (MoE; keep `1` for non-MoE models) +- `tp`: tensor parallel (splits large matmuls) +- `sp`: sequence parallel (splits sequence dimension) + +Use `sharding_axis_names=("dp","fsdp","ep","tp","sp")` and choose `sharding_axis_dims` so that their product equals your device count. +You can use `-1` in `sharding_axis_dims` to let EasyDeL infer the remaining dimension. + +
+Example sharding configs + +```python +# 8 devices, pure FSDP +sharding_axis_dims = (1, 8, 1, 1, 1) + +# 8 devices, 2-way DP x 4-way FSDP +sharding_axis_dims = (2, 4, 1, 1, 1) + +# 8 devices, 4-way FSDP x 2-way TP +sharding_axis_dims = (1, 4, 1, 2, 1) +``` +
+ +## Using via `eLargeModel` (ELM) + +`eLargeModel` is a higher-level interface that wires together loading, sharding, training, and eSurge inference from a single config. + +```python +from easydel import eLargeModel + +repo_id = "EasyDeL/GLM-4.6V-Flash" + +elm = eLargeModel.from_pretrained(repo_id) # task is auto-detected +elm.set_dtype("bf16") +elm.set_sharding(axis_names=("dp", "fsdp", "ep", "tp", "sp"), axis_dims=(1, -1, 1, 1, 1)) + +model = elm.build_model() +# Optional: build an inference engine +# engine = elm.build_esurge() +``` + +
+ELM YAML config example + +```yaml +model: + name_or_path: "EasyDeL/GLM-4.6V-Flash" + +loader: + dtype: bf16 + param_dtype: bf16 + +sharding: + axis_dims: [1, -1, 1, 1, 1] + auto_shard_model: true +``` +
+ +## Features + +**EasyDeL:** +- JAX native implementation and sharded execution +- Configurable attention backends via `AttentionMechanisms.*` +- Precision control via `dtype`, `param_dtype`, and `precision` + +## Installation + +```bash +pip install easydel +``` + +## Links + +- EasyDeL GitHub: https://github.com/erfanzar/EasyDeL +- Docs: https://easydel.readthedocs.io/en/latest/ + +## Supported Tasks + +- ImageTextToText + +## Limitations + +- Refer to the original model card for training data, evaluation, and intended use. + +## License + +EasyDeL is released under the Apache-2.0 license. The license for this model's weights may differ; please consult the original repository. + +## Citation + +```bibtex +@misc{Zare Chavoshi_2023, + title={EasyDeL: An open-source library for enhancing and streamlining the training process of machine learning models}, + url={https://github.com/erfanzar/EasyDeL}, + author={Zare Chavoshi, Erfan}, + year={2023} +} +``` diff --git a/checkpoint_metadata.json b/checkpoint_metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..2375d85a6de87cdb18ea77c905201010673b9e3e --- /dev/null +++ b/checkpoint_metadata.json @@ -0,0 +1,6 @@ +{ + "timestamp": "2025-12-28T04:14:13.950976", + "custom_metadata": { + "step": 0 + } +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bce23eeaf2da724ced8dee75958a34e673503b5b --- /dev/null +++ b/config.json @@ -0,0 +1,349 @@ +{ + "_external_rope_config_kwargs": {}, + "architectures": [ + "Glm4vForConditionalGeneration" + ], + "attn_mechanism": "vanilla", + "backend": null, + "bits": null, + "blocksize_b": 1, + "blocksize_k": 128, + "blocksize_q": 128, + "decode_attn_mechanism": null, + "easy_method": "train", + "fcm_max_ratio": 0.0, + "fcm_min_ratio": 0.0, + "flash_attention_backward_pass_impl": "triton", + "fsdp_is_ep_bound": true, + "gradient_checkpointing": "", + "gradient_checkpointing_targets": null, + "hardware_abstraction": false, + "image_end_token_id": 151340, + "image_start_token_id": 151339, + "image_token_id": 151363, + "kv_cache_quantization_config": null, + "kv_cache_sharding_sequence_axis_name": "sp", + "model_type": "glm4v", + "moe_force_xla_gmm": false, + "moe_method": "fused_moe", + "moe_tiling_size_batch": 4, + "moe_tiling_size_dim": 128, + "moe_tiling_size_seqlen": 128, + "operation_configs": null, + "pallas_k_block_size": 128, + "pallas_m_block_size": 128, + "pallas_n_block_size": 128, + "partition_axis": { + "attention_dim_axis": null, + "attention_kv_dim_axis": null, + "batch_axis": [ + "fsdp", + "dp" + ], + "bias_head_sequence_axis": null, + "bias_key_sequence_axis": null, + "data_parallel_axis": "dp", + "decode_attention_dim_axis": null, + "decode_attention_kv_dim_axis": null, + "decode_batch_axis": [ + "fsdp", + "dp" + ], + "decode_head_axis": "tp", + "decode_key_sequence_axis": "sp", + "decode_kv_head_axis": "tp", + "decode_query_sequence_axis": null, + "expert_axis": "ep", + "expert_gate_axis": null, + "expert_parallel_axis": "ep", + "fully_sharded_data_parallel_axis": "fsdp", + "head_axis": "tp", + "hidden_state_axis": "tp", + "key_sequence_axis": "sp", + "kv_head_axis": "tp", + "mlp_intermediate_axis": "tp", + "query_sequence_axis": "sp", + "sequence_axis": "sp", + "sequence_parallel_axis": "sp", + "tensor_parallel_axis": "tp", + "vocab_axis": "tp" + }, + "platform": null, + "precompute_masks": true, + "pretraining_tp": 1, + "quantization_config": null, + "scan_attention_layers": false, + "scan_mlp_chunk_size": 1024, + "scan_ring_attention": true, + "sequence_axis_name": "sp", + "sharding_axis_dims": [ + 1, + -1, + 1, + 1, + 1 + ], + "sharding_axis_names": [ + "dp", + "fsdp", + "ep", + "tp", + "sp" + ], + "sharding_dcn_axis_dims": null, + "sp_is_ep_bound": true, + "text_config": { + "_external_rope_config_kwargs": { + "repetition_style": true + }, + "architectures": [ + "Glm4vForConditionalGeneration" + ], + "attention_bias": true, + "attention_dropout": 0.0, + "attn_mechanism": "vanilla", + "backend": null, + "bits": null, + "blocksize_b": 1, + "blocksize_k": 128, + "blocksize_q": 128, + "decode_attn_mechanism": null, + "dtype": "bfloat16", + "easy_method": "train", + "eos_token_id": [ + 151329, + 151336, + 151338 + ], + "fcm_max_ratio": 0.0, + "fcm_min_ratio": 0.0, + "flash_attention_backward_pass_impl": "triton", + "fsdp_is_ep_bound": true, + "gradient_checkpointing": "", + "gradient_checkpointing_targets": null, + "hardware_abstraction": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 13696, + "kv_cache_quantization_config": null, + "kv_cache_sharding_sequence_axis_name": "sp", + "max_position_embeddings": 131072, + "model_type": "glm4v_text", + "moe_force_xla_gmm": false, + "moe_method": "fused_moe", + "moe_tiling_size_batch": 4, + "moe_tiling_size_dim": 128, + "moe_tiling_size_seqlen": 128, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 2, + "operation_configs": null, + "pad_token_id": 151329, + "pallas_k_block_size": 128, + "pallas_m_block_size": 128, + "pallas_n_block_size": 128, + "partial_rotary_factor": 0.5, + "partition_axis": { + "attention_dim_axis": null, + "attention_kv_dim_axis": null, + "batch_axis": [ + "fsdp", + "dp" + ], + "bias_head_sequence_axis": null, + "bias_key_sequence_axis": null, + "data_parallel_axis": "dp", + "decode_attention_dim_axis": null, + "decode_attention_kv_dim_axis": null, + "decode_batch_axis": [ + "fsdp", + "dp" + ], + "decode_head_axis": "tp", + "decode_key_sequence_axis": "sp", + "decode_kv_head_axis": "tp", + "decode_query_sequence_axis": null, + "expert_axis": "ep", + "expert_gate_axis": null, + "expert_parallel_axis": "ep", + "fully_sharded_data_parallel_axis": "fsdp", + "head_axis": "tp", + "hidden_state_axis": "tp", + "key_sequence_axis": "sp", + "kv_head_axis": "tp", + "mlp_intermediate_axis": "tp", + "query_sequence_axis": "sp", + "sequence_axis": "sp", + "sequence_parallel_axis": "sp", + "tensor_parallel_axis": "tp", + "vocab_axis": "tp" + }, + "platform": null, + "precompute_masks": true, + "pretraining_tp": 1, + "quantization_config": null, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "mrope_section": [ + 8, + 12, + 12 + ], + "rope_type": "default" + }, + "rope_theta": 500000.0, + "scan_attention_layers": false, + "scan_mlp_chunk_size": 1024, + "scan_ring_attention": true, + "sequence_axis_name": "sp", + "sharding_axis_dims": [ + 1, + -1, + 1, + 1, + 1 + ], + "sharding_axis_names": [ + "dp", + "fsdp", + "ep", + "tp", + "sp" + ], + "sharding_dcn_axis_dims": null, + "sp_is_ep_bound": true, + "use_cache": true, + "use_expert_tensor_mode": false, + "use_ring_of_experts": false, + "use_scan_mlp": false, + "use_sharded_kv_caching": false, + "use_sharding_constraint": false, + "vocab_size": 151552 + }, + "tie_word_embeddings": false, + "transformers_version": "4.57.3", + "use_expert_tensor_mode": false, + "use_ring_of_experts": false, + "use_scan_mlp": false, + "use_sharded_kv_caching": false, + "use_sharding_constraint": false, + "video_end_token_id": 151342, + "video_start_token_id": 151341, + "video_token_id": 151364, + "vision_config": { + "_external_rope_config_kwargs": {}, + "architectures": [ + "Glm4vForConditionalGeneration" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_mechanism": "vanilla", + "backend": null, + "bits": null, + "blocksize_b": 1, + "blocksize_k": 128, + "blocksize_q": 128, + "decode_attn_mechanism": null, + "depth": 24, + "easy_method": "train", + "fcm_max_ratio": 0.0, + "fcm_min_ratio": 0.0, + "flash_attention_backward_pass_impl": "triton", + "fsdp_is_ep_bound": true, + "gradient_checkpointing": "", + "gradient_checkpointing_targets": null, + "hardware_abstraction": false, + "hidden_act": "silu", + "hidden_dropout_prob": 0.0, + "hidden_size": 1536, + "image_size": 336, + "in_channels": 3, + "initializer_range": 0.02, + "intermediate_size": 13696, + "kv_cache_quantization_config": null, + "kv_cache_sharding_sequence_axis_name": "sp", + "model_type": "glm4v_vision", + "moe_force_xla_gmm": false, + "moe_method": "fused_moe", + "moe_tiling_size_batch": 4, + "moe_tiling_size_dim": 128, + "moe_tiling_size_seqlen": 128, + "num_attention_heads": 12, + "num_heads": 12, + "operation_configs": null, + "out_hidden_size": 4096, + "pallas_k_block_size": 128, + "pallas_m_block_size": 128, + "pallas_n_block_size": 128, + "partition_axis": { + "attention_dim_axis": null, + "attention_kv_dim_axis": null, + "batch_axis": [ + "fsdp", + "dp" + ], + "bias_head_sequence_axis": null, + "bias_key_sequence_axis": null, + "data_parallel_axis": "dp", + "decode_attention_dim_axis": null, + "decode_attention_kv_dim_axis": null, + "decode_batch_axis": [ + "fsdp", + "dp" + ], + "decode_head_axis": "tp", + "decode_key_sequence_axis": "sp", + "decode_kv_head_axis": "tp", + "decode_query_sequence_axis": null, + "expert_axis": "ep", + "expert_gate_axis": null, + "expert_parallel_axis": "ep", + "fully_sharded_data_parallel_axis": "fsdp", + "head_axis": "tp", + "hidden_state_axis": "tp", + "key_sequence_axis": "sp", + "kv_head_axis": "tp", + "mlp_intermediate_axis": "tp", + "query_sequence_axis": "sp", + "sequence_axis": "sp", + "sequence_parallel_axis": "sp", + "tensor_parallel_axis": "tp", + "vocab_axis": "tp" + }, + "patch_size": 14, + "platform": null, + "precompute_masks": true, + "pretraining_tp": 1, + "quantization_config": null, + "rms_norm_eps": 1e-05, + "scan_attention_layers": false, + "scan_mlp_chunk_size": 1024, + "scan_ring_attention": true, + "sequence_axis_name": "sp", + "sharding_axis_dims": [ + 1, + -1, + 1, + 1, + 1 + ], + "sharding_axis_names": [ + "dp", + "fsdp", + "ep", + "tp", + "sp" + ], + "sharding_dcn_axis_dims": null, + "sp_is_ep_bound": true, + "spatial_merge_size": 2, + "temporal_patch_size": 2, + "use_expert_tensor_mode": false, + "use_ring_of_experts": false, + "use_scan_mlp": false, + "use_sharded_kv_caching": false, + "use_sharding_constraint": false + } +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..11d215c5938942409e9278376c52c42fd595025b --- /dev/null +++ b/generation_config.json @@ -0,0 +1,15 @@ +{ + "_from_model_config": true, + "do_sample": true, + "eos_token_id": [ + 151329, + 151336, + 151338, + 151348 + ], + "pad_token_id": 151329, + "temperature": 0.8, + "top_k": 2, + "top_p": 0.6, + "transformers_version": "4.57.3" +} diff --git a/model/params/model/language_model/layers/0/self_attn/k_proj/bias/0 b/model/params/model/language_model/layers/0/self_attn/k_proj/bias/0 new file mode 100644 index 0000000000000000000000000000000000000000..6219cd0bd2a359a4e7d2cc83a47625a25eff41e8 Binary files /dev/null and b/model/params/model/language_model/layers/0/self_attn/k_proj/bias/0 differ diff --git a/model/params/model/language_model/layers/0/self_attn/o_proj/kernel/.zarray b/model/params/model/language_model/layers/0/self_attn/o_proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f74e87081615413ab6b0c5219a8edae5265883fe --- /dev/null +++ b/model/params/model/language_model/layers/0/self_attn/o_proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[4096,1024],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096,4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/language_model/layers/1/mlp/down_proj/kernel/.zarray b/model/params/model/language_model/layers/1/mlp/down_proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..6d957709d3382e500bf428e615bed7868edcd317 --- /dev/null +++ b/model/params/model/language_model/layers/1/mlp/down_proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[13696,1024],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[13696,4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/language_model/layers/1/self_attn/q_proj/bias/0 b/model/params/model/language_model/layers/1/self_attn/q_proj/bias/0 new file mode 100644 index 0000000000000000000000000000000000000000..4e1ba1db5f7c824bdb2dc493f96290f55d53e562 Binary files /dev/null and b/model/params/model/language_model/layers/1/self_attn/q_proj/bias/0 differ diff --git a/model/params/model/language_model/layers/1/self_attn/v_proj/bias/0 b/model/params/model/language_model/layers/1/self_attn/v_proj/bias/0 new file mode 100644 index 0000000000000000000000000000000000000000..990a62417073ce6a5adf396bd5377f29997f1f1d Binary files /dev/null and b/model/params/model/language_model/layers/1/self_attn/v_proj/bias/0 differ diff --git a/model/params/model/language_model/layers/10/input_layernorm/kernel/.zarray b/model/params/model/language_model/layers/10/input_layernorm/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..6cb36eccfd612cd6d39e72807aaa8527aa28075d --- /dev/null +++ b/model/params/model/language_model/layers/10/input_layernorm/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/language_model/layers/10/post_mlp_layernorm/kernel/.zarray b/model/params/model/language_model/layers/10/post_mlp_layernorm/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..6cb36eccfd612cd6d39e72807aaa8527aa28075d --- /dev/null +++ b/model/params/model/language_model/layers/10/post_mlp_layernorm/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/language_model/layers/10/self_attn/q_proj/bias/.zarray b/model/params/model/language_model/layers/10/self_attn/q_proj/bias/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..6cb36eccfd612cd6d39e72807aaa8527aa28075d --- /dev/null +++ b/model/params/model/language_model/layers/10/self_attn/q_proj/bias/.zarray @@ -0,0 +1 @@ +{"chunks":[4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/language_model/layers/10/self_attn/q_proj/bias/0 b/model/params/model/language_model/layers/10/self_attn/q_proj/bias/0 new file mode 100644 index 0000000000000000000000000000000000000000..5b2464db98788ef79897bb68d8710dce9f01b110 Binary files /dev/null and b/model/params/model/language_model/layers/10/self_attn/q_proj/bias/0 differ diff --git a/model/params/model/language_model/layers/10/self_attn/q_proj/kernel/.zarray b/model/params/model/language_model/layers/10/self_attn/q_proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..7ac7b3325317fe14c7a87913c61d88e3b92ab05e --- /dev/null +++ b/model/params/model/language_model/layers/10/self_attn/q_proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[1024,4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096,4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/language_model/layers/10/self_attn/v_proj/bias/.zarray b/model/params/model/language_model/layers/10/self_attn/v_proj/bias/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..653c877efa83da0ab518995885715f7e8b4275d7 --- /dev/null +++ b/model/params/model/language_model/layers/10/self_attn/v_proj/bias/.zarray @@ -0,0 +1 @@ +{"chunks":[256],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[256],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/language_model/layers/10/self_attn/v_proj/bias/0 b/model/params/model/language_model/layers/10/self_attn/v_proj/bias/0 new file mode 100644 index 0000000000000000000000000000000000000000..28876c6dfcd3354bd7e2bfbe1de0572d9412f82a Binary files /dev/null and b/model/params/model/language_model/layers/10/self_attn/v_proj/bias/0 differ diff --git a/model/params/model/language_model/layers/10/self_attn/v_proj/kernel/.zarray b/model/params/model/language_model/layers/10/self_attn/v_proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..65d91ab909ea33fb45716acd9b2259d1c5a0cd71 --- /dev/null +++ b/model/params/model/language_model/layers/10/self_attn/v_proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[1024,256],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096,256],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/language_model/layers/11/input_layernorm/kernel/.zarray b/model/params/model/language_model/layers/11/input_layernorm/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..6cb36eccfd612cd6d39e72807aaa8527aa28075d --- /dev/null +++ b/model/params/model/language_model/layers/11/input_layernorm/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/language_model/layers/11/input_layernorm/kernel/0 b/model/params/model/language_model/layers/11/input_layernorm/kernel/0 new file mode 100644 index 0000000000000000000000000000000000000000..65fc306b926bffc36d56b571640d9cca2773c1d6 Binary files /dev/null and b/model/params/model/language_model/layers/11/input_layernorm/kernel/0 differ diff --git a/model/params/model/language_model/layers/11/mlp/down_proj/kernel/.zarray b/model/params/model/language_model/layers/11/mlp/down_proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..6d957709d3382e500bf428e615bed7868edcd317 --- /dev/null +++ b/model/params/model/language_model/layers/11/mlp/down_proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[13696,1024],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[13696,4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/language_model/layers/11/mlp/gate_up_proj/kernel/.zarray b/model/params/model/language_model/layers/11/mlp/gate_up_proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..096abde696f79cae51c28a3df025167152ce3172 --- /dev/null +++ b/model/params/model/language_model/layers/11/mlp/gate_up_proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[1024,27392],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096,27392],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/language_model/layers/11/post_attention_layernorm/kernel/.zarray b/model/params/model/language_model/layers/11/post_attention_layernorm/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..6cb36eccfd612cd6d39e72807aaa8527aa28075d --- /dev/null +++ b/model/params/model/language_model/layers/11/post_attention_layernorm/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/language_model/layers/11/post_attention_layernorm/kernel/0 b/model/params/model/language_model/layers/11/post_attention_layernorm/kernel/0 new file mode 100644 index 0000000000000000000000000000000000000000..722c2e8768405bfbf9b79515ba7f4993456913c9 Binary files /dev/null and b/model/params/model/language_model/layers/11/post_attention_layernorm/kernel/0 differ diff --git a/model/params/model/language_model/layers/11/post_mlp_layernorm/kernel/.zarray b/model/params/model/language_model/layers/11/post_mlp_layernorm/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..6cb36eccfd612cd6d39e72807aaa8527aa28075d --- /dev/null +++ b/model/params/model/language_model/layers/11/post_mlp_layernorm/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/language_model/layers/11/post_mlp_layernorm/kernel/0 b/model/params/model/language_model/layers/11/post_mlp_layernorm/kernel/0 new file mode 100644 index 0000000000000000000000000000000000000000..4e221fbc50b25c58c27cf61c273ce5b8fe9fde15 Binary files /dev/null and b/model/params/model/language_model/layers/11/post_mlp_layernorm/kernel/0 differ diff --git a/model/params/model/language_model/layers/11/post_self_attn_layernorm/kernel/.zarray b/model/params/model/language_model/layers/11/post_self_attn_layernorm/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..6cb36eccfd612cd6d39e72807aaa8527aa28075d --- /dev/null +++ b/model/params/model/language_model/layers/11/post_self_attn_layernorm/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/language_model/layers/11/post_self_attn_layernorm/kernel/0 b/model/params/model/language_model/layers/11/post_self_attn_layernorm/kernel/0 new file mode 100644 index 0000000000000000000000000000000000000000..51337f48bb42a12bd097f5a642b7146f5521f61d Binary files /dev/null and b/model/params/model/language_model/layers/11/post_self_attn_layernorm/kernel/0 differ diff --git a/model/params/model/language_model/layers/11/self_attn/k_proj/kernel/.zarray b/model/params/model/language_model/layers/11/self_attn/k_proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..65d91ab909ea33fb45716acd9b2259d1c5a0cd71 --- /dev/null +++ b/model/params/model/language_model/layers/11/self_attn/k_proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[1024,256],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096,256],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/blocks/4/norm1/kernel/0 b/model/params/model/visual/blocks/4/norm1/kernel/0 new file mode 100644 index 0000000000000000000000000000000000000000..b3e3fc8c468bca76232c5cc4a66d8e993b27fd84 Binary files /dev/null and b/model/params/model/visual/blocks/4/norm1/kernel/0 differ diff --git a/model/params/model/visual/blocks/4/norm2/kernel/0 b/model/params/model/visual/blocks/4/norm2/kernel/0 new file mode 100644 index 0000000000000000000000000000000000000000..65e30beca0b5056c292e806025578718f88cf9c8 Binary files /dev/null and b/model/params/model/visual/blocks/4/norm2/kernel/0 differ diff --git a/model/params/model/visual/blocks/5/attn/proj/kernel/.zarray b/model/params/model/visual/blocks/5/attn/proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..c66b99465c1430f7e05ffd79381a2eca6ed4d48c --- /dev/null +++ b/model/params/model/visual/blocks/5/attn/proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[1536,384],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,1536],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/blocks/5/attn/qkv/kernel/.zarray b/model/params/model/visual/blocks/5/attn/qkv/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..5d464d118869d3fa87097b63cf90be221aefa400 --- /dev/null +++ b/model/params/model/visual/blocks/5/attn/qkv/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[384,4608],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,4608],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/blocks/5/mlp/gate_proj/kernel/.zarray b/model/params/model/visual/blocks/5/mlp/gate_proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..5c3c9a990f8bbcfcaca9f0bf04c88698b23248f0 --- /dev/null +++ b/model/params/model/visual/blocks/5/mlp/gate_proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[384,4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/blocks/5/mlp/up_proj/kernel/.zarray b/model/params/model/visual/blocks/5/mlp/up_proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..5c3c9a990f8bbcfcaca9f0bf04c88698b23248f0 --- /dev/null +++ b/model/params/model/visual/blocks/5/mlp/up_proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[384,4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/blocks/5/norm1/kernel/0 b/model/params/model/visual/blocks/5/norm1/kernel/0 new file mode 100644 index 0000000000000000000000000000000000000000..3817e4632b52fa1d78521939f52e56e05aae1497 Binary files /dev/null and b/model/params/model/visual/blocks/5/norm1/kernel/0 differ diff --git a/model/params/model/visual/blocks/5/norm2/kernel/0 b/model/params/model/visual/blocks/5/norm2/kernel/0 new file mode 100644 index 0000000000000000000000000000000000000000..bb76b124b1022226d7b46e32f5d8e2214d30365a Binary files /dev/null and b/model/params/model/visual/blocks/5/norm2/kernel/0 differ diff --git a/model/params/model/visual/blocks/6/attn/proj/kernel/.zarray b/model/params/model/visual/blocks/6/attn/proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..c66b99465c1430f7e05ffd79381a2eca6ed4d48c --- /dev/null +++ b/model/params/model/visual/blocks/6/attn/proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[1536,384],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,1536],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/blocks/6/mlp/down_proj/kernel/.zarray b/model/params/model/visual/blocks/6/mlp/down_proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..0c76e816644f867d0b1cee8aed4278aea5096ac1 --- /dev/null +++ b/model/params/model/visual/blocks/6/mlp/down_proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[4096,384],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096,1536],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/blocks/6/mlp/gate_proj/kernel/.zarray b/model/params/model/visual/blocks/6/mlp/gate_proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..5c3c9a990f8bbcfcaca9f0bf04c88698b23248f0 --- /dev/null +++ b/model/params/model/visual/blocks/6/mlp/gate_proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[384,4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/blocks/6/mlp/up_proj/kernel/.zarray b/model/params/model/visual/blocks/6/mlp/up_proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..5c3c9a990f8bbcfcaca9f0bf04c88698b23248f0 --- /dev/null +++ b/model/params/model/visual/blocks/6/mlp/up_proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[384,4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/blocks/6/norm1/kernel/.zarray b/model/params/model/visual/blocks/6/norm1/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..13570c20a475d7a4f35f655350cf560708114bb8 --- /dev/null +++ b/model/params/model/visual/blocks/6/norm1/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/blocks/6/norm2/kernel/0 b/model/params/model/visual/blocks/6/norm2/kernel/0 new file mode 100644 index 0000000000000000000000000000000000000000..f6631d32b0de4b855b169c5beec685d80a4c3444 Binary files /dev/null and b/model/params/model/visual/blocks/6/norm2/kernel/0 differ diff --git a/model/params/model/visual/blocks/7/attn/qkv/kernel/.zarray b/model/params/model/visual/blocks/7/attn/qkv/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..5d464d118869d3fa87097b63cf90be221aefa400 --- /dev/null +++ b/model/params/model/visual/blocks/7/attn/qkv/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[384,4608],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,4608],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/blocks/7/norm1/kernel/.zarray b/model/params/model/visual/blocks/7/norm1/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..13570c20a475d7a4f35f655350cf560708114bb8 --- /dev/null +++ b/model/params/model/visual/blocks/7/norm1/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/blocks/7/norm1/kernel/0 b/model/params/model/visual/blocks/7/norm1/kernel/0 new file mode 100644 index 0000000000000000000000000000000000000000..380f86e90c9ef23ea4fcd10f5e2c49d53f811ac2 Binary files /dev/null and b/model/params/model/visual/blocks/7/norm1/kernel/0 differ diff --git a/model/params/model/visual/blocks/7/norm2/kernel/0 b/model/params/model/visual/blocks/7/norm2/kernel/0 new file mode 100644 index 0000000000000000000000000000000000000000..dbbfc32aeb353970ce44ca9c562a93824742f2b5 Binary files /dev/null and b/model/params/model/visual/blocks/7/norm2/kernel/0 differ diff --git a/model/params/model/visual/blocks/8/attn/qkv/kernel/.zarray b/model/params/model/visual/blocks/8/attn/qkv/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..5d464d118869d3fa87097b63cf90be221aefa400 --- /dev/null +++ b/model/params/model/visual/blocks/8/attn/qkv/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[384,4608],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,4608],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/blocks/8/mlp/down_proj/kernel/.zarray b/model/params/model/visual/blocks/8/mlp/down_proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..0c76e816644f867d0b1cee8aed4278aea5096ac1 --- /dev/null +++ b/model/params/model/visual/blocks/8/mlp/down_proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[4096,384],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096,1536],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/blocks/8/norm1/kernel/0 b/model/params/model/visual/blocks/8/norm1/kernel/0 new file mode 100644 index 0000000000000000000000000000000000000000..549e67d2d11835c94ca9542f75f1d8524621a481 Binary files /dev/null and b/model/params/model/visual/blocks/8/norm1/kernel/0 differ diff --git a/model/params/model/visual/blocks/8/norm2/kernel/.zarray b/model/params/model/visual/blocks/8/norm2/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..13570c20a475d7a4f35f655350cf560708114bb8 --- /dev/null +++ b/model/params/model/visual/blocks/8/norm2/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/blocks/9/mlp/up_proj/kernel/.zarray b/model/params/model/visual/blocks/9/mlp/up_proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..5c3c9a990f8bbcfcaca9f0bf04c88698b23248f0 --- /dev/null +++ b/model/params/model/visual/blocks/9/mlp/up_proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[384,4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/blocks/9/norm1/kernel/.zarray b/model/params/model/visual/blocks/9/norm1/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..13570c20a475d7a4f35f655350cf560708114bb8 --- /dev/null +++ b/model/params/model/visual/blocks/9/norm1/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/blocks/9/norm1/kernel/0 b/model/params/model/visual/blocks/9/norm1/kernel/0 new file mode 100644 index 0000000000000000000000000000000000000000..98e10faff5a8ba9664910b74c175bb095b94ee83 Binary files /dev/null and b/model/params/model/visual/blocks/9/norm1/kernel/0 differ diff --git a/model/params/model/visual/blocks/9/norm2/kernel/.zarray b/model/params/model/visual/blocks/9/norm2/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..13570c20a475d7a4f35f655350cf560708114bb8 --- /dev/null +++ b/model/params/model/visual/blocks/9/norm2/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/blocks/9/norm2/kernel/0 b/model/params/model/visual/blocks/9/norm2/kernel/0 new file mode 100644 index 0000000000000000000000000000000000000000..caf77ec52e00c9ea7b6e1cd4d94d4c26b8434965 Binary files /dev/null and b/model/params/model/visual/blocks/9/norm2/kernel/0 differ diff --git a/model/params/model/visual/downsample/bias/.zarray b/model/params/model/visual/downsample/bias/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..6cb36eccfd612cd6d39e72807aaa8527aa28075d --- /dev/null +++ b/model/params/model/visual/downsample/bias/.zarray @@ -0,0 +1 @@ +{"chunks":[4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/downsample/bias/0 b/model/params/model/visual/downsample/bias/0 new file mode 100644 index 0000000000000000000000000000000000000000..eceb5de47fb366acafe671c8da93cff95d1a9ef9 Binary files /dev/null and b/model/params/model/visual/downsample/bias/0 differ diff --git a/model/params/model/visual/downsample/kernel/.zarray b/model/params/model/visual/downsample/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..aa99caf9770b72231e58a7ad64f2bf4f0bc0fb1a --- /dev/null +++ b/model/params/model/visual/downsample/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[2,2,1536,4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[2,2,1536,4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/merger/down_proj/kernel/.zarray b/model/params/model/visual/merger/down_proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..6d957709d3382e500bf428e615bed7868edcd317 --- /dev/null +++ b/model/params/model/visual/merger/down_proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[13696,1024],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[13696,4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/merger/gate_proj/kernel/.zarray b/model/params/model/visual/merger/gate_proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..cd759573b03445afa42a8c128ec67759636ad66d --- /dev/null +++ b/model/params/model/visual/merger/gate_proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[1024,13696],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096,13696],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/merger/norm/bias/.zarray b/model/params/model/visual/merger/norm/bias/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..6cb36eccfd612cd6d39e72807aaa8527aa28075d --- /dev/null +++ b/model/params/model/visual/merger/norm/bias/.zarray @@ -0,0 +1 @@ +{"chunks":[4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/merger/norm/bias/0 b/model/params/model/visual/merger/norm/bias/0 new file mode 100644 index 0000000000000000000000000000000000000000..f1fe50c169997afab62b7754d41a85dbbe717dc1 Binary files /dev/null and b/model/params/model/visual/merger/norm/bias/0 differ diff --git a/model/params/model/visual/merger/norm/scale/.zarray b/model/params/model/visual/merger/norm/scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..6cb36eccfd612cd6d39e72807aaa8527aa28075d --- /dev/null +++ b/model/params/model/visual/merger/norm/scale/.zarray @@ -0,0 +1 @@ +{"chunks":[4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/merger/proj/kernel/.zarray b/model/params/model/visual/merger/proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..7ac7b3325317fe14c7a87913c61d88e3b92ab05e --- /dev/null +++ b/model/params/model/visual/merger/proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[1024,4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096,4096],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/merger/up_proj/kernel/.zarray b/model/params/model/visual/merger/up_proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..cd759573b03445afa42a8c128ec67759636ad66d --- /dev/null +++ b/model/params/model/visual/merger/up_proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[1024,13696],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096,13696],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/patch_embed/proj/bias/.zarray b/model/params/model/visual/patch_embed/proj/bias/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..13570c20a475d7a4f35f655350cf560708114bb8 --- /dev/null +++ b/model/params/model/visual/patch_embed/proj/bias/.zarray @@ -0,0 +1 @@ +{"chunks":[1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/patch_embed/proj/bias/0 b/model/params/model/visual/patch_embed/proj/bias/0 new file mode 100644 index 0000000000000000000000000000000000000000..09e071c870e600e51e8db805d7641a83cc318bd0 Binary files /dev/null and b/model/params/model/visual/patch_embed/proj/bias/0 differ diff --git a/model/params/model/visual/patch_embed/proj/kernel/.zarray b/model/params/model/visual/patch_embed/proj/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..9a20defd09ef9eb0e47667fe1d6ca61147f16286 --- /dev/null +++ b/model/params/model/visual/patch_embed/proj/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[2,14,14,3,1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[2,14,14,3,1536],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/pos_embed/embedding/.zarray b/model/params/model/visual/pos_embed/embedding/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ba4a9a706aa77c7b576e84bda43a0717e745f027 --- /dev/null +++ b/model/params/model/visual/pos_embed/embedding/.zarray @@ -0,0 +1 @@ +{"chunks":[144,1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[576,1536],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/post_conv_layernorm/kernel/.zarray b/model/params/model/visual/post_conv_layernorm/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..13570c20a475d7a4f35f655350cf560708114bb8 --- /dev/null +++ b/model/params/model/visual/post_conv_layernorm/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/post_conv_layernorm/kernel/0 b/model/params/model/visual/post_conv_layernorm/kernel/0 new file mode 100644 index 0000000000000000000000000000000000000000..59eec04ae6f74bb68d8e4edf5d3074562cbd8370 Binary files /dev/null and b/model/params/model/visual/post_conv_layernorm/kernel/0 differ diff --git a/model/params/model/visual/post_layernorm/kernel/.zarray b/model/params/model/visual/post_layernorm/kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..13570c20a475d7a4f35f655350cf560708114bb8 --- /dev/null +++ b/model/params/model/visual/post_layernorm/kernel/.zarray @@ -0,0 +1 @@ +{"chunks":[1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2} \ No newline at end of file diff --git a/model/params/model/visual/post_layernorm/kernel/0 b/model/params/model/visual/post_layernorm/kernel/0 new file mode 100644 index 0000000000000000000000000000000000000000..ef5ecb3ff335f5c59c4b2d5ef1c6978fd6b478b7 Binary files /dev/null and b/model/params/model/visual/post_layernorm/kernel/0 differ diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..c1efb0ea7f62e3bcdd84b47acc6b0c8c195d0e93 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,42 @@ +{ + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>", + "<|begin_of_audio|>", + "<|end_of_audio|>", + "<|image|>", + "<|video|>", + "<|begin_of_transcription|>", + "<|end_of_transcription|>", + "<|code_prefix|>", + "<|code_middle|>", + "<|code_suffix|>", + "/nothink" + ], + "eos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tensorstore_index.json b/tensorstore_index.json new file mode 100644 index 0000000000000000000000000000000000000000..a910231f0e89e4ddf07dc5c8ea027469d0985f84 --- /dev/null +++ b/tensorstore_index.json @@ -0,0 +1,5310 @@ +{ + "format": "tensorstore", + "version": "easydel", + "prefixes": { + "model": [ + { + "path": "model/params/model/language_model/embed_tokens/embedding", + "shape": [ + 151552, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/0/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/0/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/0/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/0/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/0/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/0/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/0/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/0/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/0/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/0/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/0/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/0/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/0/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/1/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/1/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/1/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/1/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/1/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/1/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/1/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/1/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/1/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/1/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/1/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/1/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/1/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/2/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/2/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/2/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/2/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/2/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/2/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/2/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/2/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/2/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/2/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/2/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/2/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/2/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/3/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/3/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/3/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/3/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/3/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/3/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/3/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/3/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/3/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/3/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/3/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/3/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/3/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/4/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/4/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/4/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/4/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/4/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/4/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/4/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/4/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/4/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/4/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/4/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/4/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/4/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/5/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/5/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/5/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/5/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/5/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/5/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/5/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/5/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/5/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/5/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/5/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/5/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/5/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/6/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/6/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/6/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/6/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/6/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/6/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/6/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/6/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/6/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/6/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/6/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/6/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/6/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/7/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/7/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/7/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/7/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/7/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/7/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/7/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/7/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/7/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/7/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/7/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/7/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/7/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/8/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/8/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/8/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/8/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/8/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/8/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/8/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/8/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/8/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/8/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/8/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/8/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/8/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/9/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/9/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/9/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/9/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/9/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/9/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/9/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/9/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/9/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/9/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/9/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/9/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/9/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/10/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/10/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/10/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/10/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/10/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/10/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/10/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/10/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/10/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/10/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/10/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/10/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/10/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/11/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/11/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/11/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/11/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/11/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/11/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/11/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/11/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/11/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/11/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/11/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/11/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/11/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/12/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/12/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/12/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/12/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/12/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/12/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/12/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/12/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/12/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/12/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/12/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/12/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/12/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/13/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/13/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/13/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/13/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/13/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/13/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/13/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/13/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/13/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/13/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/13/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/13/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/13/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/14/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/14/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/14/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/14/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/14/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/14/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/14/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/14/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/14/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/14/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/14/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/14/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/14/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/15/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/15/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/15/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/15/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/15/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/15/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/15/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/15/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/15/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/15/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/15/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/15/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/15/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/16/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/16/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/16/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/16/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/16/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/16/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/16/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/16/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/16/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/16/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/16/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/16/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/16/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/17/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/17/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/17/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/17/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/17/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/17/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/17/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/17/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/17/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/17/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/17/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/17/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/17/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/18/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/18/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/18/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/18/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/18/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/18/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/18/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/18/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/18/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/18/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/18/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/18/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/18/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/19/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/19/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/19/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/19/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/19/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/19/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/19/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/19/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/19/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/19/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/19/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/19/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/19/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/20/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/20/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/20/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/20/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/20/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/20/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/20/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/20/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/20/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/20/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/20/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/20/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/20/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/21/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/21/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/21/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/21/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/21/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/21/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/21/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/21/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/21/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/21/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/21/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/21/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/21/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/22/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/22/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/22/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/22/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/22/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/22/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/22/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/22/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/22/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/22/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/22/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/22/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/22/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/23/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/23/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/23/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/23/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/23/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/23/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/23/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/23/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/23/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/23/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/23/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/23/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/23/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/24/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/24/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/24/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/24/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/24/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/24/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/24/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/24/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/24/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/24/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/24/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/24/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/24/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/25/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/25/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/25/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/25/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/25/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/25/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/25/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/25/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/25/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/25/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/25/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/25/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/25/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/26/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/26/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/26/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/26/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/26/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/26/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/26/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/26/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/26/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/26/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/26/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/26/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/26/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/27/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/27/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/27/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/27/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/27/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/27/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/27/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/27/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/27/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/27/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/27/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/27/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/27/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/28/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/28/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/28/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/28/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/28/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/28/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/28/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/28/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/28/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/28/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/28/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/28/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/28/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/29/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/29/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/29/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/29/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/29/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/29/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/29/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/29/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/29/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/29/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/29/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/29/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/29/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/30/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/30/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/30/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/30/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/30/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/30/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/30/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/30/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/30/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/30/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/30/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/30/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/30/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/31/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/31/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/31/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/31/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/31/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/31/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/31/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/31/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/31/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/31/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/31/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/31/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/31/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/32/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/32/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/32/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/32/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/32/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/32/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/32/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/32/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/32/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/32/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/32/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/32/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/32/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/33/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/33/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/33/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/33/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/33/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/33/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/33/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/33/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/33/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/33/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/33/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/33/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/33/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/34/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/34/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/34/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/34/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/34/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/34/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/34/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/34/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/34/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/34/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/34/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/34/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/34/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/35/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/35/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/35/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/35/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/35/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/35/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/35/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/35/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/35/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/35/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/35/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/35/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/35/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/lm_head/kernel", + "shape": [ + 4096, + 151552 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/36/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/36/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/36/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/36/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/36/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/36/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/36/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/36/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/36/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/36/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/36/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/36/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/36/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/37/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/37/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/37/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/37/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/37/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/37/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/37/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/37/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/37/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/37/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/37/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/37/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/37/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/38/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/38/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/38/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/38/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/38/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/38/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/38/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/38/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/38/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/38/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/38/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/38/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/38/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/39/input_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/39/mlp/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/39/mlp/gate_up_proj/kernel", + "shape": [ + 4096, + 27392 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/39/post_attention_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/39/post_mlp_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/39/post_self_attn_layernorm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/39/self_attn/k_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/39/self_attn/k_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/39/self_attn/o_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/39/self_attn/q_proj/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/39/self_attn/q_proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/39/self_attn/v_proj/bias", + "shape": [ + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/layers/39/self_attn/v_proj/kernel", + "shape": [ + 4096, + 256 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/language_model/norm/kernel", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/0/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/0/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/0/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/0/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/0/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/0/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/0/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/1/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/1/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/1/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/1/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/1/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/1/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/1/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/10/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/10/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/10/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/10/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/10/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/10/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/10/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/11/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/11/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/11/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/11/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/11/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/11/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/11/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/12/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/12/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/12/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/12/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/12/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/12/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/12/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/13/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/13/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/13/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/13/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/13/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/13/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/13/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/14/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/14/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/14/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/14/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/14/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/14/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/14/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/15/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/15/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/15/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/15/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/15/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/15/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/15/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/16/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/16/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/16/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/16/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/16/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/16/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/16/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/17/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/17/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/17/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/17/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/17/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/17/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/17/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/18/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/18/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/18/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/18/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/18/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/18/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/18/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/19/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/19/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/19/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/19/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/19/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/19/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/19/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/2/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/2/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/2/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/2/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/2/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/2/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/2/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/20/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/20/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/20/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/20/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/20/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/20/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/20/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/21/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/21/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/21/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/21/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/21/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/21/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/21/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/22/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/22/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/22/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/22/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/22/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/22/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/22/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/23/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/23/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/23/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/23/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/23/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/23/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/23/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/3/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/3/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/3/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/3/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/3/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/3/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/3/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/4/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/4/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/4/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/4/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/4/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/4/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/4/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/5/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/5/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/5/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/5/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/5/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/5/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/5/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/6/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/6/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/6/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/6/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/6/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/6/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/6/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/7/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/7/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/7/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/7/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/7/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/7/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/7/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/8/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/8/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/8/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/8/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/8/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/8/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/8/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/9/attn/proj/kernel", + "shape": [ + 1536, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/9/attn/qkv/kernel", + "shape": [ + 1536, + 4608 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/9/mlp/down_proj/kernel", + "shape": [ + 4096, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/9/mlp/gate_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/9/mlp/up_proj/kernel", + "shape": [ + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/9/norm1/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/blocks/9/norm2/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/downsample/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/downsample/kernel", + "shape": [ + 2, + 2, + 1536, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/pos_embed/embedding", + "shape": [ + 576, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/merger/down_proj/kernel", + "shape": [ + 13696, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/merger/gate_proj/kernel", + "shape": [ + 4096, + 13696 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/merger/norm/bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/merger/norm/scale", + "shape": [ + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/merger/proj/kernel", + "shape": [ + 4096, + 4096 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/merger/up_proj/kernel", + "shape": [ + 4096, + 13696 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/patch_embed/proj/bias", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/patch_embed/proj/kernel", + "shape": [ + 2, + 14, + 14, + 3, + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/post_conv_layernorm/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + }, + { + "path": "model/params/model/visual/post_layernorm/kernel", + "shape": [ + 1536 + ], + "dtype": "bfloat16" + } + ] + } +} \ No newline at end of file