diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fe4ffde5465c4bdf054e9f07c5ea0b1f8ce3ac21
--- /dev/null
+++ b/README.md
@@ -0,0 +1,179 @@
+---
+library_name: easydel
+pipeline_tag: image-to-text
+tags:
+ - easydel
+ - jax
+ - "glm4v"
+ - "ImageTextToText"
+ - "vanilla"
+---
+
+
+
+
+
+
+
+zai-org/GLM-4.6V-Flash
+
+
+ EasyDeL checkpoint converted from zai-org/GLM-4.6V-Flash.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+---
+
+## At a Glance
+
+| Field | Value |
+| --- | --- |
+| Repo ID | `EasyDeL/GLM-4.6V-Flash` |
+| Model type | `glm4v` |
+| Task | `ImageTextToText` |
+| Attention | `vanilla` (`AttentionMechanisms.VANILLA`) |
+| EasyDeL | `0.2.0` |
+
+## Overview
+
+This checkpoint is intended to be loaded with EasyDeL on JAX (CPU/GPU/TPU). It supports sharded loading with `auto_shard_model=True` and configurable precision via `dtype`, `param_dtype`, and `precision`.
+
+## Quickstart
+
+```python
+import easydel as ed
+from jax import numpy as jnp, lax
+
+repo_id = "EasyDeL/GLM-4.6V-Flash"
+
+dtype = jnp.bfloat16 # try jnp.float16 on many GPUs
+
+model = ed.AutoEasyDeLModelForImageTextToText.from_pretrained(
+ repo_id,
+ config_kwargs=ed.EasyDeLBaseConfigDict(
+ attn_dtype=dtype,
+ attn_mechanism=ed.AttentionMechanisms.VANILLA,
+ ),
+ dtype=dtype,
+ param_dtype=dtype,
+ precision=lax.Precision("fastest"),
+ auto_shard_model=True,
+)
+```
+
+If the repository only provides PyTorch weights, pass `from_torch=True` to `from_pretrained(...)`.
+
+## Sharding & Parallelism (Multi-Device)
+
+EasyDeL can scale to multiple devices by creating a logical device mesh. Most EasyDeL loaders use a 5D mesh:
+
+- `dp`: data parallel (replicated parameters, different batch shards)
+- `fsdp`: parameter sharding (memory saver; often the biggest axis)
+- `ep`: expert parallel (MoE; keep `1` for non-MoE models)
+- `tp`: tensor parallel (splits large matmuls)
+- `sp`: sequence parallel (splits sequence dimension)
+
+Use `sharding_axis_names=("dp","fsdp","ep","tp","sp")` and choose `sharding_axis_dims` so that their product equals your device count.
+You can use `-1` in `sharding_axis_dims` to let EasyDeL infer the remaining dimension.
+
+
+Example sharding configs
+
+```python
+# 8 devices, pure FSDP
+sharding_axis_dims = (1, 8, 1, 1, 1)
+
+# 8 devices, 2-way DP x 4-way FSDP
+sharding_axis_dims = (2, 4, 1, 1, 1)
+
+# 8 devices, 4-way FSDP x 2-way TP
+sharding_axis_dims = (1, 4, 1, 2, 1)
+```
+
+
+## Using via `eLargeModel` (ELM)
+
+`eLargeModel` is a higher-level interface that wires together loading, sharding, training, and eSurge inference from a single config.
+
+```python
+from easydel import eLargeModel
+
+repo_id = "EasyDeL/GLM-4.6V-Flash"
+
+elm = eLargeModel.from_pretrained(repo_id) # task is auto-detected
+elm.set_dtype("bf16")
+elm.set_sharding(axis_names=("dp", "fsdp", "ep", "tp", "sp"), axis_dims=(1, -1, 1, 1, 1))
+
+model = elm.build_model()
+# Optional: build an inference engine
+# engine = elm.build_esurge()
+```
+
+
+ELM YAML config example
+
+```yaml
+model:
+ name_or_path: "EasyDeL/GLM-4.6V-Flash"
+
+loader:
+ dtype: bf16
+ param_dtype: bf16
+
+sharding:
+ axis_dims: [1, -1, 1, 1, 1]
+ auto_shard_model: true
+```
+
+
+## Features
+
+**EasyDeL:**
+- JAX native implementation and sharded execution
+- Configurable attention backends via `AttentionMechanisms.*`
+- Precision control via `dtype`, `param_dtype`, and `precision`
+
+## Installation
+
+```bash
+pip install easydel
+```
+
+## Links
+
+- EasyDeL GitHub: https://github.com/erfanzar/EasyDeL
+- Docs: https://easydel.readthedocs.io/en/latest/
+
+## Supported Tasks
+
+- ImageTextToText
+
+## Limitations
+
+- Refer to the original model card for training data, evaluation, and intended use.
+
+## License
+
+EasyDeL is released under the Apache-2.0 license. The license for this model's weights may differ; please consult the original repository.
+
+## Citation
+
+```bibtex
+@misc{Zare Chavoshi_2023,
+ title={EasyDeL: An open-source library for enhancing and streamlining the training process of machine learning models},
+ url={https://github.com/erfanzar/EasyDeL},
+ author={Zare Chavoshi, Erfan},
+ year={2023}
+}
+```
diff --git a/checkpoint_metadata.json b/checkpoint_metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..2375d85a6de87cdb18ea77c905201010673b9e3e
--- /dev/null
+++ b/checkpoint_metadata.json
@@ -0,0 +1,6 @@
+{
+ "timestamp": "2025-12-28T04:14:13.950976",
+ "custom_metadata": {
+ "step": 0
+ }
+}
\ No newline at end of file
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bce23eeaf2da724ced8dee75958a34e673503b5b
--- /dev/null
+++ b/config.json
@@ -0,0 +1,349 @@
+{
+ "_external_rope_config_kwargs": {},
+ "architectures": [
+ "Glm4vForConditionalGeneration"
+ ],
+ "attn_mechanism": "vanilla",
+ "backend": null,
+ "bits": null,
+ "blocksize_b": 1,
+ "blocksize_k": 128,
+ "blocksize_q": 128,
+ "decode_attn_mechanism": null,
+ "easy_method": "train",
+ "fcm_max_ratio": 0.0,
+ "fcm_min_ratio": 0.0,
+ "flash_attention_backward_pass_impl": "triton",
+ "fsdp_is_ep_bound": true,
+ "gradient_checkpointing": "",
+ "gradient_checkpointing_targets": null,
+ "hardware_abstraction": false,
+ "image_end_token_id": 151340,
+ "image_start_token_id": 151339,
+ "image_token_id": 151363,
+ "kv_cache_quantization_config": null,
+ "kv_cache_sharding_sequence_axis_name": "sp",
+ "model_type": "glm4v",
+ "moe_force_xla_gmm": false,
+ "moe_method": "fused_moe",
+ "moe_tiling_size_batch": 4,
+ "moe_tiling_size_dim": 128,
+ "moe_tiling_size_seqlen": 128,
+ "operation_configs": null,
+ "pallas_k_block_size": 128,
+ "pallas_m_block_size": 128,
+ "pallas_n_block_size": 128,
+ "partition_axis": {
+ "attention_dim_axis": null,
+ "attention_kv_dim_axis": null,
+ "batch_axis": [
+ "fsdp",
+ "dp"
+ ],
+ "bias_head_sequence_axis": null,
+ "bias_key_sequence_axis": null,
+ "data_parallel_axis": "dp",
+ "decode_attention_dim_axis": null,
+ "decode_attention_kv_dim_axis": null,
+ "decode_batch_axis": [
+ "fsdp",
+ "dp"
+ ],
+ "decode_head_axis": "tp",
+ "decode_key_sequence_axis": "sp",
+ "decode_kv_head_axis": "tp",
+ "decode_query_sequence_axis": null,
+ "expert_axis": "ep",
+ "expert_gate_axis": null,
+ "expert_parallel_axis": "ep",
+ "fully_sharded_data_parallel_axis": "fsdp",
+ "head_axis": "tp",
+ "hidden_state_axis": "tp",
+ "key_sequence_axis": "sp",
+ "kv_head_axis": "tp",
+ "mlp_intermediate_axis": "tp",
+ "query_sequence_axis": "sp",
+ "sequence_axis": "sp",
+ "sequence_parallel_axis": "sp",
+ "tensor_parallel_axis": "tp",
+ "vocab_axis": "tp"
+ },
+ "platform": null,
+ "precompute_masks": true,
+ "pretraining_tp": 1,
+ "quantization_config": null,
+ "scan_attention_layers": false,
+ "scan_mlp_chunk_size": 1024,
+ "scan_ring_attention": true,
+ "sequence_axis_name": "sp",
+ "sharding_axis_dims": [
+ 1,
+ -1,
+ 1,
+ 1,
+ 1
+ ],
+ "sharding_axis_names": [
+ "dp",
+ "fsdp",
+ "ep",
+ "tp",
+ "sp"
+ ],
+ "sharding_dcn_axis_dims": null,
+ "sp_is_ep_bound": true,
+ "text_config": {
+ "_external_rope_config_kwargs": {
+ "repetition_style": true
+ },
+ "architectures": [
+ "Glm4vForConditionalGeneration"
+ ],
+ "attention_bias": true,
+ "attention_dropout": 0.0,
+ "attn_mechanism": "vanilla",
+ "backend": null,
+ "bits": null,
+ "blocksize_b": 1,
+ "blocksize_k": 128,
+ "blocksize_q": 128,
+ "decode_attn_mechanism": null,
+ "dtype": "bfloat16",
+ "easy_method": "train",
+ "eos_token_id": [
+ 151329,
+ 151336,
+ 151338
+ ],
+ "fcm_max_ratio": 0.0,
+ "fcm_min_ratio": 0.0,
+ "flash_attention_backward_pass_impl": "triton",
+ "fsdp_is_ep_bound": true,
+ "gradient_checkpointing": "",
+ "gradient_checkpointing_targets": null,
+ "hardware_abstraction": false,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 13696,
+ "kv_cache_quantization_config": null,
+ "kv_cache_sharding_sequence_axis_name": "sp",
+ "max_position_embeddings": 131072,
+ "model_type": "glm4v_text",
+ "moe_force_xla_gmm": false,
+ "moe_method": "fused_moe",
+ "moe_tiling_size_batch": 4,
+ "moe_tiling_size_dim": 128,
+ "moe_tiling_size_seqlen": 128,
+ "num_attention_heads": 32,
+ "num_hidden_layers": 40,
+ "num_key_value_heads": 2,
+ "operation_configs": null,
+ "pad_token_id": 151329,
+ "pallas_k_block_size": 128,
+ "pallas_m_block_size": 128,
+ "pallas_n_block_size": 128,
+ "partial_rotary_factor": 0.5,
+ "partition_axis": {
+ "attention_dim_axis": null,
+ "attention_kv_dim_axis": null,
+ "batch_axis": [
+ "fsdp",
+ "dp"
+ ],
+ "bias_head_sequence_axis": null,
+ "bias_key_sequence_axis": null,
+ "data_parallel_axis": "dp",
+ "decode_attention_dim_axis": null,
+ "decode_attention_kv_dim_axis": null,
+ "decode_batch_axis": [
+ "fsdp",
+ "dp"
+ ],
+ "decode_head_axis": "tp",
+ "decode_key_sequence_axis": "sp",
+ "decode_kv_head_axis": "tp",
+ "decode_query_sequence_axis": null,
+ "expert_axis": "ep",
+ "expert_gate_axis": null,
+ "expert_parallel_axis": "ep",
+ "fully_sharded_data_parallel_axis": "fsdp",
+ "head_axis": "tp",
+ "hidden_state_axis": "tp",
+ "key_sequence_axis": "sp",
+ "kv_head_axis": "tp",
+ "mlp_intermediate_axis": "tp",
+ "query_sequence_axis": "sp",
+ "sequence_axis": "sp",
+ "sequence_parallel_axis": "sp",
+ "tensor_parallel_axis": "tp",
+ "vocab_axis": "tp"
+ },
+ "platform": null,
+ "precompute_masks": true,
+ "pretraining_tp": 1,
+ "quantization_config": null,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "mrope_section": [
+ 8,
+ 12,
+ 12
+ ],
+ "rope_type": "default"
+ },
+ "rope_theta": 500000.0,
+ "scan_attention_layers": false,
+ "scan_mlp_chunk_size": 1024,
+ "scan_ring_attention": true,
+ "sequence_axis_name": "sp",
+ "sharding_axis_dims": [
+ 1,
+ -1,
+ 1,
+ 1,
+ 1
+ ],
+ "sharding_axis_names": [
+ "dp",
+ "fsdp",
+ "ep",
+ "tp",
+ "sp"
+ ],
+ "sharding_dcn_axis_dims": null,
+ "sp_is_ep_bound": true,
+ "use_cache": true,
+ "use_expert_tensor_mode": false,
+ "use_ring_of_experts": false,
+ "use_scan_mlp": false,
+ "use_sharded_kv_caching": false,
+ "use_sharding_constraint": false,
+ "vocab_size": 151552
+ },
+ "tie_word_embeddings": false,
+ "transformers_version": "4.57.3",
+ "use_expert_tensor_mode": false,
+ "use_ring_of_experts": false,
+ "use_scan_mlp": false,
+ "use_sharded_kv_caching": false,
+ "use_sharding_constraint": false,
+ "video_end_token_id": 151342,
+ "video_start_token_id": 151341,
+ "video_token_id": 151364,
+ "vision_config": {
+ "_external_rope_config_kwargs": {},
+ "architectures": [
+ "Glm4vForConditionalGeneration"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "attn_mechanism": "vanilla",
+ "backend": null,
+ "bits": null,
+ "blocksize_b": 1,
+ "blocksize_k": 128,
+ "blocksize_q": 128,
+ "decode_attn_mechanism": null,
+ "depth": 24,
+ "easy_method": "train",
+ "fcm_max_ratio": 0.0,
+ "fcm_min_ratio": 0.0,
+ "flash_attention_backward_pass_impl": "triton",
+ "fsdp_is_ep_bound": true,
+ "gradient_checkpointing": "",
+ "gradient_checkpointing_targets": null,
+ "hardware_abstraction": false,
+ "hidden_act": "silu",
+ "hidden_dropout_prob": 0.0,
+ "hidden_size": 1536,
+ "image_size": 336,
+ "in_channels": 3,
+ "initializer_range": 0.02,
+ "intermediate_size": 13696,
+ "kv_cache_quantization_config": null,
+ "kv_cache_sharding_sequence_axis_name": "sp",
+ "model_type": "glm4v_vision",
+ "moe_force_xla_gmm": false,
+ "moe_method": "fused_moe",
+ "moe_tiling_size_batch": 4,
+ "moe_tiling_size_dim": 128,
+ "moe_tiling_size_seqlen": 128,
+ "num_attention_heads": 12,
+ "num_heads": 12,
+ "operation_configs": null,
+ "out_hidden_size": 4096,
+ "pallas_k_block_size": 128,
+ "pallas_m_block_size": 128,
+ "pallas_n_block_size": 128,
+ "partition_axis": {
+ "attention_dim_axis": null,
+ "attention_kv_dim_axis": null,
+ "batch_axis": [
+ "fsdp",
+ "dp"
+ ],
+ "bias_head_sequence_axis": null,
+ "bias_key_sequence_axis": null,
+ "data_parallel_axis": "dp",
+ "decode_attention_dim_axis": null,
+ "decode_attention_kv_dim_axis": null,
+ "decode_batch_axis": [
+ "fsdp",
+ "dp"
+ ],
+ "decode_head_axis": "tp",
+ "decode_key_sequence_axis": "sp",
+ "decode_kv_head_axis": "tp",
+ "decode_query_sequence_axis": null,
+ "expert_axis": "ep",
+ "expert_gate_axis": null,
+ "expert_parallel_axis": "ep",
+ "fully_sharded_data_parallel_axis": "fsdp",
+ "head_axis": "tp",
+ "hidden_state_axis": "tp",
+ "key_sequence_axis": "sp",
+ "kv_head_axis": "tp",
+ "mlp_intermediate_axis": "tp",
+ "query_sequence_axis": "sp",
+ "sequence_axis": "sp",
+ "sequence_parallel_axis": "sp",
+ "tensor_parallel_axis": "tp",
+ "vocab_axis": "tp"
+ },
+ "patch_size": 14,
+ "platform": null,
+ "precompute_masks": true,
+ "pretraining_tp": 1,
+ "quantization_config": null,
+ "rms_norm_eps": 1e-05,
+ "scan_attention_layers": false,
+ "scan_mlp_chunk_size": 1024,
+ "scan_ring_attention": true,
+ "sequence_axis_name": "sp",
+ "sharding_axis_dims": [
+ 1,
+ -1,
+ 1,
+ 1,
+ 1
+ ],
+ "sharding_axis_names": [
+ "dp",
+ "fsdp",
+ "ep",
+ "tp",
+ "sp"
+ ],
+ "sharding_dcn_axis_dims": null,
+ "sp_is_ep_bound": true,
+ "spatial_merge_size": 2,
+ "temporal_patch_size": 2,
+ "use_expert_tensor_mode": false,
+ "use_ring_of_experts": false,
+ "use_scan_mlp": false,
+ "use_sharded_kv_caching": false,
+ "use_sharding_constraint": false
+ }
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..11d215c5938942409e9278376c52c42fd595025b
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,15 @@
+{
+ "_from_model_config": true,
+ "do_sample": true,
+ "eos_token_id": [
+ 151329,
+ 151336,
+ 151338,
+ 151348
+ ],
+ "pad_token_id": 151329,
+ "temperature": 0.8,
+ "top_k": 2,
+ "top_p": 0.6,
+ "transformers_version": "4.57.3"
+}
diff --git a/model/params/model/language_model/layers/0/self_attn/k_proj/bias/0 b/model/params/model/language_model/layers/0/self_attn/k_proj/bias/0
new file mode 100644
index 0000000000000000000000000000000000000000..6219cd0bd2a359a4e7d2cc83a47625a25eff41e8
Binary files /dev/null and b/model/params/model/language_model/layers/0/self_attn/k_proj/bias/0 differ
diff --git a/model/params/model/language_model/layers/0/self_attn/o_proj/kernel/.zarray b/model/params/model/language_model/layers/0/self_attn/o_proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..f74e87081615413ab6b0c5219a8edae5265883fe
--- /dev/null
+++ b/model/params/model/language_model/layers/0/self_attn/o_proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[4096,1024],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096,4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/language_model/layers/1/mlp/down_proj/kernel/.zarray b/model/params/model/language_model/layers/1/mlp/down_proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..6d957709d3382e500bf428e615bed7868edcd317
--- /dev/null
+++ b/model/params/model/language_model/layers/1/mlp/down_proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[13696,1024],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[13696,4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/language_model/layers/1/self_attn/q_proj/bias/0 b/model/params/model/language_model/layers/1/self_attn/q_proj/bias/0
new file mode 100644
index 0000000000000000000000000000000000000000..4e1ba1db5f7c824bdb2dc493f96290f55d53e562
Binary files /dev/null and b/model/params/model/language_model/layers/1/self_attn/q_proj/bias/0 differ
diff --git a/model/params/model/language_model/layers/1/self_attn/v_proj/bias/0 b/model/params/model/language_model/layers/1/self_attn/v_proj/bias/0
new file mode 100644
index 0000000000000000000000000000000000000000..990a62417073ce6a5adf396bd5377f29997f1f1d
Binary files /dev/null and b/model/params/model/language_model/layers/1/self_attn/v_proj/bias/0 differ
diff --git a/model/params/model/language_model/layers/10/input_layernorm/kernel/.zarray b/model/params/model/language_model/layers/10/input_layernorm/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..6cb36eccfd612cd6d39e72807aaa8527aa28075d
--- /dev/null
+++ b/model/params/model/language_model/layers/10/input_layernorm/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/language_model/layers/10/post_mlp_layernorm/kernel/.zarray b/model/params/model/language_model/layers/10/post_mlp_layernorm/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..6cb36eccfd612cd6d39e72807aaa8527aa28075d
--- /dev/null
+++ b/model/params/model/language_model/layers/10/post_mlp_layernorm/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/language_model/layers/10/self_attn/q_proj/bias/.zarray b/model/params/model/language_model/layers/10/self_attn/q_proj/bias/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..6cb36eccfd612cd6d39e72807aaa8527aa28075d
--- /dev/null
+++ b/model/params/model/language_model/layers/10/self_attn/q_proj/bias/.zarray
@@ -0,0 +1 @@
+{"chunks":[4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/language_model/layers/10/self_attn/q_proj/bias/0 b/model/params/model/language_model/layers/10/self_attn/q_proj/bias/0
new file mode 100644
index 0000000000000000000000000000000000000000..5b2464db98788ef79897bb68d8710dce9f01b110
Binary files /dev/null and b/model/params/model/language_model/layers/10/self_attn/q_proj/bias/0 differ
diff --git a/model/params/model/language_model/layers/10/self_attn/q_proj/kernel/.zarray b/model/params/model/language_model/layers/10/self_attn/q_proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..7ac7b3325317fe14c7a87913c61d88e3b92ab05e
--- /dev/null
+++ b/model/params/model/language_model/layers/10/self_attn/q_proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[1024,4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096,4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/language_model/layers/10/self_attn/v_proj/bias/.zarray b/model/params/model/language_model/layers/10/self_attn/v_proj/bias/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..653c877efa83da0ab518995885715f7e8b4275d7
--- /dev/null
+++ b/model/params/model/language_model/layers/10/self_attn/v_proj/bias/.zarray
@@ -0,0 +1 @@
+{"chunks":[256],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[256],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/language_model/layers/10/self_attn/v_proj/bias/0 b/model/params/model/language_model/layers/10/self_attn/v_proj/bias/0
new file mode 100644
index 0000000000000000000000000000000000000000..28876c6dfcd3354bd7e2bfbe1de0572d9412f82a
Binary files /dev/null and b/model/params/model/language_model/layers/10/self_attn/v_proj/bias/0 differ
diff --git a/model/params/model/language_model/layers/10/self_attn/v_proj/kernel/.zarray b/model/params/model/language_model/layers/10/self_attn/v_proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..65d91ab909ea33fb45716acd9b2259d1c5a0cd71
--- /dev/null
+++ b/model/params/model/language_model/layers/10/self_attn/v_proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[1024,256],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096,256],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/language_model/layers/11/input_layernorm/kernel/.zarray b/model/params/model/language_model/layers/11/input_layernorm/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..6cb36eccfd612cd6d39e72807aaa8527aa28075d
--- /dev/null
+++ b/model/params/model/language_model/layers/11/input_layernorm/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/language_model/layers/11/input_layernorm/kernel/0 b/model/params/model/language_model/layers/11/input_layernorm/kernel/0
new file mode 100644
index 0000000000000000000000000000000000000000..65fc306b926bffc36d56b571640d9cca2773c1d6
Binary files /dev/null and b/model/params/model/language_model/layers/11/input_layernorm/kernel/0 differ
diff --git a/model/params/model/language_model/layers/11/mlp/down_proj/kernel/.zarray b/model/params/model/language_model/layers/11/mlp/down_proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..6d957709d3382e500bf428e615bed7868edcd317
--- /dev/null
+++ b/model/params/model/language_model/layers/11/mlp/down_proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[13696,1024],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[13696,4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/language_model/layers/11/mlp/gate_up_proj/kernel/.zarray b/model/params/model/language_model/layers/11/mlp/gate_up_proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..096abde696f79cae51c28a3df025167152ce3172
--- /dev/null
+++ b/model/params/model/language_model/layers/11/mlp/gate_up_proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[1024,27392],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096,27392],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/language_model/layers/11/post_attention_layernorm/kernel/.zarray b/model/params/model/language_model/layers/11/post_attention_layernorm/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..6cb36eccfd612cd6d39e72807aaa8527aa28075d
--- /dev/null
+++ b/model/params/model/language_model/layers/11/post_attention_layernorm/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/language_model/layers/11/post_attention_layernorm/kernel/0 b/model/params/model/language_model/layers/11/post_attention_layernorm/kernel/0
new file mode 100644
index 0000000000000000000000000000000000000000..722c2e8768405bfbf9b79515ba7f4993456913c9
Binary files /dev/null and b/model/params/model/language_model/layers/11/post_attention_layernorm/kernel/0 differ
diff --git a/model/params/model/language_model/layers/11/post_mlp_layernorm/kernel/.zarray b/model/params/model/language_model/layers/11/post_mlp_layernorm/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..6cb36eccfd612cd6d39e72807aaa8527aa28075d
--- /dev/null
+++ b/model/params/model/language_model/layers/11/post_mlp_layernorm/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/language_model/layers/11/post_mlp_layernorm/kernel/0 b/model/params/model/language_model/layers/11/post_mlp_layernorm/kernel/0
new file mode 100644
index 0000000000000000000000000000000000000000..4e221fbc50b25c58c27cf61c273ce5b8fe9fde15
Binary files /dev/null and b/model/params/model/language_model/layers/11/post_mlp_layernorm/kernel/0 differ
diff --git a/model/params/model/language_model/layers/11/post_self_attn_layernorm/kernel/.zarray b/model/params/model/language_model/layers/11/post_self_attn_layernorm/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..6cb36eccfd612cd6d39e72807aaa8527aa28075d
--- /dev/null
+++ b/model/params/model/language_model/layers/11/post_self_attn_layernorm/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/language_model/layers/11/post_self_attn_layernorm/kernel/0 b/model/params/model/language_model/layers/11/post_self_attn_layernorm/kernel/0
new file mode 100644
index 0000000000000000000000000000000000000000..51337f48bb42a12bd097f5a642b7146f5521f61d
Binary files /dev/null and b/model/params/model/language_model/layers/11/post_self_attn_layernorm/kernel/0 differ
diff --git a/model/params/model/language_model/layers/11/self_attn/k_proj/kernel/.zarray b/model/params/model/language_model/layers/11/self_attn/k_proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..65d91ab909ea33fb45716acd9b2259d1c5a0cd71
--- /dev/null
+++ b/model/params/model/language_model/layers/11/self_attn/k_proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[1024,256],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096,256],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/blocks/4/norm1/kernel/0 b/model/params/model/visual/blocks/4/norm1/kernel/0
new file mode 100644
index 0000000000000000000000000000000000000000..b3e3fc8c468bca76232c5cc4a66d8e993b27fd84
Binary files /dev/null and b/model/params/model/visual/blocks/4/norm1/kernel/0 differ
diff --git a/model/params/model/visual/blocks/4/norm2/kernel/0 b/model/params/model/visual/blocks/4/norm2/kernel/0
new file mode 100644
index 0000000000000000000000000000000000000000..65e30beca0b5056c292e806025578718f88cf9c8
Binary files /dev/null and b/model/params/model/visual/blocks/4/norm2/kernel/0 differ
diff --git a/model/params/model/visual/blocks/5/attn/proj/kernel/.zarray b/model/params/model/visual/blocks/5/attn/proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..c66b99465c1430f7e05ffd79381a2eca6ed4d48c
--- /dev/null
+++ b/model/params/model/visual/blocks/5/attn/proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[1536,384],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,1536],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/blocks/5/attn/qkv/kernel/.zarray b/model/params/model/visual/blocks/5/attn/qkv/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..5d464d118869d3fa87097b63cf90be221aefa400
--- /dev/null
+++ b/model/params/model/visual/blocks/5/attn/qkv/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[384,4608],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,4608],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/blocks/5/mlp/gate_proj/kernel/.zarray b/model/params/model/visual/blocks/5/mlp/gate_proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..5c3c9a990f8bbcfcaca9f0bf04c88698b23248f0
--- /dev/null
+++ b/model/params/model/visual/blocks/5/mlp/gate_proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[384,4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/blocks/5/mlp/up_proj/kernel/.zarray b/model/params/model/visual/blocks/5/mlp/up_proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..5c3c9a990f8bbcfcaca9f0bf04c88698b23248f0
--- /dev/null
+++ b/model/params/model/visual/blocks/5/mlp/up_proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[384,4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/blocks/5/norm1/kernel/0 b/model/params/model/visual/blocks/5/norm1/kernel/0
new file mode 100644
index 0000000000000000000000000000000000000000..3817e4632b52fa1d78521939f52e56e05aae1497
Binary files /dev/null and b/model/params/model/visual/blocks/5/norm1/kernel/0 differ
diff --git a/model/params/model/visual/blocks/5/norm2/kernel/0 b/model/params/model/visual/blocks/5/norm2/kernel/0
new file mode 100644
index 0000000000000000000000000000000000000000..bb76b124b1022226d7b46e32f5d8e2214d30365a
Binary files /dev/null and b/model/params/model/visual/blocks/5/norm2/kernel/0 differ
diff --git a/model/params/model/visual/blocks/6/attn/proj/kernel/.zarray b/model/params/model/visual/blocks/6/attn/proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..c66b99465c1430f7e05ffd79381a2eca6ed4d48c
--- /dev/null
+++ b/model/params/model/visual/blocks/6/attn/proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[1536,384],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,1536],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/blocks/6/mlp/down_proj/kernel/.zarray b/model/params/model/visual/blocks/6/mlp/down_proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..0c76e816644f867d0b1cee8aed4278aea5096ac1
--- /dev/null
+++ b/model/params/model/visual/blocks/6/mlp/down_proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[4096,384],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096,1536],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/blocks/6/mlp/gate_proj/kernel/.zarray b/model/params/model/visual/blocks/6/mlp/gate_proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..5c3c9a990f8bbcfcaca9f0bf04c88698b23248f0
--- /dev/null
+++ b/model/params/model/visual/blocks/6/mlp/gate_proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[384,4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/blocks/6/mlp/up_proj/kernel/.zarray b/model/params/model/visual/blocks/6/mlp/up_proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..5c3c9a990f8bbcfcaca9f0bf04c88698b23248f0
--- /dev/null
+++ b/model/params/model/visual/blocks/6/mlp/up_proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[384,4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/blocks/6/norm1/kernel/.zarray b/model/params/model/visual/blocks/6/norm1/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..13570c20a475d7a4f35f655350cf560708114bb8
--- /dev/null
+++ b/model/params/model/visual/blocks/6/norm1/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/blocks/6/norm2/kernel/0 b/model/params/model/visual/blocks/6/norm2/kernel/0
new file mode 100644
index 0000000000000000000000000000000000000000..f6631d32b0de4b855b169c5beec685d80a4c3444
Binary files /dev/null and b/model/params/model/visual/blocks/6/norm2/kernel/0 differ
diff --git a/model/params/model/visual/blocks/7/attn/qkv/kernel/.zarray b/model/params/model/visual/blocks/7/attn/qkv/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..5d464d118869d3fa87097b63cf90be221aefa400
--- /dev/null
+++ b/model/params/model/visual/blocks/7/attn/qkv/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[384,4608],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,4608],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/blocks/7/norm1/kernel/.zarray b/model/params/model/visual/blocks/7/norm1/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..13570c20a475d7a4f35f655350cf560708114bb8
--- /dev/null
+++ b/model/params/model/visual/blocks/7/norm1/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/blocks/7/norm1/kernel/0 b/model/params/model/visual/blocks/7/norm1/kernel/0
new file mode 100644
index 0000000000000000000000000000000000000000..380f86e90c9ef23ea4fcd10f5e2c49d53f811ac2
Binary files /dev/null and b/model/params/model/visual/blocks/7/norm1/kernel/0 differ
diff --git a/model/params/model/visual/blocks/7/norm2/kernel/0 b/model/params/model/visual/blocks/7/norm2/kernel/0
new file mode 100644
index 0000000000000000000000000000000000000000..dbbfc32aeb353970ce44ca9c562a93824742f2b5
Binary files /dev/null and b/model/params/model/visual/blocks/7/norm2/kernel/0 differ
diff --git a/model/params/model/visual/blocks/8/attn/qkv/kernel/.zarray b/model/params/model/visual/blocks/8/attn/qkv/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..5d464d118869d3fa87097b63cf90be221aefa400
--- /dev/null
+++ b/model/params/model/visual/blocks/8/attn/qkv/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[384,4608],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,4608],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/blocks/8/mlp/down_proj/kernel/.zarray b/model/params/model/visual/blocks/8/mlp/down_proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..0c76e816644f867d0b1cee8aed4278aea5096ac1
--- /dev/null
+++ b/model/params/model/visual/blocks/8/mlp/down_proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[4096,384],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096,1536],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/blocks/8/norm1/kernel/0 b/model/params/model/visual/blocks/8/norm1/kernel/0
new file mode 100644
index 0000000000000000000000000000000000000000..549e67d2d11835c94ca9542f75f1d8524621a481
Binary files /dev/null and b/model/params/model/visual/blocks/8/norm1/kernel/0 differ
diff --git a/model/params/model/visual/blocks/8/norm2/kernel/.zarray b/model/params/model/visual/blocks/8/norm2/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..13570c20a475d7a4f35f655350cf560708114bb8
--- /dev/null
+++ b/model/params/model/visual/blocks/8/norm2/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/blocks/9/mlp/up_proj/kernel/.zarray b/model/params/model/visual/blocks/9/mlp/up_proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..5c3c9a990f8bbcfcaca9f0bf04c88698b23248f0
--- /dev/null
+++ b/model/params/model/visual/blocks/9/mlp/up_proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[384,4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536,4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/blocks/9/norm1/kernel/.zarray b/model/params/model/visual/blocks/9/norm1/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..13570c20a475d7a4f35f655350cf560708114bb8
--- /dev/null
+++ b/model/params/model/visual/blocks/9/norm1/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/blocks/9/norm1/kernel/0 b/model/params/model/visual/blocks/9/norm1/kernel/0
new file mode 100644
index 0000000000000000000000000000000000000000..98e10faff5a8ba9664910b74c175bb095b94ee83
Binary files /dev/null and b/model/params/model/visual/blocks/9/norm1/kernel/0 differ
diff --git a/model/params/model/visual/blocks/9/norm2/kernel/.zarray b/model/params/model/visual/blocks/9/norm2/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..13570c20a475d7a4f35f655350cf560708114bb8
--- /dev/null
+++ b/model/params/model/visual/blocks/9/norm2/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/blocks/9/norm2/kernel/0 b/model/params/model/visual/blocks/9/norm2/kernel/0
new file mode 100644
index 0000000000000000000000000000000000000000..caf77ec52e00c9ea7b6e1cd4d94d4c26b8434965
Binary files /dev/null and b/model/params/model/visual/blocks/9/norm2/kernel/0 differ
diff --git a/model/params/model/visual/downsample/bias/.zarray b/model/params/model/visual/downsample/bias/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..6cb36eccfd612cd6d39e72807aaa8527aa28075d
--- /dev/null
+++ b/model/params/model/visual/downsample/bias/.zarray
@@ -0,0 +1 @@
+{"chunks":[4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/downsample/bias/0 b/model/params/model/visual/downsample/bias/0
new file mode 100644
index 0000000000000000000000000000000000000000..eceb5de47fb366acafe671c8da93cff95d1a9ef9
Binary files /dev/null and b/model/params/model/visual/downsample/bias/0 differ
diff --git a/model/params/model/visual/downsample/kernel/.zarray b/model/params/model/visual/downsample/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..aa99caf9770b72231e58a7ad64f2bf4f0bc0fb1a
--- /dev/null
+++ b/model/params/model/visual/downsample/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[2,2,1536,4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[2,2,1536,4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/merger/down_proj/kernel/.zarray b/model/params/model/visual/merger/down_proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..6d957709d3382e500bf428e615bed7868edcd317
--- /dev/null
+++ b/model/params/model/visual/merger/down_proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[13696,1024],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[13696,4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/merger/gate_proj/kernel/.zarray b/model/params/model/visual/merger/gate_proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..cd759573b03445afa42a8c128ec67759636ad66d
--- /dev/null
+++ b/model/params/model/visual/merger/gate_proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[1024,13696],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096,13696],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/merger/norm/bias/.zarray b/model/params/model/visual/merger/norm/bias/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..6cb36eccfd612cd6d39e72807aaa8527aa28075d
--- /dev/null
+++ b/model/params/model/visual/merger/norm/bias/.zarray
@@ -0,0 +1 @@
+{"chunks":[4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/merger/norm/bias/0 b/model/params/model/visual/merger/norm/bias/0
new file mode 100644
index 0000000000000000000000000000000000000000..f1fe50c169997afab62b7754d41a85dbbe717dc1
Binary files /dev/null and b/model/params/model/visual/merger/norm/bias/0 differ
diff --git a/model/params/model/visual/merger/norm/scale/.zarray b/model/params/model/visual/merger/norm/scale/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..6cb36eccfd612cd6d39e72807aaa8527aa28075d
--- /dev/null
+++ b/model/params/model/visual/merger/norm/scale/.zarray
@@ -0,0 +1 @@
+{"chunks":[4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/merger/proj/kernel/.zarray b/model/params/model/visual/merger/proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..7ac7b3325317fe14c7a87913c61d88e3b92ab05e
--- /dev/null
+++ b/model/params/model/visual/merger/proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[1024,4096],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096,4096],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/merger/up_proj/kernel/.zarray b/model/params/model/visual/merger/up_proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..cd759573b03445afa42a8c128ec67759636ad66d
--- /dev/null
+++ b/model/params/model/visual/merger/up_proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[1024,13696],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[4096,13696],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/patch_embed/proj/bias/.zarray b/model/params/model/visual/patch_embed/proj/bias/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..13570c20a475d7a4f35f655350cf560708114bb8
--- /dev/null
+++ b/model/params/model/visual/patch_embed/proj/bias/.zarray
@@ -0,0 +1 @@
+{"chunks":[1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/patch_embed/proj/bias/0 b/model/params/model/visual/patch_embed/proj/bias/0
new file mode 100644
index 0000000000000000000000000000000000000000..09e071c870e600e51e8db805d7641a83cc318bd0
Binary files /dev/null and b/model/params/model/visual/patch_embed/proj/bias/0 differ
diff --git a/model/params/model/visual/patch_embed/proj/kernel/.zarray b/model/params/model/visual/patch_embed/proj/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..9a20defd09ef9eb0e47667fe1d6ca61147f16286
--- /dev/null
+++ b/model/params/model/visual/patch_embed/proj/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[2,14,14,3,1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[2,14,14,3,1536],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/pos_embed/embedding/.zarray b/model/params/model/visual/pos_embed/embedding/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..ba4a9a706aa77c7b576e84bda43a0717e745f027
--- /dev/null
+++ b/model/params/model/visual/pos_embed/embedding/.zarray
@@ -0,0 +1 @@
+{"chunks":[144,1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[576,1536],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/post_conv_layernorm/kernel/.zarray b/model/params/model/visual/post_conv_layernorm/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..13570c20a475d7a4f35f655350cf560708114bb8
--- /dev/null
+++ b/model/params/model/visual/post_conv_layernorm/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/post_conv_layernorm/kernel/0 b/model/params/model/visual/post_conv_layernorm/kernel/0
new file mode 100644
index 0000000000000000000000000000000000000000..59eec04ae6f74bb68d8e4edf5d3074562cbd8370
Binary files /dev/null and b/model/params/model/visual/post_conv_layernorm/kernel/0 differ
diff --git a/model/params/model/visual/post_layernorm/kernel/.zarray b/model/params/model/visual/post_layernorm/kernel/.zarray
new file mode 100644
index 0000000000000000000000000000000000000000..13570c20a475d7a4f35f655350cf560708114bb8
--- /dev/null
+++ b/model/params/model/visual/post_layernorm/kernel/.zarray
@@ -0,0 +1 @@
+{"chunks":[1536],"compressor":{"id":"zstd","level":1},"dimension_separator":".","dtype":"bfloat16","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
\ No newline at end of file
diff --git a/model/params/model/visual/post_layernorm/kernel/0 b/model/params/model/visual/post_layernorm/kernel/0
new file mode 100644
index 0000000000000000000000000000000000000000..ef5ecb3ff335f5c59c4b2d5ef1c6978fd6b478b7
Binary files /dev/null and b/model/params/model/visual/post_layernorm/kernel/0 differ
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..c1efb0ea7f62e3bcdd84b47acc6b0c8c195d0e93
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,42 @@
+{
+ "additional_special_tokens": [
+ "<|endoftext|>",
+ "[MASK]",
+ "[gMASK]",
+ "[sMASK]",
+ "",
+ "",
+ "<|system|>",
+ "<|user|>",
+ "<|assistant|>",
+ "<|observation|>",
+ "<|begin_of_image|>",
+ "<|end_of_image|>",
+ "<|begin_of_video|>",
+ "<|end_of_video|>",
+ "<|begin_of_audio|>",
+ "<|end_of_audio|>",
+ "<|image|>",
+ "<|video|>",
+ "<|begin_of_transcription|>",
+ "<|end_of_transcription|>",
+ "<|code_prefix|>",
+ "<|code_middle|>",
+ "<|code_suffix|>",
+ "/nothink"
+ ],
+ "eos_token": {
+ "content": "<|endoftext|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "<|endoftext|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/tensorstore_index.json b/tensorstore_index.json
new file mode 100644
index 0000000000000000000000000000000000000000..a910231f0e89e4ddf07dc5c8ea027469d0985f84
--- /dev/null
+++ b/tensorstore_index.json
@@ -0,0 +1,5310 @@
+{
+ "format": "tensorstore",
+ "version": "easydel",
+ "prefixes": {
+ "model": [
+ {
+ "path": "model/params/model/language_model/embed_tokens/embedding",
+ "shape": [
+ 151552,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/0/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/0/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/0/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/0/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/0/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/0/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/0/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/0/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/0/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/0/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/0/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/0/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/0/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/1/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/1/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/1/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/1/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/1/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/1/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/1/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/1/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/1/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/1/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/1/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/1/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/1/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/2/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/2/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/2/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/2/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/2/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/2/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/2/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/2/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/2/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/2/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/2/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/2/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/2/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/3/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/3/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/3/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/3/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/3/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/3/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/3/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/3/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/3/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/3/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/3/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/3/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/3/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/4/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/4/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/4/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/4/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/4/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/4/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/4/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/4/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/4/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/4/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/4/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/4/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/4/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/5/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/5/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/5/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/5/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/5/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/5/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/5/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/5/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/5/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/5/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/5/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/5/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/5/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/6/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/6/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/6/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/6/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/6/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/6/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/6/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/6/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/6/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/6/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/6/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/6/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/6/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/7/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/7/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/7/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/7/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/7/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/7/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/7/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/7/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/7/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/7/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/7/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/7/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/7/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/8/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/8/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/8/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/8/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/8/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/8/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/8/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/8/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/8/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/8/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/8/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/8/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/8/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/9/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/9/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/9/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/9/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/9/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/9/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/9/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/9/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/9/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/9/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/9/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/9/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/9/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/10/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/10/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/10/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/10/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/10/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/10/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/10/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/10/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/10/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/10/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/10/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/10/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/10/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/11/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/11/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/11/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/11/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/11/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/11/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/11/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/11/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/11/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/11/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/11/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/11/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/11/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/12/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/12/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/12/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/12/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/12/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/12/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/12/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/12/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/12/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/12/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/12/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/12/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/12/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/13/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/13/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/13/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/13/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/13/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/13/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/13/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/13/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/13/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/13/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/13/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/13/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/13/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/14/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/14/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/14/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/14/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/14/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/14/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/14/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/14/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/14/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/14/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/14/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/14/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/14/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/15/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/15/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/15/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/15/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/15/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/15/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/15/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/15/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/15/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/15/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/15/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/15/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/15/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/16/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/16/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/16/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/16/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/16/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/16/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/16/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/16/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/16/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/16/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/16/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/16/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/16/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/17/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/17/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/17/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/17/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/17/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/17/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/17/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/17/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/17/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/17/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/17/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/17/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/17/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/18/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/18/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/18/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/18/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/18/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/18/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/18/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/18/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/18/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/18/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/18/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/18/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/18/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/19/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/19/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/19/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/19/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/19/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/19/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/19/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/19/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/19/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/19/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/19/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/19/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/19/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/20/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/20/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/20/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/20/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/20/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/20/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/20/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/20/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/20/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/20/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/20/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/20/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/20/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/21/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/21/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/21/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/21/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/21/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/21/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/21/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/21/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/21/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/21/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/21/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/21/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/21/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/22/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/22/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/22/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/22/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/22/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/22/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/22/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/22/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/22/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/22/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/22/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/22/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/22/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/23/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/23/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/23/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/23/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/23/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/23/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/23/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/23/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/23/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/23/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/23/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/23/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/23/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/24/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/24/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/24/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/24/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/24/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/24/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/24/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/24/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/24/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/24/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/24/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/24/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/24/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/25/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/25/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/25/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/25/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/25/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/25/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/25/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/25/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/25/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/25/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/25/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/25/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/25/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/26/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/26/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/26/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/26/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/26/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/26/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/26/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/26/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/26/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/26/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/26/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/26/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/26/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/27/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/27/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/27/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/27/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/27/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/27/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/27/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/27/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/27/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/27/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/27/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/27/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/27/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/28/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/28/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/28/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/28/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/28/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/28/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/28/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/28/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/28/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/28/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/28/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/28/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/28/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/29/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/29/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/29/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/29/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/29/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/29/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/29/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/29/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/29/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/29/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/29/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/29/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/29/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/30/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/30/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/30/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/30/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/30/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/30/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/30/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/30/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/30/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/30/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/30/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/30/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/30/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/31/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/31/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/31/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/31/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/31/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/31/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/31/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/31/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/31/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/31/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/31/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/31/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/31/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/32/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/32/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/32/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/32/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/32/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/32/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/32/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/32/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/32/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/32/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/32/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/32/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/32/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/33/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/33/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/33/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/33/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/33/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/33/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/33/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/33/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/33/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/33/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/33/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/33/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/33/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/34/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/34/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/34/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/34/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/34/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/34/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/34/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/34/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/34/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/34/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/34/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/34/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/34/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/35/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/35/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/35/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/35/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/35/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/35/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/35/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/35/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/35/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/35/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/35/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/35/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/35/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/lm_head/kernel",
+ "shape": [
+ 4096,
+ 151552
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/36/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/36/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/36/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/36/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/36/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/36/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/36/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/36/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/36/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/36/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/36/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/36/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/36/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/37/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/37/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/37/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/37/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/37/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/37/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/37/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/37/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/37/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/37/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/37/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/37/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/37/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/38/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/38/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/38/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/38/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/38/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/38/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/38/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/38/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/38/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/38/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/38/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/38/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/38/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/39/input_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/39/mlp/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/39/mlp/gate_up_proj/kernel",
+ "shape": [
+ 4096,
+ 27392
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/39/post_attention_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/39/post_mlp_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/39/post_self_attn_layernorm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/39/self_attn/k_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/39/self_attn/k_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/39/self_attn/o_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/39/self_attn/q_proj/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/39/self_attn/q_proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/39/self_attn/v_proj/bias",
+ "shape": [
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/layers/39/self_attn/v_proj/kernel",
+ "shape": [
+ 4096,
+ 256
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/language_model/norm/kernel",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/0/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/0/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/0/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/0/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/0/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/0/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/0/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/1/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/1/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/1/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/1/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/1/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/1/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/1/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/10/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/10/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/10/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/10/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/10/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/10/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/10/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/11/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/11/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/11/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/11/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/11/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/11/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/11/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/12/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/12/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/12/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/12/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/12/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/12/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/12/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/13/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/13/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/13/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/13/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/13/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/13/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/13/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/14/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/14/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/14/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/14/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/14/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/14/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/14/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/15/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/15/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/15/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/15/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/15/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/15/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/15/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/16/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/16/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/16/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/16/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/16/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/16/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/16/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/17/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/17/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/17/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/17/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/17/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/17/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/17/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/18/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/18/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/18/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/18/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/18/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/18/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/18/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/19/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/19/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/19/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/19/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/19/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/19/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/19/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/2/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/2/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/2/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/2/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/2/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/2/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/2/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/20/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/20/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/20/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/20/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/20/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/20/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/20/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/21/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/21/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/21/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/21/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/21/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/21/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/21/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/22/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/22/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/22/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/22/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/22/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/22/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/22/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/23/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/23/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/23/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/23/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/23/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/23/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/23/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/3/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/3/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/3/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/3/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/3/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/3/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/3/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/4/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/4/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/4/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/4/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/4/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/4/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/4/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/5/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/5/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/5/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/5/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/5/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/5/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/5/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/6/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/6/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/6/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/6/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/6/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/6/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/6/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/7/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/7/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/7/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/7/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/7/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/7/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/7/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/8/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/8/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/8/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/8/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/8/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/8/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/8/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/9/attn/proj/kernel",
+ "shape": [
+ 1536,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/9/attn/qkv/kernel",
+ "shape": [
+ 1536,
+ 4608
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/9/mlp/down_proj/kernel",
+ "shape": [
+ 4096,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/9/mlp/gate_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/9/mlp/up_proj/kernel",
+ "shape": [
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/9/norm1/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/blocks/9/norm2/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/downsample/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/downsample/kernel",
+ "shape": [
+ 2,
+ 2,
+ 1536,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/pos_embed/embedding",
+ "shape": [
+ 576,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/merger/down_proj/kernel",
+ "shape": [
+ 13696,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/merger/gate_proj/kernel",
+ "shape": [
+ 4096,
+ 13696
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/merger/norm/bias",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/merger/norm/scale",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/merger/proj/kernel",
+ "shape": [
+ 4096,
+ 4096
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/merger/up_proj/kernel",
+ "shape": [
+ 4096,
+ 13696
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/patch_embed/proj/bias",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/patch_embed/proj/kernel",
+ "shape": [
+ 2,
+ 14,
+ 14,
+ 3,
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/post_conv_layernorm/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ },
+ {
+ "path": "model/params/model/visual/post_layernorm/kernel",
+ "shape": [
+ 1536
+ ],
+ "dtype": "bfloat16"
+ }
+ ]
+ }
+}
\ No newline at end of file