diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..40f7af3996c29b3c82feca30f2724698b6d74216 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+checkpoint-36/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..00526be7c37fd74f93c3aba3e61c5be580151ce2
--- /dev/null
+++ b/README.md
@@ -0,0 +1,129 @@
+---
+---
+
+[
](https://github.com/axolotl-ai-cloud/axolotl)
+See axolotl config
+
+axolotl version: `0.9.2`
+```yaml
+base_model: /capstor/scratch/cscs/bbernath/models/meditron-70B
+chat_template: llama3
+bfloat16: true
+output_dir: /capstor/store/cscs/swissai/a06/meditron/models/meditron_CHUV_2 #/capstor/scratch/cscs/bbernath/models/meditron_CHUV
+dataset_prepared_path: /capstor/scratch/cscs/bbernath/dataset/
+# - path: /capstor/store/cscs/swissai/a06/meditron/datasets/masked/special_mixture/instruction_tuning_mixture.jsonl
+# type: chat_template
+# ds_type: json
+# split: train
+# field_messages: conversations
+# message_field_role: from
+# message_field_content: value
+#pretraining_dataset:
+# - path: json
+# data_files:
+# - /capstor/store/cscs/swissai/a06/meditron/datasets/pretrain/pubmed/pubmed_3B.jsonl
+# - /capstor/store/cscs/swissai/a06/meditron/datasets/pretrain/fineweb/fineweb_400M_anglais.jsonl
+# type: pretrain
+datasets:
+ - path: /capstor/store/cscs/swissai/a06/meditron/datasets/masked/gemini/moove_gemini_2.jsonl
+ type: chat_template
+ ds_type: json
+ split: train
+ field_messages: conversations
+ message_field_role: from
+ message_field_content: value
+
+shuffle_merged_datasets: true
+dataset_processes: 128
+# max_steps: 1500
+flash_attention: true
+sequence_len: 8192
+gradient_accumulation_steps: 1
+micro_batch_size: 1
+train_on_inputs: false
+group_by_length: false
+pad_to_sequence_len: true
+sample_packing: true
+optimizer: adamw_torch
+optim_args:
+ fused: true
+cosine_min_lr_ratio: 0.1
+learning_rate: 5.0e-6
+warmup_ratio: 0
+weight_decay: 0.05
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+ use_reentrant: false
+load_in_4bit: false
+load_in_8bit: false
+num_epochs: 1
+saves_per_epoch: 1
+# evals_per_epoch: 1
+eval_set_size: 0.0
+eval_table_size: null
+lr_scheduler: cosine
+max_grad_norm: 1.0
+resume_from_checkpoint: null
+special_tokens:
+ pad_token: <|end_of_text|>
+tf32: false
+tokenizer_type: AutoTokenizer
+type: LlamaForCausalLM
+flash_attn_rms_norm: true
+flash_attn_fuse_qkv: false
+early_stopping_patience: 0
+wandb_entity: alexs-team
+wandb_name: meditron-CHUV-llama-gemini
+wandb_project: Meditron DDX
+wandb_watch: gradients
+xformers_attention: null
+logging_steps: 1
+deepspeed: /capstor/users/cscs/bbernath/meditron/axolotl_config/deepspeed_new.json
+
+```
+
+
+
+# capstor/store/cscs/swissai/a06/meditron/models/meditron_CHUV_2
+
+This model was trained from scratch on the /capstor/store/cscs/swissai/a06/meditron/datasets/masked/gemini/moove_gemini_2.jsonl dataset.
+
+## Model description
+
+More information needed
+
+## Intended uses & limitations
+
+More information needed
+
+## Training and evaluation data
+
+More information needed
+
+## Training procedure
+
+### Training hyperparameters
+
+The following hyperparameters were used during training:
+- learning_rate: 5e-06
+- train_batch_size: 1
+- eval_batch_size: 1
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 32
+- total_train_batch_size: 32
+- total_eval_batch_size: 32
+- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=fused=True
+- lr_scheduler_type: cosine
+- num_epochs: 1.0
+
+### Training results
+
+
+
+### Framework versions
+
+- Transformers 4.51.3
+- Pytorch 2.7.0a0+79aa17489c.nv25.04
+- Datasets 3.6.0
+- Tokenizers 0.21.1
diff --git a/checkpoint-36/config.json b/checkpoint-36/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..4946add7353eabbc5791c753f11b44b5be1f89b9
--- /dev/null
+++ b/checkpoint-36/config.json
@@ -0,0 +1,35 @@
+{
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 128000,
+ "eos_token_id": 128009,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 8192,
+ "initializer_range": 0.02,
+ "intermediate_size": 28672,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "num_attention_heads": 64,
+ "num_hidden_layers": 80,
+ "num_key_value_heads": 8,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 8.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.51.3",
+ "use_cache": false,
+ "vocab_size": 128256
+}
diff --git a/checkpoint-36/generation_config.json b/checkpoint-36/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cd28e19ed02358fd2089710dcd138907439c525b
--- /dev/null
+++ b/checkpoint-36/generation_config.json
@@ -0,0 +1,8 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 128000,
+ "do_sample": true,
+ "eos_token_id": 128009,
+ "transformers_version": "4.51.3",
+ "use_cache": false
+}
diff --git a/checkpoint-36/global_step36/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-36/global_step36/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ab20d02180d011251d89632bf7530fb356153d81
--- /dev/null
+++ b/checkpoint-36/global_step36/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7ae6d478a48a04b829bdf309b2109104cd87bb05761d2569f66ed4d16d09c9f
+size 26457647899
diff --git a/checkpoint-36/global_step36/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/checkpoint-36/global_step36/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a7d077968ccbd79a559c967d706480335150f4ab
--- /dev/null
+++ b/checkpoint-36/global_step36/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6fa76a3fd02bc15997d4f6e41df80985373c355d81bbe9a750d3d512d98c023
+size 26457647920
diff --git a/checkpoint-36/global_step36/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/checkpoint-36/global_step36/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1f98566c4efa4fea040a49d4bb8308e05d0cd828
--- /dev/null
+++ b/checkpoint-36/global_step36/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6524c751c24ec6f0fa0b2306a3aed370117e5d77b4a938df76112b6f9833a6b3
+size 26457647920
diff --git a/checkpoint-36/global_step36/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt b/checkpoint-36/global_step36/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6d63babb7154f9cae5416197c0aa5184854e9426
--- /dev/null
+++ b/checkpoint-36/global_step36/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed9e62f3fd69513940ce85849ed74cfdbca35f69a567d8cfc12d93da1be64286
+size 26457647920
diff --git a/checkpoint-36/global_step36/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt b/checkpoint-36/global_step36/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1961d71ee03701b3654e2b3ed2f0d6cdc86e3010
--- /dev/null
+++ b/checkpoint-36/global_step36/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8e3639691f08a961e26f5bd61d5b51be5170a0340372fe35816e6d6f48522ca
+size 26457647920
diff --git a/checkpoint-36/global_step36/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt b/checkpoint-36/global_step36/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e8d37a7d185e2a2d45d0b55a0daa3ca1411b2af
--- /dev/null
+++ b/checkpoint-36/global_step36/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80827d2a7cd25ac5bf8b86b998115b1b1c10f4689e4665bec03ca4c009acc504
+size 26457647920
diff --git a/checkpoint-36/global_step36/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/checkpoint-36/global_step36/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0888e7bb7d0af9c2e5d8178522d14e29020cfd3e
--- /dev/null
+++ b/checkpoint-36/global_step36/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38f353a8c4c4143db58a72e2cbce430c5a4840d7809cf44a603bd32ac0bf0a34
+size 26457647899
diff --git a/checkpoint-36/global_step36/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_0_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8f9406c1451049457b9ea6730d1e42b4dc32df02
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_0_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bae1f93932e95493b7252837c1d4637439f0a6b8671f2a7aab6451251e24ee53
+size 368825
diff --git a/checkpoint-36/global_step36/zero_pp_rank_10_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_10_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7887e47791f76494616236081fb53b24472bbcbb
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_10_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a792d9f6f9186ae94097075f6cb897da42e5b4fc765000bded4cc4598cc4a153
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_11_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_11_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..537e692e95a1a8c6aee42e0212657f2ee45019b5
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_11_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1453c1d7975756e71e6d9bb93e0b047f1f9c5096ed28bd65bde30ab7912359db
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_12_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_12_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1fcdb3668d5954d34a0640d208307feab09490ee
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_12_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d85def5a04040f8ab38dfaea748ab029e7a19a9eda262e971a85c2f7de8fcf88
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_13_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_13_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..33cd38e1c15c5fb08a95a31235e2e21b866c083a
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_13_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1302ef54cf0bf94ceceaceceb1efedfe340ccbf3a0d91435ebf35a40a4689c42
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_14_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_14_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..166d2e499ac609bf1b5f9aff34ffa7aeca080277
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_14_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b86c96aed22c2500ccb847daac820ee04f72845e6cc13a8ef16976283cde1cd5
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_15_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_15_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c0e4789e80c08ee14a4db6962362853465514c15
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_15_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3dab2319c9e13ceccd16423a62efe56ad37f283c05adbb87846797b8c8264e6
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_16_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_16_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3cb4ee03aec29bf143f865a1b824139f6f324e6d
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_16_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d41103c9aedc28774a8a107f847709995c2e08aaca62fc927c6f786d9b8ccf2d
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_17_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_17_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..494f886b97f2accc450cc318226eee7a086d552d
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_17_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8933b76d538e3e20119faa4027be5064598ce3507d17b17ba48217ce7d2b5373
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_18_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_18_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bf57f237d31d79ae3da8b1f5fbe949270e287d57
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_18_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:319b3931bec6cf49afb98ce47bff0e0cf6f085b3fd8335372a14dbaf3d493a50
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_19_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_19_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..36d12b0f3c99dd704938648dd58e113c913c4801
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_19_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a44ddafab92a91dff54b730a33eca4f77ae283995d0a92c6dbc808a31a36857c
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_1_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_1_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..525b1b948306a2be9968c4aaae02c60bab7c2a6f
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_1_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf534431d2fbef15f117900c9e5c0e75db186219ac42541cb90dee5e0b7725ca
+size 368825
diff --git a/checkpoint-36/global_step36/zero_pp_rank_20_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_20_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fa64811aaed7690a016ae69eb977805a07cd3c16
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_20_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f380e3fdf643afeb5fc621e57f723ef594e316d7c5f703f71f4cf2dab945ef2b
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_21_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_21_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..43111c180a4ed3edd190d33e6dbf38231c5d10fd
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_21_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d69de6226909b4ebff72bb00798f1774edf1a8c723d2692f9fa6d9b0d05d416
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_22_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_22_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ddf42315d310176ad61c1799e1606d4cbe976f1f
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_22_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84f7e37e167b1f88de8cb6c7e2e6d275e1943ed99d8d4386a848114aabc97713
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_23_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_23_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8de4c114b22ef94bde388adf5fa384a1b458e6b1
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_23_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b65d408a5e14cee02057b858cca39662aa636ea4d2b20228913ea327e81aca34
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_24_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_24_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2ea3670bd0c679e96b1b3b9d1a3e0f4f6c0e2d38
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_24_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e71363f973b0416507bf87624cc8184e813d183f50d7481de1360d82b33c96f
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_25_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_25_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6b40e212c7907abadf46bdfc12f9d3d495523c13
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_25_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2cdf3a013fb60e4a4c62ed3a8cf119d34cce70beab16feb38d3c3addfe228a3d
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_26_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_26_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d789353a5f7ecb40c2b809ff75fc1d9e071edfdd
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_26_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67ff0837d13ae22f4ef8e9e29a4135288dc04141df4558e8d785369b7d9d37f6
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_27_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_27_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..713efb3706889311bcd46395b52d006b3ca12164
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_27_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a38dd63881612bf78a6f35916bbe359eb4a286f60622dbe3ab0f1c74a4b6078
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_28_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_28_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a8924a81e93246d743f1d3af1caab6a78944a699
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_28_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d45b0b837f47572a7ea95937caddaae71866b1c29a225ba150191a9dcd6a0960
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_29_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_29_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..229071d5904355089775db529e922d241563c8e8
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_29_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ba15c637a97b79a7d20b322a57fed67203c8fad3064f248dc630711feef8bdd
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_2_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_2_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3923cbe8cc254b26cbae39f462488f8f5f8966c7
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_2_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddbb8400389879accf364d0361a8e87fac639509ab92a294470da6c3bf92df73
+size 368825
diff --git a/checkpoint-36/global_step36/zero_pp_rank_30_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_30_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cecb57104672e3a3fa673ebab4c53d40bdec29bb
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_30_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f94d9490207baafdd2240f6facf5bf4cee05eff2bbc4c07eef9d4199fa7a962c
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_31_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_31_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f57c3bd2ed8f80a62b7bccfecf36e64980737615
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_31_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c15b47cd9758433b93bf043bea52912f0194b5387e57c21e236f92f26f48350e
+size 369553
diff --git a/checkpoint-36/global_step36/zero_pp_rank_3_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_3_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b57a866aa8068d52ae9fc30c0469a9f491f15a4f
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_3_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:014a2aba358672f2d0c456198e41691d89aa1f79fc0e27a31205d6c7b698c824
+size 368825
diff --git a/checkpoint-36/global_step36/zero_pp_rank_4_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_4_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..46dd291cb01bd4809156e39bb2dbdf95a7882b6c
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_4_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5408b704557aacbf43c9caf0bf84765c342b337f7e7072276f593e5cb64190ac
+size 368825
diff --git a/checkpoint-36/global_step36/zero_pp_rank_5_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_5_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..62b26dce6ce6d7e193bac0fcf28a61a4f9f283be
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_5_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:376881b210f89c1a19d106b0673f057fb272d12dcbbcee4c41fb3bf20bd8391d
+size 368825
diff --git a/checkpoint-36/global_step36/zero_pp_rank_6_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_6_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2cc630bff2a43518bd406e981e5178910b9e0442
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_6_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a75f2d192b3f128caaedd747e10b0024a0af50dfe5155c4402016357702e925
+size 368825
diff --git a/checkpoint-36/global_step36/zero_pp_rank_7_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_7_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0c8beff411cf3ac49840bbcb3eac70148f390cc7
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_7_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da265d76fa82e5eda92c28d4914332a1f0eaeb13540bd5312d805b00728326cf
+size 368825
diff --git a/checkpoint-36/global_step36/zero_pp_rank_8_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_8_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0af1bf8e6e55962c965d83365baed7a1596961e1
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_8_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a00e4dea0649853fe5168359203ad013f85f3b740a31136ca595fed5d137f9af
+size 368825
diff --git a/checkpoint-36/global_step36/zero_pp_rank_9_mp_rank_00_model_states.pt b/checkpoint-36/global_step36/zero_pp_rank_9_mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aab25707d15d6d8eb0832c4a140f4d03b29b96b2
--- /dev/null
+++ b/checkpoint-36/global_step36/zero_pp_rank_9_mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc83fec43838c8cf0785e8cff37959b7eadeab0eaecccbb294bfd0a24054cfaa
+size 368825
diff --git a/checkpoint-36/latest b/checkpoint-36/latest
new file mode 100644
index 0000000000000000000000000000000000000000..377336ce371ac0248aa71731610cacc22e84b848
--- /dev/null
+++ b/checkpoint-36/latest
@@ -0,0 +1 @@
+global_step36
\ No newline at end of file
diff --git a/checkpoint-36/model-00001-of-00030.safetensors b/checkpoint-36/model-00001-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..033593d292ed2fcef62eb2e37f36218b92eac53d
--- /dev/null
+++ b/checkpoint-36/model-00001-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4cabb5d59084cfa0516e1ee9733929ab22844b6f28be8133e20a098dd1d40e1
+size 4584408808
diff --git a/checkpoint-36/model-00002-of-00030.safetensors b/checkpoint-36/model-00002-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..446e42a65a5f73bc5529b2a8c690902e576df3a0
--- /dev/null
+++ b/checkpoint-36/model-00002-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ddc59c1cc80f904fc1987d24f0c421df9eefaf8074e787b0ee14b67b90a4f8b
+size 4664167376
diff --git a/checkpoint-36/model-00003-of-00030.safetensors b/checkpoint-36/model-00003-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..71559b7831aad3c8b70f8c0824c5814a3d841844
--- /dev/null
+++ b/checkpoint-36/model-00003-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22513694fe0f63a9a036561351b57c0cfae48fc00cf36c12b662649ce8cb078e
+size 4999711704
diff --git a/checkpoint-36/model-00004-of-00030.safetensors b/checkpoint-36/model-00004-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bb62ce95c63f77b715a9ff87eef0e9dfc1ceee43
--- /dev/null
+++ b/checkpoint-36/model-00004-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:691e9246e1d9092c75352c97f0eccb8dd02639553042f0f5a51db2531b6c74f3
+size 4966157032
diff --git a/checkpoint-36/model-00005-of-00030.safetensors b/checkpoint-36/model-00005-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9f842617523301d625fcc209775b9dc4ad793dc4
--- /dev/null
+++ b/checkpoint-36/model-00005-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b1e5a69ab2e16687429773b4b3f681852518869dbbcec8e61caa012064cf0f0a
+size 4664134408
diff --git a/checkpoint-36/model-00006-of-00030.safetensors b/checkpoint-36/model-00006-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8984b19cd0deb7d687ca52345399b891a8fa8cea
--- /dev/null
+++ b/checkpoint-36/model-00006-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e3a1b59792281c7d8ae475ce26bf81cf057c68b243cb62946c97a07dc4594ee
+size 4664167408
diff --git a/checkpoint-36/model-00007-of-00030.safetensors b/checkpoint-36/model-00007-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..147a1fb80453297df1f994aee9e7697b2980bec3
--- /dev/null
+++ b/checkpoint-36/model-00007-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d748bcea229e07f1eb39f922e2722b88f64ed8b1c927d8468a5a7255722cc1f
+size 4664167408
diff --git a/checkpoint-36/model-00008-of-00030.safetensors b/checkpoint-36/model-00008-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4d1ef19cb75adaabb878baa1eec5ab10d13cb697
--- /dev/null
+++ b/checkpoint-36/model-00008-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e19bfcd9e549ef045b6f6c65da24295bc1d616fef032cfa3ed3d9d070c2e079
+size 4999711728
diff --git a/checkpoint-36/model-00009-of-00030.safetensors b/checkpoint-36/model-00009-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e9f0f96a980427add67024e806820e7b69527fc9
--- /dev/null
+++ b/checkpoint-36/model-00009-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb099d9ff85178472649e0ff678c2c773ce39107fddb3eafaed71031499a2e9b
+size 4966157056
diff --git a/checkpoint-36/model-00010-of-00030.safetensors b/checkpoint-36/model-00010-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8a43714c237c6900642f8480591d112ecd9b2a7e
--- /dev/null
+++ b/checkpoint-36/model-00010-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afbf674bac3fabfce368a362c07ce6657d0cba056611928f06589c35cce5f747
+size 4664134408
diff --git a/checkpoint-36/model-00011-of-00030.safetensors b/checkpoint-36/model-00011-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1f85a9ef08e7a95f94f930b46eaa0476b9e3cf92
--- /dev/null
+++ b/checkpoint-36/model-00011-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e42f2db2ecfc3ee540cb85949151df7dbc675ca10fe9bd70937ad3ab97d7fd52
+size 4664167408
diff --git a/checkpoint-36/model-00012-of-00030.safetensors b/checkpoint-36/model-00012-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6f196eae0cd1c6e943bc388a4cb803eee86c1949
--- /dev/null
+++ b/checkpoint-36/model-00012-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:677690d7c7590a54095252fe70aa438cbc8e237cf6af60da9069f733bf4ff4ce
+size 4664167408
diff --git a/checkpoint-36/model-00013-of-00030.safetensors b/checkpoint-36/model-00013-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e7c9ffc9f09d43ca8ccd6a839008b246d96bf38d
--- /dev/null
+++ b/checkpoint-36/model-00013-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d703720cd04e2b35c0528029a2ece4a6d621ab43b075d39d929df54202a267ee
+size 4999711728
diff --git a/checkpoint-36/model-00014-of-00030.safetensors b/checkpoint-36/model-00014-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7ab6485f7a376a23b8d9eaf1398946eb5e1b202a
--- /dev/null
+++ b/checkpoint-36/model-00014-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c63ae134dbaa344665789b8b0ef2697a4984bf2a4060b7e9b4d7d2202b5e4ade
+size 4966157056
diff --git a/checkpoint-36/model-00015-of-00030.safetensors b/checkpoint-36/model-00015-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3a0af656a33007be9948cfd8d6e8b08520736eaa
--- /dev/null
+++ b/checkpoint-36/model-00015-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:960ae0a271bb777784dc8cdeb96bb88451e6d6620c1400073fe8d37552c74173
+size 4664134408
diff --git a/checkpoint-36/model-00016-of-00030.safetensors b/checkpoint-36/model-00016-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cad213e04f1b91bdd12b51e9917e5f43ac12aba8
--- /dev/null
+++ b/checkpoint-36/model-00016-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44abb02c211d7aded70a0f23c996afdb4505a287155e5b344cb7b2a43ec8640b
+size 4664167408
diff --git a/checkpoint-36/model-00017-of-00030.safetensors b/checkpoint-36/model-00017-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..42f81fcd692c27a534244a4e7b9d9915008c8685
--- /dev/null
+++ b/checkpoint-36/model-00017-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22aeaf179ffde50a10aecbda803a140446fc76b609e42c05aae821a527dbca75
+size 4664167408
diff --git a/checkpoint-36/model-00018-of-00030.safetensors b/checkpoint-36/model-00018-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b9ceb087fa0864768d72f549d3e8967e6d2966ae
--- /dev/null
+++ b/checkpoint-36/model-00018-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2798d3191824f504e59750f67a75e4f620956610e79eb5451d32ea4addf2c7f3
+size 4999711728
diff --git a/checkpoint-36/model-00019-of-00030.safetensors b/checkpoint-36/model-00019-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..11f761a31b38f451644b476ed434c05b2b0d8a57
--- /dev/null
+++ b/checkpoint-36/model-00019-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd045f6262276c5c417d6d381ea88c24ab7b506398d1172c8a06612eccc55640
+size 4966157056
diff --git a/checkpoint-36/model-00020-of-00030.safetensors b/checkpoint-36/model-00020-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4785512474540a8b10253806117ea6e210106d9b
--- /dev/null
+++ b/checkpoint-36/model-00020-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0048cf26d97567fac9f8c491373844b43fda8da0293f6eeccf4fb63e44bfea1
+size 4664134408
diff --git a/checkpoint-36/model-00021-of-00030.safetensors b/checkpoint-36/model-00021-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bc037f2544096a32cb33c50be7afb12bf071462a
--- /dev/null
+++ b/checkpoint-36/model-00021-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07330fd90ed976fb41f9ca657a413ed2d3444b8ed8a7fdca64838706e9834bad
+size 4664167408
diff --git a/checkpoint-36/model-00022-of-00030.safetensors b/checkpoint-36/model-00022-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..90b9c987a0f69f89fefa37e4fa22cb55087cdac2
--- /dev/null
+++ b/checkpoint-36/model-00022-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d3e63e361e2f21b9a8dc8a7c27a63384371427803ef8a96a6ea7d7c9fdf25bd
+size 4664167408
diff --git a/checkpoint-36/model-00023-of-00030.safetensors b/checkpoint-36/model-00023-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..17c60a61d31b33f6907d523024ced7b5ecfd3d74
--- /dev/null
+++ b/checkpoint-36/model-00023-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7cdbb795719a187775a5edb2431d214e38882a8954d994d3d6fc921f817d2636
+size 4999711728
diff --git a/checkpoint-36/model-00024-of-00030.safetensors b/checkpoint-36/model-00024-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2bc3c5d0068473ef5aaaa61e30836eb91af7f059
--- /dev/null
+++ b/checkpoint-36/model-00024-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:756d4e1a1fc22a9d38285c03568635c2a3440455047aa515d7653221f1fa0e18
+size 4966157056
diff --git a/checkpoint-36/model-00025-of-00030.safetensors b/checkpoint-36/model-00025-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5d1fb58abd6dc7b4c539f9cdf9c975cd6b0472b8
--- /dev/null
+++ b/checkpoint-36/model-00025-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4b93c281b20705b010ed1af2fb461fae6317c6f3034fad649fde5fed3ac9e80
+size 4664134408
diff --git a/checkpoint-36/model-00026-of-00030.safetensors b/checkpoint-36/model-00026-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6d429b5614d0bea063a8a8679e104d9841cba596
--- /dev/null
+++ b/checkpoint-36/model-00026-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0cb003d2d89c383dcde824c418a1a832ff908f8ee2b98e8d27cbb3707e363cda
+size 4664167408
diff --git a/checkpoint-36/model-00027-of-00030.safetensors b/checkpoint-36/model-00027-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5471659e734724652b85d5f8b4c2e932c466b3e9
--- /dev/null
+++ b/checkpoint-36/model-00027-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:043f5027e70ee23b4d65d56106055018ab5746fdfbf856a4a5ca963bede6679b
+size 4664167408
diff --git a/checkpoint-36/model-00028-of-00030.safetensors b/checkpoint-36/model-00028-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6e3b477a84e9e876cf7add95c8d6fb517b5cd89f
--- /dev/null
+++ b/checkpoint-36/model-00028-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08b1d835c29f3f1ed6fb37c22dd449cb0eb587373866a2009959c22d852f1824
+size 4999711728
diff --git a/checkpoint-36/model-00029-of-00030.safetensors b/checkpoint-36/model-00029-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..40657cd94cd7484bfe53de232c0b37f31c900862
--- /dev/null
+++ b/checkpoint-36/model-00029-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b02c8b1e7875bd4be22db0d438e28a4ed47c4279914b5cf02200c5bdeccdb0c6
+size 4966173536
diff --git a/checkpoint-36/model-00030-of-00030.safetensors b/checkpoint-36/model-00030-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a92bae86487bffc2c07e97dd23d54198d8efe174
--- /dev/null
+++ b/checkpoint-36/model-00030-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a200ab4a9f6e3c246058e2742fc5a09a7586ac85a2c7215aa5c42bb051df0b5
+size 2101346432
diff --git a/checkpoint-36/model.safetensors.index.json b/checkpoint-36/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..37b1afe63cadc4ddce30aaff1b149c2f3083650c
--- /dev/null
+++ b/checkpoint-36/model.safetensors.index.json
@@ -0,0 +1,730 @@
+{
+ "metadata": {
+ "total_size": 141107412992
+ },
+ "weight_map": {
+ "lm_head.weight": "model-00030-of-00030.safetensors",
+ "model.embed_tokens.weight": "model-00001-of-00030.safetensors",
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00030.safetensors",
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00030.safetensors",
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.1.input_layernorm.weight": "model-00002-of-00030.safetensors",
+ "model.layers.1.mlp.down_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.1.mlp.up_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00030.safetensors",
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.10.input_layernorm.weight": "model-00005-of-00030.safetensors",
+ "model.layers.10.mlp.down_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.10.mlp.gate_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.10.mlp.up_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.10.post_attention_layernorm.weight": "model-00005-of-00030.safetensors",
+ "model.layers.10.self_attn.k_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.10.self_attn.o_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.10.self_attn.q_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.10.self_attn.v_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.11.input_layernorm.weight": "model-00005-of-00030.safetensors",
+ "model.layers.11.mlp.down_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.11.mlp.gate_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.11.mlp.up_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.11.post_attention_layernorm.weight": "model-00005-of-00030.safetensors",
+ "model.layers.11.self_attn.k_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.11.self_attn.o_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.11.self_attn.q_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.11.self_attn.v_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.12.input_layernorm.weight": "model-00006-of-00030.safetensors",
+ "model.layers.12.mlp.down_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.12.mlp.gate_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.12.mlp.up_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.12.post_attention_layernorm.weight": "model-00006-of-00030.safetensors",
+ "model.layers.12.self_attn.k_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.12.self_attn.o_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.12.self_attn.q_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.12.self_attn.v_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.13.input_layernorm.weight": "model-00006-of-00030.safetensors",
+ "model.layers.13.mlp.down_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.13.mlp.gate_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.13.mlp.up_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.13.post_attention_layernorm.weight": "model-00006-of-00030.safetensors",
+ "model.layers.13.self_attn.k_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.13.self_attn.o_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.13.self_attn.q_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.13.self_attn.v_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.14.input_layernorm.weight": "model-00006-of-00030.safetensors",
+ "model.layers.14.mlp.down_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.14.mlp.gate_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.14.mlp.up_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.14.post_attention_layernorm.weight": "model-00006-of-00030.safetensors",
+ "model.layers.14.self_attn.k_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.14.self_attn.o_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.14.self_attn.q_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.14.self_attn.v_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.15.input_layernorm.weight": "model-00007-of-00030.safetensors",
+ "model.layers.15.mlp.down_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.15.mlp.gate_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.15.mlp.up_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.15.post_attention_layernorm.weight": "model-00007-of-00030.safetensors",
+ "model.layers.15.self_attn.k_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.15.self_attn.o_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.15.self_attn.q_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.15.self_attn.v_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.16.input_layernorm.weight": "model-00007-of-00030.safetensors",
+ "model.layers.16.mlp.down_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.16.mlp.gate_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.16.mlp.up_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.16.post_attention_layernorm.weight": "model-00007-of-00030.safetensors",
+ "model.layers.16.self_attn.k_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.16.self_attn.o_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.16.self_attn.q_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.16.self_attn.v_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.17.input_layernorm.weight": "model-00007-of-00030.safetensors",
+ "model.layers.17.mlp.down_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.17.mlp.gate_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.17.mlp.up_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.17.post_attention_layernorm.weight": "model-00007-of-00030.safetensors",
+ "model.layers.17.self_attn.k_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.17.self_attn.o_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.17.self_attn.q_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.17.self_attn.v_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.18.input_layernorm.weight": "model-00008-of-00030.safetensors",
+ "model.layers.18.mlp.down_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.18.mlp.gate_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.18.mlp.up_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.18.post_attention_layernorm.weight": "model-00008-of-00030.safetensors",
+ "model.layers.18.self_attn.k_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.18.self_attn.o_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.18.self_attn.q_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.18.self_attn.v_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.19.input_layernorm.weight": "model-00008-of-00030.safetensors",
+ "model.layers.19.mlp.down_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.19.mlp.gate_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.19.mlp.up_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.19.post_attention_layernorm.weight": "model-00008-of-00030.safetensors",
+ "model.layers.19.self_attn.k_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.19.self_attn.o_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.19.self_attn.q_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.19.self_attn.v_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.2.input_layernorm.weight": "model-00002-of-00030.safetensors",
+ "model.layers.2.mlp.down_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.2.mlp.gate_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.2.mlp.up_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00030.safetensors",
+ "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.20.input_layernorm.weight": "model-00008-of-00030.safetensors",
+ "model.layers.20.mlp.down_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.20.mlp.gate_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.20.mlp.up_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.20.post_attention_layernorm.weight": "model-00008-of-00030.safetensors",
+ "model.layers.20.self_attn.k_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.20.self_attn.o_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.20.self_attn.q_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.20.self_attn.v_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.21.input_layernorm.weight": "model-00009-of-00030.safetensors",
+ "model.layers.21.mlp.down_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.21.mlp.gate_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.21.mlp.up_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.21.post_attention_layernorm.weight": "model-00009-of-00030.safetensors",
+ "model.layers.21.self_attn.k_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.21.self_attn.o_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.21.self_attn.q_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.21.self_attn.v_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.22.input_layernorm.weight": "model-00009-of-00030.safetensors",
+ "model.layers.22.mlp.down_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.22.mlp.gate_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.22.mlp.up_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.22.post_attention_layernorm.weight": "model-00009-of-00030.safetensors",
+ "model.layers.22.self_attn.k_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.22.self_attn.o_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.22.self_attn.q_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.22.self_attn.v_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.23.input_layernorm.weight": "model-00009-of-00030.safetensors",
+ "model.layers.23.mlp.down_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.23.mlp.gate_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.23.mlp.up_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.23.post_attention_layernorm.weight": "model-00009-of-00030.safetensors",
+ "model.layers.23.self_attn.k_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.23.self_attn.o_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.23.self_attn.q_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.23.self_attn.v_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.24.input_layernorm.weight": "model-00010-of-00030.safetensors",
+ "model.layers.24.mlp.down_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.24.mlp.gate_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.24.mlp.up_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.24.post_attention_layernorm.weight": "model-00010-of-00030.safetensors",
+ "model.layers.24.self_attn.k_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.24.self_attn.o_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.24.self_attn.q_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.24.self_attn.v_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.25.input_layernorm.weight": "model-00010-of-00030.safetensors",
+ "model.layers.25.mlp.down_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.25.mlp.gate_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.25.mlp.up_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.25.post_attention_layernorm.weight": "model-00010-of-00030.safetensors",
+ "model.layers.25.self_attn.k_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.25.self_attn.o_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.25.self_attn.q_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.25.self_attn.v_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.26.input_layernorm.weight": "model-00011-of-00030.safetensors",
+ "model.layers.26.mlp.down_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.26.mlp.gate_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.26.mlp.up_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.26.post_attention_layernorm.weight": "model-00011-of-00030.safetensors",
+ "model.layers.26.self_attn.k_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.26.self_attn.o_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.26.self_attn.q_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.26.self_attn.v_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.27.input_layernorm.weight": "model-00011-of-00030.safetensors",
+ "model.layers.27.mlp.down_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.27.mlp.gate_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.27.mlp.up_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.27.post_attention_layernorm.weight": "model-00011-of-00030.safetensors",
+ "model.layers.27.self_attn.k_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.27.self_attn.o_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.27.self_attn.q_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.27.self_attn.v_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.28.input_layernorm.weight": "model-00011-of-00030.safetensors",
+ "model.layers.28.mlp.down_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.28.mlp.gate_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.28.mlp.up_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.28.post_attention_layernorm.weight": "model-00011-of-00030.safetensors",
+ "model.layers.28.self_attn.k_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.28.self_attn.o_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.28.self_attn.q_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.28.self_attn.v_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.29.input_layernorm.weight": "model-00012-of-00030.safetensors",
+ "model.layers.29.mlp.down_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.29.mlp.gate_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.29.mlp.up_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.29.post_attention_layernorm.weight": "model-00012-of-00030.safetensors",
+ "model.layers.29.self_attn.k_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.29.self_attn.o_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.29.self_attn.q_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.29.self_attn.v_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.3.input_layernorm.weight": "model-00002-of-00030.safetensors",
+ "model.layers.3.mlp.down_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.3.mlp.up_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00030.safetensors",
+ "model.layers.3.self_attn.k_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.3.self_attn.o_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.3.self_attn.q_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.3.self_attn.v_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.30.input_layernorm.weight": "model-00012-of-00030.safetensors",
+ "model.layers.30.mlp.down_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.30.mlp.gate_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.30.mlp.up_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.30.post_attention_layernorm.weight": "model-00012-of-00030.safetensors",
+ "model.layers.30.self_attn.k_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.30.self_attn.o_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.30.self_attn.q_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.30.self_attn.v_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.31.input_layernorm.weight": "model-00012-of-00030.safetensors",
+ "model.layers.31.mlp.down_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.31.mlp.gate_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.31.mlp.up_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.31.post_attention_layernorm.weight": "model-00012-of-00030.safetensors",
+ "model.layers.31.self_attn.k_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.31.self_attn.o_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.31.self_attn.q_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.31.self_attn.v_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.32.input_layernorm.weight": "model-00013-of-00030.safetensors",
+ "model.layers.32.mlp.down_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.32.mlp.gate_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.32.mlp.up_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.32.post_attention_layernorm.weight": "model-00013-of-00030.safetensors",
+ "model.layers.32.self_attn.k_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.32.self_attn.o_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.32.self_attn.q_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.32.self_attn.v_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.33.input_layernorm.weight": "model-00013-of-00030.safetensors",
+ "model.layers.33.mlp.down_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.33.mlp.gate_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.33.mlp.up_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.33.post_attention_layernorm.weight": "model-00013-of-00030.safetensors",
+ "model.layers.33.self_attn.k_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.33.self_attn.o_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.33.self_attn.q_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.33.self_attn.v_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.34.input_layernorm.weight": "model-00013-of-00030.safetensors",
+ "model.layers.34.mlp.down_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.34.mlp.gate_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.34.mlp.up_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.34.post_attention_layernorm.weight": "model-00013-of-00030.safetensors",
+ "model.layers.34.self_attn.k_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.34.self_attn.o_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.34.self_attn.q_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.34.self_attn.v_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.35.input_layernorm.weight": "model-00014-of-00030.safetensors",
+ "model.layers.35.mlp.down_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.35.mlp.gate_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.35.mlp.up_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.35.post_attention_layernorm.weight": "model-00014-of-00030.safetensors",
+ "model.layers.35.self_attn.k_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.35.self_attn.o_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.35.self_attn.q_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.35.self_attn.v_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.36.input_layernorm.weight": "model-00014-of-00030.safetensors",
+ "model.layers.36.mlp.down_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.36.mlp.gate_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.36.mlp.up_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.36.post_attention_layernorm.weight": "model-00014-of-00030.safetensors",
+ "model.layers.36.self_attn.k_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.36.self_attn.o_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.36.self_attn.q_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.36.self_attn.v_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.37.input_layernorm.weight": "model-00014-of-00030.safetensors",
+ "model.layers.37.mlp.down_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.37.mlp.gate_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.37.mlp.up_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.37.post_attention_layernorm.weight": "model-00014-of-00030.safetensors",
+ "model.layers.37.self_attn.k_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.37.self_attn.o_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.37.self_attn.q_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.37.self_attn.v_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.38.input_layernorm.weight": "model-00015-of-00030.safetensors",
+ "model.layers.38.mlp.down_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.38.mlp.gate_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.38.mlp.up_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.38.post_attention_layernorm.weight": "model-00015-of-00030.safetensors",
+ "model.layers.38.self_attn.k_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.38.self_attn.o_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.38.self_attn.q_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.38.self_attn.v_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.39.input_layernorm.weight": "model-00015-of-00030.safetensors",
+ "model.layers.39.mlp.down_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.39.mlp.gate_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.39.mlp.up_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.39.post_attention_layernorm.weight": "model-00015-of-00030.safetensors",
+ "model.layers.39.self_attn.k_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.39.self_attn.o_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.39.self_attn.q_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.39.self_attn.v_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.4.input_layernorm.weight": "model-00003-of-00030.safetensors",
+ "model.layers.4.mlp.down_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.4.mlp.gate_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.4.mlp.up_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.4.post_attention_layernorm.weight": "model-00003-of-00030.safetensors",
+ "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.40.input_layernorm.weight": "model-00016-of-00030.safetensors",
+ "model.layers.40.mlp.down_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.40.mlp.gate_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.40.mlp.up_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.40.post_attention_layernorm.weight": "model-00016-of-00030.safetensors",
+ "model.layers.40.self_attn.k_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.40.self_attn.o_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.40.self_attn.q_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.40.self_attn.v_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.41.input_layernorm.weight": "model-00016-of-00030.safetensors",
+ "model.layers.41.mlp.down_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.41.mlp.gate_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.41.mlp.up_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.41.post_attention_layernorm.weight": "model-00016-of-00030.safetensors",
+ "model.layers.41.self_attn.k_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.41.self_attn.o_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.41.self_attn.q_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.41.self_attn.v_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.42.input_layernorm.weight": "model-00016-of-00030.safetensors",
+ "model.layers.42.mlp.down_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.42.mlp.gate_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.42.mlp.up_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.42.post_attention_layernorm.weight": "model-00016-of-00030.safetensors",
+ "model.layers.42.self_attn.k_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.42.self_attn.o_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.42.self_attn.q_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.42.self_attn.v_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.43.input_layernorm.weight": "model-00017-of-00030.safetensors",
+ "model.layers.43.mlp.down_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.43.mlp.gate_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.43.mlp.up_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.43.post_attention_layernorm.weight": "model-00017-of-00030.safetensors",
+ "model.layers.43.self_attn.k_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.43.self_attn.o_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.43.self_attn.q_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.43.self_attn.v_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.44.input_layernorm.weight": "model-00017-of-00030.safetensors",
+ "model.layers.44.mlp.down_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.44.mlp.gate_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.44.mlp.up_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.44.post_attention_layernorm.weight": "model-00017-of-00030.safetensors",
+ "model.layers.44.self_attn.k_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.44.self_attn.o_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.44.self_attn.q_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.44.self_attn.v_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.45.input_layernorm.weight": "model-00017-of-00030.safetensors",
+ "model.layers.45.mlp.down_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.45.mlp.gate_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.45.mlp.up_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.45.post_attention_layernorm.weight": "model-00017-of-00030.safetensors",
+ "model.layers.45.self_attn.k_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.45.self_attn.o_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.45.self_attn.q_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.45.self_attn.v_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.46.input_layernorm.weight": "model-00018-of-00030.safetensors",
+ "model.layers.46.mlp.down_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.46.mlp.gate_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.46.mlp.up_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.46.post_attention_layernorm.weight": "model-00018-of-00030.safetensors",
+ "model.layers.46.self_attn.k_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.46.self_attn.o_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.46.self_attn.q_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.46.self_attn.v_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.47.input_layernorm.weight": "model-00018-of-00030.safetensors",
+ "model.layers.47.mlp.down_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.47.mlp.gate_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.47.mlp.up_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.47.post_attention_layernorm.weight": "model-00018-of-00030.safetensors",
+ "model.layers.47.self_attn.k_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.47.self_attn.o_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.47.self_attn.q_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.47.self_attn.v_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.48.input_layernorm.weight": "model-00018-of-00030.safetensors",
+ "model.layers.48.mlp.down_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.48.mlp.gate_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.48.mlp.up_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.48.post_attention_layernorm.weight": "model-00018-of-00030.safetensors",
+ "model.layers.48.self_attn.k_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.48.self_attn.o_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.48.self_attn.q_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.48.self_attn.v_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.49.input_layernorm.weight": "model-00019-of-00030.safetensors",
+ "model.layers.49.mlp.down_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.49.mlp.gate_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.49.mlp.up_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.49.post_attention_layernorm.weight": "model-00019-of-00030.safetensors",
+ "model.layers.49.self_attn.k_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.49.self_attn.o_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.49.self_attn.q_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.49.self_attn.v_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.5.input_layernorm.weight": "model-00003-of-00030.safetensors",
+ "model.layers.5.mlp.down_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.5.mlp.gate_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.5.mlp.up_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.5.post_attention_layernorm.weight": "model-00003-of-00030.safetensors",
+ "model.layers.5.self_attn.k_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.5.self_attn.o_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.5.self_attn.q_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.5.self_attn.v_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.50.input_layernorm.weight": "model-00019-of-00030.safetensors",
+ "model.layers.50.mlp.down_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.50.mlp.gate_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.50.mlp.up_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.50.post_attention_layernorm.weight": "model-00019-of-00030.safetensors",
+ "model.layers.50.self_attn.k_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.50.self_attn.o_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.50.self_attn.q_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.50.self_attn.v_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.51.input_layernorm.weight": "model-00019-of-00030.safetensors",
+ "model.layers.51.mlp.down_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.51.mlp.gate_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.51.mlp.up_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.51.post_attention_layernorm.weight": "model-00019-of-00030.safetensors",
+ "model.layers.51.self_attn.k_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.51.self_attn.o_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.51.self_attn.q_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.51.self_attn.v_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.52.input_layernorm.weight": "model-00020-of-00030.safetensors",
+ "model.layers.52.mlp.down_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.52.mlp.gate_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.52.mlp.up_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.52.post_attention_layernorm.weight": "model-00020-of-00030.safetensors",
+ "model.layers.52.self_attn.k_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.52.self_attn.o_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.52.self_attn.q_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.52.self_attn.v_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.53.input_layernorm.weight": "model-00020-of-00030.safetensors",
+ "model.layers.53.mlp.down_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.53.mlp.gate_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.53.mlp.up_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.53.post_attention_layernorm.weight": "model-00020-of-00030.safetensors",
+ "model.layers.53.self_attn.k_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.53.self_attn.o_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.53.self_attn.q_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.53.self_attn.v_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.54.input_layernorm.weight": "model-00021-of-00030.safetensors",
+ "model.layers.54.mlp.down_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.54.mlp.gate_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.54.mlp.up_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.54.post_attention_layernorm.weight": "model-00021-of-00030.safetensors",
+ "model.layers.54.self_attn.k_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.54.self_attn.o_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.54.self_attn.q_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.54.self_attn.v_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.55.input_layernorm.weight": "model-00021-of-00030.safetensors",
+ "model.layers.55.mlp.down_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.55.mlp.gate_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.55.mlp.up_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.55.post_attention_layernorm.weight": "model-00021-of-00030.safetensors",
+ "model.layers.55.self_attn.k_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.55.self_attn.o_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.55.self_attn.q_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.55.self_attn.v_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.56.input_layernorm.weight": "model-00021-of-00030.safetensors",
+ "model.layers.56.mlp.down_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.56.mlp.gate_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.56.mlp.up_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.56.post_attention_layernorm.weight": "model-00021-of-00030.safetensors",
+ "model.layers.56.self_attn.k_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.56.self_attn.o_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.56.self_attn.q_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.56.self_attn.v_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.57.input_layernorm.weight": "model-00022-of-00030.safetensors",
+ "model.layers.57.mlp.down_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.57.mlp.gate_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.57.mlp.up_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.57.post_attention_layernorm.weight": "model-00022-of-00030.safetensors",
+ "model.layers.57.self_attn.k_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.57.self_attn.o_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.57.self_attn.q_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.57.self_attn.v_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.58.input_layernorm.weight": "model-00022-of-00030.safetensors",
+ "model.layers.58.mlp.down_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.58.mlp.gate_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.58.mlp.up_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.58.post_attention_layernorm.weight": "model-00022-of-00030.safetensors",
+ "model.layers.58.self_attn.k_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.58.self_attn.o_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.58.self_attn.q_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.58.self_attn.v_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.59.input_layernorm.weight": "model-00022-of-00030.safetensors",
+ "model.layers.59.mlp.down_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.59.mlp.gate_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.59.mlp.up_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.59.post_attention_layernorm.weight": "model-00022-of-00030.safetensors",
+ "model.layers.59.self_attn.k_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.59.self_attn.o_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.59.self_attn.q_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.59.self_attn.v_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.6.input_layernorm.weight": "model-00003-of-00030.safetensors",
+ "model.layers.6.mlp.down_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.6.mlp.gate_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.6.mlp.up_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.6.post_attention_layernorm.weight": "model-00003-of-00030.safetensors",
+ "model.layers.6.self_attn.k_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.6.self_attn.o_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.6.self_attn.q_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.6.self_attn.v_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.60.input_layernorm.weight": "model-00023-of-00030.safetensors",
+ "model.layers.60.mlp.down_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.60.mlp.gate_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.60.mlp.up_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.60.post_attention_layernorm.weight": "model-00023-of-00030.safetensors",
+ "model.layers.60.self_attn.k_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.60.self_attn.o_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.60.self_attn.q_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.60.self_attn.v_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.61.input_layernorm.weight": "model-00023-of-00030.safetensors",
+ "model.layers.61.mlp.down_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.61.mlp.gate_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.61.mlp.up_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.61.post_attention_layernorm.weight": "model-00023-of-00030.safetensors",
+ "model.layers.61.self_attn.k_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.61.self_attn.o_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.61.self_attn.q_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.61.self_attn.v_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.62.input_layernorm.weight": "model-00023-of-00030.safetensors",
+ "model.layers.62.mlp.down_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.62.mlp.gate_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.62.mlp.up_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.62.post_attention_layernorm.weight": "model-00023-of-00030.safetensors",
+ "model.layers.62.self_attn.k_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.62.self_attn.o_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.62.self_attn.q_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.62.self_attn.v_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.63.input_layernorm.weight": "model-00024-of-00030.safetensors",
+ "model.layers.63.mlp.down_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.63.mlp.gate_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.63.mlp.up_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.63.post_attention_layernorm.weight": "model-00024-of-00030.safetensors",
+ "model.layers.63.self_attn.k_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.63.self_attn.o_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.63.self_attn.q_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.63.self_attn.v_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.64.input_layernorm.weight": "model-00024-of-00030.safetensors",
+ "model.layers.64.mlp.down_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.64.mlp.gate_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.64.mlp.up_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.64.post_attention_layernorm.weight": "model-00024-of-00030.safetensors",
+ "model.layers.64.self_attn.k_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.64.self_attn.o_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.64.self_attn.q_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.64.self_attn.v_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.65.input_layernorm.weight": "model-00024-of-00030.safetensors",
+ "model.layers.65.mlp.down_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.65.mlp.gate_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.65.mlp.up_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.65.post_attention_layernorm.weight": "model-00024-of-00030.safetensors",
+ "model.layers.65.self_attn.k_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.65.self_attn.o_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.65.self_attn.q_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.65.self_attn.v_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.66.input_layernorm.weight": "model-00025-of-00030.safetensors",
+ "model.layers.66.mlp.down_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.66.mlp.gate_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.66.mlp.up_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.66.post_attention_layernorm.weight": "model-00025-of-00030.safetensors",
+ "model.layers.66.self_attn.k_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.66.self_attn.o_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.66.self_attn.q_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.66.self_attn.v_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.67.input_layernorm.weight": "model-00025-of-00030.safetensors",
+ "model.layers.67.mlp.down_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.67.mlp.gate_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.67.mlp.up_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.67.post_attention_layernorm.weight": "model-00025-of-00030.safetensors",
+ "model.layers.67.self_attn.k_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.67.self_attn.o_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.67.self_attn.q_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.67.self_attn.v_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.68.input_layernorm.weight": "model-00026-of-00030.safetensors",
+ "model.layers.68.mlp.down_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.68.mlp.gate_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.68.mlp.up_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.68.post_attention_layernorm.weight": "model-00026-of-00030.safetensors",
+ "model.layers.68.self_attn.k_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.68.self_attn.o_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.68.self_attn.q_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.68.self_attn.v_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.69.input_layernorm.weight": "model-00026-of-00030.safetensors",
+ "model.layers.69.mlp.down_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.69.mlp.gate_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.69.mlp.up_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.69.post_attention_layernorm.weight": "model-00026-of-00030.safetensors",
+ "model.layers.69.self_attn.k_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.69.self_attn.o_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.69.self_attn.q_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.69.self_attn.v_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.7.input_layernorm.weight": "model-00004-of-00030.safetensors",
+ "model.layers.7.mlp.down_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.7.mlp.gate_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.7.mlp.up_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.7.post_attention_layernorm.weight": "model-00004-of-00030.safetensors",
+ "model.layers.7.self_attn.k_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.7.self_attn.o_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.7.self_attn.q_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.7.self_attn.v_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.70.input_layernorm.weight": "model-00026-of-00030.safetensors",
+ "model.layers.70.mlp.down_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.70.mlp.gate_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.70.mlp.up_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.70.post_attention_layernorm.weight": "model-00026-of-00030.safetensors",
+ "model.layers.70.self_attn.k_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.70.self_attn.o_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.70.self_attn.q_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.70.self_attn.v_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.71.input_layernorm.weight": "model-00027-of-00030.safetensors",
+ "model.layers.71.mlp.down_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.71.mlp.gate_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.71.mlp.up_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.71.post_attention_layernorm.weight": "model-00027-of-00030.safetensors",
+ "model.layers.71.self_attn.k_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.71.self_attn.o_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.71.self_attn.q_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.71.self_attn.v_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.72.input_layernorm.weight": "model-00027-of-00030.safetensors",
+ "model.layers.72.mlp.down_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.72.mlp.gate_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.72.mlp.up_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.72.post_attention_layernorm.weight": "model-00027-of-00030.safetensors",
+ "model.layers.72.self_attn.k_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.72.self_attn.o_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.72.self_attn.q_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.72.self_attn.v_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.73.input_layernorm.weight": "model-00027-of-00030.safetensors",
+ "model.layers.73.mlp.down_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.73.mlp.gate_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.73.mlp.up_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.73.post_attention_layernorm.weight": "model-00027-of-00030.safetensors",
+ "model.layers.73.self_attn.k_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.73.self_attn.o_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.73.self_attn.q_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.73.self_attn.v_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.74.input_layernorm.weight": "model-00028-of-00030.safetensors",
+ "model.layers.74.mlp.down_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.74.mlp.gate_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.74.mlp.up_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.74.post_attention_layernorm.weight": "model-00028-of-00030.safetensors",
+ "model.layers.74.self_attn.k_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.74.self_attn.o_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.74.self_attn.q_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.74.self_attn.v_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.75.input_layernorm.weight": "model-00028-of-00030.safetensors",
+ "model.layers.75.mlp.down_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.75.mlp.gate_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.75.mlp.up_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.75.post_attention_layernorm.weight": "model-00028-of-00030.safetensors",
+ "model.layers.75.self_attn.k_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.75.self_attn.o_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.75.self_attn.q_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.75.self_attn.v_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.76.input_layernorm.weight": "model-00028-of-00030.safetensors",
+ "model.layers.76.mlp.down_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.76.mlp.gate_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.76.mlp.up_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.76.post_attention_layernorm.weight": "model-00028-of-00030.safetensors",
+ "model.layers.76.self_attn.k_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.76.self_attn.o_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.76.self_attn.q_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.76.self_attn.v_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.77.input_layernorm.weight": "model-00029-of-00030.safetensors",
+ "model.layers.77.mlp.down_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.77.mlp.gate_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.77.mlp.up_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.77.post_attention_layernorm.weight": "model-00029-of-00030.safetensors",
+ "model.layers.77.self_attn.k_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.77.self_attn.o_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.77.self_attn.q_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.77.self_attn.v_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.78.input_layernorm.weight": "model-00029-of-00030.safetensors",
+ "model.layers.78.mlp.down_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.78.mlp.gate_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.78.mlp.up_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.78.post_attention_layernorm.weight": "model-00029-of-00030.safetensors",
+ "model.layers.78.self_attn.k_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.78.self_attn.o_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.78.self_attn.q_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.78.self_attn.v_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.79.input_layernorm.weight": "model-00029-of-00030.safetensors",
+ "model.layers.79.mlp.down_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.79.mlp.gate_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.79.mlp.up_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.79.post_attention_layernorm.weight": "model-00029-of-00030.safetensors",
+ "model.layers.79.self_attn.k_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.79.self_attn.o_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.79.self_attn.q_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.79.self_attn.v_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.8.input_layernorm.weight": "model-00004-of-00030.safetensors",
+ "model.layers.8.mlp.down_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.8.mlp.gate_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.8.mlp.up_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.8.post_attention_layernorm.weight": "model-00004-of-00030.safetensors",
+ "model.layers.8.self_attn.k_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.8.self_attn.o_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.8.self_attn.q_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.8.self_attn.v_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.9.input_layernorm.weight": "model-00004-of-00030.safetensors",
+ "model.layers.9.mlp.down_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.9.mlp.gate_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.9.mlp.up_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.9.post_attention_layernorm.weight": "model-00004-of-00030.safetensors",
+ "model.layers.9.self_attn.k_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.9.self_attn.o_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.9.self_attn.q_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.9.self_attn.v_proj.weight": "model-00004-of-00030.safetensors",
+ "model.norm.weight": "model-00029-of-00030.safetensors"
+ }
+}
diff --git a/checkpoint-36/rng_state_0.pth b/checkpoint-36/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..845b3d030ffaa64ffa22691fc49f246ebb3407e1
--- /dev/null
+++ b/checkpoint-36/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5ed3e7dbdb967b68fdb8285c5a0a048dba7d4d232cee3a9ed4613fb8c6786f5
+size 15225
diff --git a/checkpoint-36/rng_state_1.pth b/checkpoint-36/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2e7745a1d5960bacbc91b6fffaf386faba9e1894
--- /dev/null
+++ b/checkpoint-36/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:248b8f01c99fe1ab31a8c68cb6bb99a25268a18f610cdde51234b58c12900da1
+size 15225
diff --git a/checkpoint-36/rng_state_10.pth b/checkpoint-36/rng_state_10.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e23cc357f1981b881570ef86fbbec74213fbc444
--- /dev/null
+++ b/checkpoint-36/rng_state_10.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a31ed0773e2d676af0f73f00ac0b27e550d5ade3f76f2c94f952ff45ec72ea84
+size 15235
diff --git a/checkpoint-36/rng_state_11.pth b/checkpoint-36/rng_state_11.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7c8f0043c94042e22e55dd06589aaccb229d1a55
--- /dev/null
+++ b/checkpoint-36/rng_state_11.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fa4115a9c6596e67e83e3a47763abe1026f01c8bcbb674cef98b4b3301d384d
+size 15235
diff --git a/checkpoint-36/rng_state_12.pth b/checkpoint-36/rng_state_12.pth
new file mode 100644
index 0000000000000000000000000000000000000000..41f37c5bf62899fd213f42865d207be71954ae5c
--- /dev/null
+++ b/checkpoint-36/rng_state_12.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:937b20e918053f5dca79ae2e954568b3f08a6185aa5df6e74ed58e641299da99
+size 15235
diff --git a/checkpoint-36/rng_state_13.pth b/checkpoint-36/rng_state_13.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6cb78ed9c37e118d479eb69fc32e7fce8888fce4
--- /dev/null
+++ b/checkpoint-36/rng_state_13.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce893865e90139b191aeca366e9fdb2345af1479371ad9725aa29db4a3215714
+size 15235
diff --git a/checkpoint-36/rng_state_14.pth b/checkpoint-36/rng_state_14.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6beed520f9bd03cc0e59c58c4e734acdc3fb01bc
--- /dev/null
+++ b/checkpoint-36/rng_state_14.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ae074290f69414d50818e5020ff931f02bc391cbcdbfc992d819a7a83d3a27f
+size 15235
diff --git a/checkpoint-36/rng_state_15.pth b/checkpoint-36/rng_state_15.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3880b8d71091cb181faa1604c5b16ff597cfcbe5
--- /dev/null
+++ b/checkpoint-36/rng_state_15.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70b40a9d8b148d0cd4db89c829d064d058882d411c28f2e40e4514e5909d3e0c
+size 15235
diff --git a/checkpoint-36/rng_state_16.pth b/checkpoint-36/rng_state_16.pth
new file mode 100644
index 0000000000000000000000000000000000000000..52a853d02ec7405e0b91053d174600d70b1b2473
--- /dev/null
+++ b/checkpoint-36/rng_state_16.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a7d340a63fd7280738a25a3f99f39d81fbae7dbf3b86561ebeeea8412e829b3
+size 15235
diff --git a/checkpoint-36/rng_state_17.pth b/checkpoint-36/rng_state_17.pth
new file mode 100644
index 0000000000000000000000000000000000000000..68c13f1730ddd62c6a914474a56d5ca2df4cce9e
--- /dev/null
+++ b/checkpoint-36/rng_state_17.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a253ea19c96acc58f0f0ae096581549f4ff12f8f02053cf37c97d703029cef85
+size 15235
diff --git a/checkpoint-36/rng_state_18.pth b/checkpoint-36/rng_state_18.pth
new file mode 100644
index 0000000000000000000000000000000000000000..da587415c41cfc137ea99cdbe53bac96bb721060
--- /dev/null
+++ b/checkpoint-36/rng_state_18.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06cb849e1a6ded244763891d6f14418c505fb9db25c4667a13a2fa51a93e86d3
+size 15235
diff --git a/checkpoint-36/rng_state_19.pth b/checkpoint-36/rng_state_19.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f79b923f6091365b69a892811badc1f693ec07bc
--- /dev/null
+++ b/checkpoint-36/rng_state_19.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2beb90bfae67877f958d8b482f5c164066e0272304d74164771cd8bb3f55fd94
+size 15235
diff --git a/checkpoint-36/rng_state_2.pth b/checkpoint-36/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a36215a73b69c975ed85ef08d043eeda69575433
--- /dev/null
+++ b/checkpoint-36/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62074669aba81c1c8a2f41e03b0359b479719ffca186d2afeb3fdc3cb0fda5e8
+size 15225
diff --git a/checkpoint-36/rng_state_20.pth b/checkpoint-36/rng_state_20.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2117b75a7b8bcc488d5ed7e3c37feaf7e931727c
--- /dev/null
+++ b/checkpoint-36/rng_state_20.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d382b3bac48e07c8623f3850819e4baed1c45846cabceaa6c460ae03d7656293
+size 15235
diff --git a/checkpoint-36/rng_state_21.pth b/checkpoint-36/rng_state_21.pth
new file mode 100644
index 0000000000000000000000000000000000000000..69352210f58cd442025a464ff4a6733c276cbb6e
--- /dev/null
+++ b/checkpoint-36/rng_state_21.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8cee71db176692ac948292d0531c72dc7fa5673084f57929d70215adbc5d1789
+size 15235
diff --git a/checkpoint-36/rng_state_22.pth b/checkpoint-36/rng_state_22.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0d0d2e9d180e228bece29063d95626a4cf5c35b3
--- /dev/null
+++ b/checkpoint-36/rng_state_22.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad497e9c2159308fe8b0bd0ba6209a518e3df99e8595e8063d8c23ffa82d664b
+size 15235
diff --git a/checkpoint-36/rng_state_23.pth b/checkpoint-36/rng_state_23.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6dbbdb430110d6d39fc2466857641b896eaa55f8
--- /dev/null
+++ b/checkpoint-36/rng_state_23.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c70e8f3f219bf3e0b8d6ecb9b629ec66dbf3bac1556d37a0eba77f5b2b92bd3c
+size 15235
diff --git a/checkpoint-36/rng_state_24.pth b/checkpoint-36/rng_state_24.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ff09f150db04955169ee5dbe6cfe9a976d2b01b3
--- /dev/null
+++ b/checkpoint-36/rng_state_24.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b0bac5424d44cd7418fe9e8c47d02022a26e5170f1e9f5daa9c25a4086384e9
+size 15235
diff --git a/checkpoint-36/rng_state_25.pth b/checkpoint-36/rng_state_25.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3bda1ea5dd0d93a44ac2fdc60be4b9013fc004f8
--- /dev/null
+++ b/checkpoint-36/rng_state_25.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:983093281fbc1e999ae72d4cb10e6b5fb0d863b922308e047f13deede8305251
+size 15235
diff --git a/checkpoint-36/rng_state_26.pth b/checkpoint-36/rng_state_26.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e3d5058d17cbd131261b285e32b8e7afcd3ba619
--- /dev/null
+++ b/checkpoint-36/rng_state_26.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37471e6522a08007e178d43ad08055269541ddc6fd19299e4ddfef7b2775abbb
+size 15235
diff --git a/checkpoint-36/rng_state_27.pth b/checkpoint-36/rng_state_27.pth
new file mode 100644
index 0000000000000000000000000000000000000000..189cc177982847fe3a944260bb9321af3b05f9f8
--- /dev/null
+++ b/checkpoint-36/rng_state_27.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0672c669cc3d79b39da38d58e24ef7824a257dd4098142e5d12cd8191b139267
+size 15235
diff --git a/checkpoint-36/rng_state_28.pth b/checkpoint-36/rng_state_28.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1a3287eafbf56986a8f4fb9e4e2169d3e683bc8e
--- /dev/null
+++ b/checkpoint-36/rng_state_28.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c0e320e3849da3fce8d8971559a6623ca7fa04d5ee3d56e573aef8b91bac19b
+size 15235
diff --git a/checkpoint-36/rng_state_29.pth b/checkpoint-36/rng_state_29.pth
new file mode 100644
index 0000000000000000000000000000000000000000..eba4c5abb5b00a8d2261195c2b6d7ae2d2ca7f4c
--- /dev/null
+++ b/checkpoint-36/rng_state_29.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96787158a75b38b69da5de384a0c466572c171db1bfa2f5071153e6e96847812
+size 15235
diff --git a/checkpoint-36/rng_state_3.pth b/checkpoint-36/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e83f4e9a9cc18eb150c78db70f8f9c7b9b161734
--- /dev/null
+++ b/checkpoint-36/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3980d49b261752741ff62dbd41f222267d41a1adba69c51e66ab38155c44ac8b
+size 15225
diff --git a/checkpoint-36/rng_state_30.pth b/checkpoint-36/rng_state_30.pth
new file mode 100644
index 0000000000000000000000000000000000000000..58e18fec32c305efa408a3f63046b805f4cdd351
--- /dev/null
+++ b/checkpoint-36/rng_state_30.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a06e221ee2d4a7aefa1d6d718833f9fdfaaf574be8663b9aad98e40d4027efe5
+size 15235
diff --git a/checkpoint-36/rng_state_31.pth b/checkpoint-36/rng_state_31.pth
new file mode 100644
index 0000000000000000000000000000000000000000..af5333955a88d3287ecbcbf896b8a04b21b3671c
--- /dev/null
+++ b/checkpoint-36/rng_state_31.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89fb30a398e6065203a7d81716270f1c8667451c796b98a8b0eac439f65bf0b9
+size 15235
diff --git a/checkpoint-36/rng_state_4.pth b/checkpoint-36/rng_state_4.pth
new file mode 100644
index 0000000000000000000000000000000000000000..19f5c4b6da29264dabf4acd1372136d3b659d195
--- /dev/null
+++ b/checkpoint-36/rng_state_4.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d78d44315bb391ea0221f074566cd19b85a6700ba09dacc5ae343f4a97b19edf
+size 15225
diff --git a/checkpoint-36/rng_state_5.pth b/checkpoint-36/rng_state_5.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2d14578a0cd04b46881f7b84d6ba61f395aab17c
--- /dev/null
+++ b/checkpoint-36/rng_state_5.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34085e17b1badb36490ecc5e40f9e8c68c1123a64b6a7f398c1b8dc83a85bb26
+size 15225
diff --git a/checkpoint-36/rng_state_6.pth b/checkpoint-36/rng_state_6.pth
new file mode 100644
index 0000000000000000000000000000000000000000..534549c62365f585f2081cfebbbe82a64e171f03
--- /dev/null
+++ b/checkpoint-36/rng_state_6.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9284c112cff88ca749b027a620721ed6dd34678c9a3aaa25e7367ddec9e7e9df
+size 15225
diff --git a/checkpoint-36/rng_state_7.pth b/checkpoint-36/rng_state_7.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3fe6e65442f61ce0b963448c5aaa68c1b1d56e79
--- /dev/null
+++ b/checkpoint-36/rng_state_7.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9cdd3098269742b740e38c9415b24f6e65b60297d17fd3552ca40bc37a1cfcae
+size 15225
diff --git a/checkpoint-36/rng_state_8.pth b/checkpoint-36/rng_state_8.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1cd98ea9efebe3e62ad6cdabe1af7e31cbbd1e81
--- /dev/null
+++ b/checkpoint-36/rng_state_8.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:def9822579f0a2e9331d44796767e6393d1f86bada96b444306eca210dee6ec1
+size 15225
diff --git a/checkpoint-36/rng_state_9.pth b/checkpoint-36/rng_state_9.pth
new file mode 100644
index 0000000000000000000000000000000000000000..26c726bae3401d53e74023483d8318c440665fb6
--- /dev/null
+++ b/checkpoint-36/rng_state_9.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb6d237cb2133cc7b028031b21c66ab100f6337e0f627493827006a198997821
+size 15225
diff --git a/checkpoint-36/scheduler.pt b/checkpoint-36/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2141f5fb3c557e77b3ad1f60aac811bb750bef31
--- /dev/null
+++ b/checkpoint-36/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24561c546eb547ded000acf838359da5900ea244e83a180abae31aa89e88bb35
+size 1263
diff --git a/checkpoint-36/special_tokens_map.json b/checkpoint-36/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..278b7f0f84be865c4687700ee7b3c63d89a51e18
--- /dev/null
+++ b/checkpoint-36/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+ "bos_token": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "<|end_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/checkpoint-36/tokenizer.json b/checkpoint-36/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..af2d22fff23798bea98b1730ae7cdacaee0b087a
--- /dev/null
+++ b/checkpoint-36/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ade1dac458f86f9bea8bf35b713f14e1bbed24228429534038e9f7e54ea3e8b6
+size 17208712
diff --git a/checkpoint-36/tokenizer_config.json b/checkpoint-36/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..809816119c8164bd275957569b7c8c708c8d1092
--- /dev/null
+++ b/checkpoint-36/tokenizer_config.json
@@ -0,0 +1,2064 @@
+{
+ "added_tokens_decoder": {
+ "128000": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128001": {
+ "content": "<|end_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128002": {
+ "content": "<|reserved_special_token_0|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128003": {
+ "content": "<|reserved_special_token_1|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128004": {
+ "content": "<|finetune_right_pad_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128005": {
+ "content": "<|reserved_special_token_2|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128006": {
+ "content": "<|start_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128007": {
+ "content": "<|end_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128008": {
+ "content": "<|eom_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128009": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128010": {
+ "content": "<|python_tag|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128011": {
+ "content": "<|reserved_special_token_3|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128012": {
+ "content": "<|reserved_special_token_4|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128013": {
+ "content": "<|reserved_special_token_5|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128014": {
+ "content": "<|reserved_special_token_6|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128015": {
+ "content": "<|reserved_special_token_7|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128016": {
+ "content": "<|reserved_special_token_8|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128017": {
+ "content": "<|reserved_special_token_9|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128018": {
+ "content": "<|reserved_special_token_10|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128019": {
+ "content": "<|reserved_special_token_11|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128020": {
+ "content": "<|reserved_special_token_12|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128021": {
+ "content": "<|reserved_special_token_13|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128022": {
+ "content": "<|reserved_special_token_14|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128023": {
+ "content": "<|reserved_special_token_15|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128024": {
+ "content": "<|reserved_special_token_16|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128025": {
+ "content": "<|reserved_special_token_17|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128026": {
+ "content": "<|reserved_special_token_18|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128027": {
+ "content": "<|reserved_special_token_19|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128028": {
+ "content": "<|reserved_special_token_20|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128029": {
+ "content": "<|reserved_special_token_21|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128030": {
+ "content": "<|reserved_special_token_22|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128031": {
+ "content": "<|reserved_special_token_23|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128032": {
+ "content": "<|reserved_special_token_24|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128033": {
+ "content": "<|reserved_special_token_25|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128034": {
+ "content": "<|reserved_special_token_26|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128035": {
+ "content": "<|reserved_special_token_27|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128036": {
+ "content": "<|reserved_special_token_28|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128037": {
+ "content": "<|reserved_special_token_29|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128038": {
+ "content": "<|reserved_special_token_30|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128039": {
+ "content": "<|reserved_special_token_31|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128040": {
+ "content": "<|reserved_special_token_32|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128041": {
+ "content": "<|reserved_special_token_33|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128042": {
+ "content": "<|reserved_special_token_34|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128043": {
+ "content": "<|reserved_special_token_35|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128044": {
+ "content": "<|reserved_special_token_36|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128045": {
+ "content": "<|reserved_special_token_37|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128046": {
+ "content": "<|reserved_special_token_38|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128047": {
+ "content": "<|reserved_special_token_39|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128048": {
+ "content": "<|reserved_special_token_40|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128049": {
+ "content": "<|reserved_special_token_41|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128050": {
+ "content": "<|reserved_special_token_42|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128051": {
+ "content": "<|reserved_special_token_43|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128052": {
+ "content": "<|reserved_special_token_44|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128053": {
+ "content": "<|reserved_special_token_45|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128054": {
+ "content": "<|reserved_special_token_46|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128055": {
+ "content": "<|reserved_special_token_47|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128056": {
+ "content": "<|reserved_special_token_48|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128057": {
+ "content": "<|reserved_special_token_49|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128058": {
+ "content": "<|reserved_special_token_50|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128059": {
+ "content": "<|reserved_special_token_51|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128060": {
+ "content": "<|reserved_special_token_52|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128061": {
+ "content": "<|reserved_special_token_53|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128062": {
+ "content": "<|reserved_special_token_54|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128063": {
+ "content": "<|reserved_special_token_55|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128064": {
+ "content": "<|reserved_special_token_56|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128065": {
+ "content": "<|reserved_special_token_57|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128066": {
+ "content": "<|reserved_special_token_58|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128067": {
+ "content": "<|reserved_special_token_59|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128068": {
+ "content": "<|reserved_special_token_60|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128069": {
+ "content": "<|reserved_special_token_61|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128070": {
+ "content": "<|reserved_special_token_62|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128071": {
+ "content": "<|reserved_special_token_63|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128072": {
+ "content": "<|reserved_special_token_64|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128073": {
+ "content": "<|reserved_special_token_65|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128074": {
+ "content": "<|reserved_special_token_66|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128075": {
+ "content": "<|reserved_special_token_67|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128076": {
+ "content": "<|reserved_special_token_68|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128077": {
+ "content": "<|reserved_special_token_69|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128078": {
+ "content": "<|reserved_special_token_70|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128079": {
+ "content": "<|reserved_special_token_71|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128080": {
+ "content": "<|reserved_special_token_72|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128081": {
+ "content": "<|reserved_special_token_73|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128082": {
+ "content": "<|reserved_special_token_74|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128083": {
+ "content": "<|reserved_special_token_75|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128084": {
+ "content": "<|reserved_special_token_76|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128085": {
+ "content": "<|reserved_special_token_77|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128086": {
+ "content": "<|reserved_special_token_78|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128087": {
+ "content": "<|reserved_special_token_79|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128088": {
+ "content": "<|reserved_special_token_80|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128089": {
+ "content": "<|reserved_special_token_81|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128090": {
+ "content": "<|reserved_special_token_82|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128091": {
+ "content": "<|reserved_special_token_83|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128092": {
+ "content": "<|reserved_special_token_84|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128093": {
+ "content": "<|reserved_special_token_85|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128094": {
+ "content": "<|reserved_special_token_86|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128095": {
+ "content": "<|reserved_special_token_87|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128096": {
+ "content": "<|reserved_special_token_88|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128097": {
+ "content": "<|reserved_special_token_89|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128098": {
+ "content": "<|reserved_special_token_90|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128099": {
+ "content": "<|reserved_special_token_91|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128100": {
+ "content": "<|reserved_special_token_92|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128101": {
+ "content": "<|reserved_special_token_93|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128102": {
+ "content": "<|reserved_special_token_94|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128103": {
+ "content": "<|reserved_special_token_95|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128104": {
+ "content": "<|reserved_special_token_96|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128105": {
+ "content": "<|reserved_special_token_97|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128106": {
+ "content": "<|reserved_special_token_98|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128107": {
+ "content": "<|reserved_special_token_99|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128108": {
+ "content": "<|reserved_special_token_100|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128109": {
+ "content": "<|reserved_special_token_101|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128110": {
+ "content": "<|reserved_special_token_102|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128111": {
+ "content": "<|reserved_special_token_103|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128112": {
+ "content": "<|reserved_special_token_104|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128113": {
+ "content": "<|reserved_special_token_105|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128114": {
+ "content": "<|reserved_special_token_106|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128115": {
+ "content": "<|reserved_special_token_107|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128116": {
+ "content": "<|reserved_special_token_108|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128117": {
+ "content": "<|reserved_special_token_109|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128118": {
+ "content": "<|reserved_special_token_110|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128119": {
+ "content": "<|reserved_special_token_111|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128120": {
+ "content": "<|reserved_special_token_112|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128121": {
+ "content": "<|reserved_special_token_113|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128122": {
+ "content": "<|reserved_special_token_114|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128123": {
+ "content": "<|reserved_special_token_115|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128124": {
+ "content": "<|reserved_special_token_116|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128125": {
+ "content": "<|reserved_special_token_117|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128126": {
+ "content": "<|reserved_special_token_118|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128127": {
+ "content": "<|reserved_special_token_119|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128128": {
+ "content": "<|reserved_special_token_120|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128129": {
+ "content": "<|reserved_special_token_121|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128130": {
+ "content": "<|reserved_special_token_122|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128131": {
+ "content": "<|reserved_special_token_123|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128132": {
+ "content": "<|reserved_special_token_124|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128133": {
+ "content": "<|reserved_special_token_125|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128134": {
+ "content": "<|reserved_special_token_126|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128135": {
+ "content": "<|reserved_special_token_127|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128136": {
+ "content": "<|reserved_special_token_128|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128137": {
+ "content": "<|reserved_special_token_129|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128138": {
+ "content": "<|reserved_special_token_130|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128139": {
+ "content": "<|reserved_special_token_131|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128140": {
+ "content": "<|reserved_special_token_132|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128141": {
+ "content": "<|reserved_special_token_133|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128142": {
+ "content": "<|reserved_special_token_134|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128143": {
+ "content": "<|reserved_special_token_135|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128144": {
+ "content": "<|reserved_special_token_136|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128145": {
+ "content": "<|reserved_special_token_137|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128146": {
+ "content": "<|reserved_special_token_138|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128147": {
+ "content": "<|reserved_special_token_139|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128148": {
+ "content": "<|reserved_special_token_140|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128149": {
+ "content": "<|reserved_special_token_141|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128150": {
+ "content": "<|reserved_special_token_142|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128151": {
+ "content": "<|reserved_special_token_143|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128152": {
+ "content": "<|reserved_special_token_144|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128153": {
+ "content": "<|reserved_special_token_145|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128154": {
+ "content": "<|reserved_special_token_146|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128155": {
+ "content": "<|reserved_special_token_147|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128156": {
+ "content": "<|reserved_special_token_148|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128157": {
+ "content": "<|reserved_special_token_149|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128158": {
+ "content": "<|reserved_special_token_150|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128159": {
+ "content": "<|reserved_special_token_151|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128160": {
+ "content": "<|reserved_special_token_152|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128161": {
+ "content": "<|reserved_special_token_153|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128162": {
+ "content": "<|reserved_special_token_154|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128163": {
+ "content": "<|reserved_special_token_155|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128164": {
+ "content": "<|reserved_special_token_156|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128165": {
+ "content": "<|reserved_special_token_157|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128166": {
+ "content": "<|reserved_special_token_158|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128167": {
+ "content": "<|reserved_special_token_159|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128168": {
+ "content": "<|reserved_special_token_160|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128169": {
+ "content": "<|reserved_special_token_161|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128170": {
+ "content": "<|reserved_special_token_162|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128171": {
+ "content": "<|reserved_special_token_163|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128172": {
+ "content": "<|reserved_special_token_164|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128173": {
+ "content": "<|reserved_special_token_165|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128174": {
+ "content": "<|reserved_special_token_166|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128175": {
+ "content": "<|reserved_special_token_167|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128176": {
+ "content": "<|reserved_special_token_168|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128177": {
+ "content": "<|reserved_special_token_169|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128178": {
+ "content": "<|reserved_special_token_170|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128179": {
+ "content": "<|reserved_special_token_171|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128180": {
+ "content": "<|reserved_special_token_172|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128181": {
+ "content": "<|reserved_special_token_173|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128182": {
+ "content": "<|reserved_special_token_174|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128183": {
+ "content": "<|reserved_special_token_175|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128184": {
+ "content": "<|reserved_special_token_176|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128185": {
+ "content": "<|reserved_special_token_177|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128186": {
+ "content": "<|reserved_special_token_178|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128187": {
+ "content": "<|reserved_special_token_179|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128188": {
+ "content": "<|reserved_special_token_180|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128189": {
+ "content": "<|reserved_special_token_181|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128190": {
+ "content": "<|reserved_special_token_182|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128191": {
+ "content": "<|reserved_special_token_183|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128192": {
+ "content": "<|reserved_special_token_184|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128193": {
+ "content": "<|reserved_special_token_185|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128194": {
+ "content": "<|reserved_special_token_186|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128195": {
+ "content": "<|reserved_special_token_187|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128196": {
+ "content": "<|reserved_special_token_188|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128197": {
+ "content": "<|reserved_special_token_189|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128198": {
+ "content": "<|reserved_special_token_190|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128199": {
+ "content": "<|reserved_special_token_191|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128200": {
+ "content": "<|reserved_special_token_192|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128201": {
+ "content": "<|reserved_special_token_193|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128202": {
+ "content": "<|reserved_special_token_194|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128203": {
+ "content": "<|reserved_special_token_195|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128204": {
+ "content": "<|reserved_special_token_196|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128205": {
+ "content": "<|reserved_special_token_197|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128206": {
+ "content": "<|reserved_special_token_198|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128207": {
+ "content": "<|reserved_special_token_199|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128208": {
+ "content": "<|reserved_special_token_200|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128209": {
+ "content": "<|reserved_special_token_201|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128210": {
+ "content": "<|reserved_special_token_202|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128211": {
+ "content": "<|reserved_special_token_203|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128212": {
+ "content": "<|reserved_special_token_204|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128213": {
+ "content": "<|reserved_special_token_205|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128214": {
+ "content": "<|reserved_special_token_206|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128215": {
+ "content": "<|reserved_special_token_207|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128216": {
+ "content": "<|reserved_special_token_208|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128217": {
+ "content": "<|reserved_special_token_209|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128218": {
+ "content": "<|reserved_special_token_210|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128219": {
+ "content": "<|reserved_special_token_211|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128220": {
+ "content": "<|reserved_special_token_212|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128221": {
+ "content": "<|reserved_special_token_213|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128222": {
+ "content": "<|reserved_special_token_214|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128223": {
+ "content": "<|reserved_special_token_215|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128224": {
+ "content": "<|reserved_special_token_216|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128225": {
+ "content": "<|reserved_special_token_217|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128226": {
+ "content": "<|reserved_special_token_218|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128227": {
+ "content": "<|reserved_special_token_219|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128228": {
+ "content": "<|reserved_special_token_220|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128229": {
+ "content": "<|reserved_special_token_221|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128230": {
+ "content": "<|reserved_special_token_222|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128231": {
+ "content": "<|reserved_special_token_223|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128232": {
+ "content": "<|reserved_special_token_224|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128233": {
+ "content": "<|reserved_special_token_225|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128234": {
+ "content": "<|reserved_special_token_226|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128235": {
+ "content": "<|reserved_special_token_227|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128236": {
+ "content": "<|reserved_special_token_228|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128237": {
+ "content": "<|reserved_special_token_229|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128238": {
+ "content": "<|reserved_special_token_230|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128239": {
+ "content": "<|reserved_special_token_231|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128240": {
+ "content": "<|reserved_special_token_232|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128241": {
+ "content": "<|reserved_special_token_233|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128242": {
+ "content": "<|reserved_special_token_234|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128243": {
+ "content": "<|reserved_special_token_235|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128244": {
+ "content": "<|reserved_special_token_236|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128245": {
+ "content": "<|reserved_special_token_237|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128246": {
+ "content": "<|reserved_special_token_238|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128247": {
+ "content": "<|reserved_special_token_239|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128248": {
+ "content": "<|reserved_special_token_240|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128249": {
+ "content": "<|reserved_special_token_241|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128250": {
+ "content": "<|reserved_special_token_242|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128251": {
+ "content": "<|reserved_special_token_243|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128252": {
+ "content": "<|reserved_special_token_244|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128253": {
+ "content": "<|reserved_special_token_245|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128254": {
+ "content": "<|reserved_special_token_246|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128255": {
+ "content": "<|reserved_special_token_247|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "<|begin_of_text|>",
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
+ "clean_up_tokenization_spaces": true,
+ "eos_token": "<|eot_id|>",
+ "extra_special_tokens": {},
+ "model_input_names": [
+ "input_ids",
+ "attention_mask"
+ ],
+ "model_max_length": 131072,
+ "pad_token": "<|end_of_text|>",
+ "tokenizer_class": "PreTrainedTokenizer"
+}
diff --git a/checkpoint-36/trainer_state.json b/checkpoint-36/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..1538fb376bec2efb785623fbab6dd723ad20f6e3
--- /dev/null
+++ b/checkpoint-36/trainer_state.json
@@ -0,0 +1,286 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 1.0,
+ "eval_steps": 500,
+ "global_step": 36,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.027777777777777776,
+ "grad_norm": 33.53114241539531,
+ "learning_rate": 5e-06,
+ "loss": 19.062,
+ "step": 1
+ },
+ {
+ "epoch": 0.05555555555555555,
+ "grad_norm": 42.84474547203142,
+ "learning_rate": 4.9914380707064284e-06,
+ "loss": 18.2499,
+ "step": 2
+ },
+ {
+ "epoch": 0.08333333333333333,
+ "grad_norm": 62.29220504145651,
+ "learning_rate": 4.965817444277469e-06,
+ "loss": 15.809,
+ "step": 3
+ },
+ {
+ "epoch": 0.1111111111111111,
+ "grad_norm": 22.95196099229298,
+ "learning_rate": 4.923333109150404e-06,
+ "loss": 14.897,
+ "step": 4
+ },
+ {
+ "epoch": 0.1388888888888889,
+ "grad_norm": 165.7977927348648,
+ "learning_rate": 4.864308396768294e-06,
+ "loss": 14.9434,
+ "step": 5
+ },
+ {
+ "epoch": 0.16666666666666666,
+ "grad_norm": 45.03607660519894,
+ "learning_rate": 4.789192520832463e-06,
+ "loss": 13.6098,
+ "step": 6
+ },
+ {
+ "epoch": 0.19444444444444445,
+ "grad_norm": 20.126716948755167,
+ "learning_rate": 4.698557158514988e-06,
+ "loss": 12.9685,
+ "step": 7
+ },
+ {
+ "epoch": 0.2222222222222222,
+ "grad_norm": 20.06570448778295,
+ "learning_rate": 4.593092099650232e-06,
+ "loss": 12.4369,
+ "step": 8
+ },
+ {
+ "epoch": 0.25,
+ "grad_norm": 22.65063979014986,
+ "learning_rate": 4.473599997017701e-06,
+ "loss": 12.0047,
+ "step": 9
+ },
+ {
+ "epoch": 0.2777777777777778,
+ "grad_norm": 70.91542109599729,
+ "learning_rate": 4.340990257669732e-06,
+ "loss": 11.5876,
+ "step": 10
+ },
+ {
+ "epoch": 0.3055555555555556,
+ "grad_norm": 12.082557329221926,
+ "learning_rate": 4.196272121794714e-06,
+ "loss": 11.2749,
+ "step": 11
+ },
+ {
+ "epoch": 0.3333333333333333,
+ "grad_norm": 12.550168999423985,
+ "learning_rate": 4.040546981789854e-06,
+ "loss": 10.7477,
+ "step": 12
+ },
+ {
+ "epoch": 0.3611111111111111,
+ "grad_norm": 8.827348020968335,
+ "learning_rate": 3.875e-06,
+ "loss": 10.2632,
+ "step": 13
+ },
+ {
+ "epoch": 0.3888888888888889,
+ "grad_norm": 12.394367079322855,
+ "learning_rate": 3.700891088916574e-06,
+ "loss": 10.0137,
+ "step": 14
+ },
+ {
+ "epoch": 0.4166666666666667,
+ "grad_norm": 9.286757446404465,
+ "learning_rate": 3.5195453224827552e-06,
+ "loss": 9.7255,
+ "step": 15
+ },
+ {
+ "epoch": 0.4444444444444444,
+ "grad_norm": 10.15211072828483,
+ "learning_rate": 3.332342851480672e-06,
+ "loss": 9.2203,
+ "step": 16
+ },
+ {
+ "epoch": 0.4722222222222222,
+ "grad_norm": 10.426428146187629,
+ "learning_rate": 3.140708399750594e-06,
+ "loss": 8.8231,
+ "step": 17
+ },
+ {
+ "epoch": 0.5,
+ "grad_norm": 10.580506627149108,
+ "learning_rate": 2.9461004211822313e-06,
+ "loss": 8.5927,
+ "step": 18
+ },
+ {
+ "epoch": 0.5277777777777778,
+ "grad_norm": 9.614867948226818,
+ "learning_rate": 2.7500000000000004e-06,
+ "loss": 8.4726,
+ "step": 19
+ },
+ {
+ "epoch": 0.5555555555555556,
+ "grad_norm": 7.358015240330244,
+ "learning_rate": 2.553899578817769e-06,
+ "loss": 7.9606,
+ "step": 20
+ },
+ {
+ "epoch": 0.5833333333333334,
+ "grad_norm": 13.145060234596546,
+ "learning_rate": 2.359291600249407e-06,
+ "loss": 7.6972,
+ "step": 21
+ },
+ {
+ "epoch": 0.6111111111111112,
+ "grad_norm": 11.804666920273716,
+ "learning_rate": 2.167657148519328e-06,
+ "loss": 7.5072,
+ "step": 22
+ },
+ {
+ "epoch": 0.6388888888888888,
+ "grad_norm": 8.3548484530948,
+ "learning_rate": 1.9804546775172455e-06,
+ "loss": 7.3315,
+ "step": 23
+ },
+ {
+ "epoch": 0.6666666666666666,
+ "grad_norm": 9.105638737736317,
+ "learning_rate": 1.799108911083427e-06,
+ "loss": 7.3131,
+ "step": 24
+ },
+ {
+ "epoch": 0.6944444444444444,
+ "grad_norm": 6.945053218616064,
+ "learning_rate": 1.6250000000000007e-06,
+ "loss": 7.0754,
+ "step": 25
+ },
+ {
+ "epoch": 0.7222222222222222,
+ "grad_norm": 7.24944737566883,
+ "learning_rate": 1.4594530182101472e-06,
+ "loss": 6.9281,
+ "step": 26
+ },
+ {
+ "epoch": 0.75,
+ "grad_norm": 6.41161136997122,
+ "learning_rate": 1.3037278782052865e-06,
+ "loss": 6.6446,
+ "step": 27
+ },
+ {
+ "epoch": 0.7777777777777778,
+ "grad_norm": 5.712279511665765,
+ "learning_rate": 1.1590097423302683e-06,
+ "loss": 6.5429,
+ "step": 28
+ },
+ {
+ "epoch": 0.8055555555555556,
+ "grad_norm": 7.0400325928064085,
+ "learning_rate": 1.0264000029823e-06,
+ "loss": 6.4264,
+ "step": 29
+ },
+ {
+ "epoch": 0.8333333333333334,
+ "grad_norm": 5.489757665173993,
+ "learning_rate": 9.069079003497683e-07,
+ "loss": 6.3249,
+ "step": 30
+ },
+ {
+ "epoch": 0.8611111111111112,
+ "grad_norm": 5.7783196208782766,
+ "learning_rate": 8.01442841485013e-07,
+ "loss": 6.0586,
+ "step": 31
+ },
+ {
+ "epoch": 0.8888888888888888,
+ "grad_norm": 4.6024061019552915,
+ "learning_rate": 7.108074791675378e-07,
+ "loss": 6.2605,
+ "step": 32
+ },
+ {
+ "epoch": 0.9166666666666666,
+ "grad_norm": 5.5663806338408,
+ "learning_rate": 6.356916032317063e-07,
+ "loss": 6.0488,
+ "step": 33
+ },
+ {
+ "epoch": 0.9444444444444444,
+ "grad_norm": 4.246970082642438,
+ "learning_rate": 5.766668908495966e-07,
+ "loss": 6.1624,
+ "step": 34
+ },
+ {
+ "epoch": 0.9722222222222222,
+ "grad_norm": 5.215470422371497,
+ "learning_rate": 5.341825557225321e-07,
+ "loss": 5.9146,
+ "step": 35
+ },
+ {
+ "epoch": 1.0,
+ "grad_norm": 7.122849870050112,
+ "learning_rate": 5.085619292935726e-07,
+ "loss": 5.8958,
+ "step": 36
+ }
+ ],
+ "logging_steps": 1,
+ "max_steps": 36,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 1,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": true
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 74680891342848.0,
+ "train_batch_size": 1,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-36/training_args.bin b/checkpoint-36/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1285b0a62e3c1abbe7883517f10388cc0c705fda
--- /dev/null
+++ b/checkpoint-36/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94aa0eaccf48363aa0ce66e0d66748ccb8a33089eb92307bbe1842f7894e4f55
+size 9539
diff --git a/checkpoint-36/zero_to_fp32.py b/checkpoint-36/zero_to_fp32.py
new file mode 100644
index 0000000000000000000000000000000000000000..24cc342e78d1a006c782b3a4cd68d9ce786d8fd8
--- /dev/null
+++ b/checkpoint-36/zero_to_fp32.py
@@ -0,0 +1,604 @@
+#!/usr/bin/env python
+
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+
+# DeepSpeed Team
+
+# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets
+# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in
+# the future. Once extracted, the weights don't require DeepSpeed and can be used in any
+# application.
+#
+# example: python zero_to_fp32.py . pytorch_model.bin
+
+import argparse
+import torch
+import glob
+import math
+import os
+import re
+from collections import OrderedDict
+from dataclasses import dataclass
+
+# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with
+# DeepSpeed data structures it has to be available in the current python environment.
+from deepspeed.utils import logger
+from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS,
+ FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES,
+ FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS)
+
+
+@dataclass
+class zero_model_state:
+ buffers: dict()
+ param_shapes: dict()
+ shared_params: list
+ ds_version: int
+ frozen_param_shapes: dict()
+ frozen_param_fragments: dict()
+
+
+debug = 0
+
+# load to cpu
+device = torch.device('cpu')
+
+
+def atoi(text):
+ return int(text) if text.isdigit() else text
+
+
+def natural_keys(text):
+ '''
+ alist.sort(key=natural_keys) sorts in human order
+ http://nedbatchelder.com/blog/200712/human_sorting.html
+ (See Toothy's implementation in the comments)
+ '''
+ return [atoi(c) for c in re.split(r'(\d+)', text)]
+
+
+def get_model_state_file(checkpoint_dir, zero_stage):
+ if not os.path.isdir(checkpoint_dir):
+ raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist")
+
+ # there should be only one file
+ if zero_stage <= 2:
+ file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt")
+ elif zero_stage == 3:
+ file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt")
+
+ if not os.path.exists(file):
+ raise FileNotFoundError(f"can't find model states file at '{file}'")
+
+ return file
+
+
+def get_checkpoint_files(checkpoint_dir, glob_pattern):
+ # XXX: need to test that this simple glob rule works for multi-node setup too
+ ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys)
+
+ if len(ckpt_files) == 0:
+ raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'")
+
+ return ckpt_files
+
+
+def get_optim_files(checkpoint_dir):
+ return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt")
+
+
+def get_model_state_files(checkpoint_dir):
+ return get_checkpoint_files(checkpoint_dir, "*_model_states.pt")
+
+
+def parse_model_states(files):
+ zero_model_states = []
+ for file in files:
+ state_dict = torch.load(file, map_location=device)
+
+ if BUFFER_NAMES not in state_dict:
+ raise ValueError(f"{file} is not a model state checkpoint")
+ buffer_names = state_dict[BUFFER_NAMES]
+ if debug:
+ print("Found buffers:", buffer_names)
+
+ # recover just the buffers while restoring them to fp32 if they were saved in fp16
+ buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names}
+ param_shapes = state_dict[PARAM_SHAPES]
+
+ # collect parameters that are included in param_shapes
+ param_names = []
+ for s in param_shapes:
+ for name in s.keys():
+ param_names.append(name)
+
+ # update with frozen parameters
+ frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None)
+ if frozen_param_shapes is not None:
+ if debug:
+ print(f"Found frozen_param_shapes: {frozen_param_shapes}")
+ param_names += list(frozen_param_shapes.keys())
+
+ # handle shared params
+ shared_params = [[k, v] for k, v in state_dict["shared_params"].items()]
+
+ ds_version = state_dict.get(DS_VERSION, None)
+
+ frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None)
+
+ z_model_state = zero_model_state(buffers=buffers,
+ param_shapes=param_shapes,
+ shared_params=shared_params,
+ ds_version=ds_version,
+ frozen_param_shapes=frozen_param_shapes,
+ frozen_param_fragments=frozen_param_fragments)
+ zero_model_states.append(z_model_state)
+
+ return zero_model_states
+
+
+def parse_optim_states(files, ds_checkpoint_dir):
+
+ total_files = len(files)
+ state_dicts = []
+ for f in files:
+ state_dict = torch.load(f, map_location=device)
+ # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights
+ # and also handle the case where it was already removed by another helper script
+ state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None)
+ state_dicts.append(state_dict)
+
+ if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]:
+ raise ValueError(f"{files[0]} is not a zero checkpoint")
+ zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE]
+ world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT]
+
+ # For ZeRO-2 each param group can have different partition_count as data parallelism for expert
+ # parameters can be different from data parallelism for non-expert parameters. So we can just
+ # use the max of the partition_count to get the dp world_size.
+
+ if type(world_size) is list:
+ world_size = max(world_size)
+
+ if world_size != total_files:
+ raise ValueError(
+ f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. "
+ "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes."
+ )
+
+ # the groups are named differently in each stage
+ if zero_stage <= 2:
+ fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS
+ elif zero_stage == 3:
+ fp32_groups_key = FP32_FLAT_GROUPS
+ else:
+ raise ValueError(f"unknown zero stage {zero_stage}")
+
+ if zero_stage <= 2:
+ fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))]
+ elif zero_stage == 3:
+ # if there is more than one param group, there will be multiple flattened tensors - one
+ # flattened tensor per group - for simplicity merge them into a single tensor
+ #
+ # XXX: could make the script more memory efficient for when there are multiple groups - it
+ # will require matching the sub-lists of param_shapes for each param group flattened tensor
+
+ fp32_flat_groups = [
+ torch.cat(state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key], 0) for i in range(len(state_dicts))
+ ]
+
+ return zero_stage, world_size, fp32_flat_groups
+
+
+def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters):
+ """
+ Returns fp32 state_dict reconstructed from ds checkpoint
+
+ Args:
+ - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are)
+
+ """
+ print(f"Processing zero checkpoint '{ds_checkpoint_dir}'")
+
+ optim_files = get_optim_files(ds_checkpoint_dir)
+ zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir)
+ print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}")
+
+ model_files = get_model_state_files(ds_checkpoint_dir)
+
+ zero_model_states = parse_model_states(model_files)
+ print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}')
+
+ if zero_stage <= 2:
+ return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states,
+ exclude_frozen_parameters)
+ elif zero_stage == 3:
+ return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states,
+ exclude_frozen_parameters)
+
+
+def _zero2_merge_frozen_params(state_dict, zero_model_states):
+ if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0:
+ return
+
+ frozen_param_shapes = zero_model_states[0].frozen_param_shapes
+ frozen_param_fragments = zero_model_states[0].frozen_param_fragments
+
+ if debug:
+ num_elem = sum(s.numel() for s in frozen_param_shapes.values())
+ print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}')
+
+ wanted_params = len(frozen_param_shapes)
+ wanted_numel = sum(s.numel() for s in frozen_param_shapes.values())
+ avail_numel = sum([p.numel() for p in frozen_param_fragments.values()])
+ print(f'Frozen params: Have {avail_numel} numels to process.')
+ print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params')
+
+ total_params = 0
+ total_numel = 0
+ for name, shape in frozen_param_shapes.items():
+ total_params += 1
+ unpartitioned_numel = shape.numel()
+ total_numel += unpartitioned_numel
+
+ state_dict[name] = frozen_param_fragments[name]
+
+ if debug:
+ print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ")
+
+ print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements")
+
+
+def _has_callable(obj, fn):
+ attr = getattr(obj, fn, None)
+ return callable(attr)
+
+
+def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states):
+ param_shapes = zero_model_states[0].param_shapes
+
+ # Reconstruction protocol:
+ #
+ # XXX: document this
+
+ if debug:
+ for i in range(world_size):
+ for j in range(len(fp32_flat_groups[0])):
+ print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}")
+
+ # XXX: memory usage doubles here (zero2)
+ num_param_groups = len(fp32_flat_groups[0])
+ merged_single_partition_of_fp32_groups = []
+ for i in range(num_param_groups):
+ merged_partitions = [sd[i] for sd in fp32_flat_groups]
+ full_single_fp32_vector = torch.cat(merged_partitions, 0)
+ merged_single_partition_of_fp32_groups.append(full_single_fp32_vector)
+ avail_numel = sum(
+ [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups])
+
+ if debug:
+ wanted_params = sum([len(shapes) for shapes in param_shapes])
+ wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes])
+ # not asserting if there is a mismatch due to possible padding
+ print(f"Have {avail_numel} numels to process.")
+ print(f"Need {wanted_numel} numels in {wanted_params} params.")
+
+ # params
+ # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support
+ # out-of-core computing solution
+ total_numel = 0
+ total_params = 0
+ for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups):
+ offset = 0
+ avail_numel = full_single_fp32_vector.numel()
+ for name, shape in shapes.items():
+
+ unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape)
+ total_numel += unpartitioned_numel
+ total_params += 1
+
+ if debug:
+ print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ")
+ state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape)
+ offset += unpartitioned_numel
+
+ # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and
+ # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex
+ # paddings performed in the code it's almost impossible to predict the exact numbers w/o the
+ # live optimizer object, so we are checking that the numbers are within the right range
+ align_to = 2 * world_size
+
+ def zero2_align(x):
+ return align_to * math.ceil(x / align_to)
+
+ if debug:
+ print(f"original offset={offset}, avail_numel={avail_numel}")
+
+ offset = zero2_align(offset)
+ avail_numel = zero2_align(avail_numel)
+
+ if debug:
+ print(f"aligned offset={offset}, avail_numel={avail_numel}")
+
+ # Sanity check
+ if offset != avail_numel:
+ raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong")
+
+ print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements")
+
+
+def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states,
+ exclude_frozen_parameters):
+ state_dict = OrderedDict()
+
+ # buffers
+ buffers = zero_model_states[0].buffers
+ state_dict.update(buffers)
+ if debug:
+ print(f"added {len(buffers)} buffers")
+
+ if not exclude_frozen_parameters:
+ _zero2_merge_frozen_params(state_dict, zero_model_states)
+
+ _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states)
+
+ # recover shared parameters
+ for pair in zero_model_states[0].shared_params:
+ if pair[1] in state_dict:
+ state_dict[pair[0]] = state_dict[pair[1]]
+
+ return state_dict
+
+
+def zero3_partitioned_param_info(unpartitioned_numel, world_size):
+ remainder = unpartitioned_numel % world_size
+ padding_numel = (world_size - remainder) if remainder else 0
+ partitioned_numel = math.ceil(unpartitioned_numel / world_size)
+ return partitioned_numel, padding_numel
+
+
+def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states):
+ if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0:
+ return
+
+ if debug:
+ for i in range(world_size):
+ num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values())
+ print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}')
+
+ frozen_param_shapes = zero_model_states[0].frozen_param_shapes
+ wanted_params = len(frozen_param_shapes)
+ wanted_numel = sum(s.numel() for s in frozen_param_shapes.values())
+ avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size
+ print(f'Frozen params: Have {avail_numel} numels to process.')
+ print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params')
+
+ total_params = 0
+ total_numel = 0
+ for name, shape in zero_model_states[0].frozen_param_shapes.items():
+ total_params += 1
+ unpartitioned_numel = shape.numel()
+ total_numel += unpartitioned_numel
+
+ param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states)
+ state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape)
+
+ partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size)
+
+ if debug:
+ print(
+ f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}"
+ )
+
+ print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements")
+
+
+def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states):
+ param_shapes = zero_model_states[0].param_shapes
+ avail_numel = fp32_flat_groups[0].numel() * world_size
+ # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each
+ # param, re-consolidating each param, while dealing with padding if any
+
+ # merge list of dicts, preserving order
+ param_shapes = {k: v for d in param_shapes for k, v in d.items()}
+
+ if debug:
+ for i in range(world_size):
+ print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}")
+
+ wanted_params = len(param_shapes)
+ wanted_numel = sum(shape.numel() for shape in param_shapes.values())
+ # not asserting if there is a mismatch due to possible padding
+ avail_numel = fp32_flat_groups[0].numel() * world_size
+ print(f"Trainable params: Have {avail_numel} numels to process.")
+ print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.")
+
+ # params
+ # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support
+ # out-of-core computing solution
+ offset = 0
+ total_numel = 0
+ total_params = 0
+ for name, shape in param_shapes.items():
+
+ unpartitioned_numel = shape.numel()
+ total_numel += unpartitioned_numel
+ total_params += 1
+
+ partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size)
+
+ if debug:
+ print(
+ f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}"
+ )
+
+ # XXX: memory usage doubles here
+ state_dict[name] = torch.cat(
+ tuple(fp32_flat_groups[i].narrow(0, offset, partitioned_numel) for i in range(world_size)),
+ 0).narrow(0, 0, unpartitioned_numel).view(shape)
+ offset += partitioned_numel
+
+ offset *= world_size
+
+ # Sanity check
+ if offset != avail_numel:
+ raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong")
+
+ print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements")
+
+
+def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states,
+ exclude_frozen_parameters):
+ state_dict = OrderedDict()
+
+ # buffers
+ buffers = zero_model_states[0].buffers
+ state_dict.update(buffers)
+ if debug:
+ print(f"added {len(buffers)} buffers")
+
+ if not exclude_frozen_parameters:
+ _zero3_merge_frozen_params(state_dict, world_size, zero_model_states)
+
+ _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states)
+
+ # recover shared parameters
+ for pair in zero_model_states[0].shared_params:
+ if pair[1] in state_dict:
+ state_dict[pair[0]] = state_dict[pair[1]]
+
+ return state_dict
+
+
+def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag=None, exclude_frozen_parameters=False):
+ """
+ Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with
+ ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example
+ via a model hub.
+
+ Args:
+ - ``checkpoint_dir``: path to the desired checkpoint folder
+ - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14``
+ - ``exclude_frozen_parameters``: exclude frozen parameters
+
+ Returns:
+ - pytorch ``state_dict``
+
+ Note: this approach may not work if your application doesn't have sufficient free CPU memory and
+ you may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with
+ the checkpoint.
+
+ A typical usage might be ::
+
+ from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint
+ # do the training and checkpoint saving
+ state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu
+ model = model.cpu() # move to cpu
+ model.load_state_dict(state_dict)
+ # submit to model hub or save the model to share with others
+
+ In this example the ``model`` will no longer be usable in the deepspeed context of the same
+ application. i.e. you will need to re-initialize the deepspeed engine, since
+ ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it.
+
+ If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead.
+
+ """
+ if tag is None:
+ latest_path = os.path.join(checkpoint_dir, 'latest')
+ if os.path.isfile(latest_path):
+ with open(latest_path, 'r') as fd:
+ tag = fd.read().strip()
+ else:
+ raise ValueError(f"Unable to find 'latest' file at {latest_path}")
+
+ ds_checkpoint_dir = os.path.join(checkpoint_dir, tag)
+
+ if not os.path.isdir(ds_checkpoint_dir):
+ raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist")
+
+ return _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters)
+
+
+def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, output_file, tag=None, exclude_frozen_parameters=False):
+ """
+ Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be
+ loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed.
+
+ Args:
+ - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``)
+ - ``output_file``: path to the pytorch fp32 state_dict output file (e.g. path/pytorch_model.bin)
+ - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14``
+ - ``exclude_frozen_parameters``: exclude frozen parameters
+ """
+
+ state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag, exclude_frozen_parameters)
+ print(f"Saving fp32 state dict to {output_file}")
+ torch.save(state_dict, output_file)
+
+
+def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None):
+ """
+ 1. Put the provided model to cpu
+ 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict``
+ 3. Load it into the provided model
+
+ Args:
+ - ``model``: the model object to update
+ - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``)
+ - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14``
+
+ Returns:
+ - ``model`: modified model
+
+ Make sure you have plenty of CPU memory available before you call this function. If you don't
+ have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it
+ conveniently placed for you in the checkpoint folder.
+
+ A typical usage might be ::
+
+ from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
+ model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
+ # submit to model hub or save the model to share with others
+
+ Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context
+ of the same application. i.e. you will need to re-initialize the deepspeed engine, since
+ ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it.
+
+ """
+ logger.info(f"Extracting fp32 weights")
+ state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)
+
+ logger.info(f"Overwriting model with fp32 weights")
+ model = model.cpu()
+ model.load_state_dict(state_dict, strict=False)
+
+ return model
+
+
+if __name__ == "__main__":
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument("checkpoint_dir",
+ type=str,
+ help="path to the desired checkpoint folder, e.g., path/checkpoint-12")
+ parser.add_argument(
+ "output_file",
+ type=str,
+ help="path to the pytorch fp32 state_dict output file (e.g. path/checkpoint-12/pytorch_model.bin)")
+ parser.add_argument("-t",
+ "--tag",
+ type=str,
+ default=None,
+ help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1")
+ parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters")
+ parser.add_argument("-d", "--debug", action='store_true', help="enable debug")
+ args = parser.parse_args()
+
+ debug = args.debug
+
+ convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir,
+ args.output_file,
+ tag=args.tag,
+ exclude_frozen_parameters=args.exclude_frozen_parameters)
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..4946add7353eabbc5791c753f11b44b5be1f89b9
--- /dev/null
+++ b/config.json
@@ -0,0 +1,35 @@
+{
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 128000,
+ "eos_token_id": 128009,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 8192,
+ "initializer_range": 0.02,
+ "intermediate_size": 28672,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "num_attention_heads": 64,
+ "num_hidden_layers": 80,
+ "num_key_value_heads": 8,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 8.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.51.3",
+ "use_cache": false,
+ "vocab_size": 128256
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cd28e19ed02358fd2089710dcd138907439c525b
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,8 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 128000,
+ "do_sample": true,
+ "eos_token_id": 128009,
+ "transformers_version": "4.51.3",
+ "use_cache": false
+}
diff --git a/model-00001-of-00030.safetensors b/model-00001-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..033593d292ed2fcef62eb2e37f36218b92eac53d
--- /dev/null
+++ b/model-00001-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4cabb5d59084cfa0516e1ee9733929ab22844b6f28be8133e20a098dd1d40e1
+size 4584408808
diff --git a/model-00002-of-00030.safetensors b/model-00002-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..446e42a65a5f73bc5529b2a8c690902e576df3a0
--- /dev/null
+++ b/model-00002-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ddc59c1cc80f904fc1987d24f0c421df9eefaf8074e787b0ee14b67b90a4f8b
+size 4664167376
diff --git a/model-00003-of-00030.safetensors b/model-00003-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..71559b7831aad3c8b70f8c0824c5814a3d841844
--- /dev/null
+++ b/model-00003-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22513694fe0f63a9a036561351b57c0cfae48fc00cf36c12b662649ce8cb078e
+size 4999711704
diff --git a/model-00004-of-00030.safetensors b/model-00004-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bb62ce95c63f77b715a9ff87eef0e9dfc1ceee43
--- /dev/null
+++ b/model-00004-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:691e9246e1d9092c75352c97f0eccb8dd02639553042f0f5a51db2531b6c74f3
+size 4966157032
diff --git a/model-00005-of-00030.safetensors b/model-00005-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9f842617523301d625fcc209775b9dc4ad793dc4
--- /dev/null
+++ b/model-00005-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b1e5a69ab2e16687429773b4b3f681852518869dbbcec8e61caa012064cf0f0a
+size 4664134408
diff --git a/model-00006-of-00030.safetensors b/model-00006-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8984b19cd0deb7d687ca52345399b891a8fa8cea
--- /dev/null
+++ b/model-00006-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e3a1b59792281c7d8ae475ce26bf81cf057c68b243cb62946c97a07dc4594ee
+size 4664167408
diff --git a/model-00007-of-00030.safetensors b/model-00007-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..147a1fb80453297df1f994aee9e7697b2980bec3
--- /dev/null
+++ b/model-00007-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d748bcea229e07f1eb39f922e2722b88f64ed8b1c927d8468a5a7255722cc1f
+size 4664167408
diff --git a/model-00008-of-00030.safetensors b/model-00008-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4d1ef19cb75adaabb878baa1eec5ab10d13cb697
--- /dev/null
+++ b/model-00008-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e19bfcd9e549ef045b6f6c65da24295bc1d616fef032cfa3ed3d9d070c2e079
+size 4999711728
diff --git a/model-00009-of-00030.safetensors b/model-00009-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e9f0f96a980427add67024e806820e7b69527fc9
--- /dev/null
+++ b/model-00009-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb099d9ff85178472649e0ff678c2c773ce39107fddb3eafaed71031499a2e9b
+size 4966157056
diff --git a/model-00010-of-00030.safetensors b/model-00010-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8a43714c237c6900642f8480591d112ecd9b2a7e
--- /dev/null
+++ b/model-00010-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afbf674bac3fabfce368a362c07ce6657d0cba056611928f06589c35cce5f747
+size 4664134408
diff --git a/model-00011-of-00030.safetensors b/model-00011-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1f85a9ef08e7a95f94f930b46eaa0476b9e3cf92
--- /dev/null
+++ b/model-00011-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e42f2db2ecfc3ee540cb85949151df7dbc675ca10fe9bd70937ad3ab97d7fd52
+size 4664167408
diff --git a/model-00012-of-00030.safetensors b/model-00012-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6f196eae0cd1c6e943bc388a4cb803eee86c1949
--- /dev/null
+++ b/model-00012-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:677690d7c7590a54095252fe70aa438cbc8e237cf6af60da9069f733bf4ff4ce
+size 4664167408
diff --git a/model-00013-of-00030.safetensors b/model-00013-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e7c9ffc9f09d43ca8ccd6a839008b246d96bf38d
--- /dev/null
+++ b/model-00013-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d703720cd04e2b35c0528029a2ece4a6d621ab43b075d39d929df54202a267ee
+size 4999711728
diff --git a/model-00014-of-00030.safetensors b/model-00014-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7ab6485f7a376a23b8d9eaf1398946eb5e1b202a
--- /dev/null
+++ b/model-00014-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c63ae134dbaa344665789b8b0ef2697a4984bf2a4060b7e9b4d7d2202b5e4ade
+size 4966157056
diff --git a/model-00015-of-00030.safetensors b/model-00015-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3a0af656a33007be9948cfd8d6e8b08520736eaa
--- /dev/null
+++ b/model-00015-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:960ae0a271bb777784dc8cdeb96bb88451e6d6620c1400073fe8d37552c74173
+size 4664134408
diff --git a/model-00016-of-00030.safetensors b/model-00016-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cad213e04f1b91bdd12b51e9917e5f43ac12aba8
--- /dev/null
+++ b/model-00016-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44abb02c211d7aded70a0f23c996afdb4505a287155e5b344cb7b2a43ec8640b
+size 4664167408
diff --git a/model-00017-of-00030.safetensors b/model-00017-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..42f81fcd692c27a534244a4e7b9d9915008c8685
--- /dev/null
+++ b/model-00017-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22aeaf179ffde50a10aecbda803a140446fc76b609e42c05aae821a527dbca75
+size 4664167408
diff --git a/model-00018-of-00030.safetensors b/model-00018-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b9ceb087fa0864768d72f549d3e8967e6d2966ae
--- /dev/null
+++ b/model-00018-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2798d3191824f504e59750f67a75e4f620956610e79eb5451d32ea4addf2c7f3
+size 4999711728
diff --git a/model-00019-of-00030.safetensors b/model-00019-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..11f761a31b38f451644b476ed434c05b2b0d8a57
--- /dev/null
+++ b/model-00019-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd045f6262276c5c417d6d381ea88c24ab7b506398d1172c8a06612eccc55640
+size 4966157056
diff --git a/model-00020-of-00030.safetensors b/model-00020-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4785512474540a8b10253806117ea6e210106d9b
--- /dev/null
+++ b/model-00020-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0048cf26d97567fac9f8c491373844b43fda8da0293f6eeccf4fb63e44bfea1
+size 4664134408
diff --git a/model-00021-of-00030.safetensors b/model-00021-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bc037f2544096a32cb33c50be7afb12bf071462a
--- /dev/null
+++ b/model-00021-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07330fd90ed976fb41f9ca657a413ed2d3444b8ed8a7fdca64838706e9834bad
+size 4664167408
diff --git a/model-00022-of-00030.safetensors b/model-00022-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..90b9c987a0f69f89fefa37e4fa22cb55087cdac2
--- /dev/null
+++ b/model-00022-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d3e63e361e2f21b9a8dc8a7c27a63384371427803ef8a96a6ea7d7c9fdf25bd
+size 4664167408
diff --git a/model-00023-of-00030.safetensors b/model-00023-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..17c60a61d31b33f6907d523024ced7b5ecfd3d74
--- /dev/null
+++ b/model-00023-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7cdbb795719a187775a5edb2431d214e38882a8954d994d3d6fc921f817d2636
+size 4999711728
diff --git a/model-00024-of-00030.safetensors b/model-00024-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2bc3c5d0068473ef5aaaa61e30836eb91af7f059
--- /dev/null
+++ b/model-00024-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:756d4e1a1fc22a9d38285c03568635c2a3440455047aa515d7653221f1fa0e18
+size 4966157056
diff --git a/model-00025-of-00030.safetensors b/model-00025-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5d1fb58abd6dc7b4c539f9cdf9c975cd6b0472b8
--- /dev/null
+++ b/model-00025-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4b93c281b20705b010ed1af2fb461fae6317c6f3034fad649fde5fed3ac9e80
+size 4664134408
diff --git a/model-00026-of-00030.safetensors b/model-00026-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6d429b5614d0bea063a8a8679e104d9841cba596
--- /dev/null
+++ b/model-00026-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0cb003d2d89c383dcde824c418a1a832ff908f8ee2b98e8d27cbb3707e363cda
+size 4664167408
diff --git a/model-00027-of-00030.safetensors b/model-00027-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5471659e734724652b85d5f8b4c2e932c466b3e9
--- /dev/null
+++ b/model-00027-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:043f5027e70ee23b4d65d56106055018ab5746fdfbf856a4a5ca963bede6679b
+size 4664167408
diff --git a/model-00028-of-00030.safetensors b/model-00028-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6e3b477a84e9e876cf7add95c8d6fb517b5cd89f
--- /dev/null
+++ b/model-00028-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08b1d835c29f3f1ed6fb37c22dd449cb0eb587373866a2009959c22d852f1824
+size 4999711728
diff --git a/model-00029-of-00030.safetensors b/model-00029-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..40657cd94cd7484bfe53de232c0b37f31c900862
--- /dev/null
+++ b/model-00029-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b02c8b1e7875bd4be22db0d438e28a4ed47c4279914b5cf02200c5bdeccdb0c6
+size 4966173536
diff --git a/model-00030-of-00030.safetensors b/model-00030-of-00030.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a92bae86487bffc2c07e97dd23d54198d8efe174
--- /dev/null
+++ b/model-00030-of-00030.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a200ab4a9f6e3c246058e2742fc5a09a7586ac85a2c7215aa5c42bb051df0b5
+size 2101346432
diff --git a/model.safetensors.index.json b/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..37b1afe63cadc4ddce30aaff1b149c2f3083650c
--- /dev/null
+++ b/model.safetensors.index.json
@@ -0,0 +1,730 @@
+{
+ "metadata": {
+ "total_size": 141107412992
+ },
+ "weight_map": {
+ "lm_head.weight": "model-00030-of-00030.safetensors",
+ "model.embed_tokens.weight": "model-00001-of-00030.safetensors",
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00030.safetensors",
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00030.safetensors",
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.1.input_layernorm.weight": "model-00002-of-00030.safetensors",
+ "model.layers.1.mlp.down_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.1.mlp.up_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00030.safetensors",
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00030.safetensors",
+ "model.layers.10.input_layernorm.weight": "model-00005-of-00030.safetensors",
+ "model.layers.10.mlp.down_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.10.mlp.gate_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.10.mlp.up_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.10.post_attention_layernorm.weight": "model-00005-of-00030.safetensors",
+ "model.layers.10.self_attn.k_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.10.self_attn.o_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.10.self_attn.q_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.10.self_attn.v_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.11.input_layernorm.weight": "model-00005-of-00030.safetensors",
+ "model.layers.11.mlp.down_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.11.mlp.gate_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.11.mlp.up_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.11.post_attention_layernorm.weight": "model-00005-of-00030.safetensors",
+ "model.layers.11.self_attn.k_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.11.self_attn.o_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.11.self_attn.q_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.11.self_attn.v_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.12.input_layernorm.weight": "model-00006-of-00030.safetensors",
+ "model.layers.12.mlp.down_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.12.mlp.gate_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.12.mlp.up_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.12.post_attention_layernorm.weight": "model-00006-of-00030.safetensors",
+ "model.layers.12.self_attn.k_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.12.self_attn.o_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.12.self_attn.q_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.12.self_attn.v_proj.weight": "model-00005-of-00030.safetensors",
+ "model.layers.13.input_layernorm.weight": "model-00006-of-00030.safetensors",
+ "model.layers.13.mlp.down_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.13.mlp.gate_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.13.mlp.up_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.13.post_attention_layernorm.weight": "model-00006-of-00030.safetensors",
+ "model.layers.13.self_attn.k_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.13.self_attn.o_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.13.self_attn.q_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.13.self_attn.v_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.14.input_layernorm.weight": "model-00006-of-00030.safetensors",
+ "model.layers.14.mlp.down_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.14.mlp.gate_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.14.mlp.up_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.14.post_attention_layernorm.weight": "model-00006-of-00030.safetensors",
+ "model.layers.14.self_attn.k_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.14.self_attn.o_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.14.self_attn.q_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.14.self_attn.v_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.15.input_layernorm.weight": "model-00007-of-00030.safetensors",
+ "model.layers.15.mlp.down_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.15.mlp.gate_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.15.mlp.up_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.15.post_attention_layernorm.weight": "model-00007-of-00030.safetensors",
+ "model.layers.15.self_attn.k_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.15.self_attn.o_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.15.self_attn.q_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.15.self_attn.v_proj.weight": "model-00006-of-00030.safetensors",
+ "model.layers.16.input_layernorm.weight": "model-00007-of-00030.safetensors",
+ "model.layers.16.mlp.down_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.16.mlp.gate_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.16.mlp.up_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.16.post_attention_layernorm.weight": "model-00007-of-00030.safetensors",
+ "model.layers.16.self_attn.k_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.16.self_attn.o_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.16.self_attn.q_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.16.self_attn.v_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.17.input_layernorm.weight": "model-00007-of-00030.safetensors",
+ "model.layers.17.mlp.down_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.17.mlp.gate_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.17.mlp.up_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.17.post_attention_layernorm.weight": "model-00007-of-00030.safetensors",
+ "model.layers.17.self_attn.k_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.17.self_attn.o_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.17.self_attn.q_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.17.self_attn.v_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.18.input_layernorm.weight": "model-00008-of-00030.safetensors",
+ "model.layers.18.mlp.down_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.18.mlp.gate_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.18.mlp.up_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.18.post_attention_layernorm.weight": "model-00008-of-00030.safetensors",
+ "model.layers.18.self_attn.k_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.18.self_attn.o_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.18.self_attn.q_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.18.self_attn.v_proj.weight": "model-00007-of-00030.safetensors",
+ "model.layers.19.input_layernorm.weight": "model-00008-of-00030.safetensors",
+ "model.layers.19.mlp.down_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.19.mlp.gate_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.19.mlp.up_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.19.post_attention_layernorm.weight": "model-00008-of-00030.safetensors",
+ "model.layers.19.self_attn.k_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.19.self_attn.o_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.19.self_attn.q_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.19.self_attn.v_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.2.input_layernorm.weight": "model-00002-of-00030.safetensors",
+ "model.layers.2.mlp.down_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.2.mlp.gate_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.2.mlp.up_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00030.safetensors",
+ "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.20.input_layernorm.weight": "model-00008-of-00030.safetensors",
+ "model.layers.20.mlp.down_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.20.mlp.gate_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.20.mlp.up_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.20.post_attention_layernorm.weight": "model-00008-of-00030.safetensors",
+ "model.layers.20.self_attn.k_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.20.self_attn.o_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.20.self_attn.q_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.20.self_attn.v_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.21.input_layernorm.weight": "model-00009-of-00030.safetensors",
+ "model.layers.21.mlp.down_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.21.mlp.gate_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.21.mlp.up_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.21.post_attention_layernorm.weight": "model-00009-of-00030.safetensors",
+ "model.layers.21.self_attn.k_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.21.self_attn.o_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.21.self_attn.q_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.21.self_attn.v_proj.weight": "model-00008-of-00030.safetensors",
+ "model.layers.22.input_layernorm.weight": "model-00009-of-00030.safetensors",
+ "model.layers.22.mlp.down_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.22.mlp.gate_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.22.mlp.up_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.22.post_attention_layernorm.weight": "model-00009-of-00030.safetensors",
+ "model.layers.22.self_attn.k_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.22.self_attn.o_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.22.self_attn.q_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.22.self_attn.v_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.23.input_layernorm.weight": "model-00009-of-00030.safetensors",
+ "model.layers.23.mlp.down_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.23.mlp.gate_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.23.mlp.up_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.23.post_attention_layernorm.weight": "model-00009-of-00030.safetensors",
+ "model.layers.23.self_attn.k_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.23.self_attn.o_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.23.self_attn.q_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.23.self_attn.v_proj.weight": "model-00009-of-00030.safetensors",
+ "model.layers.24.input_layernorm.weight": "model-00010-of-00030.safetensors",
+ "model.layers.24.mlp.down_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.24.mlp.gate_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.24.mlp.up_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.24.post_attention_layernorm.weight": "model-00010-of-00030.safetensors",
+ "model.layers.24.self_attn.k_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.24.self_attn.o_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.24.self_attn.q_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.24.self_attn.v_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.25.input_layernorm.weight": "model-00010-of-00030.safetensors",
+ "model.layers.25.mlp.down_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.25.mlp.gate_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.25.mlp.up_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.25.post_attention_layernorm.weight": "model-00010-of-00030.safetensors",
+ "model.layers.25.self_attn.k_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.25.self_attn.o_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.25.self_attn.q_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.25.self_attn.v_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.26.input_layernorm.weight": "model-00011-of-00030.safetensors",
+ "model.layers.26.mlp.down_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.26.mlp.gate_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.26.mlp.up_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.26.post_attention_layernorm.weight": "model-00011-of-00030.safetensors",
+ "model.layers.26.self_attn.k_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.26.self_attn.o_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.26.self_attn.q_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.26.self_attn.v_proj.weight": "model-00010-of-00030.safetensors",
+ "model.layers.27.input_layernorm.weight": "model-00011-of-00030.safetensors",
+ "model.layers.27.mlp.down_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.27.mlp.gate_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.27.mlp.up_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.27.post_attention_layernorm.weight": "model-00011-of-00030.safetensors",
+ "model.layers.27.self_attn.k_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.27.self_attn.o_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.27.self_attn.q_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.27.self_attn.v_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.28.input_layernorm.weight": "model-00011-of-00030.safetensors",
+ "model.layers.28.mlp.down_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.28.mlp.gate_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.28.mlp.up_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.28.post_attention_layernorm.weight": "model-00011-of-00030.safetensors",
+ "model.layers.28.self_attn.k_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.28.self_attn.o_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.28.self_attn.q_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.28.self_attn.v_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.29.input_layernorm.weight": "model-00012-of-00030.safetensors",
+ "model.layers.29.mlp.down_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.29.mlp.gate_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.29.mlp.up_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.29.post_attention_layernorm.weight": "model-00012-of-00030.safetensors",
+ "model.layers.29.self_attn.k_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.29.self_attn.o_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.29.self_attn.q_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.29.self_attn.v_proj.weight": "model-00011-of-00030.safetensors",
+ "model.layers.3.input_layernorm.weight": "model-00002-of-00030.safetensors",
+ "model.layers.3.mlp.down_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.3.mlp.up_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00030.safetensors",
+ "model.layers.3.self_attn.k_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.3.self_attn.o_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.3.self_attn.q_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.3.self_attn.v_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.30.input_layernorm.weight": "model-00012-of-00030.safetensors",
+ "model.layers.30.mlp.down_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.30.mlp.gate_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.30.mlp.up_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.30.post_attention_layernorm.weight": "model-00012-of-00030.safetensors",
+ "model.layers.30.self_attn.k_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.30.self_attn.o_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.30.self_attn.q_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.30.self_attn.v_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.31.input_layernorm.weight": "model-00012-of-00030.safetensors",
+ "model.layers.31.mlp.down_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.31.mlp.gate_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.31.mlp.up_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.31.post_attention_layernorm.weight": "model-00012-of-00030.safetensors",
+ "model.layers.31.self_attn.k_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.31.self_attn.o_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.31.self_attn.q_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.31.self_attn.v_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.32.input_layernorm.weight": "model-00013-of-00030.safetensors",
+ "model.layers.32.mlp.down_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.32.mlp.gate_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.32.mlp.up_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.32.post_attention_layernorm.weight": "model-00013-of-00030.safetensors",
+ "model.layers.32.self_attn.k_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.32.self_attn.o_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.32.self_attn.q_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.32.self_attn.v_proj.weight": "model-00012-of-00030.safetensors",
+ "model.layers.33.input_layernorm.weight": "model-00013-of-00030.safetensors",
+ "model.layers.33.mlp.down_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.33.mlp.gate_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.33.mlp.up_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.33.post_attention_layernorm.weight": "model-00013-of-00030.safetensors",
+ "model.layers.33.self_attn.k_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.33.self_attn.o_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.33.self_attn.q_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.33.self_attn.v_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.34.input_layernorm.weight": "model-00013-of-00030.safetensors",
+ "model.layers.34.mlp.down_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.34.mlp.gate_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.34.mlp.up_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.34.post_attention_layernorm.weight": "model-00013-of-00030.safetensors",
+ "model.layers.34.self_attn.k_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.34.self_attn.o_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.34.self_attn.q_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.34.self_attn.v_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.35.input_layernorm.weight": "model-00014-of-00030.safetensors",
+ "model.layers.35.mlp.down_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.35.mlp.gate_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.35.mlp.up_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.35.post_attention_layernorm.weight": "model-00014-of-00030.safetensors",
+ "model.layers.35.self_attn.k_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.35.self_attn.o_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.35.self_attn.q_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.35.self_attn.v_proj.weight": "model-00013-of-00030.safetensors",
+ "model.layers.36.input_layernorm.weight": "model-00014-of-00030.safetensors",
+ "model.layers.36.mlp.down_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.36.mlp.gate_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.36.mlp.up_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.36.post_attention_layernorm.weight": "model-00014-of-00030.safetensors",
+ "model.layers.36.self_attn.k_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.36.self_attn.o_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.36.self_attn.q_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.36.self_attn.v_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.37.input_layernorm.weight": "model-00014-of-00030.safetensors",
+ "model.layers.37.mlp.down_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.37.mlp.gate_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.37.mlp.up_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.37.post_attention_layernorm.weight": "model-00014-of-00030.safetensors",
+ "model.layers.37.self_attn.k_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.37.self_attn.o_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.37.self_attn.q_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.37.self_attn.v_proj.weight": "model-00014-of-00030.safetensors",
+ "model.layers.38.input_layernorm.weight": "model-00015-of-00030.safetensors",
+ "model.layers.38.mlp.down_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.38.mlp.gate_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.38.mlp.up_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.38.post_attention_layernorm.weight": "model-00015-of-00030.safetensors",
+ "model.layers.38.self_attn.k_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.38.self_attn.o_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.38.self_attn.q_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.38.self_attn.v_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.39.input_layernorm.weight": "model-00015-of-00030.safetensors",
+ "model.layers.39.mlp.down_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.39.mlp.gate_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.39.mlp.up_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.39.post_attention_layernorm.weight": "model-00015-of-00030.safetensors",
+ "model.layers.39.self_attn.k_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.39.self_attn.o_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.39.self_attn.q_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.39.self_attn.v_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.4.input_layernorm.weight": "model-00003-of-00030.safetensors",
+ "model.layers.4.mlp.down_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.4.mlp.gate_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.4.mlp.up_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.4.post_attention_layernorm.weight": "model-00003-of-00030.safetensors",
+ "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00030.safetensors",
+ "model.layers.40.input_layernorm.weight": "model-00016-of-00030.safetensors",
+ "model.layers.40.mlp.down_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.40.mlp.gate_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.40.mlp.up_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.40.post_attention_layernorm.weight": "model-00016-of-00030.safetensors",
+ "model.layers.40.self_attn.k_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.40.self_attn.o_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.40.self_attn.q_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.40.self_attn.v_proj.weight": "model-00015-of-00030.safetensors",
+ "model.layers.41.input_layernorm.weight": "model-00016-of-00030.safetensors",
+ "model.layers.41.mlp.down_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.41.mlp.gate_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.41.mlp.up_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.41.post_attention_layernorm.weight": "model-00016-of-00030.safetensors",
+ "model.layers.41.self_attn.k_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.41.self_attn.o_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.41.self_attn.q_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.41.self_attn.v_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.42.input_layernorm.weight": "model-00016-of-00030.safetensors",
+ "model.layers.42.mlp.down_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.42.mlp.gate_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.42.mlp.up_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.42.post_attention_layernorm.weight": "model-00016-of-00030.safetensors",
+ "model.layers.42.self_attn.k_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.42.self_attn.o_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.42.self_attn.q_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.42.self_attn.v_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.43.input_layernorm.weight": "model-00017-of-00030.safetensors",
+ "model.layers.43.mlp.down_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.43.mlp.gate_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.43.mlp.up_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.43.post_attention_layernorm.weight": "model-00017-of-00030.safetensors",
+ "model.layers.43.self_attn.k_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.43.self_attn.o_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.43.self_attn.q_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.43.self_attn.v_proj.weight": "model-00016-of-00030.safetensors",
+ "model.layers.44.input_layernorm.weight": "model-00017-of-00030.safetensors",
+ "model.layers.44.mlp.down_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.44.mlp.gate_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.44.mlp.up_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.44.post_attention_layernorm.weight": "model-00017-of-00030.safetensors",
+ "model.layers.44.self_attn.k_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.44.self_attn.o_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.44.self_attn.q_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.44.self_attn.v_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.45.input_layernorm.weight": "model-00017-of-00030.safetensors",
+ "model.layers.45.mlp.down_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.45.mlp.gate_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.45.mlp.up_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.45.post_attention_layernorm.weight": "model-00017-of-00030.safetensors",
+ "model.layers.45.self_attn.k_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.45.self_attn.o_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.45.self_attn.q_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.45.self_attn.v_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.46.input_layernorm.weight": "model-00018-of-00030.safetensors",
+ "model.layers.46.mlp.down_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.46.mlp.gate_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.46.mlp.up_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.46.post_attention_layernorm.weight": "model-00018-of-00030.safetensors",
+ "model.layers.46.self_attn.k_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.46.self_attn.o_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.46.self_attn.q_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.46.self_attn.v_proj.weight": "model-00017-of-00030.safetensors",
+ "model.layers.47.input_layernorm.weight": "model-00018-of-00030.safetensors",
+ "model.layers.47.mlp.down_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.47.mlp.gate_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.47.mlp.up_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.47.post_attention_layernorm.weight": "model-00018-of-00030.safetensors",
+ "model.layers.47.self_attn.k_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.47.self_attn.o_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.47.self_attn.q_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.47.self_attn.v_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.48.input_layernorm.weight": "model-00018-of-00030.safetensors",
+ "model.layers.48.mlp.down_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.48.mlp.gate_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.48.mlp.up_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.48.post_attention_layernorm.weight": "model-00018-of-00030.safetensors",
+ "model.layers.48.self_attn.k_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.48.self_attn.o_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.48.self_attn.q_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.48.self_attn.v_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.49.input_layernorm.weight": "model-00019-of-00030.safetensors",
+ "model.layers.49.mlp.down_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.49.mlp.gate_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.49.mlp.up_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.49.post_attention_layernorm.weight": "model-00019-of-00030.safetensors",
+ "model.layers.49.self_attn.k_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.49.self_attn.o_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.49.self_attn.q_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.49.self_attn.v_proj.weight": "model-00018-of-00030.safetensors",
+ "model.layers.5.input_layernorm.weight": "model-00003-of-00030.safetensors",
+ "model.layers.5.mlp.down_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.5.mlp.gate_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.5.mlp.up_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.5.post_attention_layernorm.weight": "model-00003-of-00030.safetensors",
+ "model.layers.5.self_attn.k_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.5.self_attn.o_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.5.self_attn.q_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.5.self_attn.v_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.50.input_layernorm.weight": "model-00019-of-00030.safetensors",
+ "model.layers.50.mlp.down_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.50.mlp.gate_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.50.mlp.up_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.50.post_attention_layernorm.weight": "model-00019-of-00030.safetensors",
+ "model.layers.50.self_attn.k_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.50.self_attn.o_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.50.self_attn.q_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.50.self_attn.v_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.51.input_layernorm.weight": "model-00019-of-00030.safetensors",
+ "model.layers.51.mlp.down_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.51.mlp.gate_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.51.mlp.up_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.51.post_attention_layernorm.weight": "model-00019-of-00030.safetensors",
+ "model.layers.51.self_attn.k_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.51.self_attn.o_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.51.self_attn.q_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.51.self_attn.v_proj.weight": "model-00019-of-00030.safetensors",
+ "model.layers.52.input_layernorm.weight": "model-00020-of-00030.safetensors",
+ "model.layers.52.mlp.down_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.52.mlp.gate_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.52.mlp.up_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.52.post_attention_layernorm.weight": "model-00020-of-00030.safetensors",
+ "model.layers.52.self_attn.k_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.52.self_attn.o_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.52.self_attn.q_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.52.self_attn.v_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.53.input_layernorm.weight": "model-00020-of-00030.safetensors",
+ "model.layers.53.mlp.down_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.53.mlp.gate_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.53.mlp.up_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.53.post_attention_layernorm.weight": "model-00020-of-00030.safetensors",
+ "model.layers.53.self_attn.k_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.53.self_attn.o_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.53.self_attn.q_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.53.self_attn.v_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.54.input_layernorm.weight": "model-00021-of-00030.safetensors",
+ "model.layers.54.mlp.down_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.54.mlp.gate_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.54.mlp.up_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.54.post_attention_layernorm.weight": "model-00021-of-00030.safetensors",
+ "model.layers.54.self_attn.k_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.54.self_attn.o_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.54.self_attn.q_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.54.self_attn.v_proj.weight": "model-00020-of-00030.safetensors",
+ "model.layers.55.input_layernorm.weight": "model-00021-of-00030.safetensors",
+ "model.layers.55.mlp.down_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.55.mlp.gate_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.55.mlp.up_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.55.post_attention_layernorm.weight": "model-00021-of-00030.safetensors",
+ "model.layers.55.self_attn.k_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.55.self_attn.o_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.55.self_attn.q_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.55.self_attn.v_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.56.input_layernorm.weight": "model-00021-of-00030.safetensors",
+ "model.layers.56.mlp.down_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.56.mlp.gate_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.56.mlp.up_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.56.post_attention_layernorm.weight": "model-00021-of-00030.safetensors",
+ "model.layers.56.self_attn.k_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.56.self_attn.o_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.56.self_attn.q_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.56.self_attn.v_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.57.input_layernorm.weight": "model-00022-of-00030.safetensors",
+ "model.layers.57.mlp.down_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.57.mlp.gate_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.57.mlp.up_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.57.post_attention_layernorm.weight": "model-00022-of-00030.safetensors",
+ "model.layers.57.self_attn.k_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.57.self_attn.o_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.57.self_attn.q_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.57.self_attn.v_proj.weight": "model-00021-of-00030.safetensors",
+ "model.layers.58.input_layernorm.weight": "model-00022-of-00030.safetensors",
+ "model.layers.58.mlp.down_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.58.mlp.gate_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.58.mlp.up_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.58.post_attention_layernorm.weight": "model-00022-of-00030.safetensors",
+ "model.layers.58.self_attn.k_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.58.self_attn.o_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.58.self_attn.q_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.58.self_attn.v_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.59.input_layernorm.weight": "model-00022-of-00030.safetensors",
+ "model.layers.59.mlp.down_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.59.mlp.gate_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.59.mlp.up_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.59.post_attention_layernorm.weight": "model-00022-of-00030.safetensors",
+ "model.layers.59.self_attn.k_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.59.self_attn.o_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.59.self_attn.q_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.59.self_attn.v_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.6.input_layernorm.weight": "model-00003-of-00030.safetensors",
+ "model.layers.6.mlp.down_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.6.mlp.gate_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.6.mlp.up_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.6.post_attention_layernorm.weight": "model-00003-of-00030.safetensors",
+ "model.layers.6.self_attn.k_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.6.self_attn.o_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.6.self_attn.q_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.6.self_attn.v_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.60.input_layernorm.weight": "model-00023-of-00030.safetensors",
+ "model.layers.60.mlp.down_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.60.mlp.gate_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.60.mlp.up_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.60.post_attention_layernorm.weight": "model-00023-of-00030.safetensors",
+ "model.layers.60.self_attn.k_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.60.self_attn.o_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.60.self_attn.q_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.60.self_attn.v_proj.weight": "model-00022-of-00030.safetensors",
+ "model.layers.61.input_layernorm.weight": "model-00023-of-00030.safetensors",
+ "model.layers.61.mlp.down_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.61.mlp.gate_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.61.mlp.up_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.61.post_attention_layernorm.weight": "model-00023-of-00030.safetensors",
+ "model.layers.61.self_attn.k_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.61.self_attn.o_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.61.self_attn.q_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.61.self_attn.v_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.62.input_layernorm.weight": "model-00023-of-00030.safetensors",
+ "model.layers.62.mlp.down_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.62.mlp.gate_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.62.mlp.up_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.62.post_attention_layernorm.weight": "model-00023-of-00030.safetensors",
+ "model.layers.62.self_attn.k_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.62.self_attn.o_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.62.self_attn.q_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.62.self_attn.v_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.63.input_layernorm.weight": "model-00024-of-00030.safetensors",
+ "model.layers.63.mlp.down_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.63.mlp.gate_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.63.mlp.up_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.63.post_attention_layernorm.weight": "model-00024-of-00030.safetensors",
+ "model.layers.63.self_attn.k_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.63.self_attn.o_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.63.self_attn.q_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.63.self_attn.v_proj.weight": "model-00023-of-00030.safetensors",
+ "model.layers.64.input_layernorm.weight": "model-00024-of-00030.safetensors",
+ "model.layers.64.mlp.down_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.64.mlp.gate_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.64.mlp.up_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.64.post_attention_layernorm.weight": "model-00024-of-00030.safetensors",
+ "model.layers.64.self_attn.k_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.64.self_attn.o_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.64.self_attn.q_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.64.self_attn.v_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.65.input_layernorm.weight": "model-00024-of-00030.safetensors",
+ "model.layers.65.mlp.down_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.65.mlp.gate_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.65.mlp.up_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.65.post_attention_layernorm.weight": "model-00024-of-00030.safetensors",
+ "model.layers.65.self_attn.k_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.65.self_attn.o_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.65.self_attn.q_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.65.self_attn.v_proj.weight": "model-00024-of-00030.safetensors",
+ "model.layers.66.input_layernorm.weight": "model-00025-of-00030.safetensors",
+ "model.layers.66.mlp.down_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.66.mlp.gate_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.66.mlp.up_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.66.post_attention_layernorm.weight": "model-00025-of-00030.safetensors",
+ "model.layers.66.self_attn.k_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.66.self_attn.o_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.66.self_attn.q_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.66.self_attn.v_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.67.input_layernorm.weight": "model-00025-of-00030.safetensors",
+ "model.layers.67.mlp.down_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.67.mlp.gate_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.67.mlp.up_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.67.post_attention_layernorm.weight": "model-00025-of-00030.safetensors",
+ "model.layers.67.self_attn.k_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.67.self_attn.o_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.67.self_attn.q_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.67.self_attn.v_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.68.input_layernorm.weight": "model-00026-of-00030.safetensors",
+ "model.layers.68.mlp.down_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.68.mlp.gate_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.68.mlp.up_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.68.post_attention_layernorm.weight": "model-00026-of-00030.safetensors",
+ "model.layers.68.self_attn.k_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.68.self_attn.o_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.68.self_attn.q_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.68.self_attn.v_proj.weight": "model-00025-of-00030.safetensors",
+ "model.layers.69.input_layernorm.weight": "model-00026-of-00030.safetensors",
+ "model.layers.69.mlp.down_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.69.mlp.gate_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.69.mlp.up_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.69.post_attention_layernorm.weight": "model-00026-of-00030.safetensors",
+ "model.layers.69.self_attn.k_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.69.self_attn.o_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.69.self_attn.q_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.69.self_attn.v_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.7.input_layernorm.weight": "model-00004-of-00030.safetensors",
+ "model.layers.7.mlp.down_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.7.mlp.gate_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.7.mlp.up_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.7.post_attention_layernorm.weight": "model-00004-of-00030.safetensors",
+ "model.layers.7.self_attn.k_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.7.self_attn.o_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.7.self_attn.q_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.7.self_attn.v_proj.weight": "model-00003-of-00030.safetensors",
+ "model.layers.70.input_layernorm.weight": "model-00026-of-00030.safetensors",
+ "model.layers.70.mlp.down_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.70.mlp.gate_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.70.mlp.up_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.70.post_attention_layernorm.weight": "model-00026-of-00030.safetensors",
+ "model.layers.70.self_attn.k_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.70.self_attn.o_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.70.self_attn.q_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.70.self_attn.v_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.71.input_layernorm.weight": "model-00027-of-00030.safetensors",
+ "model.layers.71.mlp.down_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.71.mlp.gate_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.71.mlp.up_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.71.post_attention_layernorm.weight": "model-00027-of-00030.safetensors",
+ "model.layers.71.self_attn.k_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.71.self_attn.o_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.71.self_attn.q_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.71.self_attn.v_proj.weight": "model-00026-of-00030.safetensors",
+ "model.layers.72.input_layernorm.weight": "model-00027-of-00030.safetensors",
+ "model.layers.72.mlp.down_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.72.mlp.gate_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.72.mlp.up_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.72.post_attention_layernorm.weight": "model-00027-of-00030.safetensors",
+ "model.layers.72.self_attn.k_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.72.self_attn.o_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.72.self_attn.q_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.72.self_attn.v_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.73.input_layernorm.weight": "model-00027-of-00030.safetensors",
+ "model.layers.73.mlp.down_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.73.mlp.gate_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.73.mlp.up_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.73.post_attention_layernorm.weight": "model-00027-of-00030.safetensors",
+ "model.layers.73.self_attn.k_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.73.self_attn.o_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.73.self_attn.q_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.73.self_attn.v_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.74.input_layernorm.weight": "model-00028-of-00030.safetensors",
+ "model.layers.74.mlp.down_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.74.mlp.gate_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.74.mlp.up_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.74.post_attention_layernorm.weight": "model-00028-of-00030.safetensors",
+ "model.layers.74.self_attn.k_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.74.self_attn.o_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.74.self_attn.q_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.74.self_attn.v_proj.weight": "model-00027-of-00030.safetensors",
+ "model.layers.75.input_layernorm.weight": "model-00028-of-00030.safetensors",
+ "model.layers.75.mlp.down_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.75.mlp.gate_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.75.mlp.up_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.75.post_attention_layernorm.weight": "model-00028-of-00030.safetensors",
+ "model.layers.75.self_attn.k_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.75.self_attn.o_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.75.self_attn.q_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.75.self_attn.v_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.76.input_layernorm.weight": "model-00028-of-00030.safetensors",
+ "model.layers.76.mlp.down_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.76.mlp.gate_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.76.mlp.up_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.76.post_attention_layernorm.weight": "model-00028-of-00030.safetensors",
+ "model.layers.76.self_attn.k_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.76.self_attn.o_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.76.self_attn.q_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.76.self_attn.v_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.77.input_layernorm.weight": "model-00029-of-00030.safetensors",
+ "model.layers.77.mlp.down_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.77.mlp.gate_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.77.mlp.up_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.77.post_attention_layernorm.weight": "model-00029-of-00030.safetensors",
+ "model.layers.77.self_attn.k_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.77.self_attn.o_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.77.self_attn.q_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.77.self_attn.v_proj.weight": "model-00028-of-00030.safetensors",
+ "model.layers.78.input_layernorm.weight": "model-00029-of-00030.safetensors",
+ "model.layers.78.mlp.down_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.78.mlp.gate_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.78.mlp.up_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.78.post_attention_layernorm.weight": "model-00029-of-00030.safetensors",
+ "model.layers.78.self_attn.k_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.78.self_attn.o_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.78.self_attn.q_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.78.self_attn.v_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.79.input_layernorm.weight": "model-00029-of-00030.safetensors",
+ "model.layers.79.mlp.down_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.79.mlp.gate_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.79.mlp.up_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.79.post_attention_layernorm.weight": "model-00029-of-00030.safetensors",
+ "model.layers.79.self_attn.k_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.79.self_attn.o_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.79.self_attn.q_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.79.self_attn.v_proj.weight": "model-00029-of-00030.safetensors",
+ "model.layers.8.input_layernorm.weight": "model-00004-of-00030.safetensors",
+ "model.layers.8.mlp.down_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.8.mlp.gate_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.8.mlp.up_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.8.post_attention_layernorm.weight": "model-00004-of-00030.safetensors",
+ "model.layers.8.self_attn.k_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.8.self_attn.o_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.8.self_attn.q_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.8.self_attn.v_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.9.input_layernorm.weight": "model-00004-of-00030.safetensors",
+ "model.layers.9.mlp.down_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.9.mlp.gate_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.9.mlp.up_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.9.post_attention_layernorm.weight": "model-00004-of-00030.safetensors",
+ "model.layers.9.self_attn.k_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.9.self_attn.o_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.9.self_attn.q_proj.weight": "model-00004-of-00030.safetensors",
+ "model.layers.9.self_attn.v_proj.weight": "model-00004-of-00030.safetensors",
+ "model.norm.weight": "model-00029-of-00030.safetensors"
+ }
+}
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..278b7f0f84be865c4687700ee7b3c63d89a51e18
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+ "bos_token": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "<|end_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..af2d22fff23798bea98b1730ae7cdacaee0b087a
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ade1dac458f86f9bea8bf35b713f14e1bbed24228429534038e9f7e54ea3e8b6
+size 17208712
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..809816119c8164bd275957569b7c8c708c8d1092
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,2064 @@
+{
+ "added_tokens_decoder": {
+ "128000": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128001": {
+ "content": "<|end_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128002": {
+ "content": "<|reserved_special_token_0|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128003": {
+ "content": "<|reserved_special_token_1|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128004": {
+ "content": "<|finetune_right_pad_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128005": {
+ "content": "<|reserved_special_token_2|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128006": {
+ "content": "<|start_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128007": {
+ "content": "<|end_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128008": {
+ "content": "<|eom_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128009": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128010": {
+ "content": "<|python_tag|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128011": {
+ "content": "<|reserved_special_token_3|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128012": {
+ "content": "<|reserved_special_token_4|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128013": {
+ "content": "<|reserved_special_token_5|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128014": {
+ "content": "<|reserved_special_token_6|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128015": {
+ "content": "<|reserved_special_token_7|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128016": {
+ "content": "<|reserved_special_token_8|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128017": {
+ "content": "<|reserved_special_token_9|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128018": {
+ "content": "<|reserved_special_token_10|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128019": {
+ "content": "<|reserved_special_token_11|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128020": {
+ "content": "<|reserved_special_token_12|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128021": {
+ "content": "<|reserved_special_token_13|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128022": {
+ "content": "<|reserved_special_token_14|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128023": {
+ "content": "<|reserved_special_token_15|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128024": {
+ "content": "<|reserved_special_token_16|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128025": {
+ "content": "<|reserved_special_token_17|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128026": {
+ "content": "<|reserved_special_token_18|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128027": {
+ "content": "<|reserved_special_token_19|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128028": {
+ "content": "<|reserved_special_token_20|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128029": {
+ "content": "<|reserved_special_token_21|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128030": {
+ "content": "<|reserved_special_token_22|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128031": {
+ "content": "<|reserved_special_token_23|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128032": {
+ "content": "<|reserved_special_token_24|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128033": {
+ "content": "<|reserved_special_token_25|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128034": {
+ "content": "<|reserved_special_token_26|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128035": {
+ "content": "<|reserved_special_token_27|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128036": {
+ "content": "<|reserved_special_token_28|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128037": {
+ "content": "<|reserved_special_token_29|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128038": {
+ "content": "<|reserved_special_token_30|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128039": {
+ "content": "<|reserved_special_token_31|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128040": {
+ "content": "<|reserved_special_token_32|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128041": {
+ "content": "<|reserved_special_token_33|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128042": {
+ "content": "<|reserved_special_token_34|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128043": {
+ "content": "<|reserved_special_token_35|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128044": {
+ "content": "<|reserved_special_token_36|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128045": {
+ "content": "<|reserved_special_token_37|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128046": {
+ "content": "<|reserved_special_token_38|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128047": {
+ "content": "<|reserved_special_token_39|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128048": {
+ "content": "<|reserved_special_token_40|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128049": {
+ "content": "<|reserved_special_token_41|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128050": {
+ "content": "<|reserved_special_token_42|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128051": {
+ "content": "<|reserved_special_token_43|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128052": {
+ "content": "<|reserved_special_token_44|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128053": {
+ "content": "<|reserved_special_token_45|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128054": {
+ "content": "<|reserved_special_token_46|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128055": {
+ "content": "<|reserved_special_token_47|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128056": {
+ "content": "<|reserved_special_token_48|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128057": {
+ "content": "<|reserved_special_token_49|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128058": {
+ "content": "<|reserved_special_token_50|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128059": {
+ "content": "<|reserved_special_token_51|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128060": {
+ "content": "<|reserved_special_token_52|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128061": {
+ "content": "<|reserved_special_token_53|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128062": {
+ "content": "<|reserved_special_token_54|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128063": {
+ "content": "<|reserved_special_token_55|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128064": {
+ "content": "<|reserved_special_token_56|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128065": {
+ "content": "<|reserved_special_token_57|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128066": {
+ "content": "<|reserved_special_token_58|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128067": {
+ "content": "<|reserved_special_token_59|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128068": {
+ "content": "<|reserved_special_token_60|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128069": {
+ "content": "<|reserved_special_token_61|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128070": {
+ "content": "<|reserved_special_token_62|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128071": {
+ "content": "<|reserved_special_token_63|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128072": {
+ "content": "<|reserved_special_token_64|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128073": {
+ "content": "<|reserved_special_token_65|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128074": {
+ "content": "<|reserved_special_token_66|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128075": {
+ "content": "<|reserved_special_token_67|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128076": {
+ "content": "<|reserved_special_token_68|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128077": {
+ "content": "<|reserved_special_token_69|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128078": {
+ "content": "<|reserved_special_token_70|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128079": {
+ "content": "<|reserved_special_token_71|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128080": {
+ "content": "<|reserved_special_token_72|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128081": {
+ "content": "<|reserved_special_token_73|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128082": {
+ "content": "<|reserved_special_token_74|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128083": {
+ "content": "<|reserved_special_token_75|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128084": {
+ "content": "<|reserved_special_token_76|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128085": {
+ "content": "<|reserved_special_token_77|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128086": {
+ "content": "<|reserved_special_token_78|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128087": {
+ "content": "<|reserved_special_token_79|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128088": {
+ "content": "<|reserved_special_token_80|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128089": {
+ "content": "<|reserved_special_token_81|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128090": {
+ "content": "<|reserved_special_token_82|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128091": {
+ "content": "<|reserved_special_token_83|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128092": {
+ "content": "<|reserved_special_token_84|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128093": {
+ "content": "<|reserved_special_token_85|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128094": {
+ "content": "<|reserved_special_token_86|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128095": {
+ "content": "<|reserved_special_token_87|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128096": {
+ "content": "<|reserved_special_token_88|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128097": {
+ "content": "<|reserved_special_token_89|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128098": {
+ "content": "<|reserved_special_token_90|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128099": {
+ "content": "<|reserved_special_token_91|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128100": {
+ "content": "<|reserved_special_token_92|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128101": {
+ "content": "<|reserved_special_token_93|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128102": {
+ "content": "<|reserved_special_token_94|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128103": {
+ "content": "<|reserved_special_token_95|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128104": {
+ "content": "<|reserved_special_token_96|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128105": {
+ "content": "<|reserved_special_token_97|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128106": {
+ "content": "<|reserved_special_token_98|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128107": {
+ "content": "<|reserved_special_token_99|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128108": {
+ "content": "<|reserved_special_token_100|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128109": {
+ "content": "<|reserved_special_token_101|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128110": {
+ "content": "<|reserved_special_token_102|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128111": {
+ "content": "<|reserved_special_token_103|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128112": {
+ "content": "<|reserved_special_token_104|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128113": {
+ "content": "<|reserved_special_token_105|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128114": {
+ "content": "<|reserved_special_token_106|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128115": {
+ "content": "<|reserved_special_token_107|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128116": {
+ "content": "<|reserved_special_token_108|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128117": {
+ "content": "<|reserved_special_token_109|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128118": {
+ "content": "<|reserved_special_token_110|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128119": {
+ "content": "<|reserved_special_token_111|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128120": {
+ "content": "<|reserved_special_token_112|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128121": {
+ "content": "<|reserved_special_token_113|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128122": {
+ "content": "<|reserved_special_token_114|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128123": {
+ "content": "<|reserved_special_token_115|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128124": {
+ "content": "<|reserved_special_token_116|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128125": {
+ "content": "<|reserved_special_token_117|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128126": {
+ "content": "<|reserved_special_token_118|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128127": {
+ "content": "<|reserved_special_token_119|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128128": {
+ "content": "<|reserved_special_token_120|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128129": {
+ "content": "<|reserved_special_token_121|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128130": {
+ "content": "<|reserved_special_token_122|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128131": {
+ "content": "<|reserved_special_token_123|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128132": {
+ "content": "<|reserved_special_token_124|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128133": {
+ "content": "<|reserved_special_token_125|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128134": {
+ "content": "<|reserved_special_token_126|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128135": {
+ "content": "<|reserved_special_token_127|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128136": {
+ "content": "<|reserved_special_token_128|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128137": {
+ "content": "<|reserved_special_token_129|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128138": {
+ "content": "<|reserved_special_token_130|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128139": {
+ "content": "<|reserved_special_token_131|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128140": {
+ "content": "<|reserved_special_token_132|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128141": {
+ "content": "<|reserved_special_token_133|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128142": {
+ "content": "<|reserved_special_token_134|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128143": {
+ "content": "<|reserved_special_token_135|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128144": {
+ "content": "<|reserved_special_token_136|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128145": {
+ "content": "<|reserved_special_token_137|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128146": {
+ "content": "<|reserved_special_token_138|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128147": {
+ "content": "<|reserved_special_token_139|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128148": {
+ "content": "<|reserved_special_token_140|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128149": {
+ "content": "<|reserved_special_token_141|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128150": {
+ "content": "<|reserved_special_token_142|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128151": {
+ "content": "<|reserved_special_token_143|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128152": {
+ "content": "<|reserved_special_token_144|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128153": {
+ "content": "<|reserved_special_token_145|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128154": {
+ "content": "<|reserved_special_token_146|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128155": {
+ "content": "<|reserved_special_token_147|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128156": {
+ "content": "<|reserved_special_token_148|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128157": {
+ "content": "<|reserved_special_token_149|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128158": {
+ "content": "<|reserved_special_token_150|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128159": {
+ "content": "<|reserved_special_token_151|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128160": {
+ "content": "<|reserved_special_token_152|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128161": {
+ "content": "<|reserved_special_token_153|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128162": {
+ "content": "<|reserved_special_token_154|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128163": {
+ "content": "<|reserved_special_token_155|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128164": {
+ "content": "<|reserved_special_token_156|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128165": {
+ "content": "<|reserved_special_token_157|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128166": {
+ "content": "<|reserved_special_token_158|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128167": {
+ "content": "<|reserved_special_token_159|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128168": {
+ "content": "<|reserved_special_token_160|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128169": {
+ "content": "<|reserved_special_token_161|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128170": {
+ "content": "<|reserved_special_token_162|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128171": {
+ "content": "<|reserved_special_token_163|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128172": {
+ "content": "<|reserved_special_token_164|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128173": {
+ "content": "<|reserved_special_token_165|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128174": {
+ "content": "<|reserved_special_token_166|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128175": {
+ "content": "<|reserved_special_token_167|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128176": {
+ "content": "<|reserved_special_token_168|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128177": {
+ "content": "<|reserved_special_token_169|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128178": {
+ "content": "<|reserved_special_token_170|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128179": {
+ "content": "<|reserved_special_token_171|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128180": {
+ "content": "<|reserved_special_token_172|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128181": {
+ "content": "<|reserved_special_token_173|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128182": {
+ "content": "<|reserved_special_token_174|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128183": {
+ "content": "<|reserved_special_token_175|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128184": {
+ "content": "<|reserved_special_token_176|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128185": {
+ "content": "<|reserved_special_token_177|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128186": {
+ "content": "<|reserved_special_token_178|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128187": {
+ "content": "<|reserved_special_token_179|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128188": {
+ "content": "<|reserved_special_token_180|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128189": {
+ "content": "<|reserved_special_token_181|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128190": {
+ "content": "<|reserved_special_token_182|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128191": {
+ "content": "<|reserved_special_token_183|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128192": {
+ "content": "<|reserved_special_token_184|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128193": {
+ "content": "<|reserved_special_token_185|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128194": {
+ "content": "<|reserved_special_token_186|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128195": {
+ "content": "<|reserved_special_token_187|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128196": {
+ "content": "<|reserved_special_token_188|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128197": {
+ "content": "<|reserved_special_token_189|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128198": {
+ "content": "<|reserved_special_token_190|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128199": {
+ "content": "<|reserved_special_token_191|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128200": {
+ "content": "<|reserved_special_token_192|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128201": {
+ "content": "<|reserved_special_token_193|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128202": {
+ "content": "<|reserved_special_token_194|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128203": {
+ "content": "<|reserved_special_token_195|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128204": {
+ "content": "<|reserved_special_token_196|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128205": {
+ "content": "<|reserved_special_token_197|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128206": {
+ "content": "<|reserved_special_token_198|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128207": {
+ "content": "<|reserved_special_token_199|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128208": {
+ "content": "<|reserved_special_token_200|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128209": {
+ "content": "<|reserved_special_token_201|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128210": {
+ "content": "<|reserved_special_token_202|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128211": {
+ "content": "<|reserved_special_token_203|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128212": {
+ "content": "<|reserved_special_token_204|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128213": {
+ "content": "<|reserved_special_token_205|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128214": {
+ "content": "<|reserved_special_token_206|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128215": {
+ "content": "<|reserved_special_token_207|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128216": {
+ "content": "<|reserved_special_token_208|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128217": {
+ "content": "<|reserved_special_token_209|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128218": {
+ "content": "<|reserved_special_token_210|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128219": {
+ "content": "<|reserved_special_token_211|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128220": {
+ "content": "<|reserved_special_token_212|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128221": {
+ "content": "<|reserved_special_token_213|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128222": {
+ "content": "<|reserved_special_token_214|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128223": {
+ "content": "<|reserved_special_token_215|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128224": {
+ "content": "<|reserved_special_token_216|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128225": {
+ "content": "<|reserved_special_token_217|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128226": {
+ "content": "<|reserved_special_token_218|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128227": {
+ "content": "<|reserved_special_token_219|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128228": {
+ "content": "<|reserved_special_token_220|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128229": {
+ "content": "<|reserved_special_token_221|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128230": {
+ "content": "<|reserved_special_token_222|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128231": {
+ "content": "<|reserved_special_token_223|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128232": {
+ "content": "<|reserved_special_token_224|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128233": {
+ "content": "<|reserved_special_token_225|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128234": {
+ "content": "<|reserved_special_token_226|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128235": {
+ "content": "<|reserved_special_token_227|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128236": {
+ "content": "<|reserved_special_token_228|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128237": {
+ "content": "<|reserved_special_token_229|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128238": {
+ "content": "<|reserved_special_token_230|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128239": {
+ "content": "<|reserved_special_token_231|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128240": {
+ "content": "<|reserved_special_token_232|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128241": {
+ "content": "<|reserved_special_token_233|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128242": {
+ "content": "<|reserved_special_token_234|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128243": {
+ "content": "<|reserved_special_token_235|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128244": {
+ "content": "<|reserved_special_token_236|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128245": {
+ "content": "<|reserved_special_token_237|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128246": {
+ "content": "<|reserved_special_token_238|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128247": {
+ "content": "<|reserved_special_token_239|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128248": {
+ "content": "<|reserved_special_token_240|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128249": {
+ "content": "<|reserved_special_token_241|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128250": {
+ "content": "<|reserved_special_token_242|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128251": {
+ "content": "<|reserved_special_token_243|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128252": {
+ "content": "<|reserved_special_token_244|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128253": {
+ "content": "<|reserved_special_token_245|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128254": {
+ "content": "<|reserved_special_token_246|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128255": {
+ "content": "<|reserved_special_token_247|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "<|begin_of_text|>",
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
+ "clean_up_tokenization_spaces": true,
+ "eos_token": "<|eot_id|>",
+ "extra_special_tokens": {},
+ "model_input_names": [
+ "input_ids",
+ "attention_mask"
+ ],
+ "model_max_length": 131072,
+ "pad_token": "<|end_of_text|>",
+ "tokenizer_class": "PreTrainedTokenizer"
+}
diff --git a/training_args.bin b/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1285b0a62e3c1abbe7883517f10388cc0c705fda
--- /dev/null
+++ b/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94aa0eaccf48363aa0ce66e0d66748ccb8a33089eb92307bbe1842f7894e4f55
+size 9539