diff --git a/Everything-LM/README.md b/Everything-LM/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3
--- /dev/null
+++ b/Everything-LM/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/Everything-LM/adapter_config.json b/Everything-LM/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbe4dfdbbcdf59d3381aba62eca599a276e3fb6e
--- /dev/null
+++ b/Everything-LM/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "down_proj",
+    "up_proj",
+    "gate_proj",
+    "k_proj",
+    "o_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/Everything-LM/adapter_model.bin b/Everything-LM/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..52a3e0edeca0c2e370018dc5ffd37c1c1719de8d
--- /dev/null
+++ b/Everything-LM/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a991d74cf01bc5993d7d515e6c91abb9aacf2250cc5e32c7912ed38e3764b1d
+size 80114765
diff --git a/Everything-LM/added_tokens.json b/Everything-LM/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..9c16aa4be022f03ad001b006fba14dfb73a1929c
--- /dev/null
+++ b/Everything-LM/added_tokens.json
@@ -0,0 +1,3 @@
+{
+  "<pad>": 32000
+}
diff --git a/Everything-LM/checkpoint-2/README.md b/Everything-LM/checkpoint-2/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3
--- /dev/null
+++ b/Everything-LM/checkpoint-2/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/Everything-LM/checkpoint-2/adapter_config.json b/Everything-LM/checkpoint-2/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbe4dfdbbcdf59d3381aba62eca599a276e3fb6e
--- /dev/null
+++ b/Everything-LM/checkpoint-2/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "down_proj",
+    "up_proj",
+    "gate_proj",
+    "k_proj",
+    "o_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/Everything-LM/checkpoint-2/adapter_model.bin b/Everything-LM/checkpoint-2/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e3b372cf658c8c76dbabdc160d96e72aa3d7edcb
--- /dev/null
+++ b/Everything-LM/checkpoint-2/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a04a5ca22abdae32458bd108cafa031afc19b36fd26be501f6a79361c29dc8c
+size 80114765
diff --git a/Everything-LM/checkpoint-2/adapter_model/README.md b/Everything-LM/checkpoint-2/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3
--- /dev/null
+++ b/Everything-LM/checkpoint-2/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/Everything-LM/checkpoint-2/adapter_model/adapter_config.json b/Everything-LM/checkpoint-2/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbe4dfdbbcdf59d3381aba62eca599a276e3fb6e
--- /dev/null
+++ b/Everything-LM/checkpoint-2/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "down_proj",
+    "up_proj",
+    "gate_proj",
+    "k_proj",
+    "o_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/Everything-LM/checkpoint-2/adapter_model/adapter_model.bin b/Everything-LM/checkpoint-2/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e3b372cf658c8c76dbabdc160d96e72aa3d7edcb
--- /dev/null
+++ b/Everything-LM/checkpoint-2/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a04a5ca22abdae32458bd108cafa031afc19b36fd26be501f6a79361c29dc8c
+size 80114765
diff --git a/Everything-LM/checkpoint-2/optimizer.pt b/Everything-LM/checkpoint-2/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1bec732c18aeed9058f045a30be5c91fe473f041
--- /dev/null
+++ b/Everything-LM/checkpoint-2/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3318079d84860039de45c144aa3fe2e9d0e56190bfc36e1c2b9e412d0598ee26
+size 40569887
diff --git a/Everything-LM/checkpoint-2/rng_state.pth b/Everything-LM/checkpoint-2/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a64f8db7f0eef7d8f6fd06fa05ca8ac7a972a822
--- /dev/null
+++ b/Everything-LM/checkpoint-2/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:984433fd7be740b2d6360cfe44e349d5a40ecb2285791768d183ed3afcfc48aa
+size 14575
diff --git a/Everything-LM/checkpoint-2/scheduler.pt b/Everything-LM/checkpoint-2/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4d74bc8aac574cbc99a3e87804fc277de9d7b9bf
--- /dev/null
+++ b/Everything-LM/checkpoint-2/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f69965cd123fba88348bcf5858148ecb2990698e61613f3f725965ea841e49de
+size 627
diff --git a/Everything-LM/checkpoint-2/trainer_state.json b/Everything-LM/checkpoint-2/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..87864cb8a5a8be865466a31011bdb461f7de87a0
--- /dev/null
+++ b/Everything-LM/checkpoint-2/trainer_state.json
@@ -0,0 +1,31 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.8648648648648649,
+  "eval_steps": 500,
+  "global_step": 2,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.43,
+      "learning_rate": 0.0002799038105676658,
+      "loss": 1.5099,
+      "step": 1
+    },
+    {
+      "epoch": 0.86,
+      "learning_rate": 0.000225,
+      "loss": 1.4484,
+      "step": 2
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 6,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 1.2089671264763904e+16,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/Everything-LM/checkpoint-2/training_args.bin b/Everything-LM/checkpoint-2/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c2985646bdb82b8066f0e57712fe3aee0e87a525
--- /dev/null
+++ b/Everything-LM/checkpoint-2/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5069800a33ca54bf43c3b93e1d5f640909151ea51562afd2230d8c868d36ff7b
+size 4155
diff --git a/Everything-LM/checkpoint-4/README.md b/Everything-LM/checkpoint-4/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3
--- /dev/null
+++ b/Everything-LM/checkpoint-4/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/Everything-LM/checkpoint-4/adapter_config.json b/Everything-LM/checkpoint-4/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbe4dfdbbcdf59d3381aba62eca599a276e3fb6e
--- /dev/null
+++ b/Everything-LM/checkpoint-4/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "down_proj",
+    "up_proj",
+    "gate_proj",
+    "k_proj",
+    "o_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/Everything-LM/checkpoint-4/adapter_model.bin b/Everything-LM/checkpoint-4/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2711f0ff931bba5d086ec50f727c9de085513c46
--- /dev/null
+++ b/Everything-LM/checkpoint-4/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b9fe4007d8a741162ca6333b04b5e5df236fc4c410a30f8bde31fbda5ea0b4c
+size 80114765
diff --git a/Everything-LM/checkpoint-4/adapter_model/README.md b/Everything-LM/checkpoint-4/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3
--- /dev/null
+++ b/Everything-LM/checkpoint-4/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/Everything-LM/checkpoint-4/adapter_model/adapter_config.json b/Everything-LM/checkpoint-4/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbe4dfdbbcdf59d3381aba62eca599a276e3fb6e
--- /dev/null
+++ b/Everything-LM/checkpoint-4/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "down_proj",
+    "up_proj",
+    "gate_proj",
+    "k_proj",
+    "o_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/Everything-LM/checkpoint-4/adapter_model/adapter_model.bin b/Everything-LM/checkpoint-4/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2711f0ff931bba5d086ec50f727c9de085513c46
--- /dev/null
+++ b/Everything-LM/checkpoint-4/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b9fe4007d8a741162ca6333b04b5e5df236fc4c410a30f8bde31fbda5ea0b4c
+size 80114765
diff --git a/Everything-LM/checkpoint-4/optimizer.pt b/Everything-LM/checkpoint-4/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c0a9e0acb404e1dfd319d48ef51f2b012916535b
--- /dev/null
+++ b/Everything-LM/checkpoint-4/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5487ddde84775df62200352f01ec1c66a2cb3960ede96aadbd22f95022927fc
+size 40569887
diff --git a/Everything-LM/checkpoint-4/rng_state.pth b/Everything-LM/checkpoint-4/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c34700ebf6c4e2c40f18678e00acc4a18319f64d
--- /dev/null
+++ b/Everything-LM/checkpoint-4/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82b85a7a337110f6a2d8109bdab3eec70d3e60c4e6b04e854ff5998d9e8f9f68
+size 14575
diff --git a/Everything-LM/checkpoint-4/scheduler.pt b/Everything-LM/checkpoint-4/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..03c921d9466f8c47148f689ba46c71631f0f8426
--- /dev/null
+++ b/Everything-LM/checkpoint-4/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c60fa183fd6ae9404348042f9312b80e0a1fb1bc4a4feda8a143c9f9c99975d7
+size 627
diff --git a/Everything-LM/checkpoint-4/trainer_state.json b/Everything-LM/checkpoint-4/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..a486c764f46d79a1706b23daf0b20e83a84e290d
--- /dev/null
+++ b/Everything-LM/checkpoint-4/trainer_state.json
@@ -0,0 +1,43 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.7297297297297298,
+  "eval_steps": 500,
+  "global_step": 4,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.43,
+      "learning_rate": 0.0002799038105676658,
+      "loss": 1.5099,
+      "step": 1
+    },
+    {
+      "epoch": 0.86,
+      "learning_rate": 0.000225,
+      "loss": 1.4484,
+      "step": 2
+    },
+    {
+      "epoch": 1.3,
+      "learning_rate": 0.00015,
+      "loss": 1.4938,
+      "step": 3
+    },
+    {
+      "epoch": 1.73,
+      "learning_rate": 7.500000000000002e-05,
+      "loss": 1.4088,
+      "step": 4
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 6,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 2.4157216960413696e+16,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/Everything-LM/checkpoint-4/training_args.bin b/Everything-LM/checkpoint-4/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c2985646bdb82b8066f0e57712fe3aee0e87a525
--- /dev/null
+++ b/Everything-LM/checkpoint-4/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5069800a33ca54bf43c3b93e1d5f640909151ea51562afd2230d8c868d36ff7b
+size 4155
diff --git a/Everything-LM/checkpoint-6/README.md b/Everything-LM/checkpoint-6/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3
--- /dev/null
+++ b/Everything-LM/checkpoint-6/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/Everything-LM/checkpoint-6/adapter_config.json b/Everything-LM/checkpoint-6/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbe4dfdbbcdf59d3381aba62eca599a276e3fb6e
--- /dev/null
+++ b/Everything-LM/checkpoint-6/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "down_proj",
+    "up_proj",
+    "gate_proj",
+    "k_proj",
+    "o_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/Everything-LM/checkpoint-6/adapter_model.bin b/Everything-LM/checkpoint-6/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..52a3e0edeca0c2e370018dc5ffd37c1c1719de8d
--- /dev/null
+++ b/Everything-LM/checkpoint-6/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a991d74cf01bc5993d7d515e6c91abb9aacf2250cc5e32c7912ed38e3764b1d
+size 80114765
diff --git a/Everything-LM/checkpoint-6/adapter_model/README.md b/Everything-LM/checkpoint-6/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3
--- /dev/null
+++ b/Everything-LM/checkpoint-6/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/Everything-LM/checkpoint-6/adapter_model/adapter_config.json b/Everything-LM/checkpoint-6/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbe4dfdbbcdf59d3381aba62eca599a276e3fb6e
--- /dev/null
+++ b/Everything-LM/checkpoint-6/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "down_proj",
+    "up_proj",
+    "gate_proj",
+    "k_proj",
+    "o_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/Everything-LM/checkpoint-6/adapter_model/adapter_model.bin b/Everything-LM/checkpoint-6/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..52a3e0edeca0c2e370018dc5ffd37c1c1719de8d
--- /dev/null
+++ b/Everything-LM/checkpoint-6/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a991d74cf01bc5993d7d515e6c91abb9aacf2250cc5e32c7912ed38e3764b1d
+size 80114765
diff --git a/Everything-LM/checkpoint-6/optimizer.pt b/Everything-LM/checkpoint-6/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..16baebd689faf52ebdd48501c1b29c6760740d26
--- /dev/null
+++ b/Everything-LM/checkpoint-6/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e6418f6ca59834adfc8c6238036b82d86a4f38a81210582f84a5693e45c8120
+size 40569887
diff --git a/Everything-LM/checkpoint-6/rng_state.pth b/Everything-LM/checkpoint-6/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..dda35b16545baec766437929889f4b0c1338783e
--- /dev/null
+++ b/Everything-LM/checkpoint-6/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89ad21499347281d1b140497be2595dba5d75ce73b57d40d135e0fdd20c3f4c5
+size 14575
diff --git a/Everything-LM/checkpoint-6/scheduler.pt b/Everything-LM/checkpoint-6/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ada004f66f8383e0ea4230d1bb89ddcead6aa5b8
--- /dev/null
+++ b/Everything-LM/checkpoint-6/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a2141b0f152d878006263e11b37207f4c1e1d4252b202dbe2b127e504173c88
+size 627
diff --git a/Everything-LM/checkpoint-6/trainer_state.json b/Everything-LM/checkpoint-6/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..7ff70716e7d7632624a741fe5bb769bb49abce28
--- /dev/null
+++ b/Everything-LM/checkpoint-6/trainer_state.json
@@ -0,0 +1,55 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.5945945945945947,
+  "eval_steps": 500,
+  "global_step": 6,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.43,
+      "learning_rate": 0.0002799038105676658,
+      "loss": 1.5099,
+      "step": 1
+    },
+    {
+      "epoch": 0.86,
+      "learning_rate": 0.000225,
+      "loss": 1.4484,
+      "step": 2
+    },
+    {
+      "epoch": 1.3,
+      "learning_rate": 0.00015,
+      "loss": 1.4938,
+      "step": 3
+    },
+    {
+      "epoch": 1.73,
+      "learning_rate": 7.500000000000002e-05,
+      "loss": 1.4088,
+      "step": 4
+    },
+    {
+      "epoch": 2.16,
+      "learning_rate": 2.009618943233419e-05,
+      "loss": 1.3926,
+      "step": 5
+    },
+    {
+      "epoch": 2.59,
+      "learning_rate": 0.0,
+      "loss": 1.4358,
+      "step": 6
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 6,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 3.134282090623795e+16,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/Everything-LM/checkpoint-6/training_args.bin b/Everything-LM/checkpoint-6/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c2985646bdb82b8066f0e57712fe3aee0e87a525
--- /dev/null
+++ b/Everything-LM/checkpoint-6/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5069800a33ca54bf43c3b93e1d5f640909151ea51562afd2230d8c868d36ff7b
+size 4155
diff --git a/Everything-LM/special_tokens_map.json b/Everything-LM/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..3f58a5e115855c6ea3cec98accae196ad927222e
--- /dev/null
+++ b/Everything-LM/special_tokens_map.json
@@ -0,0 +1,6 @@
+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "pad_token": "[PAD]",
+  "unk_token": "<unk>"
+}
diff --git a/Everything-LM/tokenizer.model b/Everything-LM/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899
--- /dev/null
+++ b/Everything-LM/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723
diff --git a/Everything-LM/tokenizer_config.json b/Everything-LM/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..65532c2074f7f407d2d801a559f3d90aa5137f26
--- /dev/null
+++ b/Everything-LM/tokenizer_config.json
@@ -0,0 +1,38 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "clean_up_tokenization_spaces": false,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "legacy": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": null,
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "trust_remote_code": false,
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "use_default_system_prompt": true,
+  "use_fast": true
+}
diff --git a/Puffin-7B/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/Puffin-7B/.ipynb_checkpoints/Untitled-checkpoint.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..363fcab7ed6e9634e198cf5555ceb88932c9a245
--- /dev/null
+++ b/Puffin-7B/.ipynb_checkpoints/Untitled-checkpoint.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Puffin-7B/Untitled.ipynb b/Puffin-7B/Untitled.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..ac30a892d35d10b408ec85910f00f169c7ffa5c0
--- /dev/null
+++ b/Puffin-7B/Untitled.ipynb
@@ -0,0 +1,125 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "670a4958-8306-4a10-a51c-01eb2764f6fe",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "009ab51eb1164706bca69f75874d064e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "adapter_model.bin:   0%|          | 0.00/80.1M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "aed7b179e3eb4de3bd96ba9f4a9b1fbc",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "cd685e7e249c45f891e0ea4f8457288b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "adapter_model.bin:   0%|          | 0.00/80.1M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b2293c2c58b0435f849a8d79087972c2",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "optimizer.pt:   0%|          | 0.00/40.6M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'https://huggingface.co/datasets/nRuaif/temp/tree/main/'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from huggingface_hub import HfApi\n",
+    "\n",
+    "api = HfApi()\n",
+    "\n",
+    "# Upload all the content from the local folder to your remote Space.\n",
+    "# By default, files are uploaded at the root of the repo\n",
+    "\n",
+    "api.upload_folder(\n",
+    "\n",
+    "    folder_path=\"/workspace/axolotl\",\n",
+    "\n",
+    "    repo_id=\"nRuaif/temp\",\n",
+    "\n",
+    "    repo_type=\"dataset\",\n",
+    "\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "88e74658-49b3-4c3b-a2a2-2ba7225cb3e1",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Puffin-7B/adapter_config.json b/Puffin-7B/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8552129c6bc617e943b5a1e7c1cbdde3de00e54c
--- /dev/null
+++ b/Puffin-7B/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "down_proj",
+    "up_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/Puffin-7B/added_tokens.json b/Puffin-7B/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..9c16aa4be022f03ad001b006fba14dfb73a1929c
--- /dev/null
+++ b/Puffin-7B/added_tokens.json
@@ -0,0 +1,3 @@
+{
+  "<pad>": 32000
+}
diff --git a/Puffin-7B/checkpoint-20/README.md b/Puffin-7B/checkpoint-20/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6
--- /dev/null
+++ b/Puffin-7B/checkpoint-20/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/Puffin-7B/checkpoint-20/adapter_config.json b/Puffin-7B/checkpoint-20/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a1cd75bf98835983a780077bf012648484e8285
--- /dev/null
+++ b/Puffin-7B/checkpoint-20/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "down_proj",
+    "up_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/Puffin-7B/checkpoint-20/adapter_model.bin b/Puffin-7B/checkpoint-20/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..87b48269fdd97f78341213d9f99b0b82afcf4ccb
--- /dev/null
+++ b/Puffin-7B/checkpoint-20/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b21e5f4dcceb76ea3181f0c5f052515f8d8733911db0785eb6c7ae5e10a7e796
+size 80114765
diff --git a/Puffin-7B/checkpoint-20/adapter_model/README.md b/Puffin-7B/checkpoint-20/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6
--- /dev/null
+++ b/Puffin-7B/checkpoint-20/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/Puffin-7B/checkpoint-20/adapter_model/adapter_config.json b/Puffin-7B/checkpoint-20/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a1cd75bf98835983a780077bf012648484e8285
--- /dev/null
+++ b/Puffin-7B/checkpoint-20/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "down_proj",
+    "up_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/Puffin-7B/checkpoint-20/adapter_model/adapter_model.bin b/Puffin-7B/checkpoint-20/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..87b48269fdd97f78341213d9f99b0b82afcf4ccb
--- /dev/null
+++ b/Puffin-7B/checkpoint-20/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b21e5f4dcceb76ea3181f0c5f052515f8d8733911db0785eb6c7ae5e10a7e796
+size 80114765
diff --git a/Puffin-7B/checkpoint-20/optimizer.pt b/Puffin-7B/checkpoint-20/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a36d94dc4db5f4fb4c728c7ba291bbf71652e551
--- /dev/null
+++ b/Puffin-7B/checkpoint-20/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e6d3149d2f727915dab9db573761ad95065bb59ba1704fe9e52b6ba8e976f6e
+size 40569887
diff --git a/Puffin-7B/checkpoint-20/rng_state.pth b/Puffin-7B/checkpoint-20/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4925620c36ffe5fc57c2bd6064bae1c8d45e3c9b
--- /dev/null
+++ b/Puffin-7B/checkpoint-20/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:930027ef671facab47ddd6f3b220cff979254f311abb138cf064cb376f5d5918
+size 14575
diff --git a/Puffin-7B/checkpoint-20/scheduler.pt b/Puffin-7B/checkpoint-20/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c8925cc0ec3b18a7ee772557d3761b93ac696c91
--- /dev/null
+++ b/Puffin-7B/checkpoint-20/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a343d51a1518966727eef74c11b5d5eaa20f6293d7bc972d7025f1daada4663e
+size 627
diff --git a/Puffin-7B/checkpoint-20/trainer_state.json b/Puffin-7B/checkpoint-20/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5d7e88851863a922318c1fde683380479584d3f
--- /dev/null
+++ b/Puffin-7B/checkpoint-20/trainer_state.json
@@ -0,0 +1,147 @@
+{
+  "best_metric": 8.979241371154785,
+  "best_model_checkpoint": "./qlora-out-Puffin/checkpoint-20",
+  "epoch": 0.37735849056603776,
+  "eval_steps": 20,
+  "global_step": 20,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 0.9753,
+      "step": 1
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.9631,
+      "step": 2
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.952,
+      "step": 3
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.8687,
+      "step": 4
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.0003,
+      "loss": 1.0306,
+      "step": 5
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.00029996878922838096,
+      "loss": 0.9029,
+      "step": 6
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.0002998751699016874,
+      "loss": 0.8488,
+      "step": 7
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.000299719180979005,
+      "loss": 0.8167,
+      "step": 8
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00029950088737412895,
+      "loss": 0.7058,
+      "step": 9
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.0002992203799285506,
+      "loss": 0.8208,
+      "step": 10
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.00029887777537365414,
+      "loss": 0.752,
+      "step": 11
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 0.0002984732162821399,
+      "loss": 0.725,
+      "step": 12
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 0.0002980068710086933,
+      "loss": 0.7936,
+      "step": 13
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 0.0002974789336199254,
+      "loss": 0.7811,
+      "step": 14
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 0.0002968896238136131,
+      "loss": 0.7519,
+      "step": 15
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 0.0002962391868272735,
+      "loss": 0.7475,
+      "step": 16
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 0.00029552789333610964,
+      "loss": 0.8075,
+      "step": 17
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 0.00029475603934037094,
+      "loss": 0.7297,
+      "step": 18
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 0.0002939239460421746,
+      "loss": 0.7071,
+      "step": 19
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 0.0002930319597118391,
+      "loss": 0.7873,
+      "step": 20
+    },
+    {
+      "epoch": 0.38,
+      "eval_loss": 8.979241371154785,
+      "eval_runtime": 28.0141,
+      "eval_samples_per_second": 4.176,
+      "eval_steps_per_second": 1.071,
+      "step": 20
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 159,
+  "num_train_epochs": 3,
+  "save_steps": 20,
+  "total_flos": 2.5549826310537216e+16,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/Puffin-7B/checkpoint-20/training_args.bin b/Puffin-7B/checkpoint-20/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c04455771af5c94d4ad83dddf97f19e3f60cc49e
--- /dev/null
+++ b/Puffin-7B/checkpoint-20/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8cedb1881c5272bd443403b1508825590117bc6ccfd192e967d653552a0caa6a
+size 4219
diff --git a/Puffin-7B/checkpoint-40/README.md b/Puffin-7B/checkpoint-40/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6
--- /dev/null
+++ b/Puffin-7B/checkpoint-40/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/Puffin-7B/checkpoint-40/adapter_config.json b/Puffin-7B/checkpoint-40/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a1cd75bf98835983a780077bf012648484e8285
--- /dev/null
+++ b/Puffin-7B/checkpoint-40/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "down_proj",
+    "up_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/Puffin-7B/checkpoint-40/adapter_model.bin b/Puffin-7B/checkpoint-40/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2b88672488afb1065f43e9d924a68d1bc1e94e85
--- /dev/null
+++ b/Puffin-7B/checkpoint-40/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3384337d94297fbe43261e2bb46308c88a16e31a50e2ca6ddfbfa95c354f29b
+size 80114765
diff --git a/Puffin-7B/checkpoint-40/adapter_model/README.md b/Puffin-7B/checkpoint-40/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6
--- /dev/null
+++ b/Puffin-7B/checkpoint-40/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/Puffin-7B/checkpoint-40/adapter_model/adapter_config.json b/Puffin-7B/checkpoint-40/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a1cd75bf98835983a780077bf012648484e8285
--- /dev/null
+++ b/Puffin-7B/checkpoint-40/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "down_proj",
+    "up_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/Puffin-7B/checkpoint-40/adapter_model/adapter_model.bin b/Puffin-7B/checkpoint-40/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2b88672488afb1065f43e9d924a68d1bc1e94e85
--- /dev/null
+++ b/Puffin-7B/checkpoint-40/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3384337d94297fbe43261e2bb46308c88a16e31a50e2ca6ddfbfa95c354f29b
+size 80114765
diff --git a/Puffin-7B/checkpoint-40/optimizer.pt b/Puffin-7B/checkpoint-40/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..86a61e0b1ccd96bad6635c17852d122b2829b4ad
--- /dev/null
+++ b/Puffin-7B/checkpoint-40/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0d06227b84729f3d74f91c96021133a82e9e1f93e98bc39dd405b3583230d26
+size 40569887
diff --git a/Puffin-7B/checkpoint-40/rng_state.pth b/Puffin-7B/checkpoint-40/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..55d90be29d8c7acf13da3bd73cbab706536c5c3b
--- /dev/null
+++ b/Puffin-7B/checkpoint-40/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88442e1e885b1eeeceda92b84921b5637ebf6bdc3862d9116b67ed421cdba32e
+size 14575
diff --git a/Puffin-7B/checkpoint-40/scheduler.pt b/Puffin-7B/checkpoint-40/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..45258ed4e1219fb88c433f514845c3539d79db5b
--- /dev/null
+++ b/Puffin-7B/checkpoint-40/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e62b6523d0cf9d571fdfab3999f41d0efc40f6522e500e52bde40b3fa57de48
+size 627
diff --git a/Puffin-7B/checkpoint-40/trainer_state.json b/Puffin-7B/checkpoint-40/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..300f6c5ace9c143185775e83ee33097cb16295cd
--- /dev/null
+++ b/Puffin-7B/checkpoint-40/trainer_state.json
@@ -0,0 +1,275 @@
+{
+  "best_metric": 8.979241371154785,
+  "best_model_checkpoint": "./qlora-out-Puffin/checkpoint-20",
+  "epoch": 0.7547169811320755,
+  "eval_steps": 20,
+  "global_step": 40,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 0.9753,
+      "step": 1
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.9631,
+      "step": 2
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.952,
+      "step": 3
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.8687,
+      "step": 4
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.0003,
+      "loss": 1.0306,
+      "step": 5
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.00029996878922838096,
+      "loss": 0.9029,
+      "step": 6
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.0002998751699016874,
+      "loss": 0.8488,
+      "step": 7
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.000299719180979005,
+      "loss": 0.8167,
+      "step": 8
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00029950088737412895,
+      "loss": 0.7058,
+      "step": 9
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.0002992203799285506,
+      "loss": 0.8208,
+      "step": 10
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.00029887777537365414,
+      "loss": 0.752,
+      "step": 11
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 0.0002984732162821399,
+      "loss": 0.725,
+      "step": 12
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 0.0002980068710086933,
+      "loss": 0.7936,
+      "step": 13
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 0.0002974789336199254,
+      "loss": 0.7811,
+      "step": 14
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 0.0002968896238136131,
+      "loss": 0.7519,
+      "step": 15
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 0.0002962391868272735,
+      "loss": 0.7475,
+      "step": 16
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 0.00029552789333610964,
+      "loss": 0.8075,
+      "step": 17
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 0.00029475603934037094,
+      "loss": 0.7297,
+      "step": 18
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 0.0002939239460421746,
+      "loss": 0.7071,
+      "step": 19
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 0.0002930319597118391,
+      "loss": 0.7873,
+      "step": 20
+    },
+    {
+      "epoch": 0.38,
+      "eval_loss": 8.979241371154785,
+      "eval_runtime": 28.0141,
+      "eval_samples_per_second": 4.176,
+      "eval_steps_per_second": 1.071,
+      "step": 20
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 0.0002920804515437865,
+      "loss": 0.7057,
+      "step": 21
+    },
+    {
+      "epoch": 0.42,
+      "learning_rate": 0.0002910698175020717,
+      "loss": 0.7695,
+      "step": 22
+    },
+    {
+      "epoch": 0.43,
+      "learning_rate": 0.000290000478155605,
+      "loss": 0.7348,
+      "step": 23
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 0.0002888728785031347,
+      "loss": 0.72,
+      "step": 24
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 0.00028768748778806386,
+      "loss": 0.742,
+      "step": 25
+    },
+    {
+      "epoch": 0.49,
+      "learning_rate": 0.00028644479930317775,
+      "loss": 0.7085,
+      "step": 26
+    },
+    {
+      "epoch": 0.51,
+      "learning_rate": 0.0002851453301853628,
+      "loss": 0.7044,
+      "step": 27
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 0.00028378962120040405,
+      "loss": 0.7065,
+      "step": 28
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 0.00028237823651794814,
+      "loss": 0.7105,
+      "step": 29
+    },
+    {
+      "epoch": 0.57,
+      "learning_rate": 0.0002809117634767284,
+      "loss": 0.6517,
+      "step": 30
+    },
+    {
+      "epoch": 0.58,
+      "learning_rate": 0.00027939081234014705,
+      "loss": 0.7091,
+      "step": 31
+    },
+    {
+      "epoch": 0.6,
+      "learning_rate": 0.00027781601604231847,
+      "loss": 0.7677,
+      "step": 32
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 0.0002761880299246772,
+      "loss": 0.6455,
+      "step": 33
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 0.0002745075314632621,
+      "loss": 0.7224,
+      "step": 34
+    },
+    {
+      "epoch": 0.66,
+      "learning_rate": 0.000272775219986789,
+      "loss": 0.6218,
+      "step": 35
+    },
+    {
+      "epoch": 0.68,
+      "learning_rate": 0.0002709918163856295,
+      "loss": 0.6965,
+      "step": 36
+    },
+    {
+      "epoch": 0.7,
+      "learning_rate": 0.00026915806281181686,
+      "loss": 0.6512,
+      "step": 37
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 0.00026727472237020447,
+      "loss": 0.746,
+      "step": 38
+    },
+    {
+      "epoch": 0.74,
+      "learning_rate": 0.0002653425788009043,
+      "loss": 0.7126,
+      "step": 39
+    },
+    {
+      "epoch": 0.75,
+      "learning_rate": 0.00026336243615313873,
+      "loss": 0.6657,
+      "step": 40
+    },
+    {
+      "epoch": 0.75,
+      "eval_loss": 9.270380973815918,
+      "eval_runtime": 28.0073,
+      "eval_samples_per_second": 4.177,
+      "eval_steps_per_second": 1.071,
+      "step": 40
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 159,
+  "num_train_epochs": 3,
+  "save_steps": 20,
+  "total_flos": 5.050356375905894e+16,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/Puffin-7B/checkpoint-40/training_args.bin b/Puffin-7B/checkpoint-40/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c04455771af5c94d4ad83dddf97f19e3f60cc49e
--- /dev/null
+++ b/Puffin-7B/checkpoint-40/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8cedb1881c5272bd443403b1508825590117bc6ccfd192e967d653552a0caa6a
+size 4219
diff --git a/Puffin-7B/checkpoint-60/README.md b/Puffin-7B/checkpoint-60/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6
--- /dev/null
+++ b/Puffin-7B/checkpoint-60/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/Puffin-7B/checkpoint-60/adapter_config.json b/Puffin-7B/checkpoint-60/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a1cd75bf98835983a780077bf012648484e8285
--- /dev/null
+++ b/Puffin-7B/checkpoint-60/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "down_proj",
+    "up_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/Puffin-7B/checkpoint-60/adapter_model.bin b/Puffin-7B/checkpoint-60/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..22af2cc9b8330c05882b0ad00588be8e0373c9d9
--- /dev/null
+++ b/Puffin-7B/checkpoint-60/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28eaf819a76beb056936cedc370545d758e16961dae5b4a0c8376b1b139996c8
+size 80114765
diff --git a/Puffin-7B/checkpoint-60/adapter_model/README.md b/Puffin-7B/checkpoint-60/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6
--- /dev/null
+++ b/Puffin-7B/checkpoint-60/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/Puffin-7B/checkpoint-60/adapter_model/adapter_config.json b/Puffin-7B/checkpoint-60/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a1cd75bf98835983a780077bf012648484e8285
--- /dev/null
+++ b/Puffin-7B/checkpoint-60/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "down_proj",
+    "up_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/Puffin-7B/checkpoint-60/adapter_model/adapter_model.bin b/Puffin-7B/checkpoint-60/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..22af2cc9b8330c05882b0ad00588be8e0373c9d9
--- /dev/null
+++ b/Puffin-7B/checkpoint-60/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28eaf819a76beb056936cedc370545d758e16961dae5b4a0c8376b1b139996c8
+size 80114765
diff --git a/Puffin-7B/checkpoint-60/optimizer.pt b/Puffin-7B/checkpoint-60/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..580dfe06fb5978e8d1327d7e877bbbc6b197d95c
--- /dev/null
+++ b/Puffin-7B/checkpoint-60/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:695fc79fb9df7aaa302b9dc715ae5e30dc4e234feb66e4d818437bc9996eccdc
+size 40569887
diff --git a/Puffin-7B/checkpoint-60/rng_state.pth b/Puffin-7B/checkpoint-60/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3bdae08984eda60050aa921cc54e43ea292c8846
--- /dev/null
+++ b/Puffin-7B/checkpoint-60/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77cb4cbfb7b181923b75cbee905a10468e8293562c12029dd61d7d9f6c46f32c
+size 14575
diff --git a/Puffin-7B/checkpoint-60/scheduler.pt b/Puffin-7B/checkpoint-60/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fba444037085fcecc8bf1025f71f9e0212faa4fc
--- /dev/null
+++ b/Puffin-7B/checkpoint-60/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04135f2b8153c7b36bfd155069e4a5c7f8acfedd0e84a78c0f2a68acec1181c9
+size 627
diff --git a/Puffin-7B/checkpoint-60/trainer_state.json b/Puffin-7B/checkpoint-60/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..8bd5f6121d3bc973cc7aa56b26e66d8728d8ec56
--- /dev/null
+++ b/Puffin-7B/checkpoint-60/trainer_state.json
@@ -0,0 +1,403 @@
+{
+  "best_metric": 8.979241371154785,
+  "best_model_checkpoint": "./qlora-out-Puffin/checkpoint-20",
+  "epoch": 1.1320754716981132,
+  "eval_steps": 20,
+  "global_step": 60,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 0.9753,
+      "step": 1
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.9631,
+      "step": 2
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.952,
+      "step": 3
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.8687,
+      "step": 4
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.0003,
+      "loss": 1.0306,
+      "step": 5
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.00029996878922838096,
+      "loss": 0.9029,
+      "step": 6
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.0002998751699016874,
+      "loss": 0.8488,
+      "step": 7
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.000299719180979005,
+      "loss": 0.8167,
+      "step": 8
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00029950088737412895,
+      "loss": 0.7058,
+      "step": 9
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.0002992203799285506,
+      "loss": 0.8208,
+      "step": 10
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.00029887777537365414,
+      "loss": 0.752,
+      "step": 11
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 0.0002984732162821399,
+      "loss": 0.725,
+      "step": 12
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 0.0002980068710086933,
+      "loss": 0.7936,
+      "step": 13
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 0.0002974789336199254,
+      "loss": 0.7811,
+      "step": 14
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 0.0002968896238136131,
+      "loss": 0.7519,
+      "step": 15
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 0.0002962391868272735,
+      "loss": 0.7475,
+      "step": 16
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 0.00029552789333610964,
+      "loss": 0.8075,
+      "step": 17
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 0.00029475603934037094,
+      "loss": 0.7297,
+      "step": 18
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 0.0002939239460421746,
+      "loss": 0.7071,
+      "step": 19
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 0.0002930319597118391,
+      "loss": 0.7873,
+      "step": 20
+    },
+    {
+      "epoch": 0.38,
+      "eval_loss": 8.979241371154785,
+      "eval_runtime": 28.0141,
+      "eval_samples_per_second": 4.176,
+      "eval_steps_per_second": 1.071,
+      "step": 20
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 0.0002920804515437865,
+      "loss": 0.7057,
+      "step": 21
+    },
+    {
+      "epoch": 0.42,
+      "learning_rate": 0.0002910698175020717,
+      "loss": 0.7695,
+      "step": 22
+    },
+    {
+      "epoch": 0.43,
+      "learning_rate": 0.000290000478155605,
+      "loss": 0.7348,
+      "step": 23
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 0.0002888728785031347,
+      "loss": 0.72,
+      "step": 24
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 0.00028768748778806386,
+      "loss": 0.742,
+      "step": 25
+    },
+    {
+      "epoch": 0.49,
+      "learning_rate": 0.00028644479930317775,
+      "loss": 0.7085,
+      "step": 26
+    },
+    {
+      "epoch": 0.51,
+      "learning_rate": 0.0002851453301853628,
+      "loss": 0.7044,
+      "step": 27
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 0.00028378962120040405,
+      "loss": 0.7065,
+      "step": 28
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 0.00028237823651794814,
+      "loss": 0.7105,
+      "step": 29
+    },
+    {
+      "epoch": 0.57,
+      "learning_rate": 0.0002809117634767284,
+      "loss": 0.6517,
+      "step": 30
+    },
+    {
+      "epoch": 0.58,
+      "learning_rate": 0.00027939081234014705,
+      "loss": 0.7091,
+      "step": 31
+    },
+    {
+      "epoch": 0.6,
+      "learning_rate": 0.00027781601604231847,
+      "loss": 0.7677,
+      "step": 32
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 0.0002761880299246772,
+      "loss": 0.6455,
+      "step": 33
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 0.0002745075314632621,
+      "loss": 0.7224,
+      "step": 34
+    },
+    {
+      "epoch": 0.66,
+      "learning_rate": 0.000272775219986789,
+      "loss": 0.6218,
+      "step": 35
+    },
+    {
+      "epoch": 0.68,
+      "learning_rate": 0.0002709918163856295,
+      "loss": 0.6965,
+      "step": 36
+    },
+    {
+      "epoch": 0.7,
+      "learning_rate": 0.00026915806281181686,
+      "loss": 0.6512,
+      "step": 37
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 0.00026727472237020447,
+      "loss": 0.746,
+      "step": 38
+    },
+    {
+      "epoch": 0.74,
+      "learning_rate": 0.0002653425788009043,
+      "loss": 0.7126,
+      "step": 39
+    },
+    {
+      "epoch": 0.75,
+      "learning_rate": 0.00026336243615313873,
+      "loss": 0.6657,
+      "step": 40
+    },
+    {
+      "epoch": 0.75,
+      "eval_loss": 9.270380973815918,
+      "eval_runtime": 28.0073,
+      "eval_samples_per_second": 4.177,
+      "eval_steps_per_second": 1.071,
+      "step": 40
+    },
+    {
+      "epoch": 0.77,
+      "learning_rate": 0.0002613351184506405,
+      "loss": 0.6897,
+      "step": 41
+    },
+    {
+      "epoch": 0.79,
+      "learning_rate": 0.00025926146934874037,
+      "loss": 0.6771,
+      "step": 42
+    },
+    {
+      "epoch": 0.81,
+      "learning_rate": 0.0002571423517832855,
+      "loss": 0.6954,
+      "step": 43
+    },
+    {
+      "epoch": 0.83,
+      "learning_rate": 0.0002549786476115343,
+      "loss": 0.7264,
+      "step": 44
+    },
+    {
+      "epoch": 0.85,
+      "learning_rate": 0.0002527712572451766,
+      "loss": 0.6308,
+      "step": 45
+    },
+    {
+      "epoch": 0.87,
+      "learning_rate": 0.0002505210992756339,
+      "loss": 0.6424,
+      "step": 46
+    },
+    {
+      "epoch": 0.89,
+      "learning_rate": 0.00024822911009179276,
+      "loss": 0.6904,
+      "step": 47
+    },
+    {
+      "epoch": 0.91,
+      "learning_rate": 0.0002458962434903327,
+      "loss": 0.8218,
+      "step": 48
+    },
+    {
+      "epoch": 0.92,
+      "learning_rate": 0.00024352347027881003,
+      "loss": 0.6698,
+      "step": 49
+    },
+    {
+      "epoch": 0.94,
+      "learning_rate": 0.00024111177787166212,
+      "loss": 0.7299,
+      "step": 50
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 0.0002386621698793015,
+      "loss": 0.6973,
+      "step": 51
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 0.0002361756656904695,
+      "loss": 0.7159,
+      "step": 52
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 0.0002336533000480244,
+      "loss": 0.703,
+      "step": 53
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 0.00023109612261833963,
+      "loss": 0.7033,
+      "step": 54
+    },
+    {
+      "epoch": 1.04,
+      "learning_rate": 0.0002285051975544918,
+      "loss": 0.6758,
+      "step": 55
+    },
+    {
+      "epoch": 1.06,
+      "learning_rate": 0.00022588160305342023,
+      "loss": 0.6828,
+      "step": 56
+    },
+    {
+      "epoch": 1.08,
+      "learning_rate": 0.00022322643090724216,
+      "loss": 0.6433,
+      "step": 57
+    },
+    {
+      "epoch": 1.09,
+      "learning_rate": 0.0002205407860489105,
+      "loss": 0.6495,
+      "step": 58
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 0.00021782578609240284,
+      "loss": 0.654,
+      "step": 59
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 0.00021508256086763368,
+      "loss": 0.6364,
+      "step": 60
+    },
+    {
+      "epoch": 1.13,
+      "eval_loss": 9.256454467773438,
+      "eval_runtime": 28.0034,
+      "eval_samples_per_second": 4.178,
+      "eval_steps_per_second": 1.071,
+      "step": 60
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 159,
+  "num_train_epochs": 3,
+  "save_steps": 20,
+  "total_flos": 7.573191856540877e+16,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/Puffin-7B/checkpoint-60/training_args.bin b/Puffin-7B/checkpoint-60/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c04455771af5c94d4ad83dddf97f19e3f60cc49e
--- /dev/null
+++ b/Puffin-7B/checkpoint-60/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8cedb1881c5272bd443403b1508825590117bc6ccfd192e967d653552a0caa6a
+size 4219
diff --git a/Puffin-7B/checkpoint-80/README.md b/Puffin-7B/checkpoint-80/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6
--- /dev/null
+++ b/Puffin-7B/checkpoint-80/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/Puffin-7B/checkpoint-80/adapter_config.json b/Puffin-7B/checkpoint-80/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a1cd75bf98835983a780077bf012648484e8285
--- /dev/null
+++ b/Puffin-7B/checkpoint-80/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "down_proj",
+    "up_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/Puffin-7B/checkpoint-80/adapter_model.bin b/Puffin-7B/checkpoint-80/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..aed06167ab488d62bd44dfe43529b4be650bc5ec
--- /dev/null
+++ b/Puffin-7B/checkpoint-80/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3534aa08e9f113c39c8290b11f3113c96d9537bd94a2d545ee2617cab7fb21b
+size 80114765
diff --git a/Puffin-7B/checkpoint-80/adapter_model/README.md b/Puffin-7B/checkpoint-80/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6
--- /dev/null
+++ b/Puffin-7B/checkpoint-80/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/Puffin-7B/checkpoint-80/adapter_model/adapter_config.json b/Puffin-7B/checkpoint-80/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a1cd75bf98835983a780077bf012648484e8285
--- /dev/null
+++ b/Puffin-7B/checkpoint-80/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "down_proj",
+    "up_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/Puffin-7B/checkpoint-80/adapter_model/adapter_model.bin b/Puffin-7B/checkpoint-80/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..aed06167ab488d62bd44dfe43529b4be650bc5ec
--- /dev/null
+++ b/Puffin-7B/checkpoint-80/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3534aa08e9f113c39c8290b11f3113c96d9537bd94a2d545ee2617cab7fb21b
+size 80114765
diff --git a/Puffin-7B/checkpoint-80/optimizer.pt b/Puffin-7B/checkpoint-80/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4ee03c93b90bd36f39f25b90aeb83416b976f938
--- /dev/null
+++ b/Puffin-7B/checkpoint-80/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d96f18bc030cc5de4b5f67fd24e51077b6f36a002049405a7aae8c1a8412eef9
+size 40569887
diff --git a/Puffin-7B/checkpoint-80/rng_state.pth b/Puffin-7B/checkpoint-80/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7e2c37f5d3b5a7da301c5f21cff14af000c24460
--- /dev/null
+++ b/Puffin-7B/checkpoint-80/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b25d04b74f535bace2746d0e525fef4336cb3eafa1df5d97916bc3a61217f08a
+size 14575
diff --git a/Puffin-7B/checkpoint-80/scheduler.pt b/Puffin-7B/checkpoint-80/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..098ed5bd5a6302861b68d6f63a05067f39a9ee71
--- /dev/null
+++ b/Puffin-7B/checkpoint-80/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d834acafa9bc3ca9adcc7c685156c02a8dd422cea7a30a83d3ec21d63724de75
+size 627
diff --git a/Puffin-7B/checkpoint-80/trainer_state.json b/Puffin-7B/checkpoint-80/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..aacf30e1ecf737a9cdaa911364fe06bd7b95b2c1
--- /dev/null
+++ b/Puffin-7B/checkpoint-80/trainer_state.json
@@ -0,0 +1,531 @@
+{
+  "best_metric": 8.979241371154785,
+  "best_model_checkpoint": "./qlora-out-Puffin/checkpoint-20",
+  "epoch": 1.509433962264151,
+  "eval_steps": 20,
+  "global_step": 80,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02,
+      "learning_rate": 5.9999999999999995e-05,
+      "loss": 0.9753,
+      "step": 1
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 0.00011999999999999999,
+      "loss": 0.9631,
+      "step": 2
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 0.952,
+      "step": 3
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00023999999999999998,
+      "loss": 0.8687,
+      "step": 4
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 0.0003,
+      "loss": 1.0306,
+      "step": 5
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 0.00029996878922838096,
+      "loss": 0.9029,
+      "step": 6
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 0.0002998751699016874,
+      "loss": 0.8488,
+      "step": 7
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.000299719180979005,
+      "loss": 0.8167,
+      "step": 8
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.00029950088737412895,
+      "loss": 0.7058,
+      "step": 9
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.0002992203799285506,
+      "loss": 0.8208,
+      "step": 10
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 0.00029887777537365414,
+      "loss": 0.752,
+      "step": 11
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 0.0002984732162821399,
+      "loss": 0.725,
+      "step": 12
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 0.0002980068710086933,
+      "loss": 0.7936,
+      "step": 13
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 0.0002974789336199254,
+      "loss": 0.7811,
+      "step": 14
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 0.0002968896238136131,
+      "loss": 0.7519,
+      "step": 15
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 0.0002962391868272735,
+      "loss": 0.7475,
+      "step": 16
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 0.00029552789333610964,
+      "loss": 0.8075,
+      "step": 17
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 0.00029475603934037094,
+      "loss": 0.7297,
+      "step": 18
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 0.0002939239460421746,
+      "loss": 0.7071,
+      "step": 19
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 0.0002930319597118391,
+      "loss": 0.7873,
+      "step": 20
+    },
+    {
+      "epoch": 0.38,
+      "eval_loss": 8.979241371154785,
+      "eval_runtime": 28.0141,
+      "eval_samples_per_second": 4.176,
+      "eval_steps_per_second": 1.071,
+      "step": 20
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 0.0002920804515437865,
+      "loss": 0.7057,
+      "step": 21
+    },
+    {
+      "epoch": 0.42,
+      "learning_rate": 0.0002910698175020717,
+      "loss": 0.7695,
+      "step": 22
+    },
+    {
+      "epoch": 0.43,
+      "learning_rate": 0.000290000478155605,
+      "loss": 0.7348,
+      "step": 23
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 0.0002888728785031347,
+      "loss": 0.72,
+      "step": 24
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 0.00028768748778806386,
+      "loss": 0.742,
+      "step": 25
+    },
+    {
+      "epoch": 0.49,
+      "learning_rate": 0.00028644479930317775,
+      "loss": 0.7085,
+      "step": 26
+    },
+    {
+      "epoch": 0.51,
+      "learning_rate": 0.0002851453301853628,
+      "loss": 0.7044,
+      "step": 27
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 0.00028378962120040405,
+      "loss": 0.7065,
+      "step": 28
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 0.00028237823651794814,
+      "loss": 0.7105,
+      "step": 29
+    },
+    {
+      "epoch": 0.57,
+      "learning_rate": 0.0002809117634767284,
+      "loss": 0.6517,
+      "step": 30
+    },
+    {
+      "epoch": 0.58,
+      "learning_rate": 0.00027939081234014705,
+      "loss": 0.7091,
+      "step": 31
+    },
+    {
+      "epoch": 0.6,
+      "learning_rate": 0.00027781601604231847,
+      "loss": 0.7677,
+      "step": 32
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 0.0002761880299246772,
+      "loss": 0.6455,
+      "step": 33
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 0.0002745075314632621,
+      "loss": 0.7224,
+      "step": 34
+    },
+    {
+      "epoch": 0.66,
+      "learning_rate": 0.000272775219986789,
+      "loss": 0.6218,
+      "step": 35
+    },
+    {
+      "epoch": 0.68,
+      "learning_rate": 0.0002709918163856295,
+      "loss": 0.6965,
+      "step": 36
+    },
+    {
+      "epoch": 0.7,
+      "learning_rate": 0.00026915806281181686,
+      "loss": 0.6512,
+      "step": 37
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 0.00026727472237020447,
+      "loss": 0.746,
+      "step": 38
+    },
+    {
+      "epoch": 0.74,
+      "learning_rate": 0.0002653425788009043,
+      "loss": 0.7126,
+      "step": 39
+    },
+    {
+      "epoch": 0.75,
+      "learning_rate": 0.00026336243615313873,
+      "loss": 0.6657,
+      "step": 40
+    },
+    {
+      "epoch": 0.75,
+      "eval_loss": 9.270380973815918,
+      "eval_runtime": 28.0073,
+      "eval_samples_per_second": 4.177,
+      "eval_steps_per_second": 1.071,
+      "step": 40
+    },
+    {
+      "epoch": 0.77,
+      "learning_rate": 0.0002613351184506405,
+      "loss": 0.6897,
+      "step": 41
+    },
+    {
+      "epoch": 0.79,
+      "learning_rate": 0.00025926146934874037,
+      "loss": 0.6771,
+      "step": 42
+    },
+    {
+      "epoch": 0.81,
+      "learning_rate": 0.0002571423517832855,
+      "loss": 0.6954,
+      "step": 43
+    },
+    {
+      "epoch": 0.83,
+      "learning_rate": 0.0002549786476115343,
+      "loss": 0.7264,
+      "step": 44
+    },
+    {
+      "epoch": 0.85,
+      "learning_rate": 0.0002527712572451766,
+      "loss": 0.6308,
+      "step": 45
+    },
+    {
+      "epoch": 0.87,
+      "learning_rate": 0.0002505210992756339,
+      "loss": 0.6424,
+      "step": 46
+    },
+    {
+      "epoch": 0.89,
+      "learning_rate": 0.00024822911009179276,
+      "loss": 0.6904,
+      "step": 47
+    },
+    {
+      "epoch": 0.91,
+      "learning_rate": 0.0002458962434903327,
+      "loss": 0.8218,
+      "step": 48
+    },
+    {
+      "epoch": 0.92,
+      "learning_rate": 0.00024352347027881003,
+      "loss": 0.6698,
+      "step": 49
+    },
+    {
+      "epoch": 0.94,
+      "learning_rate": 0.00024111177787166212,
+      "loss": 0.7299,
+      "step": 50
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 0.0002386621698793015,
+      "loss": 0.6973,
+      "step": 51
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 0.0002361756656904695,
+      "loss": 0.7159,
+      "step": 52
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 0.0002336533000480244,
+      "loss": 0.703,
+      "step": 53
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 0.00023109612261833963,
+      "loss": 0.7033,
+      "step": 54
+    },
+    {
+      "epoch": 1.04,
+      "learning_rate": 0.0002285051975544918,
+      "loss": 0.6758,
+      "step": 55
+    },
+    {
+      "epoch": 1.06,
+      "learning_rate": 0.00022588160305342023,
+      "loss": 0.6828,
+      "step": 56
+    },
+    {
+      "epoch": 1.08,
+      "learning_rate": 0.00022322643090724216,
+      "loss": 0.6433,
+      "step": 57
+    },
+    {
+      "epoch": 1.09,
+      "learning_rate": 0.0002205407860489105,
+      "loss": 0.6495,
+      "step": 58
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 0.00021782578609240284,
+      "loss": 0.654,
+      "step": 59
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 0.00021508256086763368,
+      "loss": 0.6364,
+      "step": 60
+    },
+    {
+      "epoch": 1.13,
+      "eval_loss": 9.256454467773438,
+      "eval_runtime": 28.0034,
+      "eval_samples_per_second": 4.178,
+      "eval_steps_per_second": 1.071,
+      "step": 60
+    },
+    {
+      "epoch": 1.15,
+      "learning_rate": 0.00021231225195028297,
+      "loss": 0.6944,
+      "step": 61
+    },
+    {
+      "epoch": 1.17,
+      "learning_rate": 0.00020951601218673635,
+      "loss": 0.6534,
+      "step": 62
+    },
+    {
+      "epoch": 1.19,
+      "learning_rate": 0.00020669500521433597,
+      "loss": 0.7002,
+      "step": 63
+    },
+    {
+      "epoch": 1.21,
+      "learning_rate": 0.00020385040497713976,
+      "loss": 0.6381,
+      "step": 64
+    },
+    {
+      "epoch": 1.23,
+      "learning_rate": 0.00020098339523739247,
+      "loss": 0.6451,
+      "step": 65
+    },
+    {
+      "epoch": 1.25,
+      "learning_rate": 0.0001980951690829103,
+      "loss": 0.6308,
+      "step": 66
+    },
+    {
+      "epoch": 1.26,
+      "learning_rate": 0.00019518692843058512,
+      "loss": 0.6586,
+      "step": 67
+    },
+    {
+      "epoch": 1.28,
+      "learning_rate": 0.00019225988352621445,
+      "loss": 0.7115,
+      "step": 68
+    },
+    {
+      "epoch": 1.3,
+      "learning_rate": 0.0001893152524408653,
+      "loss": 0.5739,
+      "step": 69
+    },
+    {
+      "epoch": 1.32,
+      "learning_rate": 0.00018635426056398186,
+      "loss": 0.6538,
+      "step": 70
+    },
+    {
+      "epoch": 1.34,
+      "learning_rate": 0.00018337814009344714,
+      "loss": 0.5868,
+      "step": 71
+    },
+    {
+      "epoch": 1.36,
+      "learning_rate": 0.00018038812952281212,
+      "loss": 0.6573,
+      "step": 72
+    },
+    {
+      "epoch": 1.38,
+      "learning_rate": 0.00017738547312590424,
+      "loss": 0.6931,
+      "step": 73
+    },
+    {
+      "epoch": 1.4,
+      "learning_rate": 0.0001743714204390309,
+      "loss": 0.6873,
+      "step": 74
+    },
+    {
+      "epoch": 1.42,
+      "learning_rate": 0.00017134722574099276,
+      "loss": 0.6295,
+      "step": 75
+    },
+    {
+      "epoch": 1.43,
+      "learning_rate": 0.00016831414753112398,
+      "loss": 0.6832,
+      "step": 76
+    },
+    {
+      "epoch": 1.45,
+      "learning_rate": 0.00016527344800557533,
+      "loss": 0.646,
+      "step": 77
+    },
+    {
+      "epoch": 1.47,
+      "learning_rate": 0.00016222639253205947,
+      "loss": 0.6658,
+      "step": 78
+    },
+    {
+      "epoch": 1.49,
+      "learning_rate": 0.00015917424912327641,
+      "loss": 0.6123,
+      "step": 79
+    },
+    {
+      "epoch": 1.51,
+      "learning_rate": 0.00015611828790923786,
+      "loss": 0.6698,
+      "step": 80
+    },
+    {
+      "epoch": 1.51,
+      "eval_loss": 9.175628662109375,
+      "eval_runtime": 28.001,
+      "eval_samples_per_second": 4.178,
+      "eval_steps_per_second": 1.071,
+      "step": 80
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 159,
+  "num_train_epochs": 3,
+  "save_steps": 20,
+  "total_flos": 1.0081710792454963e+17,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/Puffin-7B/checkpoint-80/training_args.bin b/Puffin-7B/checkpoint-80/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c04455771af5c94d4ad83dddf97f19e3f60cc49e
--- /dev/null
+++ b/Puffin-7B/checkpoint-80/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8cedb1881c5272bd443403b1508825590117bc6ccfd192e967d653552a0caa6a
+size 4219
diff --git a/Puffin-7B/special_tokens_map.json b/Puffin-7B/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..3f58a5e115855c6ea3cec98accae196ad927222e
--- /dev/null
+++ b/Puffin-7B/special_tokens_map.json
@@ -0,0 +1,6 @@
+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "pad_token": "[PAD]",
+  "unk_token": "<unk>"
+}
diff --git a/Puffin-7B/tokenizer.model b/Puffin-7B/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899
--- /dev/null
+++ b/Puffin-7B/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723
diff --git a/Puffin-7B/tokenizer_config.json b/Puffin-7B/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..65532c2074f7f407d2d801a559f3d90aa5137f26
--- /dev/null
+++ b/Puffin-7B/tokenizer_config.json
@@ -0,0 +1,38 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "clean_up_tokenization_spaces": false,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "legacy": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": null,
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "trust_remote_code": false,
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "use_default_system_prompt": true,
+  "use_fast": true
+}
diff --git a/ShareGPT-cleaned/README.md b/ShareGPT-cleaned/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3
--- /dev/null
+++ b/ShareGPT-cleaned/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/ShareGPT-cleaned/adapter_config.json b/ShareGPT-cleaned/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f035b69c0f76b657a7ea6a70e8e929f6301cea47
--- /dev/null
+++ b/ShareGPT-cleaned/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "q_proj",
+    "up_proj",
+    "o_proj",
+    "gate_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/ShareGPT-cleaned/adapter_model.bin b/ShareGPT-cleaned/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..15489d429b58fad9a4354febce043a14fd1d094a
--- /dev/null
+++ b/ShareGPT-cleaned/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c45f3436fbeb782012ba6c0a7e855a1a891e5939a2279bf712d192d1d8c83a53
+size 80114765
diff --git a/ShareGPT-cleaned/added_tokens.json b/ShareGPT-cleaned/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..9c16aa4be022f03ad001b006fba14dfb73a1929c
--- /dev/null
+++ b/ShareGPT-cleaned/added_tokens.json
@@ -0,0 +1,3 @@
+{
+  "<pad>": 32000
+}
diff --git a/ShareGPT-cleaned/checkpoint-12/README.md b/ShareGPT-cleaned/checkpoint-12/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-12/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/ShareGPT-cleaned/checkpoint-12/adapter_config.json b/ShareGPT-cleaned/checkpoint-12/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f035b69c0f76b657a7ea6a70e8e929f6301cea47
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-12/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "q_proj",
+    "up_proj",
+    "o_proj",
+    "gate_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/ShareGPT-cleaned/checkpoint-12/adapter_model.bin b/ShareGPT-cleaned/checkpoint-12/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9fb11ccb8c8d8ef1303e6963928ae852a3e624ad
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-12/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d07c1b713f6eba443b872849b924f9c77fe69485dbedeed2095c4c74f3f1e5c8
+size 80114765
diff --git a/ShareGPT-cleaned/checkpoint-12/adapter_model/README.md b/ShareGPT-cleaned/checkpoint-12/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-12/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/ShareGPT-cleaned/checkpoint-12/adapter_model/adapter_config.json b/ShareGPT-cleaned/checkpoint-12/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f035b69c0f76b657a7ea6a70e8e929f6301cea47
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-12/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "q_proj",
+    "up_proj",
+    "o_proj",
+    "gate_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/ShareGPT-cleaned/checkpoint-12/adapter_model/adapter_model.bin b/ShareGPT-cleaned/checkpoint-12/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9fb11ccb8c8d8ef1303e6963928ae852a3e624ad
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-12/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d07c1b713f6eba443b872849b924f9c77fe69485dbedeed2095c4c74f3f1e5c8
+size 80114765
diff --git a/ShareGPT-cleaned/checkpoint-12/optimizer.pt b/ShareGPT-cleaned/checkpoint-12/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..72d5178ec523b7438ade0202541e2dc86a2ad0c4
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-12/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75261ac468437b0174757c6bced7a51cf80bcc4ee928cebf31ca3891deaddd49
+size 40569887
diff --git a/ShareGPT-cleaned/checkpoint-12/rng_state.pth b/ShareGPT-cleaned/checkpoint-12/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7e96a293c9bbccc9bdfd0dce6f567bf271db959f
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-12/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3106e70c084cf6d9b3133067f57d635d31ce5e67e314582db71d4b3ce4ec446c
+size 14575
diff --git a/ShareGPT-cleaned/checkpoint-12/scheduler.pt b/ShareGPT-cleaned/checkpoint-12/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..542d577812371b57a2b87f48bd64167ec8959ef7
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-12/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25262a79f04fd972af27a73324b2eb39f5732b3adbb3489d877e0cbceab7224e
+size 627
diff --git a/ShareGPT-cleaned/checkpoint-12/trainer_state.json b/ShareGPT-cleaned/checkpoint-12/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..4a36aa0f56046181dd3bd4c77cb4360f85a5fab4
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-12/trainer_state.json
@@ -0,0 +1,91 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9795918367346939,
+  "eval_steps": 500,
+  "global_step": 12,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.0003,
+      "loss": 1.0275,
+      "step": 1
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.00029939614409928584,
+      "loss": 1.0557,
+      "step": 2
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 0.00029758943828979444,
+      "loss": 1.0341,
+      "step": 3
+    },
+    {
+      "epoch": 0.33,
+      "learning_rate": 0.00029459442910437797,
+      "loss": 1.0866,
+      "step": 4
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 0.00029043523059596053,
+      "loss": 1.0544,
+      "step": 5
+    },
+    {
+      "epoch": 0.49,
+      "learning_rate": 0.0002851453301853628,
+      "loss": 0.9875,
+      "step": 6
+    },
+    {
+      "epoch": 0.57,
+      "learning_rate": 0.0002787673190402799,
+      "loss": 1.0672,
+      "step": 7
+    },
+    {
+      "epoch": 0.65,
+      "learning_rate": 0.0002713525491562421,
+      "loss": 1.0344,
+      "step": 8
+    },
+    {
+      "epoch": 0.73,
+      "learning_rate": 0.00026296071990054165,
+      "loss": 1.0481,
+      "step": 9
+    },
+    {
+      "epoch": 0.82,
+      "learning_rate": 0.0002536593973480297,
+      "loss": 0.8523,
+      "step": 10
+    },
+    {
+      "epoch": 0.9,
+      "learning_rate": 0.00024352347027881003,
+      "loss": 1.019,
+      "step": 11
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 0.00023263454721781537,
+      "loss": 0.9123,
+      "step": 12
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 36,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 1.5882254111735808e+16,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/ShareGPT-cleaned/checkpoint-12/training_args.bin b/ShareGPT-cleaned/checkpoint-12/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..00e89e1c931b56d90542ad868dbc0e31f257ec1a
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-12/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44a629ad453bf2898430a28c0829ede2a81c7366c7bf8749dc4057027d66b023
+size 4219
diff --git a/ShareGPT-cleaned/checkpoint-24/README.md b/ShareGPT-cleaned/checkpoint-24/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-24/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/ShareGPT-cleaned/checkpoint-24/adapter_config.json b/ShareGPT-cleaned/checkpoint-24/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f035b69c0f76b657a7ea6a70e8e929f6301cea47
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-24/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "q_proj",
+    "up_proj",
+    "o_proj",
+    "gate_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/ShareGPT-cleaned/checkpoint-24/adapter_model.bin b/ShareGPT-cleaned/checkpoint-24/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3e3f09d2aa3f9c98c1a43049a5a7a69576362884
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-24/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc64c7985ff45ee7f80d6cb8dd7eb22ebaa66ad796f2ebb5c9b59181b699e997
+size 80114765
diff --git a/ShareGPT-cleaned/checkpoint-24/adapter_model/README.md b/ShareGPT-cleaned/checkpoint-24/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-24/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/ShareGPT-cleaned/checkpoint-24/adapter_model/adapter_config.json b/ShareGPT-cleaned/checkpoint-24/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f035b69c0f76b657a7ea6a70e8e929f6301cea47
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-24/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "q_proj",
+    "up_proj",
+    "o_proj",
+    "gate_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/ShareGPT-cleaned/checkpoint-24/adapter_model/adapter_model.bin b/ShareGPT-cleaned/checkpoint-24/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3e3f09d2aa3f9c98c1a43049a5a7a69576362884
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-24/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc64c7985ff45ee7f80d6cb8dd7eb22ebaa66ad796f2ebb5c9b59181b699e997
+size 80114765
diff --git a/ShareGPT-cleaned/checkpoint-24/optimizer.pt b/ShareGPT-cleaned/checkpoint-24/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c0d3aafc5b6595dc9d682b32d43bbbb2a47b91b9
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-24/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e5b505beeefeeec841267f568c495e8f92a3771e5f5b41903be4d415fb47efa
+size 40569887
diff --git a/ShareGPT-cleaned/checkpoint-24/rng_state.pth b/ShareGPT-cleaned/checkpoint-24/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ed769759652568720df6202b19479ab0d8b02493
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-24/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3e20b6e5968ea92d3ef28f695a6de364e7d3ba9d5545e08a5e373a05a7fee4c
+size 14575
diff --git a/ShareGPT-cleaned/checkpoint-24/scheduler.pt b/ShareGPT-cleaned/checkpoint-24/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..474c893b363aeaf8ea20ccfc6808ed4bacc2adfa
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-24/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9083b16933f40bfbf9b5ea98facc24202d2fd767e5924b71286e71821404c5d6
+size 627
diff --git a/ShareGPT-cleaned/checkpoint-24/trainer_state.json b/ShareGPT-cleaned/checkpoint-24/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..0b28c0b9a21e66c72b8811c52754dc955903d4a0
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-24/trainer_state.json
@@ -0,0 +1,163 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.9591836734693877,
+  "eval_steps": 500,
+  "global_step": 24,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.0003,
+      "loss": 1.0275,
+      "step": 1
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.00029939614409928584,
+      "loss": 1.0557,
+      "step": 2
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 0.00029758943828979444,
+      "loss": 1.0341,
+      "step": 3
+    },
+    {
+      "epoch": 0.33,
+      "learning_rate": 0.00029459442910437797,
+      "loss": 1.0866,
+      "step": 4
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 0.00029043523059596053,
+      "loss": 1.0544,
+      "step": 5
+    },
+    {
+      "epoch": 0.49,
+      "learning_rate": 0.0002851453301853628,
+      "loss": 0.9875,
+      "step": 6
+    },
+    {
+      "epoch": 0.57,
+      "learning_rate": 0.0002787673190402799,
+      "loss": 1.0672,
+      "step": 7
+    },
+    {
+      "epoch": 0.65,
+      "learning_rate": 0.0002713525491562421,
+      "loss": 1.0344,
+      "step": 8
+    },
+    {
+      "epoch": 0.73,
+      "learning_rate": 0.00026296071990054165,
+      "loss": 1.0481,
+      "step": 9
+    },
+    {
+      "epoch": 0.82,
+      "learning_rate": 0.0002536593973480297,
+      "loss": 0.8523,
+      "step": 10
+    },
+    {
+      "epoch": 0.9,
+      "learning_rate": 0.00024352347027881003,
+      "loss": 1.019,
+      "step": 11
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 0.00023263454721781537,
+      "loss": 0.9123,
+      "step": 12
+    },
+    {
+      "epoch": 1.06,
+      "learning_rate": 0.0002210802993709498,
+      "loss": 0.9131,
+      "step": 13
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 0.00020895375474808852,
+      "loss": 0.9333,
+      "step": 14
+    },
+    {
+      "epoch": 1.22,
+      "learning_rate": 0.0001963525491562421,
+      "loss": 0.84,
+      "step": 15
+    },
+    {
+      "epoch": 1.31,
+      "learning_rate": 0.00018337814009344714,
+      "loss": 0.9898,
+      "step": 16
+    },
+    {
+      "epoch": 1.39,
+      "learning_rate": 0.00017013498987264832,
+      "loss": 0.9718,
+      "step": 17
+    },
+    {
+      "epoch": 1.47,
+      "learning_rate": 0.00015672972455257723,
+      "loss": 0.9909,
+      "step": 18
+    },
+    {
+      "epoch": 1.55,
+      "learning_rate": 0.0001432702754474228,
+      "loss": 0.8745,
+      "step": 19
+    },
+    {
+      "epoch": 1.63,
+      "learning_rate": 0.0001298650101273517,
+      "loss": 0.8947,
+      "step": 20
+    },
+    {
+      "epoch": 1.71,
+      "learning_rate": 0.00011662185990655284,
+      "loss": 0.9917,
+      "step": 21
+    },
+    {
+      "epoch": 1.8,
+      "learning_rate": 0.0001036474508437579,
+      "loss": 0.9463,
+      "step": 22
+    },
+    {
+      "epoch": 1.88,
+      "learning_rate": 9.104624525191145e-05,
+      "loss": 0.9384,
+      "step": 23
+    },
+    {
+      "epoch": 1.96,
+      "learning_rate": 7.89197006290502e-05,
+      "loss": 0.89,
+      "step": 24
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 36,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 3.1718955581177856e+16,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/ShareGPT-cleaned/checkpoint-24/training_args.bin b/ShareGPT-cleaned/checkpoint-24/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..00e89e1c931b56d90542ad868dbc0e31f257ec1a
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-24/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44a629ad453bf2898430a28c0829ede2a81c7366c7bf8749dc4057027d66b023
+size 4219
diff --git a/ShareGPT-cleaned/checkpoint-36/README.md b/ShareGPT-cleaned/checkpoint-36/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-36/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/ShareGPT-cleaned/checkpoint-36/adapter_config.json b/ShareGPT-cleaned/checkpoint-36/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f035b69c0f76b657a7ea6a70e8e929f6301cea47
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-36/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "q_proj",
+    "up_proj",
+    "o_proj",
+    "gate_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/ShareGPT-cleaned/checkpoint-36/adapter_model.bin b/ShareGPT-cleaned/checkpoint-36/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..15489d429b58fad9a4354febce043a14fd1d094a
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-36/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c45f3436fbeb782012ba6c0a7e855a1a891e5939a2279bf712d192d1d8c83a53
+size 80114765
diff --git a/ShareGPT-cleaned/checkpoint-36/adapter_model/README.md b/ShareGPT-cleaned/checkpoint-36/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-36/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/ShareGPT-cleaned/checkpoint-36/adapter_model/adapter_config.json b/ShareGPT-cleaned/checkpoint-36/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f035b69c0f76b657a7ea6a70e8e929f6301cea47
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-36/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "q_proj",
+    "up_proj",
+    "o_proj",
+    "gate_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/ShareGPT-cleaned/checkpoint-36/adapter_model/adapter_model.bin b/ShareGPT-cleaned/checkpoint-36/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..15489d429b58fad9a4354febce043a14fd1d094a
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-36/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c45f3436fbeb782012ba6c0a7e855a1a891e5939a2279bf712d192d1d8c83a53
+size 80114765
diff --git a/ShareGPT-cleaned/checkpoint-36/optimizer.pt b/ShareGPT-cleaned/checkpoint-36/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c91e27e179e85bb902f05e4c69c411abe6776700
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-36/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff220621b882760c63f76d483365c31639b794b48befa77de9de572fcce64b47
+size 40569887
diff --git a/ShareGPT-cleaned/checkpoint-36/rng_state.pth b/ShareGPT-cleaned/checkpoint-36/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8e04fdea8e021a54d1b0efb8045bf8cb251bcb30
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-36/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d11ee642e6d300c4abb8c42b5efb5b0fcb1863dd61632b68c2a149bd1b50af5
+size 14575
diff --git a/ShareGPT-cleaned/checkpoint-36/scheduler.pt b/ShareGPT-cleaned/checkpoint-36/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e3ef78fa129be9ba4ff9c23d2297f6d63d3af2a0
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-36/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e465c4ecfe5cb05ad694da4e0166e9c5a9e2ca60e69ac108c0469c49a9b6b3ca
+size 627
diff --git a/ShareGPT-cleaned/checkpoint-36/trainer_state.json b/ShareGPT-cleaned/checkpoint-36/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..47dca7b011ce07e5cce1b53e8e7af91e945f9b67
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-36/trainer_state.json
@@ -0,0 +1,235 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.938775510204082,
+  "eval_steps": 500,
+  "global_step": 36,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.0003,
+      "loss": 1.0275,
+      "step": 1
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 0.00029939614409928584,
+      "loss": 1.0557,
+      "step": 2
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 0.00029758943828979444,
+      "loss": 1.0341,
+      "step": 3
+    },
+    {
+      "epoch": 0.33,
+      "learning_rate": 0.00029459442910437797,
+      "loss": 1.0866,
+      "step": 4
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 0.00029043523059596053,
+      "loss": 1.0544,
+      "step": 5
+    },
+    {
+      "epoch": 0.49,
+      "learning_rate": 0.0002851453301853628,
+      "loss": 0.9875,
+      "step": 6
+    },
+    {
+      "epoch": 0.57,
+      "learning_rate": 0.0002787673190402799,
+      "loss": 1.0672,
+      "step": 7
+    },
+    {
+      "epoch": 0.65,
+      "learning_rate": 0.0002713525491562421,
+      "loss": 1.0344,
+      "step": 8
+    },
+    {
+      "epoch": 0.73,
+      "learning_rate": 0.00026296071990054165,
+      "loss": 1.0481,
+      "step": 9
+    },
+    {
+      "epoch": 0.82,
+      "learning_rate": 0.0002536593973480297,
+      "loss": 0.8523,
+      "step": 10
+    },
+    {
+      "epoch": 0.9,
+      "learning_rate": 0.00024352347027881003,
+      "loss": 1.019,
+      "step": 11
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 0.00023263454721781537,
+      "loss": 0.9123,
+      "step": 12
+    },
+    {
+      "epoch": 1.06,
+      "learning_rate": 0.0002210802993709498,
+      "loss": 0.9131,
+      "step": 13
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 0.00020895375474808852,
+      "loss": 0.9333,
+      "step": 14
+    },
+    {
+      "epoch": 1.22,
+      "learning_rate": 0.0001963525491562421,
+      "loss": 0.84,
+      "step": 15
+    },
+    {
+      "epoch": 1.31,
+      "learning_rate": 0.00018337814009344714,
+      "loss": 0.9898,
+      "step": 16
+    },
+    {
+      "epoch": 1.39,
+      "learning_rate": 0.00017013498987264832,
+      "loss": 0.9718,
+      "step": 17
+    },
+    {
+      "epoch": 1.47,
+      "learning_rate": 0.00015672972455257723,
+      "loss": 0.9909,
+      "step": 18
+    },
+    {
+      "epoch": 1.55,
+      "learning_rate": 0.0001432702754474228,
+      "loss": 0.8745,
+      "step": 19
+    },
+    {
+      "epoch": 1.63,
+      "learning_rate": 0.0001298650101273517,
+      "loss": 0.8947,
+      "step": 20
+    },
+    {
+      "epoch": 1.71,
+      "learning_rate": 0.00011662185990655284,
+      "loss": 0.9917,
+      "step": 21
+    },
+    {
+      "epoch": 1.8,
+      "learning_rate": 0.0001036474508437579,
+      "loss": 0.9463,
+      "step": 22
+    },
+    {
+      "epoch": 1.88,
+      "learning_rate": 9.104624525191145e-05,
+      "loss": 0.9384,
+      "step": 23
+    },
+    {
+      "epoch": 1.96,
+      "learning_rate": 7.89197006290502e-05,
+      "loss": 0.89,
+      "step": 24
+    },
+    {
+      "epoch": 2.04,
+      "learning_rate": 6.736545278218463e-05,
+      "loss": 0.9943,
+      "step": 25
+    },
+    {
+      "epoch": 2.12,
+      "learning_rate": 5.6476529721189974e-05,
+      "loss": 0.8828,
+      "step": 26
+    },
+    {
+      "epoch": 2.2,
+      "learning_rate": 4.63406026519703e-05,
+      "loss": 0.884,
+      "step": 27
+    },
+    {
+      "epoch": 2.29,
+      "learning_rate": 3.7039280099458366e-05,
+      "loss": 0.8695,
+      "step": 28
+    },
+    {
+      "epoch": 2.37,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 0.9088,
+      "step": 29
+    },
+    {
+      "epoch": 2.45,
+      "learning_rate": 2.1232680959720082e-05,
+      "loss": 0.9084,
+      "step": 30
+    },
+    {
+      "epoch": 2.53,
+      "learning_rate": 1.4854669814637143e-05,
+      "loss": 0.8754,
+      "step": 31
+    },
+    {
+      "epoch": 2.61,
+      "learning_rate": 9.564769404039419e-06,
+      "loss": 0.8326,
+      "step": 32
+    },
+    {
+      "epoch": 2.69,
+      "learning_rate": 5.405570895622013e-06,
+      "loss": 0.8982,
+      "step": 33
+    },
+    {
+      "epoch": 2.78,
+      "learning_rate": 2.4105617102055496e-06,
+      "loss": 1.0135,
+      "step": 34
+    },
+    {
+      "epoch": 2.86,
+      "learning_rate": 6.038559007141397e-07,
+      "loss": 0.9641,
+      "step": 35
+    },
+    {
+      "epoch": 2.94,
+      "learning_rate": 0.0,
+      "loss": 0.8868,
+      "step": 36
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 36,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 4.666672977385882e+16,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/ShareGPT-cleaned/checkpoint-36/training_args.bin b/ShareGPT-cleaned/checkpoint-36/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..00e89e1c931b56d90542ad868dbc0e31f257ec1a
--- /dev/null
+++ b/ShareGPT-cleaned/checkpoint-36/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44a629ad453bf2898430a28c0829ede2a81c7366c7bf8749dc4057027d66b023
+size 4219
diff --git a/ShareGPT-cleaned/special_tokens_map.json b/ShareGPT-cleaned/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..3f58a5e115855c6ea3cec98accae196ad927222e
--- /dev/null
+++ b/ShareGPT-cleaned/special_tokens_map.json
@@ -0,0 +1,6 @@
+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "pad_token": "[PAD]",
+  "unk_token": "<unk>"
+}
diff --git a/ShareGPT-cleaned/tokenizer.model b/ShareGPT-cleaned/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899
--- /dev/null
+++ b/ShareGPT-cleaned/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723
diff --git a/ShareGPT-cleaned/tokenizer_config.json b/ShareGPT-cleaned/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..65532c2074f7f407d2d801a559f3d90aa5137f26
--- /dev/null
+++ b/ShareGPT-cleaned/tokenizer_config.json
@@ -0,0 +1,38 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "clean_up_tokenization_spaces": false,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "legacy": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": null,
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "trust_remote_code": false,
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "use_default_system_prompt": true,
+  "use_fast": true
+}