diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..ba58ce7ac27cf656a0c2a6f16aee5e73678fff2c 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+checkpoint-138/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-276/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-414/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-552/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-690/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-828/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..dfd85d29159e61f8c090feb3d219c62a0c01c56b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,206 @@
+---
+base_model: microsoft/Phi-4-mini-instruct
+library_name: peft
+tags:
+- base_model:adapter:microsoft/Phi-4-mini-instruct
+- lora
+- transformers
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/adapter_config.json b/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..675b31bd9d8a4c2efda38f5392b48daf033fe669
--- /dev/null
+++ b/adapter_config.json
@@ -0,0 +1,48 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "microsoft/Phi-4-mini-instruct",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "classifier",
+    "score"
+  ],
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "q_proj",
+    "k_proj",
+    "v_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "SEQ_CLS",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/adapter_model.safetensors b/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c1ecb1cb90b4c9a55e6948a3d7e811dd5f8bb8f7
--- /dev/null
+++ b/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d075579a534a1d4a45a32603660edecb6740758c1bbc080e4012de433623d5e4
+size 12603848
diff --git a/chat_template.jinja b/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..a9c00dd9bbd97e117371168e9d62af65b9f0e725
--- /dev/null
+++ b/chat_template.jinja
@@ -0,0 +1 @@
+{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}
\ No newline at end of file
diff --git a/checkpoint-138/README.md b/checkpoint-138/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..dfd85d29159e61f8c090feb3d219c62a0c01c56b
--- /dev/null
+++ b/checkpoint-138/README.md
@@ -0,0 +1,206 @@
+---
+base_model: microsoft/Phi-4-mini-instruct
+library_name: peft
+tags:
+- base_model:adapter:microsoft/Phi-4-mini-instruct
+- lora
+- transformers
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/checkpoint-138/adapter_config.json b/checkpoint-138/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..675b31bd9d8a4c2efda38f5392b48daf033fe669
--- /dev/null
+++ b/checkpoint-138/adapter_config.json
@@ -0,0 +1,48 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "microsoft/Phi-4-mini-instruct",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "classifier",
+    "score"
+  ],
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "q_proj",
+    "k_proj",
+    "v_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "SEQ_CLS",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-138/adapter_model.safetensors b/checkpoint-138/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7721e2fd58c85bf6fb2db918511750fffe441589
--- /dev/null
+++ b/checkpoint-138/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10a7755c03037c9f31df4903fcb8e90efa96bc47d5ce6af67acda272d880df40
+size 12603848
diff --git a/checkpoint-138/chat_template.jinja b/checkpoint-138/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..a9c00dd9bbd97e117371168e9d62af65b9f0e725
--- /dev/null
+++ b/checkpoint-138/chat_template.jinja
@@ -0,0 +1 @@
+{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}
\ No newline at end of file
diff --git a/checkpoint-138/optimizer.pt b/checkpoint-138/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..af9392c26a580539073b1433ea504c7d56413777
--- /dev/null
+++ b/checkpoint-138/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f9e8658919ec83d372c14a1f908a820529ee7b5052631dfb8d597f18bd417b4
+size 25246667
diff --git a/checkpoint-138/rng_state.pth b/checkpoint-138/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b79e0a333286a4cff79a730d488bdd3226db5dc0
--- /dev/null
+++ b/checkpoint-138/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:097fc9e407a419ddf28f3b853d450a43384d68e36e08e05b71f4c0a434df65b0
+size 14645
diff --git a/checkpoint-138/scheduler.pt b/checkpoint-138/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7441c727f9cb1c4968a8083ba627841ec358be0f
--- /dev/null
+++ b/checkpoint-138/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93eb7fd4a08aff2d138b02664f700445517a5d28c2e4178bd23a13a1600e1dd2
+size 1465
diff --git a/checkpoint-138/tokenizer.json b/checkpoint-138/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..8655fee00020e3140fc51416dac7cb0b9a8e4c45
--- /dev/null
+++ b/checkpoint-138/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ea8bdf68c3e7549a3fb4342523288ce628f6ab56a618f9a4dfb234a0b4d46a8
+size 15524476
diff --git a/checkpoint-138/tokenizer_config.json b/checkpoint-138/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..007b4fee54acc99b382393f6900ba96e673f8b49
--- /dev/null
+++ b/checkpoint-138/tokenizer_config.json
@@ -0,0 +1,12 @@
+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "is_local": false,
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "TokenizersBackend",
+  "unk_token": "<|endoftext|>"
+}
diff --git a/checkpoint-138/trainer_state.json b/checkpoint-138/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..e9e07b3bc9b205b5ad8467282f0f2dafeab57458
--- /dev/null
+++ b/checkpoint-138/trainer_state.json
@@ -0,0 +1,56 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 138,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.36231884057971014,
+      "grad_norm": 11.473546981811523,
+      "learning_rate": 9.408212560386473e-06,
+      "loss": 2.518828125,
+      "step": 50
+    },
+    {
+      "epoch": 0.7246376811594203,
+      "grad_norm": 19.91433334350586,
+      "learning_rate": 8.804347826086957e-06,
+      "loss": 2.4250390625,
+      "step": 100
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5803093314170837,
+      "eval_runtime": 29.4101,
+      "eval_samples_per_second": 16.661,
+      "eval_steps_per_second": 2.108,
+      "step": 138
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 828,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4977144119808000.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-138/training_args.bin b/checkpoint-138/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..41879da9e32eb23f5013e1a62452ec18a7e8995e
--- /dev/null
+++ b/checkpoint-138/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:033534501afcab0521199d3c0685a5e811f2a297a318bad85be0999d18aac32c
+size 5137
diff --git a/checkpoint-276/README.md b/checkpoint-276/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..dfd85d29159e61f8c090feb3d219c62a0c01c56b
--- /dev/null
+++ b/checkpoint-276/README.md
@@ -0,0 +1,206 @@
+---
+base_model: microsoft/Phi-4-mini-instruct
+library_name: peft
+tags:
+- base_model:adapter:microsoft/Phi-4-mini-instruct
+- lora
+- transformers
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/checkpoint-276/adapter_config.json b/checkpoint-276/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..675b31bd9d8a4c2efda38f5392b48daf033fe669
--- /dev/null
+++ b/checkpoint-276/adapter_config.json
@@ -0,0 +1,48 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "microsoft/Phi-4-mini-instruct",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "classifier",
+    "score"
+  ],
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "q_proj",
+    "k_proj",
+    "v_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "SEQ_CLS",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-276/adapter_model.safetensors b/checkpoint-276/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..315919641c869215a91452520d8161c9b6f2a11d
--- /dev/null
+++ b/checkpoint-276/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0c6ded5ba416b29a39409dadce0593ae89ad8745ffd49e026eebac7fd6a2418
+size 12603848
diff --git a/checkpoint-276/chat_template.jinja b/checkpoint-276/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..a9c00dd9bbd97e117371168e9d62af65b9f0e725
--- /dev/null
+++ b/checkpoint-276/chat_template.jinja
@@ -0,0 +1 @@
+{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}
\ No newline at end of file
diff --git a/checkpoint-276/optimizer.pt b/checkpoint-276/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5fe093a6353c6cf08bc7a82559a70dfb7968797e
--- /dev/null
+++ b/checkpoint-276/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce21ffd5a7c5857b062193fe0933bfffa42a995c930787eb31aac23a793c7017
+size 25246667
diff --git a/checkpoint-276/rng_state.pth b/checkpoint-276/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..46fb50fe41dae7cad48db337b3a96ba480974f58
--- /dev/null
+++ b/checkpoint-276/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60ba65ae9c9e67ea17aba4e7a084c9af58ba8127153fbbf45dc7355c9e1bfa49
+size 14645
diff --git a/checkpoint-276/scheduler.pt b/checkpoint-276/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..431ec5e25d388299b9df2f00d5facde327d9d266
--- /dev/null
+++ b/checkpoint-276/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d1e711516c1ca8ebd1dc2ed174db5a989057bb5fee53204461ae96fd052b31d
+size 1465
diff --git a/checkpoint-276/tokenizer.json b/checkpoint-276/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..8655fee00020e3140fc51416dac7cb0b9a8e4c45
--- /dev/null
+++ b/checkpoint-276/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ea8bdf68c3e7549a3fb4342523288ce628f6ab56a618f9a4dfb234a0b4d46a8
+size 15524476
diff --git a/checkpoint-276/tokenizer_config.json b/checkpoint-276/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..007b4fee54acc99b382393f6900ba96e673f8b49
--- /dev/null
+++ b/checkpoint-276/tokenizer_config.json
@@ -0,0 +1,12 @@
+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "is_local": false,
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "TokenizersBackend",
+  "unk_token": "<|endoftext|>"
+}
diff --git a/checkpoint-276/trainer_state.json b/checkpoint-276/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..1482c2306484dd8b550fb15e2d75b53d9c7408b8
--- /dev/null
+++ b/checkpoint-276/trainer_state.json
@@ -0,0 +1,85 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 276,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.36231884057971014,
+      "grad_norm": 11.473546981811523,
+      "learning_rate": 9.408212560386473e-06,
+      "loss": 2.518828125,
+      "step": 50
+    },
+    {
+      "epoch": 0.7246376811594203,
+      "grad_norm": 19.91433334350586,
+      "learning_rate": 8.804347826086957e-06,
+      "loss": 2.4250390625,
+      "step": 100
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5803093314170837,
+      "eval_runtime": 29.4101,
+      "eval_samples_per_second": 16.661,
+      "eval_steps_per_second": 2.108,
+      "step": 138
+    },
+    {
+      "epoch": 1.0869565217391304,
+      "grad_norm": 16.91636085510254,
+      "learning_rate": 8.20048309178744e-06,
+      "loss": 2.2990234375,
+      "step": 150
+    },
+    {
+      "epoch": 1.4492753623188406,
+      "grad_norm": 13.255877494812012,
+      "learning_rate": 7.596618357487924e-06,
+      "loss": 2.378984375,
+      "step": 200
+    },
+    {
+      "epoch": 1.8115942028985508,
+      "grad_norm": 18.71368980407715,
+      "learning_rate": 6.992753623188407e-06,
+      "loss": 2.15296875,
+      "step": 250
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5362404584884644,
+      "eval_runtime": 27.8676,
+      "eval_samples_per_second": 17.583,
+      "eval_steps_per_second": 2.225,
+      "step": 276
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 828,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.0001921782173696e+16,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-276/training_args.bin b/checkpoint-276/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..41879da9e32eb23f5013e1a62452ec18a7e8995e
--- /dev/null
+++ b/checkpoint-276/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:033534501afcab0521199d3c0685a5e811f2a297a318bad85be0999d18aac32c
+size 5137
diff --git a/checkpoint-414/README.md b/checkpoint-414/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..dfd85d29159e61f8c090feb3d219c62a0c01c56b
--- /dev/null
+++ b/checkpoint-414/README.md
@@ -0,0 +1,206 @@
+---
+base_model: microsoft/Phi-4-mini-instruct
+library_name: peft
+tags:
+- base_model:adapter:microsoft/Phi-4-mini-instruct
+- lora
+- transformers
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/checkpoint-414/adapter_config.json b/checkpoint-414/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..675b31bd9d8a4c2efda38f5392b48daf033fe669
--- /dev/null
+++ b/checkpoint-414/adapter_config.json
@@ -0,0 +1,48 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "microsoft/Phi-4-mini-instruct",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "classifier",
+    "score"
+  ],
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "q_proj",
+    "k_proj",
+    "v_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "SEQ_CLS",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-414/adapter_model.safetensors b/checkpoint-414/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..759d11e8175bc631812febac5b3cb1a7b6235ef0
--- /dev/null
+++ b/checkpoint-414/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b41017ab6b0d4427509ea94161f0af3040548b002e820f30b9335297f1016b8
+size 12603848
diff --git a/checkpoint-414/chat_template.jinja b/checkpoint-414/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..a9c00dd9bbd97e117371168e9d62af65b9f0e725
--- /dev/null
+++ b/checkpoint-414/chat_template.jinja
@@ -0,0 +1 @@
+{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}
\ No newline at end of file
diff --git a/checkpoint-414/optimizer.pt b/checkpoint-414/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..921ecf6cb835d91e9eb48d50bbddf4dd77b38a72
--- /dev/null
+++ b/checkpoint-414/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b12f5c97844b923904162a7022aab604e03843bf1564c70629e3d3a4021f49f
+size 25246667
diff --git a/checkpoint-414/rng_state.pth b/checkpoint-414/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..032bbe0eec0ad0ddbda241143152779be39851eb
--- /dev/null
+++ b/checkpoint-414/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6bc5073c8303a0cad198d2112cdec53793d9b4bb882654a273cfd73c3944d75
+size 14645
diff --git a/checkpoint-414/scheduler.pt b/checkpoint-414/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6488b890c67d4280f950ac35e5b456f236192f25
--- /dev/null
+++ b/checkpoint-414/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb5f5537b0c69c2f67d8b43747fec7176271cf94315dbef082100a0915554593
+size 1465
diff --git a/checkpoint-414/tokenizer.json b/checkpoint-414/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..8655fee00020e3140fc51416dac7cb0b9a8e4c45
--- /dev/null
+++ b/checkpoint-414/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ea8bdf68c3e7549a3fb4342523288ce628f6ab56a618f9a4dfb234a0b4d46a8
+size 15524476
diff --git a/checkpoint-414/tokenizer_config.json b/checkpoint-414/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..007b4fee54acc99b382393f6900ba96e673f8b49
--- /dev/null
+++ b/checkpoint-414/tokenizer_config.json
@@ -0,0 +1,12 @@
+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "is_local": false,
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "TokenizersBackend",
+  "unk_token": "<|endoftext|>"
+}
diff --git a/checkpoint-414/trainer_state.json b/checkpoint-414/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..135ea9fa3a817ad0fb73224a0aae412d83eee78a
--- /dev/null
+++ b/checkpoint-414/trainer_state.json
@@ -0,0 +1,114 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 414,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.36231884057971014,
+      "grad_norm": 11.473546981811523,
+      "learning_rate": 9.408212560386473e-06,
+      "loss": 2.518828125,
+      "step": 50
+    },
+    {
+      "epoch": 0.7246376811594203,
+      "grad_norm": 19.91433334350586,
+      "learning_rate": 8.804347826086957e-06,
+      "loss": 2.4250390625,
+      "step": 100
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5803093314170837,
+      "eval_runtime": 29.4101,
+      "eval_samples_per_second": 16.661,
+      "eval_steps_per_second": 2.108,
+      "step": 138
+    },
+    {
+      "epoch": 1.0869565217391304,
+      "grad_norm": 16.91636085510254,
+      "learning_rate": 8.20048309178744e-06,
+      "loss": 2.2990234375,
+      "step": 150
+    },
+    {
+      "epoch": 1.4492753623188406,
+      "grad_norm": 13.255877494812012,
+      "learning_rate": 7.596618357487924e-06,
+      "loss": 2.378984375,
+      "step": 200
+    },
+    {
+      "epoch": 1.8115942028985508,
+      "grad_norm": 18.71368980407715,
+      "learning_rate": 6.992753623188407e-06,
+      "loss": 2.15296875,
+      "step": 250
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5362404584884644,
+      "eval_runtime": 27.8676,
+      "eval_samples_per_second": 17.583,
+      "eval_steps_per_second": 2.225,
+      "step": 276
+    },
+    {
+      "epoch": 2.1739130434782608,
+      "grad_norm": 9.45117473602295,
+      "learning_rate": 6.3888888888888885e-06,
+      "loss": 2.009921875,
+      "step": 300
+    },
+    {
+      "epoch": 2.536231884057971,
+      "grad_norm": 8.40969467163086,
+      "learning_rate": 5.785024154589373e-06,
+      "loss": 2.07037109375,
+      "step": 350
+    },
+    {
+      "epoch": 2.898550724637681,
+      "grad_norm": 10.270116806030273,
+      "learning_rate": 5.181159420289855e-06,
+      "loss": 1.952861328125,
+      "step": 400
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.47528699040412903,
+      "eval_runtime": 27.7881,
+      "eval_samples_per_second": 17.633,
+      "eval_steps_per_second": 2.231,
+      "step": 414
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 828,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.4994156951207936e+16,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-414/training_args.bin b/checkpoint-414/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..41879da9e32eb23f5013e1a62452ec18a7e8995e
--- /dev/null
+++ b/checkpoint-414/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:033534501afcab0521199d3c0685a5e811f2a297a318bad85be0999d18aac32c
+size 5137
diff --git a/checkpoint-552/README.md b/checkpoint-552/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..dfd85d29159e61f8c090feb3d219c62a0c01c56b
--- /dev/null
+++ b/checkpoint-552/README.md
@@ -0,0 +1,206 @@
+---
+base_model: microsoft/Phi-4-mini-instruct
+library_name: peft
+tags:
+- base_model:adapter:microsoft/Phi-4-mini-instruct
+- lora
+- transformers
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/checkpoint-552/adapter_config.json b/checkpoint-552/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..675b31bd9d8a4c2efda38f5392b48daf033fe669
--- /dev/null
+++ b/checkpoint-552/adapter_config.json
@@ -0,0 +1,48 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "microsoft/Phi-4-mini-instruct",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "classifier",
+    "score"
+  ],
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "q_proj",
+    "k_proj",
+    "v_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "SEQ_CLS",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-552/adapter_model.safetensors b/checkpoint-552/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..29ab7a87cc48bf102d05767ed44f93ce481045e4
--- /dev/null
+++ b/checkpoint-552/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dacd0d6979261464b07e15948cc6fdd581bd750fb7d7c57aca0ad0e4b950ba7e
+size 12603848
diff --git a/checkpoint-552/chat_template.jinja b/checkpoint-552/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..a9c00dd9bbd97e117371168e9d62af65b9f0e725
--- /dev/null
+++ b/checkpoint-552/chat_template.jinja
@@ -0,0 +1 @@
+{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}
\ No newline at end of file
diff --git a/checkpoint-552/optimizer.pt b/checkpoint-552/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..897d62c40522b0549f2d6cf44ee4e61c0900915a
--- /dev/null
+++ b/checkpoint-552/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecfa1c3281277e1473512d0dc93126f4d0329444a30025f98e736b3ad571ddf8
+size 25246667
diff --git a/checkpoint-552/rng_state.pth b/checkpoint-552/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d7bb4799624878f28ee0b4457a40c1e926363e37
--- /dev/null
+++ b/checkpoint-552/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd750b00e420d79940704088c0b007122910045476fa6ec2e20d1312d1dce295
+size 14645
diff --git a/checkpoint-552/scheduler.pt b/checkpoint-552/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..55bb689ddb493920d687c0336df0aac14e07ffd6
--- /dev/null
+++ b/checkpoint-552/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6eedc4cfb80ab99aa1713d3d8b96c4e095ada96a544f8ee08a1cc99f6b1a8c4
+size 1465
diff --git a/checkpoint-552/tokenizer.json b/checkpoint-552/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..8655fee00020e3140fc51416dac7cb0b9a8e4c45
--- /dev/null
+++ b/checkpoint-552/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ea8bdf68c3e7549a3fb4342523288ce628f6ab56a618f9a4dfb234a0b4d46a8
+size 15524476
diff --git a/checkpoint-552/tokenizer_config.json b/checkpoint-552/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..007b4fee54acc99b382393f6900ba96e673f8b49
--- /dev/null
+++ b/checkpoint-552/tokenizer_config.json
@@ -0,0 +1,12 @@
+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "is_local": false,
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "TokenizersBackend",
+  "unk_token": "<|endoftext|>"
+}
diff --git a/checkpoint-552/trainer_state.json b/checkpoint-552/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..45a3ed33a1b0947af957936656d43b39da14b11f
--- /dev/null
+++ b/checkpoint-552/trainer_state.json
@@ -0,0 +1,143 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 552,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.36231884057971014,
+      "grad_norm": 11.473546981811523,
+      "learning_rate": 9.408212560386473e-06,
+      "loss": 2.518828125,
+      "step": 50
+    },
+    {
+      "epoch": 0.7246376811594203,
+      "grad_norm": 19.91433334350586,
+      "learning_rate": 8.804347826086957e-06,
+      "loss": 2.4250390625,
+      "step": 100
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5803093314170837,
+      "eval_runtime": 29.4101,
+      "eval_samples_per_second": 16.661,
+      "eval_steps_per_second": 2.108,
+      "step": 138
+    },
+    {
+      "epoch": 1.0869565217391304,
+      "grad_norm": 16.91636085510254,
+      "learning_rate": 8.20048309178744e-06,
+      "loss": 2.2990234375,
+      "step": 150
+    },
+    {
+      "epoch": 1.4492753623188406,
+      "grad_norm": 13.255877494812012,
+      "learning_rate": 7.596618357487924e-06,
+      "loss": 2.378984375,
+      "step": 200
+    },
+    {
+      "epoch": 1.8115942028985508,
+      "grad_norm": 18.71368980407715,
+      "learning_rate": 6.992753623188407e-06,
+      "loss": 2.15296875,
+      "step": 250
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5362404584884644,
+      "eval_runtime": 27.8676,
+      "eval_samples_per_second": 17.583,
+      "eval_steps_per_second": 2.225,
+      "step": 276
+    },
+    {
+      "epoch": 2.1739130434782608,
+      "grad_norm": 9.45117473602295,
+      "learning_rate": 6.3888888888888885e-06,
+      "loss": 2.009921875,
+      "step": 300
+    },
+    {
+      "epoch": 2.536231884057971,
+      "grad_norm": 8.40969467163086,
+      "learning_rate": 5.785024154589373e-06,
+      "loss": 2.07037109375,
+      "step": 350
+    },
+    {
+      "epoch": 2.898550724637681,
+      "grad_norm": 10.270116806030273,
+      "learning_rate": 5.181159420289855e-06,
+      "loss": 1.952861328125,
+      "step": 400
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.47528699040412903,
+      "eval_runtime": 27.7881,
+      "eval_samples_per_second": 17.633,
+      "eval_steps_per_second": 2.231,
+      "step": 414
+    },
+    {
+      "epoch": 3.260869565217391,
+      "grad_norm": 17.692113876342773,
+      "learning_rate": 4.5772946859903385e-06,
+      "loss": 1.79087890625,
+      "step": 450
+    },
+    {
+      "epoch": 3.6231884057971016,
+      "grad_norm": 13.763245582580566,
+      "learning_rate": 3.973429951690821e-06,
+      "loss": 1.650498046875,
+      "step": 500
+    },
+    {
+      "epoch": 3.9855072463768115,
+      "grad_norm": 33.43427276611328,
+      "learning_rate": 3.3695652173913045e-06,
+      "loss": 1.610087890625,
+      "step": 550
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.410451203584671,
+      "eval_runtime": 27.8228,
+      "eval_samples_per_second": 17.611,
+      "eval_steps_per_second": 2.228,
+      "step": 552
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 828,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.003460608777011e+16,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-552/training_args.bin b/checkpoint-552/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..41879da9e32eb23f5013e1a62452ec18a7e8995e
--- /dev/null
+++ b/checkpoint-552/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:033534501afcab0521199d3c0685a5e811f2a297a318bad85be0999d18aac32c
+size 5137
diff --git a/checkpoint-690/README.md b/checkpoint-690/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..dfd85d29159e61f8c090feb3d219c62a0c01c56b
--- /dev/null
+++ b/checkpoint-690/README.md
@@ -0,0 +1,206 @@
+---
+base_model: microsoft/Phi-4-mini-instruct
+library_name: peft
+tags:
+- base_model:adapter:microsoft/Phi-4-mini-instruct
+- lora
+- transformers
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/checkpoint-690/adapter_config.json b/checkpoint-690/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..675b31bd9d8a4c2efda38f5392b48daf033fe669
--- /dev/null
+++ b/checkpoint-690/adapter_config.json
@@ -0,0 +1,48 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "microsoft/Phi-4-mini-instruct",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "classifier",
+    "score"
+  ],
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "q_proj",
+    "k_proj",
+    "v_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "SEQ_CLS",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-690/adapter_model.safetensors b/checkpoint-690/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..70b3df0b36cba5b5e1cb037ab97a462e91cb8a73
--- /dev/null
+++ b/checkpoint-690/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2794d903c1cbd94e2bba0319f8192757a96d582c707a5c961fea609b52020a9f
+size 12603848
diff --git a/checkpoint-690/chat_template.jinja b/checkpoint-690/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..a9c00dd9bbd97e117371168e9d62af65b9f0e725
--- /dev/null
+++ b/checkpoint-690/chat_template.jinja
@@ -0,0 +1 @@
+{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}
\ No newline at end of file
diff --git a/checkpoint-690/optimizer.pt b/checkpoint-690/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a698feb4705abca25e0d9b92d41f5491e3813f54
--- /dev/null
+++ b/checkpoint-690/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b38e5374d04c8a42750eebdc061404656338af5ce33418e0850c75a16cc54f38
+size 25246667
diff --git a/checkpoint-690/rng_state.pth b/checkpoint-690/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..502a66d35a9e33abf08a2d1145499dfc89ff9a92
--- /dev/null
+++ b/checkpoint-690/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:55d1a6531f1cad7cbc10ca169a19e64db822406a42e3eb0d1c0dbcb2171b791c
+size 14645
diff --git a/checkpoint-690/scheduler.pt b/checkpoint-690/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..43d28ae4f70d721578ba73bd239abe986d84bf96
--- /dev/null
+++ b/checkpoint-690/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0943a79c4d72f7d717a68cee1dbf9a58982caea14f8a3dafb158b8c83312b56
+size 1465
diff --git a/checkpoint-690/tokenizer.json b/checkpoint-690/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..8655fee00020e3140fc51416dac7cb0b9a8e4c45
--- /dev/null
+++ b/checkpoint-690/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ea8bdf68c3e7549a3fb4342523288ce628f6ab56a618f9a4dfb234a0b4d46a8
+size 15524476
diff --git a/checkpoint-690/tokenizer_config.json b/checkpoint-690/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..007b4fee54acc99b382393f6900ba96e673f8b49
--- /dev/null
+++ b/checkpoint-690/tokenizer_config.json
@@ -0,0 +1,12 @@
+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "is_local": false,
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "TokenizersBackend",
+  "unk_token": "<|endoftext|>"
+}
diff --git a/checkpoint-690/trainer_state.json b/checkpoint-690/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..1572b06bdbb516ae2e09287f103939594b005d26
--- /dev/null
+++ b/checkpoint-690/trainer_state.json
@@ -0,0 +1,165 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 690,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.36231884057971014,
+      "grad_norm": 11.473546981811523,
+      "learning_rate": 9.408212560386473e-06,
+      "loss": 2.518828125,
+      "step": 50
+    },
+    {
+      "epoch": 0.7246376811594203,
+      "grad_norm": 19.91433334350586,
+      "learning_rate": 8.804347826086957e-06,
+      "loss": 2.4250390625,
+      "step": 100
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5803093314170837,
+      "eval_runtime": 29.4101,
+      "eval_samples_per_second": 16.661,
+      "eval_steps_per_second": 2.108,
+      "step": 138
+    },
+    {
+      "epoch": 1.0869565217391304,
+      "grad_norm": 16.91636085510254,
+      "learning_rate": 8.20048309178744e-06,
+      "loss": 2.2990234375,
+      "step": 150
+    },
+    {
+      "epoch": 1.4492753623188406,
+      "grad_norm": 13.255877494812012,
+      "learning_rate": 7.596618357487924e-06,
+      "loss": 2.378984375,
+      "step": 200
+    },
+    {
+      "epoch": 1.8115942028985508,
+      "grad_norm": 18.71368980407715,
+      "learning_rate": 6.992753623188407e-06,
+      "loss": 2.15296875,
+      "step": 250
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5362404584884644,
+      "eval_runtime": 27.8676,
+      "eval_samples_per_second": 17.583,
+      "eval_steps_per_second": 2.225,
+      "step": 276
+    },
+    {
+      "epoch": 2.1739130434782608,
+      "grad_norm": 9.45117473602295,
+      "learning_rate": 6.3888888888888885e-06,
+      "loss": 2.009921875,
+      "step": 300
+    },
+    {
+      "epoch": 2.536231884057971,
+      "grad_norm": 8.40969467163086,
+      "learning_rate": 5.785024154589373e-06,
+      "loss": 2.07037109375,
+      "step": 350
+    },
+    {
+      "epoch": 2.898550724637681,
+      "grad_norm": 10.270116806030273,
+      "learning_rate": 5.181159420289855e-06,
+      "loss": 1.952861328125,
+      "step": 400
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.47528699040412903,
+      "eval_runtime": 27.7881,
+      "eval_samples_per_second": 17.633,
+      "eval_steps_per_second": 2.231,
+      "step": 414
+    },
+    {
+      "epoch": 3.260869565217391,
+      "grad_norm": 17.692113876342773,
+      "learning_rate": 4.5772946859903385e-06,
+      "loss": 1.79087890625,
+      "step": 450
+    },
+    {
+      "epoch": 3.6231884057971016,
+      "grad_norm": 13.763245582580566,
+      "learning_rate": 3.973429951690821e-06,
+      "loss": 1.650498046875,
+      "step": 500
+    },
+    {
+      "epoch": 3.9855072463768115,
+      "grad_norm": 33.43427276611328,
+      "learning_rate": 3.3695652173913045e-06,
+      "loss": 1.610087890625,
+      "step": 550
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.410451203584671,
+      "eval_runtime": 27.8228,
+      "eval_samples_per_second": 17.611,
+      "eval_steps_per_second": 2.228,
+      "step": 552
+    },
+    {
+      "epoch": 4.3478260869565215,
+      "grad_norm": 8.163750648498535,
+      "learning_rate": 2.7657004830917877e-06,
+      "loss": 1.44470703125,
+      "step": 600
+    },
+    {
+      "epoch": 4.710144927536232,
+      "grad_norm": 17.915592193603516,
+      "learning_rate": 2.1618357487922704e-06,
+      "loss": 1.423818359375,
+      "step": 650
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 0.3703005313873291,
+      "eval_runtime": 27.9318,
+      "eval_samples_per_second": 17.543,
+      "eval_steps_per_second": 2.22,
+      "step": 690
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 828,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.493846188133581e+16,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-690/training_args.bin b/checkpoint-690/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..41879da9e32eb23f5013e1a62452ec18a7e8995e
--- /dev/null
+++ b/checkpoint-690/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:033534501afcab0521199d3c0685a5e811f2a297a318bad85be0999d18aac32c
+size 5137
diff --git a/checkpoint-828/README.md b/checkpoint-828/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..dfd85d29159e61f8c090feb3d219c62a0c01c56b
--- /dev/null
+++ b/checkpoint-828/README.md
@@ -0,0 +1,206 @@
+---
+base_model: microsoft/Phi-4-mini-instruct
+library_name: peft
+tags:
+- base_model:adapter:microsoft/Phi-4-mini-instruct
+- lora
+- transformers
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.19.1
\ No newline at end of file
diff --git a/checkpoint-828/adapter_config.json b/checkpoint-828/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..675b31bd9d8a4c2efda38f5392b48daf033fe669
--- /dev/null
+++ b/checkpoint-828/adapter_config.json
@@ -0,0 +1,48 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "microsoft/Phi-4-mini-instruct",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "classifier",
+    "score"
+  ],
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "q_proj",
+    "k_proj",
+    "v_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "SEQ_CLS",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-828/adapter_model.safetensors b/checkpoint-828/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c1ecb1cb90b4c9a55e6948a3d7e811dd5f8bb8f7
--- /dev/null
+++ b/checkpoint-828/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d075579a534a1d4a45a32603660edecb6740758c1bbc080e4012de433623d5e4
+size 12603848
diff --git a/checkpoint-828/chat_template.jinja b/checkpoint-828/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..a9c00dd9bbd97e117371168e9d62af65b9f0e725
--- /dev/null
+++ b/checkpoint-828/chat_template.jinja
@@ -0,0 +1 @@
+{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}
\ No newline at end of file
diff --git a/checkpoint-828/optimizer.pt b/checkpoint-828/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2ac4eb43ea0243a1a36a651e333feccf4c122412
--- /dev/null
+++ b/checkpoint-828/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f1025419199037fd5dba875df2ed69253e370e6e7cf57e0725477a29bb3d4eb
+size 25246667
diff --git a/checkpoint-828/rng_state.pth b/checkpoint-828/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..93ef028e9ee8dc1648cf817282962d8a2d185348
--- /dev/null
+++ b/checkpoint-828/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a7fe73d179f99ae6dc92c440369719ab7067c11bcc01d4e90c49866b2d2eea7
+size 14645
diff --git a/checkpoint-828/scheduler.pt b/checkpoint-828/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..72ea129a1cc8fd7cab5742cb5e2005ea9a1219f9
--- /dev/null
+++ b/checkpoint-828/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5dd0e92d55cdd722e4de685551b7dc9c6e87bd7ff9ce03d5b98abc8046dc40d4
+size 1465
diff --git a/checkpoint-828/tokenizer.json b/checkpoint-828/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..8655fee00020e3140fc51416dac7cb0b9a8e4c45
--- /dev/null
+++ b/checkpoint-828/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ea8bdf68c3e7549a3fb4342523288ce628f6ab56a618f9a4dfb234a0b4d46a8
+size 15524476
diff --git a/checkpoint-828/tokenizer_config.json b/checkpoint-828/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..007b4fee54acc99b382393f6900ba96e673f8b49
--- /dev/null
+++ b/checkpoint-828/tokenizer_config.json
@@ -0,0 +1,12 @@
+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "is_local": false,
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "TokenizersBackend",
+  "unk_token": "<|endoftext|>"
+}
diff --git a/checkpoint-828/trainer_state.json b/checkpoint-828/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..263a7cc3a68a7d36c3a4c7a1d5bbf4de273d1980
--- /dev/null
+++ b/checkpoint-828/trainer_state.json
@@ -0,0 +1,194 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 6.0,
+  "eval_steps": 500,
+  "global_step": 828,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.36231884057971014,
+      "grad_norm": 11.473546981811523,
+      "learning_rate": 9.408212560386473e-06,
+      "loss": 2.518828125,
+      "step": 50
+    },
+    {
+      "epoch": 0.7246376811594203,
+      "grad_norm": 19.91433334350586,
+      "learning_rate": 8.804347826086957e-06,
+      "loss": 2.4250390625,
+      "step": 100
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5803093314170837,
+      "eval_runtime": 29.4101,
+      "eval_samples_per_second": 16.661,
+      "eval_steps_per_second": 2.108,
+      "step": 138
+    },
+    {
+      "epoch": 1.0869565217391304,
+      "grad_norm": 16.91636085510254,
+      "learning_rate": 8.20048309178744e-06,
+      "loss": 2.2990234375,
+      "step": 150
+    },
+    {
+      "epoch": 1.4492753623188406,
+      "grad_norm": 13.255877494812012,
+      "learning_rate": 7.596618357487924e-06,
+      "loss": 2.378984375,
+      "step": 200
+    },
+    {
+      "epoch": 1.8115942028985508,
+      "grad_norm": 18.71368980407715,
+      "learning_rate": 6.992753623188407e-06,
+      "loss": 2.15296875,
+      "step": 250
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5362404584884644,
+      "eval_runtime": 27.8676,
+      "eval_samples_per_second": 17.583,
+      "eval_steps_per_second": 2.225,
+      "step": 276
+    },
+    {
+      "epoch": 2.1739130434782608,
+      "grad_norm": 9.45117473602295,
+      "learning_rate": 6.3888888888888885e-06,
+      "loss": 2.009921875,
+      "step": 300
+    },
+    {
+      "epoch": 2.536231884057971,
+      "grad_norm": 8.40969467163086,
+      "learning_rate": 5.785024154589373e-06,
+      "loss": 2.07037109375,
+      "step": 350
+    },
+    {
+      "epoch": 2.898550724637681,
+      "grad_norm": 10.270116806030273,
+      "learning_rate": 5.181159420289855e-06,
+      "loss": 1.952861328125,
+      "step": 400
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.47528699040412903,
+      "eval_runtime": 27.7881,
+      "eval_samples_per_second": 17.633,
+      "eval_steps_per_second": 2.231,
+      "step": 414
+    },
+    {
+      "epoch": 3.260869565217391,
+      "grad_norm": 17.692113876342773,
+      "learning_rate": 4.5772946859903385e-06,
+      "loss": 1.79087890625,
+      "step": 450
+    },
+    {
+      "epoch": 3.6231884057971016,
+      "grad_norm": 13.763245582580566,
+      "learning_rate": 3.973429951690821e-06,
+      "loss": 1.650498046875,
+      "step": 500
+    },
+    {
+      "epoch": 3.9855072463768115,
+      "grad_norm": 33.43427276611328,
+      "learning_rate": 3.3695652173913045e-06,
+      "loss": 1.610087890625,
+      "step": 550
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.410451203584671,
+      "eval_runtime": 27.8228,
+      "eval_samples_per_second": 17.611,
+      "eval_steps_per_second": 2.228,
+      "step": 552
+    },
+    {
+      "epoch": 4.3478260869565215,
+      "grad_norm": 8.163750648498535,
+      "learning_rate": 2.7657004830917877e-06,
+      "loss": 1.44470703125,
+      "step": 600
+    },
+    {
+      "epoch": 4.710144927536232,
+      "grad_norm": 17.915592193603516,
+      "learning_rate": 2.1618357487922704e-06,
+      "loss": 1.423818359375,
+      "step": 650
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 0.3703005313873291,
+      "eval_runtime": 27.9318,
+      "eval_samples_per_second": 17.543,
+      "eval_steps_per_second": 2.22,
+      "step": 690
+    },
+    {
+      "epoch": 5.072463768115942,
+      "grad_norm": 22.16204071044922,
+      "learning_rate": 1.5579710144927536e-06,
+      "loss": 1.38701171875,
+      "step": 700
+    },
+    {
+      "epoch": 5.434782608695652,
+      "grad_norm": 23.789710998535156,
+      "learning_rate": 9.541062801932368e-07,
+      "loss": 1.331396484375,
+      "step": 750
+    },
+    {
+      "epoch": 5.797101449275362,
+      "grad_norm": 29.811561584472656,
+      "learning_rate": 3.5024154589371985e-07,
+      "loss": 1.2618359375,
+      "step": 800
+    },
+    {
+      "epoch": 6.0,
+      "eval_loss": 0.35662469267845154,
+      "eval_runtime": 27.9183,
+      "eval_samples_per_second": 17.551,
+      "eval_steps_per_second": 2.221,
+      "step": 828
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 828,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.9920558960889856e+16,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-828/training_args.bin b/checkpoint-828/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..41879da9e32eb23f5013e1a62452ec18a7e8995e
--- /dev/null
+++ b/checkpoint-828/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:033534501afcab0521199d3c0685a5e811f2a297a318bad85be0999d18aac32c
+size 5137
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..8655fee00020e3140fc51416dac7cb0b9a8e4c45
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ea8bdf68c3e7549a3fb4342523288ce628f6ab56a618f9a4dfb234a0b4d46a8
+size 15524476
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..007b4fee54acc99b382393f6900ba96e673f8b49
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,12 @@
+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "is_local": false,
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "TokenizersBackend",
+  "unk_token": "<|endoftext|>"
+}