diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..9ccb51c7c3f28720a9ca287b401244b61056aea7 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+checkpoint-24/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-48/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-72/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-96/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..416b8c5a23fc5e2433a610a21af2de076125a03f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,159 @@
+---
+library_name: peft
+license: apache-2.0
+base_model: mistralai/Mistral-Small-24B-Base-2501
+tags:
+- generated_from_trainer
+datasets:
+- david-ar/synthetic-irc-data
+model-index:
+- name: outputs/public-irc-mistral-24b
+ results: []
+---
+
+
+
+[
](https://github.com/axolotl-ai-cloud/axolotl)
+See axolotl config
+
+axolotl version: `0.8.0.dev0`
+```yaml
+# Base model configuration
+base_model: mistralai/Mistral-Small-24B-Base-2501
+model_type: MistralForCausalLM
+tokenizer_type: AutoTokenizer
+trust_remote_code: true
+tokenizer_use_fast: true
+
+# Device mapping for multi-GPU
+device_map: "balanced"
+
+# Memory settings
+load_in_4bit: true
+load_in_8bit: false
+bf16: true
+low_cpu_mem_usage: true
+
+# Advanced optimizations
+flash_attention: true
+gradient_checkpointing: true
+
+# Dataset configuration
+datasets:
+ - path: david-ar/synthetic-irc-data
+ type: completion
+
+# Output directory
+output_dir: ./outputs/public-irc-mistral-24b
+val_set_size: 0.05 # 75 conversations for validation
+dataset_prepared_path: last_run_prepared
+
+# Sequence settings
+sequence_len: 4096
+sample_packing: true
+pad_to_sequence_len: true
+train_on_inputs: true
+eval_sample_packing: false
+
+# LoRA configuration
+adapter: lora
+lora_r: 128
+lora_alpha: 256
+lora_dropout: 0.1
+lora_target_modules:
+ - q_proj
+ - v_proj
+ - k_proj
+ - o_proj
+ - gate_proj
+ - down_proj
+ - up_proj
+
+# Training hyperparameters - adjusted for smaller dataset
+micro_batch_size: 1
+gradient_accumulation_steps: 16
+num_epochs: 4 # Increased from 2, but with careful monitoring
+optimizer: adamw_torch
+lr_scheduler: cosine
+learning_rate: 0.00008 # Same conservative LR
+weight_decay: 0.01
+warmup_ratio: 0.05
+
+# Performance monitoring
+group_by_length: true
+shuffle_merged_datasets: true
+include_tokens_per_second: true
+
+# Weights & Biases - public project
+wandb_project: public-irc-mistral-24b
+wandb_entity: davidar
+wandb_name: synthetic-irc-data
+wandb_log_model: "false"
+
+# Mistral model configuration
+is_mistral_derived_model: true
+
+# Early stopping
+load_best_model_at_end: true
+metric_for_best_model: "loss"
+greater_is_better: false
+
+```
+
+
+
+# outputs/public-irc-mistral-24b
+
+This model is a fine-tuned version of [mistralai/Mistral-Small-24B-Base-2501](https://huggingface.co/mistralai/Mistral-Small-24B-Base-2501) on the david-ar/synthetic-irc-data dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.9871
+
+## Model description
+
+More information needed
+
+## Intended uses & limitations
+
+More information needed
+
+## Training and evaluation data
+
+More information needed
+
+## Training procedure
+
+### Training hyperparameters
+
+The following hyperparameters were used during training:
+- learning_rate: 8e-05
+- train_batch_size: 1
+- eval_batch_size: 1
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 2
+- gradient_accumulation_steps: 16
+- total_train_batch_size: 32
+- total_eval_batch_size: 2
+- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 4
+- num_epochs: 4.0
+
+### Training results
+
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| 0.9145 | 0.9746 | 24 | 0.9128 |
+| 0.6565 | 1.9746 | 48 | 0.8936 |
+| 0.4671 | 2.9746 | 72 | 0.9503 |
+| 0.3594 | 3.9746 | 96 | 0.9871 |
+
+
+### Framework versions
+
+- PEFT 0.14.0
+- Transformers 4.49.0
+- Pytorch 2.5.1+cu124
+- Datasets 3.2.0
+- Tokenizers 0.21.0
\ No newline at end of file
diff --git a/adapter_config.json b/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5605d559a119e214b580feac2cacba0cc13245d
--- /dev/null
+++ b/adapter_config.json
@@ -0,0 +1,37 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "mistralai/Mistral-Small-24B-Base-2501",
+ "bias": "none",
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 256,
+ "lora_bias": false,
+ "lora_dropout": 0.1,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 128,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "gate_proj",
+ "down_proj",
+ "o_proj",
+ "up_proj",
+ "v_proj",
+ "k_proj",
+ "q_proj"
+ ],
+ "task_type": "CAUSAL_LM",
+ "use_dora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/adapter_model.safetensors b/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..641fd291dc50d65ba477a60184456450ccbba313
--- /dev/null
+++ b/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a40f8bf86a0de08eeb1d2dfa37d2cda343578fbdfd517283ea288c20ede8c267
+size 2957061448
diff --git a/checkpoint-24/README.md b/checkpoint-24/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f1f0b6764bb4d115aacb1db310b2f88aba10f832
--- /dev/null
+++ b/checkpoint-24/README.md
@@ -0,0 +1,202 @@
+---
+base_model: mistralai/Mistral-Small-24B-Base-2501
+library_name: peft
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.14.0
\ No newline at end of file
diff --git a/checkpoint-24/adapter_config.json b/checkpoint-24/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5605d559a119e214b580feac2cacba0cc13245d
--- /dev/null
+++ b/checkpoint-24/adapter_config.json
@@ -0,0 +1,37 @@
+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "mistralai/Mistral-Small-24B-Base-2501",
+ "bias": "none",
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 256,
+ "lora_bias": false,
+ "lora_dropout": 0.1,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 128,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "gate_proj",
+ "down_proj",
+ "o_proj",
+ "up_proj",
+ "v_proj",
+ "k_proj",
+ "q_proj"
+ ],
+ "task_type": "CAUSAL_LM",
+ "use_dora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-24/adapter_model.safetensors b/checkpoint-24/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2effc0d76fe83ec6d02308f19c097b9c4bcd37a5
--- /dev/null
+++ b/checkpoint-24/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5074e95805f0b37f31131dba7d445de5db604b6c86bb6d443c2308929c07e4f5
+size 2957061448
diff --git a/checkpoint-24/optimizer.pt b/checkpoint-24/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..673c31a4cba7f0802cbc003fc0d464a64d556e83
--- /dev/null
+++ b/checkpoint-24/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65cce2c4fe04903b8c5a696968227c4243e865e5a912da5f0fc0e417cbd7823c
+size 5914447682
diff --git a/checkpoint-24/rng_state_0.pth b/checkpoint-24/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..25ec1e4990b19a468f6014e8e5c81afe76b31eae
--- /dev/null
+++ b/checkpoint-24/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d379b2b80d4023ddcadc1de68d5109043a0dc7054db9cfbb511c20faedddf7e
+size 14512
diff --git a/checkpoint-24/rng_state_1.pth b/checkpoint-24/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..216d67e09becc0b44aac775f810391984accffff
--- /dev/null
+++ b/checkpoint-24/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:861e7c8e76c728e9e164a4fc0ab62b8a92d4c749e4d7edc00ee247319dd19c19
+size 14512
diff --git a/checkpoint-24/scheduler.pt b/checkpoint-24/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f2e3ea72f730d67f6c20eaedc63f5082d40ac8e2
--- /dev/null
+++ b/checkpoint-24/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5cee88425e2280e0ac8f9decd347f92d7d0def25a62dfb80ad614ae265c032a0
+size 1064
diff --git a/checkpoint-24/special_tokens_map.json b/checkpoint-24/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..29107fe277e638146951d661c799e6209982a4ee
--- /dev/null
+++ b/checkpoint-24/special_tokens_map.json
@@ -0,0 +1,1026 @@
+{
+ "additional_special_tokens": [
+ "",
+ "",
+ "",
+ "[INST]",
+ "[/INST]",
+ "[AVAILABLE_TOOLS]",
+ "[/AVAILABLE_TOOLS]",
+ "[TOOL_RESULTS]",
+ "[/TOOL_RESULTS]",
+ "[TOOL_CALLS]",
+ "[IMG]",
+ "",
+ "[IMG_BREAK]",
+ "[IMG_END]",
+ "[PREFIX]",
+ "[MIDDLE]",
+ "[SUFFIX]",
+ "[SYSTEM_PROMPT]",
+ "[/SYSTEM_PROMPT]",
+ "[TOOL_CONTENT]",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "