diff --git a/config.yaml b/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5f1291caa259e5f4aa8c4d78e98a7188c319c882
--- /dev/null
+++ b/config.yaml
@@ -0,0 +1,79 @@
+base_model: mistralai/Mistral-7B-v0.1
+model_type: MistralForCausalLM
+tokenizer_type: LlamaTokenizer
+
+load_in_8bit: false
+load_in_4bit: true
+strict: false
+
+datasets:
+  - path: caffeinatedcherrychic/cidds-agg-balanced
+    type: alpaca
+dataset_prepared_path: last_run_prepared
+val_set_size: 0.1
+output_dir: ./qlora-out
+
+adapter: qlora
+lora_model_dir:
+
+sequence_len: 256
+sample_packing: false
+pad_to_sequence_len: true
+
+lora_r: 32
+lora_alpha: 64
+lora_dropout: 0.05
+lora_target_linear: true
+lora_fan_in_fan_out:
+lora_target_modules:
+  - gate_proj
+  - down_proj
+  - up_proj
+  - q_proj
+  - v_proj
+  - k_proj
+  - o_proj
+
+wandb_project:
+wandb_entity:
+wandb_watch:
+wandb_name:
+wandb_log_model:
+
+gradient_accumulation_steps: 4
+micro_batch_size: 2
+num_epochs: 5
+optimizer: adamw_bnb_8bit
+lr_scheduler: cosine
+learning_rate: 0.0002
+
+train_on_inputs: false
+group_by_length: false
+bf16: true
+fp16: false
+tf32: false
+
+gradient_checkpointing: true
+early_stopping_patience:
+resume_from_checkpoint:
+local_rank:
+logging_steps: 1
+xformers_attention:
+flash_attention: true
+
+loss_watchdog_threshold: 5.0
+loss_watchdog_patience: 3
+
+max_steps: 500
+warmup_steps: 10
+evals_per_epoch: 4
+eval_table_size:
+eval_max_new_tokens: 1
+saves_per_epoch: 1
+debug:
+deepspeed:
+weight_decay: 0.001
+fsdp:
+fsdp_config:
+special_tokens:
+
diff --git a/dmog/axolotl-test-outputs/test.output b/dmog/axolotl-test-outputs/test.output
new file mode 100644
index 0000000000000000000000000000000000000000..349b80c73b3597f8c3f587eccbe02ad6a0e2a966
--- /dev/null
+++ b/dmog/axolotl-test-outputs/test.output
@@ -0,0 +1,5 @@
+Hello, dhruti
+#######
+Finetuning
+/mnt/scratch/users/dhd2000/ft14
+#######
diff --git a/dmog/job.error b/dmog/job.error
new file mode 100644
index 0000000000000000000000000000000000000000..edf84cacd71f51b8df914b524feaef7613361c1a
--- /dev/null
+++ b/dmog/job.error
@@ -0,0 +1,162 @@
+[38;5;127mmpi[0m/[38;5;172mopenmpi[0m/[38;5;67m4.1.5[0m/[38;5;68mgcc-4.8.5[0m
+ | -- [38;5;127mlibs[0m/[38;5;172mgcc[0m/[38;5;67msystem[0m
+ |    * --> [0;32mOK[0m
+ |
+ [0;32mOK[0m
+             [38;5;127mmpi[0m/[38;5;172mopenmpi[0m/[38;5;67m4.1.5[0m/[38;5;68mgcc-4.8.5[0m ... UNLOADING --> [0;32mOK[0m
+                         [38;5;127mlibs[0m/[38;5;172mgcc[0m/[38;5;67msystem[0m ... UNLOADING --> [0;32mOK[0m
+The following values were not passed to `accelerate launch` and had defaults used instead:
+	`--num_processes` was set to a value of `1`
+	`--num_machines` was set to a value of `1`
+	`--mixed_precision` was set to a value of `'no'`
+	`--dynamo_backend` was set to a value of `'no'`
+To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`.
+Tokenizing Prompts (num_proc=64):   0%|          | 0/111 [00:00<?, ? examples/s]Tokenizing Prompts (num_proc=64):   2%|▏         | 2/111 [00:00<00:20,  5.25 examples/s]Tokenizing Prompts (num_proc=64):   5%|▌         | 6/111 [00:00<00:07, 13.91 examples/s]Tokenizing Prompts (num_proc=64):   9%|▉         | 10/111 [00:00<00:05, 17.30 examples/s]Tokenizing Prompts (num_proc=64):  13%|█▎        | 14/111 [00:00<00:04, 20.80 examples/s]Tokenizing Prompts (num_proc=64):  18%|█▊        | 20/111 [00:00<00:03, 29.55 examples/s]Tokenizing Prompts (num_proc=64):  22%|██▏       | 24/111 [00:01<00:02, 30.95 examples/s]Tokenizing Prompts (num_proc=64):  25%|██▌       | 28/111 [00:01<00:02, 32.54 examples/s]Tokenizing Prompts (num_proc=64):  29%|██▉       | 32/111 [00:01<00:02, 32.69 examples/s]Tokenizing Prompts (num_proc=64):  32%|███▏      | 36/111 [00:01<00:02, 33.43 examples/s]Tokenizing Prompts (num_proc=64):  36%|███▌      | 40/111 [00:01<00:02, 33.16 examples/s]Tokenizing Prompts (num_proc=64):  40%|███▉      | 44/111 [00:01<00:01, 34.80 examples/s]Tokenizing Prompts (num_proc=64):  58%|█████▊    | 64/111 [00:01<00:00, 78.99 examples/s]Tokenizing Prompts (num_proc=64):  79%|███████▉  | 88/111 [00:01<00:00, 122.86 examples/s]Tokenizing Prompts (num_proc=64):  93%|█████████▎| 103/111 [00:01<00:00, 127.47 examples/s]Tokenizing Prompts (num_proc=64): 100%|██████████| 111/111 [00:02<00:00, 53.99 examples/s] 
+Dropping Long Sequences (num_proc=64):   0%|          | 0/111 [00:00<?, ? examples/s]Dropping Long Sequences (num_proc=64):   2%|▏         | 2/111 [00:00<00:17,  6.28 examples/s]Dropping Long Sequences (num_proc=64):   7%|▋         | 8/111 [00:00<00:05, 20.12 examples/s]Dropping Long Sequences (num_proc=64):  22%|██▏       | 24/111 [00:00<00:01, 58.49 examples/s]Dropping Long Sequences (num_proc=64):  76%|███████▌  | 84/111 [00:00<00:00, 211.25 examples/s]Dropping Long Sequences (num_proc=64): 100%|██████████| 111/111 [00:00<00:00, 136.77 examples/s]
+Saving the dataset (0/1 shards):   0%|          | 0/111 [00:00<?, ? examples/s]Saving the dataset (1/1 shards): 100%|██████████| 111/111 [00:00<00:00, 8411.95 examples/s]Saving the dataset (1/1 shards): 100%|██████████| 111/111 [00:00<00:00, 8309.70 examples/s]
+Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]Loading checkpoint shards:  50%|█████     | 1/2 [00:09<00:09,  9.93s/it]Loading checkpoint shards: 100%|██████████| 2/2 [00:14<00:00,  6.77s/it]Loading checkpoint shards: 100%|██████████| 2/2 [00:14<00:00,  7.24s/it]
+  0%|          | 0/62 [00:00<?, ?it/s]  2%|▏         | 1/62 [00:03<03:15,  3.20s/it]                                                2%|▏         | 1/62 [00:03<03:15,  3.20s/it]
+  0%|          | 0/6 [00:00<?, ?it/s]
+ 33%|███▎      | 2/6 [00:00<00:00,  8.95it/s]
+ 50%|█████     | 3/6 [00:00<00:00,  6.32it/s]
+ 67%|██████▋   | 4/6 [00:00<00:00,  5.47it/s]
+ 83%|████████▎ | 5/6 [00:00<00:00,  5.07it/s]
+100%|██████████| 6/6 [00:01<00:00,  4.86it/s]                                              
+                                               2%|▏         | 1/62 [00:04<03:15,  3.20s/it]
+100%|██████████| 6/6 [00:01<00:00,  4.86it/s]
+                                               3%|▎         | 2/62 [00:07<03:49,  3.82s/it]                                                3%|▎         | 2/62 [00:07<03:49,  3.82s/it]  5%|▍         | 3/62 [00:10<03:20,  3.40s/it]                                                5%|▍         | 3/62 [00:10<03:20,  3.40s/it]  6%|▋         | 4/62 [00:13<03:05,  3.20s/it]                                                6%|▋         | 4/62 [00:13<03:05,  3.20s/it]
+  0%|          | 0/6 [00:00<?, ?it/s]
+ 33%|███▎      | 2/6 [00:00<00:00,  8.91it/s]
+ 50%|█████     | 3/6 [00:00<00:00,  6.29it/s]
+ 67%|██████▋   | 4/6 [00:00<00:00,  5.45it/s]
+ 83%|████████▎ | 5/6 [00:00<00:00,  5.06it/s]
+100%|██████████| 6/6 [00:01<00:00,  4.84it/s]                                              
+                                               6%|▋         | 4/62 [00:14<03:05,  3.20s/it]
+100%|██████████| 6/6 [00:01<00:00,  4.84it/s]
+                                               8%|▊         | 5/62 [00:17<03:24,  3.58s/it]                                                8%|▊         | 5/62 [00:17<03:24,  3.58s/it] 10%|▉         | 6/62 [00:20<03:07,  3.35s/it]                                               10%|▉         | 6/62 [00:20<03:07,  3.35s/it] 11%|█▏        | 7/62 [00:23<02:56,  3.20s/it]                                               11%|█▏        | 7/62 [00:23<02:56,  3.20s/it] 13%|█▎        | 8/62 [00:26<02:47,  3.10s/it]                                               13%|█▎        | 8/62 [00:26<02:47,  3.10s/it]
+  0%|          | 0/6 [00:00<?, ?it/s]
+ 33%|███▎      | 2/6 [00:00<00:00,  8.92it/s]
+ 50%|█████     | 3/6 [00:00<00:00,  6.28it/s]
+ 67%|██████▋   | 4/6 [00:00<00:00,  5.44it/s]
+ 83%|████████▎ | 5/6 [00:00<00:00,  5.04it/s]
+100%|██████████| 6/6 [00:01<00:00,  4.83it/s]                                              
+                                              13%|█▎        | 8/62 [00:27<02:47,  3.10s/it]
+100%|██████████| 6/6 [00:01<00:00,  4.83it/s]
+                                              15%|█▍        | 9/62 [00:30<03:03,  3.47s/it]                                               15%|█▍        | 9/62 [00:30<03:03,  3.47s/it] 16%|█▌        | 10/62 [00:33<02:51,  3.29s/it]                                                16%|█▌        | 10/62 [00:33<02:51,  3.29s/it] 18%|█▊        | 11/62 [00:36<02:41,  3.17s/it]                                                18%|█▊        | 11/62 [00:36<02:41,  3.17s/it] 19%|█▉        | 12/62 [00:39<02:34,  3.09s/it]                                                19%|█▉        | 12/62 [00:39<02:34,  3.09s/it]
+  0%|          | 0/6 [00:00<?, ?it/s]
+ 33%|███▎      | 2/6 [00:00<00:00,  8.89it/s]
+ 50%|█████     | 3/6 [00:00<00:00,  6.27it/s]
+ 67%|██████▋   | 4/6 [00:00<00:00,  5.43it/s]
+ 83%|████████▎ | 5/6 [00:00<00:00,  5.03it/s]
+100%|██████████| 6/6 [00:01<00:00,  4.82it/s]                                               
+                                              19%|█▉        | 12/62 [00:40<02:34,  3.09s/it]
+100%|██████████| 6/6 [00:01<00:00,  4.82it/s]
+                                              21%|██        | 13/62 [00:43<02:46,  3.40s/it]                                                21%|██        | 13/62 [00:43<02:46,  3.40s/it] 23%|██▎       | 14/62 [00:47<02:49,  3.53s/it]                                                23%|██▎       | 14/62 [00:47<02:49,  3.53s/it] 24%|██▍       | 15/62 [00:50<02:37,  3.35s/it]                                                24%|██▍       | 15/62 [00:50<02:37,  3.35s/it] 26%|██▌       | 16/62 [00:52<02:27,  3.21s/it]                                                26%|██▌       | 16/62 [00:52<02:27,  3.21s/it]
+  0%|          | 0/6 [00:00<?, ?it/s]
+ 33%|███▎      | 2/6 [00:00<00:00,  8.86it/s]
+ 50%|█████     | 3/6 [00:00<00:00,  6.24it/s]
+ 67%|██████▋   | 4/6 [00:00<00:00,  5.41it/s]
+ 83%|████████▎ | 5/6 [00:00<00:00,  5.02it/s]
+100%|██████████| 6/6 [00:01<00:00,  4.81it/s]                                               
+                                              26%|██▌       | 16/62 [00:54<02:27,  3.21s/it]
+100%|██████████| 6/6 [00:01<00:00,  4.81it/s]
+                                              27%|██▋       | 17/62 [00:57<02:39,  3.54s/it]                                                27%|██▋       | 17/62 [00:57<02:39,  3.54s/it] 29%|██▉       | 18/62 [01:00<02:27,  3.35s/it]                                                29%|██▉       | 18/62 [01:00<02:27,  3.35s/it] 31%|███       | 19/62 [01:03<02:18,  3.22s/it]                                                31%|███       | 19/62 [01:03<02:18,  3.22s/it] 32%|███▏      | 20/62 [01:05<02:11,  3.13s/it]                                                32%|███▏      | 20/62 [01:05<02:11,  3.13s/it]
+  0%|          | 0/6 [00:00<?, ?it/s]
+ 33%|███▎      | 2/6 [00:00<00:00,  8.83it/s]
+ 50%|█████     | 3/6 [00:00<00:00,  6.24it/s]
+ 67%|██████▋   | 4/6 [00:00<00:00,  5.41it/s]
+ 83%|████████▎ | 5/6 [00:00<00:00,  5.01it/s]
+100%|██████████| 6/6 [00:01<00:00,  4.80it/s]                                               
+                                              32%|███▏      | 20/62 [01:07<02:11,  3.13s/it]
+100%|██████████| 6/6 [00:01<00:00,  4.80it/s]
+                                              34%|███▍      | 21/62 [01:10<02:22,  3.48s/it]                                                34%|███▍      | 21/62 [01:10<02:22,  3.48s/it] 35%|███▌      | 22/62 [01:13<02:12,  3.31s/it]                                                35%|███▌      | 22/62 [01:13<02:12,  3.31s/it] 37%|███▋      | 23/62 [01:16<02:04,  3.19s/it]                                                37%|███▋      | 23/62 [01:16<02:04,  3.19s/it] 39%|███▊      | 24/62 [01:18<01:58,  3.11s/it]                                                39%|███▊      | 24/62 [01:19<01:58,  3.11s/it]
+  0%|          | 0/6 [00:00<?, ?it/s]
+ 33%|███▎      | 2/6 [00:00<00:00,  8.83it/s]
+ 50%|█████     | 3/6 [00:00<00:00,  6.23it/s]
+ 67%|██████▋   | 4/6 [00:00<00:00,  5.39it/s]
+ 83%|████████▎ | 5/6 [00:00<00:00,  5.00it/s]
+100%|██████████| 6/6 [00:01<00:00,  4.79it/s]                                               
+                                              39%|███▊      | 24/62 [01:20<01:58,  3.11s/it]
+100%|██████████| 6/6 [00:01<00:00,  4.79it/s]
+                                              40%|████      | 25/62 [01:23<02:06,  3.42s/it]                                                40%|████      | 25/62 [01:23<02:06,  3.42s/it] 42%|████▏     | 26/62 [01:26<01:57,  3.27s/it]                                                42%|████▏     | 26/62 [01:26<01:57,  3.27s/it] 44%|████▎     | 27/62 [01:29<01:58,  3.39s/it]                                                44%|████▎     | 27/62 [01:29<01:58,  3.39s/it] 45%|████▌     | 28/62 [01:32<01:50,  3.25s/it]                                                45%|████▌     | 28/62 [01:32<01:50,  3.25s/it]
+  0%|          | 0/6 [00:00<?, ?it/s]
+ 33%|███▎      | 2/6 [00:00<00:00,  8.84it/s]
+ 50%|█████     | 3/6 [00:00<00:00,  6.23it/s]
+ 67%|██████▋   | 4/6 [00:00<00:00,  5.40it/s]
+ 83%|████████▎ | 5/6 [00:00<00:00,  5.00it/s]
+100%|██████████| 6/6 [00:01<00:00,  4.79it/s]                                               
+                                              45%|████▌     | 28/62 [01:34<01:50,  3.25s/it]
+100%|██████████| 6/6 [00:01<00:00,  4.79it/s]
+                                              47%|████▋     | 29/62 [01:36<01:57,  3.57s/it]                                                47%|████▋     | 29/62 [01:36<01:57,  3.57s/it] 48%|████▊     | 30/62 [01:39<01:47,  3.37s/it]                                                48%|████▊     | 30/62 [01:39<01:47,  3.37s/it] 50%|█████     | 31/62 [01:42<01:40,  3.24s/it]                                                50%|█████     | 31/62 [01:42<01:40,  3.24s/it] 52%|█████▏    | 32/62 [01:45<01:34,  3.14s/it]                                                52%|█████▏    | 32/62 [01:45<01:34,  3.14s/it]
+  0%|          | 0/6 [00:00<?, ?it/s]
+ 33%|███▎      | 2/6 [00:00<00:00,  8.81it/s]
+ 50%|█████     | 3/6 [00:00<00:00,  6.21it/s]
+ 67%|██████▋   | 4/6 [00:00<00:00,  5.39it/s]
+ 83%|████████▎ | 5/6 [00:00<00:00,  5.00it/s]
+100%|██████████| 6/6 [00:01<00:00,  4.79it/s]                                               
+                                              52%|█████▏    | 32/62 [01:47<01:34,  3.14s/it]
+100%|██████████| 6/6 [00:01<00:00,  4.79it/s]
+                                              53%|█████▎    | 33/62 [01:50<01:41,  3.49s/it]                                                53%|█████▎    | 33/62 [01:50<01:41,  3.49s/it] 55%|█████▍    | 34/62 [01:52<01:32,  3.32s/it]                                                55%|█████▍    | 34/62 [01:52<01:32,  3.32s/it] 56%|█████▋    | 35/62 [01:55<01:26,  3.20s/it]                                                56%|█████▋    | 35/62 [01:55<01:26,  3.20s/it] 58%|█████▊    | 36/62 [01:58<01:20,  3.11s/it]                                                58%|█████▊    | 36/62 [01:58<01:20,  3.11s/it]
+  0%|          | 0/6 [00:00<?, ?it/s]
+ 33%|███▎      | 2/6 [00:00<00:00,  8.82it/s]
+ 50%|█████     | 3/6 [00:00<00:00,  6.22it/s]
+ 67%|██████▋   | 4/6 [00:00<00:00,  5.39it/s]
+ 83%|████████▎ | 5/6 [00:00<00:00,  5.00it/s]
+100%|██████████| 6/6 [00:01<00:00,  4.78it/s]                                               
+                                              58%|█████▊    | 36/62 [02:00<01:20,  3.11s/it]
+100%|██████████| 6/6 [00:01<00:00,  4.78it/s]
+                                              60%|█████▉    | 37/62 [02:03<01:26,  3.47s/it]                                                60%|█████▉    | 37/62 [02:03<01:26,  3.47s/it] 61%|██████▏   | 38/62 [02:05<01:18,  3.26s/it]                                                61%|██████▏   | 38/62 [02:05<01:18,  3.26s/it] 63%|██████▎   | 39/62 [02:08<01:12,  3.15s/it]                                                63%|██████▎   | 39/62 [02:08<01:12,  3.15s/it] 65%|██████▍   | 40/62 [02:12<01:12,  3.32s/it]                                                65%|██████▍   | 40/62 [02:12<01:12,  3.32s/it]
+  0%|          | 0/6 [00:00<?, ?it/s]
+ 33%|███▎      | 2/6 [00:00<00:00,  8.84it/s]
+ 50%|█████     | 3/6 [00:00<00:00,  6.23it/s]
+ 67%|██████▋   | 4/6 [00:00<00:00,  5.40it/s]
+ 83%|████████▎ | 5/6 [00:00<00:00,  5.00it/s]
+100%|██████████| 6/6 [00:01<00:00,  4.79it/s]                                               
+                                              65%|██████▍   | 40/62 [02:13<01:12,  3.32s/it]
+100%|██████████| 6/6 [00:01<00:00,  4.79it/s]
+                                              66%|██████▌   | 41/62 [02:16<01:15,  3.61s/it]                                                66%|██████▌   | 41/62 [02:16<01:15,  3.61s/it] 68%|██████▊   | 42/62 [02:19<01:08,  3.40s/it]                                                68%|██████▊   | 42/62 [02:19<01:08,  3.40s/it] 69%|██████▉   | 43/62 [02:22<01:01,  3.26s/it]                                                69%|██████▉   | 43/62 [02:22<01:01,  3.26s/it] 71%|███████   | 44/62 [02:25<00:56,  3.16s/it]                                                71%|███████   | 44/62 [02:25<00:56,  3.16s/it]
+  0%|          | 0/6 [00:00<?, ?it/s]
+ 33%|███▎      | 2/6 [00:00<00:00,  8.80it/s]
+ 50%|█████     | 3/6 [00:00<00:00,  6.21it/s]
+ 67%|██████▋   | 4/6 [00:00<00:00,  5.38it/s]
+ 83%|████████▎ | 5/6 [00:00<00:00,  4.99it/s]
+100%|██████████| 6/6 [00:01<00:00,  4.78it/s]                                               
+                                              71%|███████   | 44/62 [02:26<00:56,  3.16s/it]
+100%|██████████| 6/6 [00:01<00:00,  4.78it/s]
+                                              73%|███████▎  | 45/62 [02:29<00:59,  3.50s/it]                                                73%|███████▎  | 45/62 [02:29<00:59,  3.50s/it] 74%|███████▍  | 46/62 [02:32<00:53,  3.33s/it]                                                74%|███████▍  | 46/62 [02:32<00:53,  3.33s/it] 76%|███████▌  | 47/62 [02:35<00:48,  3.20s/it]                                                76%|███████▌  | 47/62 [02:35<00:48,  3.20s/it] 77%|███████▋  | 48/62 [02:38<00:43,  3.12s/it]                                                77%|███████▋  | 48/62 [02:38<00:43,  3.12s/it]
+  0%|          | 0/6 [00:00<?, ?it/s]
+ 33%|███▎      | 2/6 [00:00<00:00,  8.80it/s]
+ 50%|█████     | 3/6 [00:00<00:00,  6.21it/s]
+ 67%|██████▋   | 4/6 [00:00<00:00,  5.38it/s]
+ 83%|████████▎ | 5/6 [00:00<00:00,  4.98it/s]
+100%|██████████| 6/6 [00:01<00:00,  4.77it/s]                                               
+                                              77%|███████▋  | 48/62 [02:39<00:43,  3.12s/it]
+100%|██████████| 6/6 [00:01<00:00,  4.77it/s]
+                                              79%|███████▉  | 49/62 [02:42<00:45,  3.48s/it]                                                79%|███████▉  | 49/62 [02:42<00:45,  3.48s/it] 81%|████████  | 50/62 [02:45<00:39,  3.26s/it]                                                81%|████████  | 50/62 [02:45<00:39,  3.26s/it] 82%|████████▏ | 51/62 [02:48<00:34,  3.16s/it]                                                82%|████████▏ | 51/62 [02:48<00:34,  3.16s/it] 84%|████████▍ | 52/62 [02:51<00:30,  3.09s/it]                                                84%|████████▍ | 52/62 [02:51<00:30,  3.09s/it]
+  0%|          | 0/6 [00:00<?, ?it/s]
+ 33%|███▎      | 2/6 [00:00<00:00,  8.78it/s]
+ 50%|█████     | 3/6 [00:00<00:00,  6.18it/s]
+ 67%|██████▋   | 4/6 [00:00<00:00,  5.37it/s]
+ 83%|████████▎ | 5/6 [00:00<00:00,  4.98it/s]
+100%|██████████| 6/6 [00:01<00:00,  4.77it/s]                                               
+                                              84%|████████▍ | 52/62 [02:52<00:30,  3.09s/it]
+100%|██████████| 6/6 [00:01<00:00,  4.77it/s]
+                                              85%|████████▌ | 53/62 [02:56<00:33,  3.74s/it]                                                85%|████████▌ | 53/62 [02:56<00:33,  3.74s/it] 87%|████████▋ | 54/62 [02:59<00:27,  3.49s/it]                                                87%|████████▋ | 54/62 [02:59<00:27,  3.49s/it] 89%|████████▊ | 55/62 [03:02<00:23,  3.32s/it]                                                89%|████████▊ | 55/62 [03:02<00:23,  3.32s/it] 90%|█████████ | 56/62 [03:05<00:19,  3.20s/it]                                                90%|█████████ | 56/62 [03:05<00:19,  3.20s/it]
+  0%|          | 0/6 [00:00<?, ?it/s]
+ 33%|███▎      | 2/6 [00:00<00:00,  8.79it/s]
+ 50%|█████     | 3/6 [00:00<00:00,  6.19it/s]
+ 67%|██████▋   | 4/6 [00:00<00:00,  5.37it/s]
+ 83%|████████▎ | 5/6 [00:00<00:00,  4.99it/s]
+100%|██████████| 6/6 [00:01<00:00,  4.78it/s]                                               
+                                              90%|█████████ | 56/62 [03:06<00:19,  3.20s/it]
+100%|██████████| 6/6 [00:01<00:00,  4.78it/s]
+                                              92%|█████████▏| 57/62 [03:09<00:17,  3.53s/it]                                                92%|█████████▏| 57/62 [03:09<00:17,  3.53s/it] 94%|█████████▎| 58/62 [03:12<00:13,  3.35s/it]                                                94%|█████████▎| 58/62 [03:12<00:13,  3.35s/it] 95%|█████████▌| 59/62 [03:15<00:09,  3.22s/it]                                                95%|█████████▌| 59/62 [03:15<00:09,  3.22s/it] 97%|█████████▋| 60/62 [03:18<00:06,  3.13s/it]                                                97%|█████████▋| 60/62 [03:18<00:06,  3.13s/it]
+  0%|          | 0/6 [00:00<?, ?it/s]
+ 33%|███▎      | 2/6 [00:00<00:00,  8.81it/s]
+ 50%|█████     | 3/6 [00:00<00:00,  6.22it/s]
+ 67%|██████▋   | 4/6 [00:00<00:00,  5.39it/s]
+ 83%|████████▎ | 5/6 [00:00<00:00,  4.99it/s]
+100%|██████████| 6/6 [00:01<00:00,  4.77it/s]                                               
+                                              97%|█████████▋| 60/62 [03:19<00:06,  3.13s/it]
+100%|██████████| 6/6 [00:01<00:00,  4.77it/s]
+                                              98%|█████████▊| 61/62 [03:22<00:03,  3.49s/it]                                                98%|█████████▊| 61/62 [03:22<00:03,  3.49s/it]100%|██████████| 62/62 [03:25<00:00,  3.32s/it]                                               100%|██████████| 62/62 [03:25<00:00,  3.32s/it]                                               100%|██████████| 62/62 [03:25<00:00,  3.32s/it]100%|██████████| 62/62 [03:25<00:00,  3.32s/it]
diff --git a/dmog/job.output b/dmog/job.output
new file mode 100644
index 0000000000000000000000000000000000000000..bf28bf8aff1ba98e813df0bbb5adc0332263cf06
--- /dev/null
+++ b/dmog/job.output
@@ -0,0 +1,131 @@
+Your results will be stored in: /mnt/scratch/users/dhd2000/ft14/dmog/axolotl-test-outputs
+Executing job commands, current working directory is /mnt/scratch/users/dhd2000/ft14/dmog
+/mnt/scratch/users/dhd2000/ft14
+[2024-04-09 08:29:06,912] [INFO] [datasets.<module>:58] [PID:30736] PyTorch version 2.1.2 available.
+[2024-04-09 08:29:08,482] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+[2024-04-09 08:29:10,330] [INFO] [axolotl.normalize_config:178] [PID:30736] [RANK:0] GPU memory usage baseline: 0.000GB (+0.640GB misc)[39m
+                                 dP            dP   dP 
+                                 88            88   88 
+      .d8888b. dP.  .dP .d8888b. 88 .d8888b. d8888P 88 
+      88'  `88  `8bd8'  88'  `88 88 88'  `88   88   88 
+      88.  .88  .d88b.  88.  .88 88 88.  .88   88   88 
+      `88888P8 dP'  `dP `88888P' dP `88888P'   dP   dP 
+                                                       
+                                                       
+
+[2024-04-09 08:29:10,707] [DEBUG] [axolotl.load_tokenizer:245] [PID:30736] [RANK:0] EOS: 2 / </s>[39m
+[2024-04-09 08:29:10,708] [DEBUG] [axolotl.load_tokenizer:246] [PID:30736] [RANK:0] BOS: 1 / <s>[39m
+[2024-04-09 08:29:10,708] [DEBUG] [axolotl.load_tokenizer:247] [PID:30736] [RANK:0] PAD: 2 / </s>[39m
+[2024-04-09 08:29:10,708] [DEBUG] [axolotl.load_tokenizer:248] [PID:30736] [RANK:0] UNK: 0 / <unk>[39m
+[2024-04-09 08:29:10,708] [INFO] [axolotl.load_tokenizer:259] [PID:30736] [RANK:0] No Chat template selected. Consider adding a chat template for easier inference.[39m
+[2024-04-09 08:29:10,708] [INFO] [axolotl.load_tokenized_prepared_datasets:191] [PID:30736] [RANK:0] Unable to find prepared dataset in last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e[39m
+[2024-04-09 08:29:10,708] [INFO] [axolotl.load_tokenized_prepared_datasets:192] [PID:30736] [RANK:0] Loading raw datasets...[39m
+[33m[2024-04-09 08:29:10,709] [WARNING] [axolotl.load_tokenized_prepared_datasets:194] [PID:30736] [RANK:0] Processing datasets during training can lead to VRAM instability. Please pre-process your dataset.[39m
+[2024-04-09 08:29:10,709] [INFO] [axolotl.load_tokenized_prepared_datasets:201] [PID:30736] [RANK:0] No seed provided, using default seed of 42[39m
+[2024-04-09 08:29:17,092] [INFO] [axolotl.load_tokenized_prepared_datasets:414] [PID:30736] [RANK:0] merging datasets[39m
+[2024-04-09 08:29:17,096] [INFO] [axolotl.log:61] [PID:30736] [RANK:0] dropping attention_mask column[39m
+[2024-04-09 08:29:18,698] [INFO] [axolotl.load_tokenized_prepared_datasets:424] [PID:30736] [RANK:0] Saving merged prepared dataset to disk... last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e[39m
+[2024-04-09 08:29:18,755] [DEBUG] [axolotl.log:61] [PID:30736] [RANK:0] total_num_tokens: 21468[39m
+[2024-04-09 08:29:18,756] [DEBUG] [axolotl.log:61] [PID:30736] [RANK:0] `total_supervised_tokens: 259`[39m
+[2024-04-09 08:29:18,756] [DEBUG] [axolotl.log:61] [PID:30736] [RANK:0] total_num_steps: 62[39m
+[2024-04-09 08:29:18,756] [INFO] [axolotl.prepare_dataset:124] [PID:30736] [RANK:0] Maximum number of steps set at 62[39m
+[2024-04-09 08:29:18,759] [DEBUG] [axolotl.train.log:61] [PID:30736] [RANK:0] loading tokenizer... mistralai/Mistral-7B-v0.1[39m
+[2024-04-09 08:29:19,028] [DEBUG] [axolotl.load_tokenizer:245] [PID:30736] [RANK:0] EOS: 2 / </s>[39m
+[2024-04-09 08:29:19,028] [DEBUG] [axolotl.load_tokenizer:246] [PID:30736] [RANK:0] BOS: 1 / <s>[39m
+[2024-04-09 08:29:19,028] [DEBUG] [axolotl.load_tokenizer:247] [PID:30736] [RANK:0] PAD: 2 / </s>[39m
+[2024-04-09 08:29:19,029] [DEBUG] [axolotl.load_tokenizer:248] [PID:30736] [RANK:0] UNK: 0 / <unk>[39m
+[2024-04-09 08:29:19,029] [INFO] [axolotl.load_tokenizer:259] [PID:30736] [RANK:0] No Chat template selected. Consider adding a chat template for easier inference.[39m
+[2024-04-09 08:29:19,029] [DEBUG] [axolotl.train.log:61] [PID:30736] [RANK:0] loading model and peft_config...[39m
+[2024-04-09 08:29:35,702] [INFO] [axolotl.load_model:660] [PID:30736] [RANK:0] GPU memory usage after model load: 4.342GB (+0.138GB cache, +0.942GB misc)[39m
+[2024-04-09 08:29:35,711] [INFO] [axolotl.load_model:701] [PID:30736] [RANK:0] converting PEFT model w/ prepare_model_for_kbit_training[39m
+[2024-04-09 08:29:35,713] [INFO] [axolotl.load_model:710] [PID:30736] [RANK:0] converting modules to torch.bfloat16 for flash attention[39m
+[2024-04-09 08:29:35,715] [INFO] [axolotl.load_lora:825] [PID:30736] [RANK:0] found linear modules: ['up_proj', 'q_proj', 'k_proj', 'gate_proj', 'down_proj', 'o_proj', 'v_proj'][39m
+trainable params: 83,886,080 || all params: 7,325,618,176 || trainable%: 1.1451058188485088
+[2024-04-09 08:29:36,348] [INFO] [axolotl.load_model:750] [PID:30736] [RANK:0] GPU memory usage after adapters: 4.670GB (+0.935GB cache, +0.942GB misc)[39m
+[2024-04-09 08:29:36,446] [INFO] [axolotl.train.log:61] [PID:30736] [RANK:0] Pre-saving adapter config to ./qlora-out[39m
+[2024-04-09 08:29:36,459] [INFO] [axolotl.train.log:61] [PID:30736] [RANK:0] Starting trainer...[39m
+{'loss': 6.6367, 'grad_norm': 102.28898620605469, 'learning_rate': 2e-05, 'epoch': 0.08}
+{'eval_loss': 7.300913333892822, 'eval_runtime': 1.3523, 'eval_samples_per_second': 8.873, 'eval_steps_per_second': 4.437, 'epoch': 0.08}
+[2024-04-09 08:29:44,573] [INFO] [axolotl.callbacks.on_step_end:123] [PID:30736] [RANK:0] GPU memory usage while training: 4.843GB (+1.177GB cache, +0.965GB misc)[39m
+{'loss': 7.0616, 'grad_norm': 103.4541015625, 'learning_rate': 4e-05, 'epoch': 0.16}
+{'loss': 4.686, 'grad_norm': 67.47515869140625, 'learning_rate': 6e-05, 'epoch': 0.24}
+{'loss': 2.3866, 'grad_norm': 72.36919403076172, 'learning_rate': 8e-05, 'epoch': 0.32}
+{'eval_loss': 0.7137572169303894, 'eval_runtime': 1.3532, 'eval_samples_per_second': 8.868, 'eval_steps_per_second': 4.434, 'epoch': 0.32}
+{'loss': 0.6844, 'grad_norm': 16.83085060119629, 'learning_rate': 0.0001, 'epoch': 0.4}
+{'loss': 0.914, 'grad_norm': 25.897714614868164, 'learning_rate': 0.00012, 'epoch': 0.48}
+{'loss': 0.63, 'grad_norm': 18.89151382446289, 'learning_rate': 0.00014, 'epoch': 0.56}
+{'loss': 0.948, 'grad_norm': 27.15555763244629, 'learning_rate': 0.00016, 'epoch': 0.64}
+{'eval_loss': 1.0445994138717651, 'eval_runtime': 1.356, 'eval_samples_per_second': 8.85, 'eval_steps_per_second': 4.425, 'epoch': 0.64}
+{'loss': 1.0285, 'grad_norm': 20.812381744384766, 'learning_rate': 0.00018, 'epoch': 0.72}
+{'loss': 1.3756, 'grad_norm': 56.3886604309082, 'learning_rate': 0.0002, 'epoch': 0.8}
+{'loss': 0.5178, 'grad_norm': 6.24803352355957, 'learning_rate': 0.00019981755542233177, 'epoch': 0.88}
+{'loss': 0.6822, 'grad_norm': 8.379430770874023, 'learning_rate': 0.0001992708874098054, 'epoch': 0.96}
+{'eval_loss': 1.3959709405899048, 'eval_runtime': 1.3583, 'eval_samples_per_second': 8.835, 'eval_steps_per_second': 4.417, 'epoch': 0.96}
+{'loss': 1.3762, 'grad_norm': 20.744348526000977, 'learning_rate': 0.00019836199069471437, 'epoch': 1.04}
+{'loss': 0.5248, 'grad_norm': 4.800480842590332, 'learning_rate': 0.0001970941817426052, 'epoch': 1.12}
+{'loss': 0.8094, 'grad_norm': 11.284302711486816, 'learning_rate': 0.00019547208665085457, 'epoch': 1.2}
+{'loss': 0.5222, 'grad_norm': 5.787976264953613, 'learning_rate': 0.0001935016242685415, 'epoch': 1.28}
+{'eval_loss': 0.9023411870002747, 'eval_runtime': 1.3623, 'eval_samples_per_second': 8.808, 'eval_steps_per_second': 4.404, 'epoch': 1.28}
+{'loss': 0.8027, 'grad_norm': 21.48629379272461, 'learning_rate': 0.00019118998459920902, 'epoch': 1.36}
+{'loss': 1.7772, 'grad_norm': 38.0982666015625, 'learning_rate': 0.000188545602565321, 'epoch': 1.44}
+{'loss': 0.7737, 'grad_norm': 10.824837684631348, 'learning_rate': 0.00018557812723014476, 'epoch': 1.52}
+{'loss': 0.534, 'grad_norm': 9.1353120803833, 'learning_rate': 0.00018229838658936564, 'epoch': 1.6}
+{'eval_loss': 0.4847445785999298, 'eval_runtime': 1.3637, 'eval_samples_per_second': 8.799, 'eval_steps_per_second': 4.4, 'epoch': 1.6}
+{'loss': 0.3201, 'grad_norm': 3.8411033153533936, 'learning_rate': 0.00017871834806090501, 'epoch': 1.68}
+{'loss': 2.2541, 'grad_norm': 23.888507843017578, 'learning_rate': 0.00017485107481711012, 'epoch': 1.76}
+{'loss': 0.8177, 'grad_norm': 8.5956392288208, 'learning_rate': 0.00017071067811865476, 'epoch': 1.84}
+{'loss': 0.4624, 'grad_norm': 3.825141191482544, 'learning_rate': 0.00016631226582407952, 'epoch': 1.92}
+{'eval_loss': 0.5740255117416382, 'eval_runtime': 1.3655, 'eval_samples_per_second': 8.788, 'eval_steps_per_second': 4.394, 'epoch': 1.92}
+{'loss': 0.3714, 'grad_norm': 3.558993101119995, 'learning_rate': 0.00016167188726285434, 'epoch': 2.0}
+{'loss': 0.6562, 'grad_norm': 11.759211540222168, 'learning_rate': 0.00015680647467311557, 'epoch': 2.08}
+{'loss': 1.5141, 'grad_norm': 96.2179183959961, 'learning_rate': 0.00015173378141776568, 'epoch': 2.16}
+{'loss': 0.7753, 'grad_norm': 31.022045135498047, 'learning_rate': 0.00014647231720437686, 'epoch': 2.24}
+{'eval_loss': 0.3771994113922119, 'eval_runtime': 1.3676, 'eval_samples_per_second': 8.775, 'eval_steps_per_second': 4.387, 'epoch': 2.24}
+{'loss': 0.2649, 'grad_norm': 3.5004501342773438, 'learning_rate': 0.0001410412805452757, 'epoch': 2.32}
+{'loss': 0.171, 'grad_norm': 5.16464376449585, 'learning_rate': 0.00013546048870425356, 'epoch': 2.4}
+{'loss': 0.9172, 'grad_norm': 25.634010314941406, 'learning_rate': 0.00012975030538552032, 'epoch': 2.48}
+{'loss': 0.3324, 'grad_norm': 7.102908134460449, 'learning_rate': 0.0001239315664287558, 'epoch': 2.56}
+{'eval_loss': 0.29374203085899353, 'eval_runtime': 1.3678, 'eval_samples_per_second': 8.773, 'eval_steps_per_second': 4.387, 'epoch': 2.56}
+{'loss': 0.4932, 'grad_norm': 6.236325263977051, 'learning_rate': 0.0001180255037813906, 'epoch': 2.64}
+{'loss': 0.1284, 'grad_norm': 4.445058345794678, 'learning_rate': 0.0001120536680255323, 'epoch': 2.72}
+{'loss': 0.1547, 'grad_norm': 6.94170618057251, 'learning_rate': 0.00010603784974222861, 'epoch': 2.8}
+{'loss': 0.1973, 'grad_norm': 5.656033039093018, 'learning_rate': 0.0001, 'epoch': 2.88}
+{'eval_loss': 0.5674905180931091, 'eval_runtime': 1.3681, 'eval_samples_per_second': 8.771, 'eval_steps_per_second': 4.386, 'epoch': 2.88}
+{'loss': 0.4884, 'grad_norm': 18.19667625427246, 'learning_rate': 9.396215025777139e-05, 'epoch': 2.96}
+{'loss': 0.5526, 'grad_norm': 17.964893341064453, 'learning_rate': 8.79463319744677e-05, 'epoch': 3.04}
+{'loss': 0.2116, 'grad_norm': 5.015590190887451, 'learning_rate': 8.197449621860943e-05, 'epoch': 3.12}
+{'loss': 0.0843, 'grad_norm': 5.6883225440979, 'learning_rate': 7.606843357124426e-05, 'epoch': 3.2}
+{'eval_loss': 0.2360386848449707, 'eval_runtime': 1.3667, 'eval_samples_per_second': 8.78, 'eval_steps_per_second': 4.39, 'epoch': 3.2}
+{'loss': 0.1158, 'grad_norm': 6.636446475982666, 'learning_rate': 7.024969461447972e-05, 'epoch': 3.28}
+{'loss': 0.2755, 'grad_norm': 4.405576229095459, 'learning_rate': 6.453951129574644e-05, 'epoch': 3.36}
+{'loss': 0.0186, 'grad_norm': 1.6179524660110474, 'learning_rate': 5.8958719454724346e-05, 'epoch': 3.44}
+{'loss': 0.3836, 'grad_norm': 8.783114433288574, 'learning_rate': 5.3527682795623146e-05, 'epoch': 3.52}
+{'eval_loss': 0.13969357311725616, 'eval_runtime': 1.3687, 'eval_samples_per_second': 8.767, 'eval_steps_per_second': 4.384, 'epoch': 3.52}
+{'loss': 0.0141, 'grad_norm': 0.8835445046424866, 'learning_rate': 4.826621858223431e-05, 'epoch': 3.6}
+{'loss': 0.6196, 'grad_norm': 12.678099632263184, 'learning_rate': 4.3193525326884435e-05, 'epoch': 3.68}
+{'loss': 0.0948, 'grad_norm': 5.320870876312256, 'learning_rate': 3.832811273714569e-05, 'epoch': 3.76}
+{'loss': 0.0449, 'grad_norm': 2.7501108646392822, 'learning_rate': 3.36877341759205e-05, 'epoch': 3.84}
+{'eval_loss': 0.2801015079021454, 'eval_runtime': 1.3706, 'eval_samples_per_second': 8.755, 'eval_steps_per_second': 4.378, 'epoch': 3.84}
+{'loss': 0.3026, 'grad_norm': 4.41072940826416, 'learning_rate': 2.9289321881345254e-05, 'epoch': 3.92}
+{'loss': 0.0152, 'grad_norm': 1.2105910778045654, 'learning_rate': 2.514892518288988e-05, 'epoch': 4.0}
+{'loss': 0.0629, 'grad_norm': 4.502895355224609, 'learning_rate': 2.1281651939094992e-05, 'epoch': 4.08}
+{'loss': 0.2246, 'grad_norm': 6.058006286621094, 'learning_rate': 1.7701613410634365e-05, 'epoch': 4.16}
+{'eval_loss': 0.19463467597961426, 'eval_runtime': 1.3725, 'eval_samples_per_second': 8.743, 'eval_steps_per_second': 4.372, 'epoch': 4.16}
+{'loss': 0.0093, 'grad_norm': 0.5118169784545898, 'learning_rate': 1.442187276985526e-05, 'epoch': 4.24}
+{'loss': 0.0148, 'grad_norm': 0.8497004508972168, 'learning_rate': 1.1454397434679021e-05, 'epoch': 4.32}
+{'loss': 0.0392, 'grad_norm': 1.752151608467102, 'learning_rate': 8.810015400790994e-06, 'epoch': 4.4}
+{'loss': 0.229, 'grad_norm': 3.6673429012298584, 'learning_rate': 6.498375731458528e-06, 'epoch': 4.48}
+{'eval_loss': 0.16181980073451996, 'eval_runtime': 1.3705, 'eval_samples_per_second': 8.756, 'eval_steps_per_second': 4.378, 'epoch': 4.48}
+{'loss': 0.1722, 'grad_norm': 2.9522616863250732, 'learning_rate': 4.527913349145441e-06, 'epoch': 4.56}
+{'loss': 0.0295, 'grad_norm': 1.5037487745285034, 'learning_rate': 2.905818257394799e-06, 'epoch': 4.64}
+{'loss': 0.03, 'grad_norm': 1.4181660413742065, 'learning_rate': 1.6380093052856483e-06, 'epoch': 4.72}
+{'loss': 0.3073, 'grad_norm': 9.207091331481934, 'learning_rate': 7.291125901946027e-07, 'epoch': 4.8}
+{'eval_loss': 0.14654164016246796, 'eval_runtime': 1.3704, 'eval_samples_per_second': 8.756, 'eval_steps_per_second': 4.378, 'epoch': 4.8}
+{'loss': 0.032, 'grad_norm': 1.5023337602615356, 'learning_rate': 1.824445776682504e-07, 'epoch': 4.88}
+{'loss': 0.1144, 'grad_norm': 2.882874011993408, 'learning_rate': 0.0, 'epoch': 4.96}
+{'train_runtime': 206.4235, 'train_samples_per_second': 2.403, 'train_steps_per_second': 0.3, 'train_loss': 0.7901421915739775, 'epoch': 4.96}
+[2024-04-09 08:33:03,093] [INFO] [axolotl.train.log:61] [PID:30736] [RANK:0] Training Completed!!! Saving pre-trained model to ./qlora-out[39m
+(PeftModelForCausalLM(   (base_model): LoraModel(     (model): MistralForCausalLM(       (model): MistralModel(         (embed_tokens): Embedding(32000, 4096)         (layers): ModuleList(           (0-31): 32 x MistralDecoderLayer(             (self_attn): MistralFlashAttention2(               (q_proj): lora.Linear4bit(                 (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)                 (lora_dropout): ModuleDict(                   (default): Dropout(p=0.05, inplace=False)                 )                 (lora_A): ModuleDict(                   (default): Linear(in_features=4096, out_features=32, bias=False)                 )                 (lora_B): ModuleDict(                   (default): Linear(in_features=32, out_features=4096, bias=False)                 )                 (lora_embedding_A): ParameterDict()                 (lora_embedding_B): ParameterDict()               )               (k_proj): lora.Linear4bit(                 (base_layer): Linear4bit(in_features=4096, out_features=1024, bias=False)                 (lora_dropout): ModuleDict(                   (default): Dropout(p=0.05, inplace=False)                 )                 (lora_A): ModuleDict(                   (default): Linear(in_features=4096, out_features=32, bias=False)                 )                 (lora_B): ModuleDict(                   (default): Linear(in_features=32, out_features=1024, bias=False)                 )                 (lora_embedding_A): ParameterDict()                 (lora_embedding_B): ParameterDict()               )               (v_proj): lora.Linear4bit(                 (base_layer): Linear4bit(in_features=4096, out_features=1024, bias=False)                 (lora_dropout): ModuleDict(                   (default): Dropout(p=0.05, inplace=False)                 )                 (lora_A): ModuleDict(                   (default): Linear(in_features=4096, out_features=32, bias=False)                 )                 (lora_B): ModuleDict(                   (default): Linear(in_features=32, out_features=1024, bias=False)                 )                 (lora_embedding_A): ParameterDict()                 (lora_embedding_B): ParameterDict()               )               (o_proj): lora.Linear4bit(                 (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)                 (lora_dropout): ModuleDict(                   (default): Dropout(p=0.05, inplace=False)                 )                 (lora_A): ModuleDict(                   (default): Linear(in_features=4096, out_features=32, bias=False)                 )                 (lora_B): ModuleDict(                   (default): Linear(in_features=32, out_features=4096, bias=False)                 )                 (lora_embedding_A): ParameterDict()                 (lora_embedding_B): ParameterDict()               )               (rotary_emb): MistralRotaryEmbedding()             )             (mlp): MistralMLP(               (gate_proj): lora.Linear4bit(                 (base_layer): Linear4bit(in_features=4096, out_features=14336, bias=False)                 (lora_dropout): ModuleDict(                   (default): Dropout(p=0.05, inplace=False)                 )                 (lora_A): ModuleDict(                   (default): Linear(in_features=4096, out_features=32, bias=False)                 )                 (lora_B): ModuleDict(                   (default): Linear(in_features=32, out_features=14336, bias=False)                 )                 (lora_embedding_A): ParameterDict()                 (lora_embedding_B): ParameterDict()               )               (up_proj): lora.Linear4bit(                 (base_layer): Linear4bit(in_features=4096, out_features=14336, bias=False)                 (lora_dropout): ModuleDict(                   (default): Dropout(p=0.05, inplace=False)                 )                 (lora_A): ModuleDict(                   (default): Linear(in_features=4096, out_features=32, bias=False)                 )                 (lora_B): ModuleDict(                   (default): Linear(in_features=32, out_features=14336, bias=False)                 )                 (lora_embedding_A): ParameterDict()                 (lora_embedding_B): ParameterDict()               )               (down_proj): lora.Linear4bit(                 (base_layer): Linear4bit(in_features=14336, out_features=4096, bias=False)                 (lora_dropout): ModuleDict(                   (default): Dropout(p=0.05, inplace=False)                 )                 (lora_A): ModuleDict(                   (default): Linear(in_features=14336, out_features=32, bias=False)                 )                 (lora_B): ModuleDict(                   (default): Linear(in_features=32, out_features=4096, bias=False)                 )                 (lora_embedding_A): ParameterDict()                 (lora_embedding_B): ParameterDict()               )               (act_fn): SiLU()             )             (input_layernorm): MistralRMSNorm()             (post_attention_layernorm): MistralRMSNorm()           )         )         (norm): MistralRMSNorm()       )       (lm_head): Linear(in_features=4096, out_features=32000, bias=False)     )   ) ), LlamaTokenizer(name_or_path='mistralai/Mistral-7B-v0.1', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=False, padding_side='left', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '</s>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={ 	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), 	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), 	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), })
+
+End of job
+Output file has been generated, please check /mnt/scratch/users/dhd2000/ft14/dmog/axolotl-test-outputs/test.output
diff --git a/finetune-test.py b/finetune-test.py
new file mode 100644
index 0000000000000000000000000000000000000000..74209cc87c69bbdec83674e631ada945646fe780
--- /dev/null
+++ b/finetune-test.py
@@ -0,0 +1,72 @@
+# This script is used to test the model using a dataset 
+
+# Import the necessary libraries
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from langchain.memory import ConversationBufferWindowMemory
+from peft import PeftModel
+import torch
+
+import json
+import sys
+
+# Check if the correct number of arguments are provided
+if len(sys.argv) != 2:
+    print("Usage: python finetune.py <jsonl_file>")
+    sys.exit(1)
+
+# Get the file path from the command-line argument
+jsonl_file_path = sys.argv[1]
+
+# Load the model and tokenizer
+base_model = "mistralai/Mistral-7B-Instruct-v0.2"
+tokenizer = AutoTokenizer.from_pretrained(base_model)
+tokenizer.add_special_tokens({"pad_token": "[PAD]"})
+base_model = AutoModelForCausalLM.from_pretrained(base_model)
+
+
+ft_model = PeftModel.from_pretrained(base_model, "./qlora-out") 
+# ft_model = ft_model.merge_and_unload()
+ft_model.eval()
+
+# Set the device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+ft_model.to(device)
+
+# Read the JSONL file
+with open(jsonl_file_path, "r") as f:
+    tp, tn, fp, fn = 0, 0, 0, 0
+    for line in f:
+        data = json.loads(line)
+        user_in = data["input"]
+        user_input = f"[INST] ###instruction: Check if the given traffic flow is normal or of an attacker or a victim\n###input: {user_in}\n#output: [/INST]"
+        encodings = tokenizer(user_input, return_tensors="pt", padding=True).to(device)
+        input_ids = encodings["input_ids"]
+        attention_mask = encodings["attention_mask"]
+
+        output_ids = ft_model.generate(input_ids, attention_mask = attention_mask, max_new_tokens=1000, num_return_sequences=1, do_sample=True, temperature=0.1, top_p=0.9)
+
+        generated_ids = output_ids[0, input_ids.shape[-1]:]
+
+        # Decode the output
+        response = tokenizer.decode(generated_ids, skip_special_tokens=True).lower()
+
+        # calculate true positive, true negative, false positive, false negative
+        if "normal" not in response and data["output"] == response:
+            tp += 1
+        elif "normal" in response and data["output"] == response:
+            tn += 1
+        elif "normal" in response and data["output"] != response:
+            fp += 1
+        elif "normal" not in response and data["output"] != response:
+            fn += 1
+        else: 
+            print(f"Error: {response}, {data[output]}")
+            print(f"User input: {user_in}")
+            print(f"Generated response: {response}")
+            print(f"Expected response: {data[output]}")
+            print()
+
+        print(f"TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")
+            
+
+
diff --git a/last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/data-00000-of-00001.arrow b/last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/data-00000-of-00001.arrow
new file mode 100644
index 0000000000000000000000000000000000000000..ff851bfb1c72204dcf880903821ff82e61b24cb7
--- /dev/null
+++ b/last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/data-00000-of-00001.arrow
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57a8a6b98277d114990fb441a27d2f777773005e6b7cf57a0ec219fe3bae40b1
+size 259336
diff --git a/last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/dataset_info.json b/last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/dataset_info.json
new file mode 100644
index 0000000000000000000000000000000000000000..a5ae2ce8394d91f7bafa3b7fbfd4bc2c8915a991
--- /dev/null
+++ b/last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/dataset_info.json
@@ -0,0 +1,22 @@
+{
+  "citation": "",
+  "description": "",
+  "features": {
+    "input_ids": {
+      "feature": {
+        "dtype": "int32",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    },
+    "labels": {
+      "feature": {
+        "dtype": "int64",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    }
+  },
+  "homepage": "",
+  "license": ""
+}
\ No newline at end of file
diff --git a/last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/state.json b/last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/state.json
new file mode 100644
index 0000000000000000000000000000000000000000..6da163c36a5f5f2d39bd4e6261f547e9e0b68adc
--- /dev/null
+++ b/last_run_prepared/c62a45f4c109d3ed049f89406e7ab97e/state.json
@@ -0,0 +1,16 @@
+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00001.arrow"
+    }
+  ],
+  "_fingerprint": "992b9317aa372e8e",
+  "_format_columns": [
+    "input_ids",
+    "labels"
+  ],
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": null
+}
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/meta.yaml b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/meta.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..22d9925a1b7e16b24e211b3a7c199c8f72785516
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/meta.yaml
@@ -0,0 +1,15 @@
+artifact_uri: file:///mnt/scratch/users/dhd2000/ft14/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/artifacts
+end_time: 1712647983089
+entry_point_name: ''
+experiment_id: '0'
+lifecycle_stage: active
+run_id: 7e75ece8e18e485db64e4e2d9196e738
+run_name: ./qlora-out
+run_uuid: 7e75ece8e18e485db64e4e2d9196e738
+source_name: ''
+source_type: 4
+source_version: ''
+start_time: 1712647776681
+status: 3
+tags: []
+user_id: dhd2000
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/epoch b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/epoch
new file mode 100644
index 0000000000000000000000000000000000000000..f432878f3791e0923ec15be54205a5aaf4841476
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/epoch
@@ -0,0 +1,79 @@
+1712647780322 0.08 1
+1712647781687 0.08 1
+1712647784574 0.16 2
+1712647787465 0.24 3
+1712647790359 0.32 4
+1712647791728 0.32 4
+1712647794625 0.4 5
+1712647797520 0.48 6
+1712647800416 0.56 7
+1712647803312 0.64 8
+1712647804683 0.64 8
+1712647807578 0.72 9
+1712647810474 0.8 10
+1712647813372 0.88 11
+1712647816270 0.96 12
+1712647817642 0.96 12
+1712647820386 1.04 13
+1712647824221 1.12 14
+1712647827138 1.2 15
+1712647830042 1.28 16
+1712647831420 1.28 16
+1712647834333 1.36 17
+1712647837242 1.44 18
+1712647840147 1.52 19
+1712647843070 1.6 20
+1712647844448 1.6 20
+1712647847363 1.68 21
+1712647850290 1.76 22
+1712647853203 1.84 23
+1712647856116 1.92 24
+1712647857496 1.92 24
+1712647860252 2.0 25
+1712647863163 2.08 26
+1712647866855 2.16 27
+1712647869769 2.24 28
+1712647871150 2.24 28
+1712647874087 2.32 29
+1712647877006 2.4 30
+1712647879921 2.48 31
+1712647882836 2.56 32
+1712647884219 2.56 32
+1712647887139 2.64 33
+1712647890053 2.72 34
+1712647892967 2.8 35
+1712647895882 2.88 36
+1712647897265 2.88 36
+1712647900187 2.96 37
+1712647902945 3.04 38
+1712647905861 3.12 39
+1712647909552 3.2 40
+1712647910933 3.2 40
+1712647913855 3.28 41
+1712647916777 3.36 42
+1712647919694 3.44 43
+1712647922609 3.52 44
+1712647923992 3.52 44
+1712647926911 3.6 45
+1712647929833 3.68 46
+1712647932754 3.76 47
+1712647935680 3.84 48
+1712647937065 3.84 48
+1712647939993 3.92 49
+1712647942758 4.0 50
+1712647945681 4.08 51
+1712647948606 4.16 52
+1712647949993 4.16 52
+1712647953856 4.24 53
+1712647956779 4.32 54
+1712647959701 4.4 55
+1712647962622 4.48 56
+1712647964007 4.48 56
+1712647966930 4.56 57
+1712647969855 4.64 58
+1712647972779 4.72 59
+1712647975702 4.8 60
+1712647977087 4.8 60
+1712647980013 4.88 61
+1712647982936 4.96 62
+1712647983084 4.96 62
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_loss b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_loss
new file mode 100644
index 0000000000000000000000000000000000000000..d1a66dddbc3ca9a74013351553978d43683b213e
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_loss
@@ -0,0 +1,16 @@
+1712647781687 7.300913333892822 1
+1712647791728 0.7137572169303894 4
+1712647804683 1.0445994138717651 8
+1712647817642 1.3959709405899048 12
+1712647831420 0.9023411870002747 16
+1712647844448 0.4847445785999298 20
+1712647857496 0.5740255117416382 24
+1712647871150 0.3771994113922119 28
+1712647884219 0.29374203085899353 32
+1712647897265 0.5674905180931091 36
+1712647910933 0.2360386848449707 40
+1712647923992 0.13969357311725616 44
+1712647937065 0.2801015079021454 48
+1712647949993 0.19463467597961426 52
+1712647964007 0.16181980073451996 56
+1712647977087 0.14654164016246796 60
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_runtime b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_runtime
new file mode 100644
index 0000000000000000000000000000000000000000..ef189ef6fa07aab3fa46dbc931de687ffe239493
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_runtime
@@ -0,0 +1,16 @@
+1712647781687 1.3523 1
+1712647791728 1.3532 4
+1712647804683 1.356 8
+1712647817642 1.3583 12
+1712647831420 1.3623 16
+1712647844448 1.3637 20
+1712647857496 1.3655 24
+1712647871150 1.3676 28
+1712647884219 1.3678 32
+1712647897265 1.3681 36
+1712647910933 1.3667 40
+1712647923992 1.3687 44
+1712647937065 1.3706 48
+1712647949993 1.3725 52
+1712647964007 1.3705 56
+1712647977087 1.3704 60
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_samples_per_second b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_samples_per_second
new file mode 100644
index 0000000000000000000000000000000000000000..0a5b33b314313207171558aa0073e4afa518c935
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_samples_per_second
@@ -0,0 +1,16 @@
+1712647781687 8.873 1
+1712647791728 8.868 4
+1712647804683 8.85 8
+1712647817642 8.835 12
+1712647831420 8.808 16
+1712647844448 8.799 20
+1712647857496 8.788 24
+1712647871150 8.775 28
+1712647884219 8.773 32
+1712647897265 8.771 36
+1712647910933 8.78 40
+1712647923992 8.767 44
+1712647937065 8.755 48
+1712647949993 8.743 52
+1712647964007 8.756 56
+1712647977087 8.756 60
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_steps_per_second b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_steps_per_second
new file mode 100644
index 0000000000000000000000000000000000000000..e39c6f2c898cf08fa4d5b51b4bed95021f37aa7a
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/eval_steps_per_second
@@ -0,0 +1,16 @@
+1712647781687 4.437 1
+1712647791728 4.434 4
+1712647804683 4.425 8
+1712647817642 4.417 12
+1712647831420 4.404 16
+1712647844448 4.4 20
+1712647857496 4.394 24
+1712647871150 4.387 28
+1712647884219 4.387 32
+1712647897265 4.386 36
+1712647910933 4.39 40
+1712647923992 4.384 44
+1712647937065 4.378 48
+1712647949993 4.372 52
+1712647964007 4.378 56
+1712647977087 4.378 60
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/grad_norm b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/grad_norm
new file mode 100644
index 0000000000000000000000000000000000000000..16b0d51cc03f1651d0d04c2865800e79fd25e238
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/grad_norm
@@ -0,0 +1,62 @@
+1712647780322 102.28898620605469 1
+1712647784574 103.4541015625 2
+1712647787465 67.47515869140625 3
+1712647790359 72.36919403076172 4
+1712647794625 16.83085060119629 5
+1712647797520 25.897714614868164 6
+1712647800416 18.89151382446289 7
+1712647803312 27.15555763244629 8
+1712647807578 20.812381744384766 9
+1712647810474 56.3886604309082 10
+1712647813372 6.24803352355957 11
+1712647816270 8.379430770874023 12
+1712647820386 20.744348526000977 13
+1712647824221 4.800480842590332 14
+1712647827138 11.284302711486816 15
+1712647830042 5.787976264953613 16
+1712647834333 21.48629379272461 17
+1712647837242 38.0982666015625 18
+1712647840147 10.824837684631348 19
+1712647843070 9.1353120803833 20
+1712647847363 3.8411033153533936 21
+1712647850290 23.888507843017578 22
+1712647853203 8.5956392288208 23
+1712647856116 3.825141191482544 24
+1712647860252 3.558993101119995 25
+1712647863163 11.759211540222168 26
+1712647866855 96.2179183959961 27
+1712647869769 31.022045135498047 28
+1712647874087 3.5004501342773438 29
+1712647877006 5.16464376449585 30
+1712647879921 25.634010314941406 31
+1712647882836 7.102908134460449 32
+1712647887139 6.236325263977051 33
+1712647890053 4.445058345794678 34
+1712647892967 6.94170618057251 35
+1712647895882 5.656033039093018 36
+1712647900187 18.19667625427246 37
+1712647902945 17.964893341064453 38
+1712647905861 5.015590190887451 39
+1712647909552 5.6883225440979 40
+1712647913855 6.636446475982666 41
+1712647916777 4.405576229095459 42
+1712647919694 1.6179524660110474 43
+1712647922609 8.783114433288574 44
+1712647926911 0.8835445046424866 45
+1712647929833 12.678099632263184 46
+1712647932754 5.320870876312256 47
+1712647935680 2.7501108646392822 48
+1712647939993 4.41072940826416 49
+1712647942758 1.2105910778045654 50
+1712647945681 4.502895355224609 51
+1712647948606 6.058006286621094 52
+1712647953856 0.5118169784545898 53
+1712647956779 0.8497004508972168 54
+1712647959701 1.752151608467102 55
+1712647962622 3.6673429012298584 56
+1712647966930 2.9522616863250732 57
+1712647969855 1.5037487745285034 58
+1712647972779 1.4181660413742065 59
+1712647975702 9.207091331481934 60
+1712647980013 1.5023337602615356 61
+1712647982936 2.882874011993408 62
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/learning_rate b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/learning_rate
new file mode 100644
index 0000000000000000000000000000000000000000..9ab174ff7c3a8e1b7bc8aef1e35c90f9f4ed54fa
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/learning_rate
@@ -0,0 +1,62 @@
+1712647780322 2e-05 1
+1712647784574 4e-05 2
+1712647787465 6e-05 3
+1712647790359 8e-05 4
+1712647794625 0.0001 5
+1712647797520 0.00012 6
+1712647800416 0.00014 7
+1712647803312 0.00016 8
+1712647807578 0.00018 9
+1712647810474 0.0002 10
+1712647813372 0.00019981755542233177 11
+1712647816270 0.0001992708874098054 12
+1712647820386 0.00019836199069471437 13
+1712647824221 0.0001970941817426052 14
+1712647827138 0.00019547208665085457 15
+1712647830042 0.0001935016242685415 16
+1712647834333 0.00019118998459920902 17
+1712647837242 0.000188545602565321 18
+1712647840147 0.00018557812723014476 19
+1712647843070 0.00018229838658936564 20
+1712647847363 0.00017871834806090501 21
+1712647850290 0.00017485107481711012 22
+1712647853203 0.00017071067811865476 23
+1712647856116 0.00016631226582407952 24
+1712647860252 0.00016167188726285434 25
+1712647863163 0.00015680647467311557 26
+1712647866855 0.00015173378141776568 27
+1712647869769 0.00014647231720437686 28
+1712647874087 0.0001410412805452757 29
+1712647877006 0.00013546048870425356 30
+1712647879921 0.00012975030538552032 31
+1712647882836 0.0001239315664287558 32
+1712647887139 0.0001180255037813906 33
+1712647890053 0.0001120536680255323 34
+1712647892967 0.00010603784974222861 35
+1712647895882 0.0001 36
+1712647900187 9.396215025777139e-05 37
+1712647902945 8.79463319744677e-05 38
+1712647905861 8.197449621860943e-05 39
+1712647909552 7.606843357124426e-05 40
+1712647913855 7.024969461447972e-05 41
+1712647916777 6.453951129574644e-05 42
+1712647919694 5.8958719454724346e-05 43
+1712647922609 5.3527682795623146e-05 44
+1712647926911 4.826621858223431e-05 45
+1712647929833 4.3193525326884435e-05 46
+1712647932754 3.832811273714569e-05 47
+1712647935680 3.36877341759205e-05 48
+1712647939993 2.9289321881345254e-05 49
+1712647942758 2.514892518288988e-05 50
+1712647945681 2.1281651939094992e-05 51
+1712647948606 1.7701613410634365e-05 52
+1712647953856 1.442187276985526e-05 53
+1712647956779 1.1454397434679021e-05 54
+1712647959701 8.810015400790994e-06 55
+1712647962622 6.498375731458528e-06 56
+1712647966930 4.527913349145441e-06 57
+1712647969855 2.905818257394799e-06 58
+1712647972779 1.6380093052856483e-06 59
+1712647975702 7.291125901946027e-07 60
+1712647980013 1.824445776682504e-07 61
+1712647982936 0.0 62
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/loss b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/loss
new file mode 100644
index 0000000000000000000000000000000000000000..dc235c2bc6e146990750afd93cdd9e06c9311240
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/loss
@@ -0,0 +1,62 @@
+1712647780322 6.6367 1
+1712647784574 7.0616 2
+1712647787465 4.686 3
+1712647790359 2.3866 4
+1712647794625 0.6844 5
+1712647797520 0.914 6
+1712647800416 0.63 7
+1712647803312 0.948 8
+1712647807578 1.0285 9
+1712647810474 1.3756 10
+1712647813372 0.5178 11
+1712647816270 0.6822 12
+1712647820386 1.3762 13
+1712647824221 0.5248 14
+1712647827138 0.8094 15
+1712647830042 0.5222 16
+1712647834333 0.8027 17
+1712647837242 1.7772 18
+1712647840147 0.7737 19
+1712647843070 0.534 20
+1712647847363 0.3201 21
+1712647850290 2.2541 22
+1712647853203 0.8177 23
+1712647856116 0.4624 24
+1712647860252 0.3714 25
+1712647863163 0.6562 26
+1712647866855 1.5141 27
+1712647869769 0.7753 28
+1712647874087 0.2649 29
+1712647877006 0.171 30
+1712647879921 0.9172 31
+1712647882836 0.3324 32
+1712647887139 0.4932 33
+1712647890053 0.1284 34
+1712647892967 0.1547 35
+1712647895882 0.1973 36
+1712647900187 0.4884 37
+1712647902945 0.5526 38
+1712647905861 0.2116 39
+1712647909552 0.0843 40
+1712647913855 0.1158 41
+1712647916777 0.2755 42
+1712647919694 0.0186 43
+1712647922609 0.3836 44
+1712647926911 0.0141 45
+1712647929833 0.6196 46
+1712647932754 0.0948 47
+1712647935680 0.0449 48
+1712647939993 0.3026 49
+1712647942758 0.0152 50
+1712647945681 0.0629 51
+1712647948606 0.2246 52
+1712647953856 0.0093 53
+1712647956779 0.0148 54
+1712647959701 0.0392 55
+1712647962622 0.229 56
+1712647966930 0.1722 57
+1712647969855 0.0295 58
+1712647972779 0.03 59
+1712647975702 0.3073 60
+1712647980013 0.032 61
+1712647982936 0.1144 62
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/total_flos b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/total_flos
new file mode 100644
index 0000000000000000000000000000000000000000..8d2fbbae034ee18b6a3a59f5cb591adcf3a59c57
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/total_flos
@@ -0,0 +1 @@
+1712647983084 5437004879757312.0 62
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_loss b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_loss
new file mode 100644
index 0000000000000000000000000000000000000000..e2a63c3c09e7fa944bdf92085bd9ee06f239180b
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_loss
@@ -0,0 +1 @@
+1712647983084 0.7901421915739775 62
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_runtime b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_runtime
new file mode 100644
index 0000000000000000000000000000000000000000..26e1cedad25e987991767e7433a00d5cc262339b
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_runtime
@@ -0,0 +1 @@
+1712647983084 206.4235 62
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_samples_per_second b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_samples_per_second
new file mode 100644
index 0000000000000000000000000000000000000000..a778a2ad05b9b53b0322670acdb01e9f3f946fb6
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_samples_per_second
@@ -0,0 +1 @@
+1712647983084 2.403 62
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_steps_per_second b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_steps_per_second
new file mode 100644
index 0000000000000000000000000000000000000000..1a9f2bb98bcad50d595796ff7acabd42c744bccd
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/metrics/train_steps_per_second
@@ -0,0 +1 @@
+1712647983084 0.3 62
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/_name_or_path b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/_name_or_path
new file mode 100644
index 0000000000000000000000000000000000000000..33107fdfe860165b41c9d861e55966a6f48293db
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/_name_or_path
@@ -0,0 +1 @@
+mistralai/Mistral-7B-v0.1
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/accelerator_config b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/accelerator_config
new file mode 100644
index 0000000000000000000000000000000000000000..a2cbdd730f27b5f9f8452fbf3e14cef98c90535f
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/accelerator_config
@@ -0,0 +1 @@
+{'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True}
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adafactor b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adafactor
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adafactor
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_beta1 b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_beta1
new file mode 100644
index 0000000000000000000000000000000000000000..9a7d84f2a96bb56f53bfc3a42ac10d06459e55c3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_beta1
@@ -0,0 +1 @@
+0.9
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_beta2 b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_beta2
new file mode 100644
index 0000000000000000000000000000000000000000..79cbfdf0652c46b13ed8946e54aa94ff7bdd44ab
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_beta2
@@ -0,0 +1 @@
+0.999
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_epsilon b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_epsilon
new file mode 100644
index 0000000000000000000000000000000000000000..851199be9c9a0b8c721d7f305f5af1759637102d
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/adam_epsilon
@@ -0,0 +1 @@
+1e-08
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/add_cross_attention b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/add_cross_attention
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/add_cross_attention
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/architectures b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/architectures
new file mode 100644
index 0000000000000000000000000000000000000000..c27f306c1b84f17dbf9aa36d723a7f328b56229f
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/architectures
@@ -0,0 +1 @@
+['MistralForCausalLM']
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/attention_dropout b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/attention_dropout
new file mode 100644
index 0000000000000000000000000000000000000000..171538eb0b00f4eddffa17929796de55b838f34b
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/attention_dropout
@@ -0,0 +1 @@
+0.0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/auto_find_batch_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/auto_find_batch_size
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/auto_find_batch_size
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bad_words_ids b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bad_words_ids
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bad_words_ids
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/begin_suppress_tokens b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/begin_suppress_tokens
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/begin_suppress_tokens
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_dataset b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_dataset
new file mode 100644
index 0000000000000000000000000000000000000000..5372119fb56f731e6e0979f74a8912782bc37aba
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_dataset
@@ -0,0 +1 @@
+pharaouk/dharma-1/dharma_1_mini.json
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_source_max_len b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_source_max_len
new file mode 100644
index 0000000000000000000000000000000000000000..f3e53ee118f90809468f69873ccb9d675089cd74
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_source_max_len
@@ -0,0 +1 @@
+2048
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_split b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_split
new file mode 100644
index 0000000000000000000000000000000000000000..08f17520cfb44b4cb37639a20e3a4e65e1b1dfa3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bench_split
@@ -0,0 +1 @@
+eval
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bf16 b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bf16
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bf16
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bf16_full_eval b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bf16_full_eval
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bf16_full_eval
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bos_token_id b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bos_token_id
new file mode 100644
index 0000000000000000000000000000000000000000..56a6051ca2b02b04ef92d5150c9ef600403cb1de
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/bos_token_id
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/chunk_size_feed_forward b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/chunk_size_feed_forward
new file mode 100644
index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/chunk_size_feed_forward
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cosine_constant_lr_ratio b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cosine_constant_lr_ratio
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cosine_constant_lr_ratio
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cosine_min_lr_ratio b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cosine_min_lr_ratio
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cosine_min_lr_ratio
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cross_attention_hidden_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cross_attention_hidden_size
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/cross_attention_hidden_size
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/data_seed b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/data_seed
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/data_seed
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_drop_last b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_drop_last
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_drop_last
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_num_workers b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_num_workers
new file mode 100644
index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_num_workers
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_persistent_workers b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_persistent_workers
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_persistent_workers
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_pin_memory b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_pin_memory
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_pin_memory
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_prefetch_factor b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_prefetch_factor
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dataloader_prefetch_factor
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_backend b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_backend
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_backend
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_broadcast_buffers b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_broadcast_buffers
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_broadcast_buffers
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_bucket_cap_mb b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_bucket_cap_mb
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_bucket_cap_mb
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_find_unused_parameters b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_find_unused_parameters
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_find_unused_parameters
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_timeout b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_timeout
new file mode 100644
index 0000000000000000000000000000000000000000..2974009fd3610ead1e61c1cd20f510a934dd6f91
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ddp_timeout
@@ -0,0 +1 @@
+1800
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/debug b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/debug
new file mode 100644
index 0000000000000000000000000000000000000000..0637a088a01e8ddab3bf3fa98dbe804cbde1a0dc
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/debug
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/decoder_start_token_id b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/decoder_start_token_id
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/decoder_start_token_id
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/deepspeed b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/deepspeed
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/deepspeed
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/disable_tqdm b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/disable_tqdm
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/disable_tqdm
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dispatch_batches b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dispatch_batches
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/dispatch_batches
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/diversity_penalty b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/diversity_penalty
new file mode 100644
index 0000000000000000000000000000000000000000..171538eb0b00f4eddffa17929796de55b838f34b
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/diversity_penalty
@@ -0,0 +1 @@
+0.0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_bench_eval b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_bench_eval
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_bench_eval
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_causal_lm_eval b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_causal_lm_eval
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_causal_lm_eval
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_eval b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_eval
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_eval
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_predict b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_predict
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_predict
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_sample b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_sample
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_sample
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_train b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_train
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/do_train
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/early_stopping b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/early_stopping
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/early_stopping
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/encoder_no_repeat_ngram_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/encoder_no_repeat_ngram_size
new file mode 100644
index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/encoder_no_repeat_ngram_size
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eos_token_id b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eos_token_id
new file mode 100644
index 0000000000000000000000000000000000000000..d8263ee9860594d2806b0dfd1bfd17528b0ba2a4
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eos_token_id
@@ -0,0 +1 @@
+2
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_accumulation_steps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_accumulation_steps
new file mode 100644
index 0000000000000000000000000000000000000000..bf0d87ab1b2b0ec1a11a3973d2845b42413d9767
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_accumulation_steps
@@ -0,0 +1 @@
+4
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_delay b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_delay
new file mode 100644
index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_delay
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_sample_packing b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_sample_packing
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_sample_packing
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_steps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_steps
new file mode 100644
index 0000000000000000000000000000000000000000..30e2fb4d6f4f74b013fc93cbbd90b37101df4148
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/eval_steps
@@ -0,0 +1 @@
+0.05
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/evaluation_strategy b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/evaluation_strategy
new file mode 100644
index 0000000000000000000000000000000000000000..17f15e19cf5e8064aff8d528657b70e9611eb59e
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/evaluation_strategy
@@ -0,0 +1 @@
+steps
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/exponential_decay_length_penalty b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/exponential_decay_length_penalty
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/exponential_decay_length_penalty
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/finetuning_task b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/finetuning_task
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/finetuning_task
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/forced_bos_token_id b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/forced_bos_token_id
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/forced_bos_token_id
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/forced_eos_token_id b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/forced_eos_token_id
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/forced_eos_token_id
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16 b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16_backend b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16_backend
new file mode 100644
index 0000000000000000000000000000000000000000..4d18c3e59ecf5c28b46b06ce26f2406b2d449870
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16_backend
@@ -0,0 +1 @@
+auto
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16_full_eval b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16_full_eval
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16_full_eval
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16_opt_level b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16_opt_level
new file mode 100644
index 0000000000000000000000000000000000000000..a9ada426ac8819467c6dc392dcbea40183a3e16e
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fp16_opt_level
@@ -0,0 +1 @@
+O1
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp
new file mode 100644
index 0000000000000000000000000000000000000000..0637a088a01e8ddab3bf3fa98dbe804cbde1a0dc
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp_config b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp_config
new file mode 100644
index 0000000000000000000000000000000000000000..9d33480169a14dfac929530aefc3cd1f5776a983
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp_config
@@ -0,0 +1 @@
+{'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp_min_num_params b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp_min_num_params
new file mode 100644
index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp_min_num_params
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp_transformer_layer_cls_to_wrap b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp_transformer_layer_cls_to_wrap
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/fsdp_transformer_layer_cls_to_wrap
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/full_determinism b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/full_determinism
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/full_determinism
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/gradient_accumulation_steps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/gradient_accumulation_steps
new file mode 100644
index 0000000000000000000000000000000000000000..bf0d87ab1b2b0ec1a11a3973d2845b42413d9767
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/gradient_accumulation_steps
@@ -0,0 +1 @@
+4
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/gradient_checkpointing b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/gradient_checkpointing
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/gradient_checkpointing
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/gradient_checkpointing_kwargs b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/gradient_checkpointing_kwargs
new file mode 100644
index 0000000000000000000000000000000000000000..e111fdd6e27ffe7ae81c6da50ec9db10030c98f5
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/gradient_checkpointing_kwargs
@@ -0,0 +1 @@
+{'use_reentrant': True}
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/greater_is_better b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/greater_is_better
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/greater_is_better
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/group_by_length b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/group_by_length
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/group_by_length
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/half_precision_backend b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/half_precision_backend
new file mode 100644
index 0000000000000000000000000000000000000000..4d18c3e59ecf5c28b46b06ce26f2406b2d449870
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/half_precision_backend
@@ -0,0 +1 @@
+auto
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hidden_act b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hidden_act
new file mode 100644
index 0000000000000000000000000000000000000000..84972cd9564e61cac416981cb71bb1e176046f68
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hidden_act
@@ -0,0 +1 @@
+silu
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hidden_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hidden_size
new file mode 100644
index 0000000000000000000000000000000000000000..1b18a99c9b4c83c582cf696ed55b1c1d79776fa2
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hidden_size
@@ -0,0 +1 @@
+4096
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_always_push b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_always_push
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_always_push
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_model_id b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_model_id
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_model_id
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_private_repo b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_private_repo
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_private_repo
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_strategy b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_strategy
new file mode 100644
index 0000000000000000000000000000000000000000..8532b12ca8add8fe61b84623fab9d559a366ce3c
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_strategy
@@ -0,0 +1 @@
+every_save
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_token b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_token
new file mode 100644
index 0000000000000000000000000000000000000000..0a574a354979ef783f5f4fe08c3595f79596ff41
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/hub_token
@@ -0,0 +1 @@
+<HUB_TOKEN>
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/id2label b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/id2label
new file mode 100644
index 0000000000000000000000000000000000000000..74c276dcae370126a18f5657c0e1ed72e72325e9
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/id2label
@@ -0,0 +1 @@
+{0: 'LABEL_0', 1: 'LABEL_1'}
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ignore_data_skip b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ignore_data_skip
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ignore_data_skip
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/include_inputs_for_metrics b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/include_inputs_for_metrics
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/include_inputs_for_metrics
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/include_num_input_tokens_seen b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/include_num_input_tokens_seen
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/include_num_input_tokens_seen
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/include_tokens_per_second b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/include_tokens_per_second
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/include_tokens_per_second
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/initializer_range b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/initializer_range
new file mode 100644
index 0000000000000000000000000000000000000000..79dd775c1e90ab736c362ede2f2332678eccf47e
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/initializer_range
@@ -0,0 +1 @@
+0.02
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/intermediate_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/intermediate_size
new file mode 100644
index 0000000000000000000000000000000000000000..5a65be5c17b7f3ef8c6237c21e0efc9b8a59f1ae
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/intermediate_size
@@ -0,0 +1 @@
+14336
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/is_decoder b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/is_decoder
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/is_decoder
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/is_encoder_decoder b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/is_encoder_decoder
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/is_encoder_decoder
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/jit_mode_eval b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/jit_mode_eval
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/jit_mode_eval
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/label2id b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/label2id
new file mode 100644
index 0000000000000000000000000000000000000000..0589857be5c3ad7b568bf7c79a4172a5aa887693
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/label2id
@@ -0,0 +1 @@
+{'LABEL_0': 0, 'LABEL_1': 1}
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/label_names b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/label_names
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/label_names
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/label_smoothing_factor b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/label_smoothing_factor
new file mode 100644
index 0000000000000000000000000000000000000000..171538eb0b00f4eddffa17929796de55b838f34b
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/label_smoothing_factor
@@ -0,0 +1 @@
+0.0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/learning_rate b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/learning_rate
new file mode 100644
index 0000000000000000000000000000000000000000..4c7a7cb48c8bda5e544d360d959f8ef5e7f5778f
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/learning_rate
@@ -0,0 +1 @@
+0.0002
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/length_column_name b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/length_column_name
new file mode 100644
index 0000000000000000000000000000000000000000..c2e7ec839dabf14d5d59f187c6b8fdb3460872aa
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/length_column_name
@@ -0,0 +1 @@
+length
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/length_penalty b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/length_penalty
new file mode 100644
index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/length_penalty
@@ -0,0 +1 @@
+1.0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/load_best_model_at_end b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/load_best_model_at_end
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/load_best_model_at_end
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/local_rank b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/local_rank
new file mode 100644
index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/local_rank
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/log_level b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/log_level
new file mode 100644
index 0000000000000000000000000000000000000000..ecf328558d66d304c19bdd373f647085a3f0880d
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/log_level
@@ -0,0 +1 @@
+passive
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/log_level_replica b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/log_level_replica
new file mode 100644
index 0000000000000000000000000000000000000000..14b472df8d4481c6fea79c066ae4650980f02b7c
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/log_level_replica
@@ -0,0 +1 @@
+warning
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/log_on_each_node b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/log_on_each_node
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/log_on_each_node
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_dir b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_dir
new file mode 100644
index 0000000000000000000000000000000000000000..0b8b53e9a3065f67eedc90d6329f1f560efab3a1
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_dir
@@ -0,0 +1 @@
+./qlora-out/runs/Apr09_08-29-36_gpu06.pri.dmog.alces.network
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_first_step b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_first_step
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_first_step
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_nan_inf_filter b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_nan_inf_filter
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_nan_inf_filter
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_steps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_steps
new file mode 100644
index 0000000000000000000000000000000000000000..56a6051ca2b02b04ef92d5150c9ef600403cb1de
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_steps
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_strategy b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_strategy
new file mode 100644
index 0000000000000000000000000000000000000000..17f15e19cf5e8064aff8d528657b70e9611eb59e
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/logging_strategy
@@ -0,0 +1 @@
+steps
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/loraplus_lr_embedding b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/loraplus_lr_embedding
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/loraplus_lr_embedding
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/loraplus_lr_ratio b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/loraplus_lr_ratio
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/loraplus_lr_ratio
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/lr_quadratic_warmup b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/lr_quadratic_warmup
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/lr_quadratic_warmup
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/lr_scheduler_kwargs b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/lr_scheduler_kwargs
new file mode 100644
index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/lr_scheduler_kwargs
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/lr_scheduler_type b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/lr_scheduler_type
new file mode 100644
index 0000000000000000000000000000000000000000..84aa3999b5b7cae7f78b1f77e04d182643005a92
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/lr_scheduler_type
@@ -0,0 +1 @@
+cosine
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_bench_samples b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_bench_samples
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_bench_samples
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_grad_norm b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_grad_norm
new file mode 100644
index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_grad_norm
@@ -0,0 +1 @@
+1.0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_length b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_length
new file mode 100644
index 0000000000000000000000000000000000000000..2edeafb09db0093bae6ff060e2dcd2166f5c9387
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_length
@@ -0,0 +1 @@
+20
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_position_embeddings b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_position_embeddings
new file mode 100644
index 0000000000000000000000000000000000000000..2707c481ad6f123a2d2f15fe38c2bbcf3c32af4b
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_position_embeddings
@@ -0,0 +1 @@
+32768
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_seq_length b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_seq_length
new file mode 100644
index 0000000000000000000000000000000000000000..ae4d10b425edf2234036e6dd7b07f9bd53fc25e3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_seq_length
@@ -0,0 +1 @@
+256
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_steps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_steps
new file mode 100644
index 0000000000000000000000000000000000000000..b2412e34dff05e77952f3f930772631cac5a3be7
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/max_steps
@@ -0,0 +1 @@
+62
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/metric_for_best_model b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/metric_for_best_model
new file mode 100644
index 0000000000000000000000000000000000000000..c476ffb61d3613d976546da2231ec877269c04d6
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/metric_for_best_model
@@ -0,0 +1 @@
+loss
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/min_length b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/min_length
new file mode 100644
index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/min_length
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/model_type b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/model_type
new file mode 100644
index 0000000000000000000000000000000000000000..757dcc3dde6402fef065090ce481dc3cdf34bea1
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/model_type
@@ -0,0 +1 @@
+mistral
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/mp_parameters b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/mp_parameters
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/multipack_real_batches b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/multipack_real_batches
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/multipack_real_batches
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/neftune_noise_alpha b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/neftune_noise_alpha
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/neftune_noise_alpha
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/no_cuda b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/no_cuda
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/no_cuda
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/no_repeat_ngram_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/no_repeat_ngram_size
new file mode 100644
index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/no_repeat_ngram_size
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_attention_heads b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_attention_heads
new file mode 100644
index 0000000000000000000000000000000000000000..1758dddccea2b3b02d21228a0d06a45a35c0d861
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_attention_heads
@@ -0,0 +1 @@
+32
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_beam_groups b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_beam_groups
new file mode 100644
index 0000000000000000000000000000000000000000..56a6051ca2b02b04ef92d5150c9ef600403cb1de
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_beam_groups
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_beams b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_beams
new file mode 100644
index 0000000000000000000000000000000000000000..56a6051ca2b02b04ef92d5150c9ef600403cb1de
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_beams
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_hidden_layers b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_hidden_layers
new file mode 100644
index 0000000000000000000000000000000000000000..1758dddccea2b3b02d21228a0d06a45a35c0d861
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_hidden_layers
@@ -0,0 +1 @@
+32
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_key_value_heads b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_key_value_heads
new file mode 100644
index 0000000000000000000000000000000000000000..301160a93062df23030a69f4b5e4d9bf71866ee9
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_key_value_heads
@@ -0,0 +1 @@
+8
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_return_sequences b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_return_sequences
new file mode 100644
index 0000000000000000000000000000000000000000..56a6051ca2b02b04ef92d5150c9ef600403cb1de
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_return_sequences
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_train_epochs b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_train_epochs
new file mode 100644
index 0000000000000000000000000000000000000000..7813681f5b41c028345ca62a2be376bae70b7f61
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/num_train_epochs
@@ -0,0 +1 @@
+5
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/optim b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/optim
new file mode 100644
index 0000000000000000000000000000000000000000..8b5daf75e0cece259f6a6b603b14f124b2cda697
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/optim
@@ -0,0 +1 @@
+adamw_bnb_8bit
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/optim_args b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/optim_args
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/optim_args
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_attentions b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_attentions
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_attentions
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_dir b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_dir
new file mode 100644
index 0000000000000000000000000000000000000000..99ea28c0280583d6cba05d7aaf0cc98dbfb10fa1
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_dir
@@ -0,0 +1 @@
+./qlora-out
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_hidden_states b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_hidden_states
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_hidden_states
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_scores b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_scores
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/output_scores
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/overwrite_output_dir b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/overwrite_output_dir
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/overwrite_output_dir
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/pad_token_id b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/pad_token_id
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/pad_token_id
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/past_index b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/past_index
new file mode 100644
index 0000000000000000000000000000000000000000..d7d17fcbef95ca19081c4cc5e97cbc592cc7081f
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/past_index
@@ -0,0 +1 @@
+-1
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_device_eval_batch_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_device_eval_batch_size
new file mode 100644
index 0000000000000000000000000000000000000000..d8263ee9860594d2806b0dfd1bfd17528b0ba2a4
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_device_eval_batch_size
@@ -0,0 +1 @@
+2
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_device_train_batch_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_device_train_batch_size
new file mode 100644
index 0000000000000000000000000000000000000000..d8263ee9860594d2806b0dfd1bfd17528b0ba2a4
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_device_train_batch_size
@@ -0,0 +1 @@
+2
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_gpu_eval_batch_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_gpu_eval_batch_size
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_gpu_eval_batch_size
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_gpu_train_batch_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_gpu_train_batch_size
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/per_gpu_train_batch_size
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/prediction_loss_only b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/prediction_loss_only
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/prediction_loss_only
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/prefix b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/prefix
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/prefix
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/pretraining b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/pretraining
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/pretraining
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/problem_type b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/problem_type
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/problem_type
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/pruned_heads b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/pruned_heads
new file mode 100644
index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/pruned_heads
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub_model_id b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub_model_id
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub_model_id
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub_organization b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub_organization
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub_organization
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub_token b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub_token
new file mode 100644
index 0000000000000000000000000000000000000000..36e61093756f7c43b24cd50fc63164c08bcf50f1
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/push_to_hub_token
@@ -0,0 +1 @@
+<PUSH_TO_HUB_TOKEN>
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/quantization_config b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/quantization_config
new file mode 100644
index 0000000000000000000000000000000000000000..adf6c34c60d0846bc5fa1f297f34a50e0a26f8b1
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/quantization_config
@@ -0,0 +1 @@
+{'quant_method': <QuantizationMethod.BITS_AND_BYTES: 'bitsandbytes'>, '_load_in_8bit': False, '_load_in_4bit': True, 'llm_int8_threshold': 6.0, 'llm_int8_skip_modules': None, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_compute_dtype': 'bfloat16', 'load_in_4bit': True, 'load_in_8bit': False}
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ray_scope b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ray_scope
new file mode 100644
index 0000000000000000000000000000000000000000..1c1206e8bf4337e96dad9a6d139628852077558d
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/ray_scope
@@ -0,0 +1 @@
+last
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_anneal_steps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_anneal_steps
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_anneal_steps
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_prune_ratio b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_prune_ratio
new file mode 100644
index 0000000000000000000000000000000000000000..9a7d84f2a96bb56f53bfc3a42ac10d06459e55c3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_prune_ratio
@@ -0,0 +1 @@
+0.9
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_steps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_steps
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_steps
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_warmup_steps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_warmup_steps
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/relora_warmup_steps
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/remove_invalid_values b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/remove_invalid_values
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/remove_invalid_values
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/remove_unused_columns b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/remove_unused_columns
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/remove_unused_columns
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/repetition_penalty b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/repetition_penalty
new file mode 100644
index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/repetition_penalty
@@ -0,0 +1 @@
+1.0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/report_to b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/report_to
new file mode 100644
index 0000000000000000000000000000000000000000..b4c01973f975b264ce9a4952bbeaa1f1b8bdb018
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/report_to
@@ -0,0 +1 @@
+['mlflow', 'tensorboard']
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/resume_from_checkpoint b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/resume_from_checkpoint
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/resume_from_checkpoint
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/return_dict b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/return_dict
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/return_dict
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/return_dict_in_generate b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/return_dict_in_generate
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/return_dict_in_generate
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/rms_norm_eps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/rms_norm_eps
new file mode 100644
index 0000000000000000000000000000000000000000..5868ff147459cee04c24f2de58e75969024870b8
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/rms_norm_eps
@@ -0,0 +1 @@
+1e-05
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/rope_theta b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/rope_theta
new file mode 100644
index 0000000000000000000000000000000000000000..5e3692287a7d36338465cfdf2af01373b923e614
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/rope_theta
@@ -0,0 +1 @@
+10000.0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/run_name b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/run_name
new file mode 100644
index 0000000000000000000000000000000000000000..99ea28c0280583d6cba05d7aaf0cc98dbfb10fa1
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/run_name
@@ -0,0 +1 @@
+./qlora-out
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sample_packing b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sample_packing
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sample_packing
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sample_packing_efficiency b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sample_packing_efficiency
new file mode 100644
index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sample_packing_efficiency
@@ -0,0 +1 @@
+1.0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sample_packing_seq_len_multiplier b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sample_packing_seq_len_multiplier
new file mode 100644
index 0000000000000000000000000000000000000000..d8263ee9860594d2806b0dfd1bfd17528b0ba2a4
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sample_packing_seq_len_multiplier
@@ -0,0 +1 @@
+2
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_on_each_node b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_on_each_node
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_on_each_node
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_only_model b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_only_model
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_only_model
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_safetensors b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_safetensors
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_steps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_steps
new file mode 100644
index 0000000000000000000000000000000000000000..2f4536184bcac31936bd15a5f9cf931dd526c022
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_steps
@@ -0,0 +1 @@
+0.2
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_strategy b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_strategy
new file mode 100644
index 0000000000000000000000000000000000000000..17f15e19cf5e8064aff8d528657b70e9611eb59e
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_strategy
@@ -0,0 +1 @@
+steps
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_total_limit b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_total_limit
new file mode 100644
index 0000000000000000000000000000000000000000..bf0d87ab1b2b0ec1a11a3973d2845b42413d9767
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/save_total_limit
@@ -0,0 +1 @@
+4
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/seed b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/seed
new file mode 100644
index 0000000000000000000000000000000000000000..f70d7bba4ae1f07682e0358bd7a2068094fc023b
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/seed
@@ -0,0 +1 @@
+42
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sep_token_id b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sep_token_id
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sep_token_id
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/skip_memory_metrics b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/skip_memory_metrics
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/skip_memory_metrics
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sliding_window b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sliding_window
new file mode 100644
index 0000000000000000000000000000000000000000..1b18a99c9b4c83c582cf696ed55b1c1d79776fa2
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/sliding_window
@@ -0,0 +1 @@
+4096
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/split_batches b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/split_batches
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/split_batches
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/suppress_tokens b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/suppress_tokens
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/suppress_tokens
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/task_specific_params b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/task_specific_params
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/task_specific_params
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/temperature b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/temperature
new file mode 100644
index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/temperature
@@ -0,0 +1 @@
+1.0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tf32 b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tf32
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tf32
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tf_legacy_loss b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tf_legacy_loss
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tf_legacy_loss
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tie_encoder_decoder b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tie_encoder_decoder
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tie_encoder_decoder
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tie_word_embeddings b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tie_word_embeddings
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tie_word_embeddings
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tokenizer_class b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tokenizer_class
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tokenizer_class
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/top_k b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/top_k
new file mode 100644
index 0000000000000000000000000000000000000000..c5b431b6cba29540b4b284840ff229bce0460886
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/top_k
@@ -0,0 +1 @@
+50
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/top_p b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/top_p
new file mode 100644
index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/top_p
@@ -0,0 +1 @@
+1.0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_compile b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_compile
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_compile
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_compile_backend b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_compile_backend
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_compile_backend
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_compile_mode b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_compile_mode
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_compile_mode
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_dtype b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_dtype
new file mode 100644
index 0000000000000000000000000000000000000000..8481ec0098496c454d11e66437510c620f01aa78
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torch_dtype
@@ -0,0 +1 @@
+bfloat16
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torchdynamo b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torchdynamo
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torchdynamo
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torchscript b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torchscript
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/torchscript
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tpu_metrics_debug b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tpu_metrics_debug
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tpu_metrics_debug
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tpu_num_cores b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tpu_num_cores
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/tpu_num_cores
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/transformers_version b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/transformers_version
new file mode 100644
index 0000000000000000000000000000000000000000..9ba2e2253c54c56d823046083e9f5b13cd908bce
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/transformers_version
@@ -0,0 +1 @@
+4.39.0.dev0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/typical_p b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/typical_p
new file mode 100644
index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/typical_p
@@ -0,0 +1 @@
+1.0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_bfloat16 b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_bfloat16
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_bfloat16
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_cache b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_cache
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_cache
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_cpu b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_cpu
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_cpu
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_ipex b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_ipex
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_ipex
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_legacy_prediction_loop b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_legacy_prediction_loop
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_legacy_prediction_loop
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_mps_device b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_mps_device
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/use_mps_device
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/vocab_size b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/vocab_size
new file mode 100644
index 0000000000000000000000000000000000000000..be79d9de6ef46aa65d12681dc5186fd34ea022dc
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/vocab_size
@@ -0,0 +1 @@
+32000
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/warmup_ratio b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/warmup_ratio
new file mode 100644
index 0000000000000000000000000000000000000000..171538eb0b00f4eddffa17929796de55b838f34b
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/warmup_ratio
@@ -0,0 +1 @@
+0.0
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/warmup_steps b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/warmup_steps
new file mode 100644
index 0000000000000000000000000000000000000000..9a037142aa3c1b4c490e1a38251620f113465330
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/warmup_steps
@@ -0,0 +1 @@
+10
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/weight_decay b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/weight_decay
new file mode 100644
index 0000000000000000000000000000000000000000..eb5a1db868251c6a5c775b49efde91a5ec3205df
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/params/weight_decay
@@ -0,0 +1 @@
+0.001
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.runName b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.runName
new file mode 100644
index 0000000000000000000000000000000000000000..99ea28c0280583d6cba05d7aaf0cc98dbfb10fa1
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.runName
@@ -0,0 +1 @@
+./qlora-out
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.source.git.commit b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.source.git.commit
new file mode 100644
index 0000000000000000000000000000000000000000..4fc1b7e7153b36dcfda203e96431b2788ace8884
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.source.git.commit
@@ -0,0 +1 @@
+8984bf17226f3abc4080d0e3decc28ff1d70178b
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.source.name b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.source.name
new file mode 100644
index 0000000000000000000000000000000000000000..b7588b4c16a4783516407529e743d1a6a58ae99c
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.source.name
@@ -0,0 +1 @@
+/users/dhd2000/axolotl/src/axolotl/cli/train.py
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.source.type b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.source.type
new file mode 100644
index 0000000000000000000000000000000000000000..0c2c1fe9dc63b7040bb81006635e50fd528f056f
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.source.type
@@ -0,0 +1 @@
+LOCAL
\ No newline at end of file
diff --git a/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.user b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.user
new file mode 100644
index 0000000000000000000000000000000000000000..7d966af2758f0b52651d138ee5ec43fb59151a4d
--- /dev/null
+++ b/mlruns/0/7e75ece8e18e485db64e4e2d9196e738/tags/mlflow.user
@@ -0,0 +1 @@
+dhd2000
\ No newline at end of file
diff --git a/mlruns/0/meta.yaml b/mlruns/0/meta.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8bf7f4eba3a8df361eaca45f4a023a4c1dedc754
--- /dev/null
+++ b/mlruns/0/meta.yaml
@@ -0,0 +1,6 @@
+artifact_location: file:///mnt/scratch/users/dhd2000/ft14/mlruns/0
+creation_time: 1712647776665
+experiment_id: '0'
+last_update_time: 1712647776665
+lifecycle_stage: active
+name: Default
diff --git a/qlora-out/README.md b/qlora-out/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ebea70108b15703a03e6b25ee9a9f435857a67be
--- /dev/null
+++ b/qlora-out/README.md
@@ -0,0 +1,166 @@
+---
+license: apache-2.0
+library_name: peft
+tags:
+- generated_from_trainer
+base_model: mistralai/Mistral-7B-v0.1
+model-index:
+- name: qlora-out
+  results: []
+---
+
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+
+[<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
+<details><summary>See axolotl config</summary>
+
+axolotl version: `0.4.0`
+```yaml
+base_model: mistralai/Mistral-7B-v0.1
+model_type: MistralForCausalLM
+tokenizer_type: LlamaTokenizer
+
+load_in_8bit: false
+load_in_4bit: true
+strict: false
+
+datasets:
+  - path: caffeinatedcherrychic/cidds-agg-balanced
+    type: alpaca
+dataset_prepared_path: last_run_prepared
+val_set_size: 0.1
+output_dir: ./qlora-out
+
+adapter: qlora
+lora_model_dir:
+
+sequence_len: 256
+sample_packing: false
+pad_to_sequence_len: true
+
+lora_r: 32
+lora_alpha: 64
+lora_dropout: 0.05
+lora_target_linear: true
+lora_fan_in_fan_out:
+lora_target_modules:
+  - gate_proj
+  - down_proj
+  - up_proj
+  - q_proj
+  - v_proj
+  - k_proj
+  - o_proj
+
+wandb_project:
+wandb_entity:
+wandb_watch:
+wandb_name:
+wandb_log_model:
+
+gradient_accumulation_steps: 4
+micro_batch_size: 2
+num_epochs: 5
+optimizer: adamw_bnb_8bit
+lr_scheduler: cosine
+learning_rate: 0.0002
+
+train_on_inputs: false
+group_by_length: false
+bf16: true
+fp16: false
+tf32: false
+
+gradient_checkpointing: true
+early_stopping_patience:
+resume_from_checkpoint:
+local_rank:
+logging_steps: 1
+xformers_attention:
+flash_attention: true
+
+loss_watchdog_threshold: 5.0
+loss_watchdog_patience: 3
+
+max_steps: 500
+warmup_steps: 10
+evals_per_epoch: 4
+eval_table_size:
+eval_max_new_tokens: 1
+saves_per_epoch: 1
+debug:
+deepspeed:
+weight_decay: 0.001
+fsdp:
+fsdp_config:
+special_tokens:
+
+
+```
+
+</details><br>
+
+# qlora-out
+
+This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.1465
+
+## Model description
+
+More information needed
+
+## Intended uses & limitations
+
+More information needed
+
+## Training and evaluation data
+
+More information needed
+
+## Training procedure
+
+### Training hyperparameters
+
+The following hyperparameters were used during training:
+- learning_rate: 0.0002
+- train_batch_size: 2
+- eval_batch_size: 2
+- seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 8
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 10
+- training_steps: 62
+
+### Training results
+
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 6.6367        | 0.08  | 1    | 7.3009          |
+| 2.3866        | 0.32  | 4    | 0.7138          |
+| 0.948         | 0.64  | 8    | 1.0446          |
+| 0.6822        | 0.96  | 12   | 1.3960          |
+| 0.5222        | 1.28  | 16   | 0.9023          |
+| 0.534         | 1.6   | 20   | 0.4847          |
+| 0.4624        | 1.92  | 24   | 0.5740          |
+| 0.7753        | 2.24  | 28   | 0.3772          |
+| 0.3324        | 2.56  | 32   | 0.2937          |
+| 0.1973        | 2.88  | 36   | 0.5675          |
+| 0.0843        | 3.2   | 40   | 0.2360          |
+| 0.3836        | 3.52  | 44   | 0.1397          |
+| 0.0449        | 3.84  | 48   | 0.2801          |
+| 0.2246        | 4.16  | 52   | 0.1946          |
+| 0.229         | 4.48  | 56   | 0.1618          |
+| 0.3073        | 4.8   | 60   | 0.1465          |
+
+
+### Framework versions
+
+- PEFT 0.10.1.dev0
+- Transformers 4.39.0.dev0
+- Pytorch 2.1.2
+- Datasets 2.18.0
+- Tokenizers 0.15.0
\ No newline at end of file
diff --git a/qlora-out/adapter_config.json b/qlora-out/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..78cef8595a53cdf83ff2d2b039d6740c09ac7281
--- /dev/null
+++ b/qlora-out/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "up_proj",
+    "k_proj",
+    "gate_proj",
+    "o_proj",
+    "down_proj",
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/qlora-out/adapter_model.bin b/qlora-out/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1f2392f76445b62e1e189b4ce3580a6e1cf85d81
--- /dev/null
+++ b/qlora-out/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef894f6daf736ab4a35fe0fba96204d34d3a179661233fc32771e92bcb515b0d
+size 335706186
diff --git a/qlora-out/checkpoint-13/README.md b/qlora-out/checkpoint-13/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..dcac0d91142fbcc17d01002ba7be40a55deb5c28
--- /dev/null
+++ b/qlora-out/checkpoint-13/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.1.dev0
\ No newline at end of file
diff --git a/qlora-out/checkpoint-13/adapter_config.json b/qlora-out/checkpoint-13/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..78cef8595a53cdf83ff2d2b039d6740c09ac7281
--- /dev/null
+++ b/qlora-out/checkpoint-13/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "up_proj",
+    "k_proj",
+    "gate_proj",
+    "o_proj",
+    "down_proj",
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/qlora-out/checkpoint-13/adapter_model.safetensors b/qlora-out/checkpoint-13/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b17e6811e3c4a3d38c6d918307672461b4b20c6c
--- /dev/null
+++ b/qlora-out/checkpoint-13/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72069b2abc2e8e408822bca99f6492f6272dff7f199d0afff420f28fdcde57ab
+size 335604696
diff --git a/qlora-out/checkpoint-13/optimizer.pt b/qlora-out/checkpoint-13/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1a69b0f63a51467febdd838d8915b95f3b9ef3dc
--- /dev/null
+++ b/qlora-out/checkpoint-13/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aff099a7ecc6bc7c04d5f8fd80d2443dd9f492cb12877c91fe4ea29066d9dd08
+size 168624724
diff --git a/qlora-out/checkpoint-13/rng_state.pth b/qlora-out/checkpoint-13/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..06b070741c8d2998636045e4a082ef320b192eef
--- /dev/null
+++ b/qlora-out/checkpoint-13/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74fd0abf3b25d5f521218bb97508206369e6984af4f556dd58b22d5dfbbb6425
+size 14244
diff --git a/qlora-out/checkpoint-13/scheduler.pt b/qlora-out/checkpoint-13/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f0d874288e76b9ca8cadf690dd2ac36327360d2b
--- /dev/null
+++ b/qlora-out/checkpoint-13/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d3b6aff690f8457dc46d75813d9f660109e8ec63e2dc8cbf92e4d726c3a8a8c
+size 1064
diff --git a/qlora-out/checkpoint-13/trainer_state.json b/qlora-out/checkpoint-13/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..c1194d1e012f34b5a299f9b1b6742b666ad61ef9
--- /dev/null
+++ b/qlora-out/checkpoint-13/trainer_state.json
@@ -0,0 +1,144 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.04,
+  "eval_steps": 4,
+  "global_step": 13,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 102.28898620605469,
+      "learning_rate": 2e-05,
+      "loss": 6.6367,
+      "step": 1
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 7.300913333892822,
+      "eval_runtime": 1.3523,
+      "eval_samples_per_second": 8.873,
+      "eval_steps_per_second": 4.437,
+      "step": 1
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 103.4541015625,
+      "learning_rate": 4e-05,
+      "loss": 7.0616,
+      "step": 2
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 67.47515869140625,
+      "learning_rate": 6e-05,
+      "loss": 4.686,
+      "step": 3
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 72.36919403076172,
+      "learning_rate": 8e-05,
+      "loss": 2.3866,
+      "step": 4
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.7137572169303894,
+      "eval_runtime": 1.3532,
+      "eval_samples_per_second": 8.868,
+      "eval_steps_per_second": 4.434,
+      "step": 4
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 16.83085060119629,
+      "learning_rate": 0.0001,
+      "loss": 0.6844,
+      "step": 5
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 25.897714614868164,
+      "learning_rate": 0.00012,
+      "loss": 0.914,
+      "step": 6
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 18.89151382446289,
+      "learning_rate": 0.00014,
+      "loss": 0.63,
+      "step": 7
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 27.15555763244629,
+      "learning_rate": 0.00016,
+      "loss": 0.948,
+      "step": 8
+    },
+    {
+      "epoch": 0.64,
+      "eval_loss": 1.0445994138717651,
+      "eval_runtime": 1.356,
+      "eval_samples_per_second": 8.85,
+      "eval_steps_per_second": 4.425,
+      "step": 8
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 20.812381744384766,
+      "learning_rate": 0.00018,
+      "loss": 1.0285,
+      "step": 9
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 56.3886604309082,
+      "learning_rate": 0.0002,
+      "loss": 1.3756,
+      "step": 10
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 6.24803352355957,
+      "learning_rate": 0.00019981755542233177,
+      "loss": 0.5178,
+      "step": 11
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 8.379430770874023,
+      "learning_rate": 0.0001992708874098054,
+      "loss": 0.6822,
+      "step": 12
+    },
+    {
+      "epoch": 0.96,
+      "eval_loss": 1.3959709405899048,
+      "eval_runtime": 1.3583,
+      "eval_samples_per_second": 8.835,
+      "eval_steps_per_second": 4.417,
+      "step": 12
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 20.744348526000977,
+      "learning_rate": 0.00019836199069471437,
+      "loss": 1.3762,
+      "step": 13
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 62,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
+  "save_steps": 13,
+  "total_flos": 1138234761412608.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/qlora-out/checkpoint-13/training_args.bin b/qlora-out/checkpoint-13/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3934f8a185d5789fd8a9250dff7398cf3eb121d1
--- /dev/null
+++ b/qlora-out/checkpoint-13/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66fede3f83b4ad6ce095e0aa09047d95bd4acc13170780f9e890d9d17d1bdace
+size 5624
diff --git a/qlora-out/checkpoint-26/README.md b/qlora-out/checkpoint-26/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..dcac0d91142fbcc17d01002ba7be40a55deb5c28
--- /dev/null
+++ b/qlora-out/checkpoint-26/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.1.dev0
\ No newline at end of file
diff --git a/qlora-out/checkpoint-26/adapter_config.json b/qlora-out/checkpoint-26/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..78cef8595a53cdf83ff2d2b039d6740c09ac7281
--- /dev/null
+++ b/qlora-out/checkpoint-26/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "up_proj",
+    "k_proj",
+    "gate_proj",
+    "o_proj",
+    "down_proj",
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/qlora-out/checkpoint-26/adapter_model.safetensors b/qlora-out/checkpoint-26/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9ab45672da31dcc0e038b019c87497f1f36920c7
--- /dev/null
+++ b/qlora-out/checkpoint-26/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:387499c7736d8b7c5cab21843d9b986ad31e4777afa1c953e254a6b821622ab8
+size 335604696
diff --git a/qlora-out/checkpoint-26/optimizer.pt b/qlora-out/checkpoint-26/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bac5a0d811c0aa5ff83105457ef69a16b347d02d
--- /dev/null
+++ b/qlora-out/checkpoint-26/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c26885c89f597923fecf6d91cf382dfac6eeea66972dd286bb6316360fd0bb69
+size 168624724
diff --git a/qlora-out/checkpoint-26/rng_state.pth b/qlora-out/checkpoint-26/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7ee0ba33d923fb061a56c5fc191e36bf2407d83f
--- /dev/null
+++ b/qlora-out/checkpoint-26/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69726c1b60735ec075cbe9ef238868d0b5845ade6b93bfd60e810fcee5f233a5
+size 14244
diff --git a/qlora-out/checkpoint-26/scheduler.pt b/qlora-out/checkpoint-26/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b71c24dad62dfe21c7661f7b95351662ad638d28
--- /dev/null
+++ b/qlora-out/checkpoint-26/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c22f4d3e17b1ff1ac5db395ab84ba067bc34a07791275897d3efe0cf1944d439
+size 1064
diff --git a/qlora-out/checkpoint-26/trainer_state.json b/qlora-out/checkpoint-26/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..f678315072628755e901826a950d986954b8def7
--- /dev/null
+++ b/qlora-out/checkpoint-26/trainer_state.json
@@ -0,0 +1,259 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.08,
+  "eval_steps": 4,
+  "global_step": 26,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 102.28898620605469,
+      "learning_rate": 2e-05,
+      "loss": 6.6367,
+      "step": 1
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 7.300913333892822,
+      "eval_runtime": 1.3523,
+      "eval_samples_per_second": 8.873,
+      "eval_steps_per_second": 4.437,
+      "step": 1
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 103.4541015625,
+      "learning_rate": 4e-05,
+      "loss": 7.0616,
+      "step": 2
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 67.47515869140625,
+      "learning_rate": 6e-05,
+      "loss": 4.686,
+      "step": 3
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 72.36919403076172,
+      "learning_rate": 8e-05,
+      "loss": 2.3866,
+      "step": 4
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.7137572169303894,
+      "eval_runtime": 1.3532,
+      "eval_samples_per_second": 8.868,
+      "eval_steps_per_second": 4.434,
+      "step": 4
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 16.83085060119629,
+      "learning_rate": 0.0001,
+      "loss": 0.6844,
+      "step": 5
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 25.897714614868164,
+      "learning_rate": 0.00012,
+      "loss": 0.914,
+      "step": 6
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 18.89151382446289,
+      "learning_rate": 0.00014,
+      "loss": 0.63,
+      "step": 7
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 27.15555763244629,
+      "learning_rate": 0.00016,
+      "loss": 0.948,
+      "step": 8
+    },
+    {
+      "epoch": 0.64,
+      "eval_loss": 1.0445994138717651,
+      "eval_runtime": 1.356,
+      "eval_samples_per_second": 8.85,
+      "eval_steps_per_second": 4.425,
+      "step": 8
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 20.812381744384766,
+      "learning_rate": 0.00018,
+      "loss": 1.0285,
+      "step": 9
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 56.3886604309082,
+      "learning_rate": 0.0002,
+      "loss": 1.3756,
+      "step": 10
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 6.24803352355957,
+      "learning_rate": 0.00019981755542233177,
+      "loss": 0.5178,
+      "step": 11
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 8.379430770874023,
+      "learning_rate": 0.0001992708874098054,
+      "loss": 0.6822,
+      "step": 12
+    },
+    {
+      "epoch": 0.96,
+      "eval_loss": 1.3959709405899048,
+      "eval_runtime": 1.3583,
+      "eval_samples_per_second": 8.835,
+      "eval_steps_per_second": 4.417,
+      "step": 12
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 20.744348526000977,
+      "learning_rate": 0.00019836199069471437,
+      "loss": 1.3762,
+      "step": 13
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 4.800480842590332,
+      "learning_rate": 0.0001970941817426052,
+      "loss": 0.5248,
+      "step": 14
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 11.284302711486816,
+      "learning_rate": 0.00019547208665085457,
+      "loss": 0.8094,
+      "step": 15
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 5.787976264953613,
+      "learning_rate": 0.0001935016242685415,
+      "loss": 0.5222,
+      "step": 16
+    },
+    {
+      "epoch": 1.28,
+      "eval_loss": 0.9023411870002747,
+      "eval_runtime": 1.3623,
+      "eval_samples_per_second": 8.808,
+      "eval_steps_per_second": 4.404,
+      "step": 16
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 21.48629379272461,
+      "learning_rate": 0.00019118998459920902,
+      "loss": 0.8027,
+      "step": 17
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 38.0982666015625,
+      "learning_rate": 0.000188545602565321,
+      "loss": 1.7772,
+      "step": 18
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 10.824837684631348,
+      "learning_rate": 0.00018557812723014476,
+      "loss": 0.7737,
+      "step": 19
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 9.1353120803833,
+      "learning_rate": 0.00018229838658936564,
+      "loss": 0.534,
+      "step": 20
+    },
+    {
+      "epoch": 1.6,
+      "eval_loss": 0.4847445785999298,
+      "eval_runtime": 1.3637,
+      "eval_samples_per_second": 8.799,
+      "eval_steps_per_second": 4.4,
+      "step": 20
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 3.8411033153533936,
+      "learning_rate": 0.00017871834806090501,
+      "loss": 0.3201,
+      "step": 21
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 23.888507843017578,
+      "learning_rate": 0.00017485107481711012,
+      "loss": 2.2541,
+      "step": 22
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 8.5956392288208,
+      "learning_rate": 0.00017071067811865476,
+      "loss": 0.8177,
+      "step": 23
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 3.825141191482544,
+      "learning_rate": 0.00016631226582407952,
+      "loss": 0.4624,
+      "step": 24
+    },
+    {
+      "epoch": 1.92,
+      "eval_loss": 0.5740255117416382,
+      "eval_runtime": 1.3655,
+      "eval_samples_per_second": 8.788,
+      "eval_steps_per_second": 4.394,
+      "step": 24
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 3.558993101119995,
+      "learning_rate": 0.00016167188726285434,
+      "loss": 0.3714,
+      "step": 25
+    },
+    {
+      "epoch": 2.08,
+      "grad_norm": 11.759211540222168,
+      "learning_rate": 0.00015680647467311557,
+      "loss": 0.6562,
+      "step": 26
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 62,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
+  "save_steps": 13,
+  "total_flos": 2276469522825216.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/qlora-out/checkpoint-26/training_args.bin b/qlora-out/checkpoint-26/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3934f8a185d5789fd8a9250dff7398cf3eb121d1
--- /dev/null
+++ b/qlora-out/checkpoint-26/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66fede3f83b4ad6ce095e0aa09047d95bd4acc13170780f9e890d9d17d1bdace
+size 5624
diff --git a/qlora-out/checkpoint-39/README.md b/qlora-out/checkpoint-39/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..dcac0d91142fbcc17d01002ba7be40a55deb5c28
--- /dev/null
+++ b/qlora-out/checkpoint-39/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.1.dev0
\ No newline at end of file
diff --git a/qlora-out/checkpoint-39/adapter_config.json b/qlora-out/checkpoint-39/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..78cef8595a53cdf83ff2d2b039d6740c09ac7281
--- /dev/null
+++ b/qlora-out/checkpoint-39/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "up_proj",
+    "k_proj",
+    "gate_proj",
+    "o_proj",
+    "down_proj",
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/qlora-out/checkpoint-39/adapter_model.safetensors b/qlora-out/checkpoint-39/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..55ee4532d97b81f281b67e51ca9f85074606ed3b
--- /dev/null
+++ b/qlora-out/checkpoint-39/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12b8939dee1c9d7c76fb429805ca8dd1be67417b78ad3ae2622ce37f2a7294d6
+size 335604696
diff --git a/qlora-out/checkpoint-39/optimizer.pt b/qlora-out/checkpoint-39/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..69c87fb68c98328af299c4b05221da1b5858daf7
--- /dev/null
+++ b/qlora-out/checkpoint-39/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62c9a9efa8eced911795343502191b7b9044f8b5aa46a6f27343859276faacbc
+size 168624724
diff --git a/qlora-out/checkpoint-39/rng_state.pth b/qlora-out/checkpoint-39/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5de01e34c268e2e9de291103c166f1d1e9371ae8
--- /dev/null
+++ b/qlora-out/checkpoint-39/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b83b87057617d19867b72bb4f1d7769198abfb127e1bef7a626c1e07b9dee3f2
+size 14244
diff --git a/qlora-out/checkpoint-39/scheduler.pt b/qlora-out/checkpoint-39/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..235fe106cb564e3df82a4f47fcfa5ebb0a72efd2
--- /dev/null
+++ b/qlora-out/checkpoint-39/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a8d987b7fe563f350e72415c21199e03eb1c8b092374967d449229a0b0fa9b1
+size 1064
diff --git a/qlora-out/checkpoint-39/trainer_state.json b/qlora-out/checkpoint-39/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..cb5e20ec1701c09095f855610376f99c2c833bbc
--- /dev/null
+++ b/qlora-out/checkpoint-39/trainer_state.json
@@ -0,0 +1,374 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.12,
+  "eval_steps": 4,
+  "global_step": 39,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 102.28898620605469,
+      "learning_rate": 2e-05,
+      "loss": 6.6367,
+      "step": 1
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 7.300913333892822,
+      "eval_runtime": 1.3523,
+      "eval_samples_per_second": 8.873,
+      "eval_steps_per_second": 4.437,
+      "step": 1
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 103.4541015625,
+      "learning_rate": 4e-05,
+      "loss": 7.0616,
+      "step": 2
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 67.47515869140625,
+      "learning_rate": 6e-05,
+      "loss": 4.686,
+      "step": 3
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 72.36919403076172,
+      "learning_rate": 8e-05,
+      "loss": 2.3866,
+      "step": 4
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.7137572169303894,
+      "eval_runtime": 1.3532,
+      "eval_samples_per_second": 8.868,
+      "eval_steps_per_second": 4.434,
+      "step": 4
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 16.83085060119629,
+      "learning_rate": 0.0001,
+      "loss": 0.6844,
+      "step": 5
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 25.897714614868164,
+      "learning_rate": 0.00012,
+      "loss": 0.914,
+      "step": 6
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 18.89151382446289,
+      "learning_rate": 0.00014,
+      "loss": 0.63,
+      "step": 7
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 27.15555763244629,
+      "learning_rate": 0.00016,
+      "loss": 0.948,
+      "step": 8
+    },
+    {
+      "epoch": 0.64,
+      "eval_loss": 1.0445994138717651,
+      "eval_runtime": 1.356,
+      "eval_samples_per_second": 8.85,
+      "eval_steps_per_second": 4.425,
+      "step": 8
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 20.812381744384766,
+      "learning_rate": 0.00018,
+      "loss": 1.0285,
+      "step": 9
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 56.3886604309082,
+      "learning_rate": 0.0002,
+      "loss": 1.3756,
+      "step": 10
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 6.24803352355957,
+      "learning_rate": 0.00019981755542233177,
+      "loss": 0.5178,
+      "step": 11
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 8.379430770874023,
+      "learning_rate": 0.0001992708874098054,
+      "loss": 0.6822,
+      "step": 12
+    },
+    {
+      "epoch": 0.96,
+      "eval_loss": 1.3959709405899048,
+      "eval_runtime": 1.3583,
+      "eval_samples_per_second": 8.835,
+      "eval_steps_per_second": 4.417,
+      "step": 12
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 20.744348526000977,
+      "learning_rate": 0.00019836199069471437,
+      "loss": 1.3762,
+      "step": 13
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 4.800480842590332,
+      "learning_rate": 0.0001970941817426052,
+      "loss": 0.5248,
+      "step": 14
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 11.284302711486816,
+      "learning_rate": 0.00019547208665085457,
+      "loss": 0.8094,
+      "step": 15
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 5.787976264953613,
+      "learning_rate": 0.0001935016242685415,
+      "loss": 0.5222,
+      "step": 16
+    },
+    {
+      "epoch": 1.28,
+      "eval_loss": 0.9023411870002747,
+      "eval_runtime": 1.3623,
+      "eval_samples_per_second": 8.808,
+      "eval_steps_per_second": 4.404,
+      "step": 16
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 21.48629379272461,
+      "learning_rate": 0.00019118998459920902,
+      "loss": 0.8027,
+      "step": 17
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 38.0982666015625,
+      "learning_rate": 0.000188545602565321,
+      "loss": 1.7772,
+      "step": 18
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 10.824837684631348,
+      "learning_rate": 0.00018557812723014476,
+      "loss": 0.7737,
+      "step": 19
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 9.1353120803833,
+      "learning_rate": 0.00018229838658936564,
+      "loss": 0.534,
+      "step": 20
+    },
+    {
+      "epoch": 1.6,
+      "eval_loss": 0.4847445785999298,
+      "eval_runtime": 1.3637,
+      "eval_samples_per_second": 8.799,
+      "eval_steps_per_second": 4.4,
+      "step": 20
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 3.8411033153533936,
+      "learning_rate": 0.00017871834806090501,
+      "loss": 0.3201,
+      "step": 21
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 23.888507843017578,
+      "learning_rate": 0.00017485107481711012,
+      "loss": 2.2541,
+      "step": 22
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 8.5956392288208,
+      "learning_rate": 0.00017071067811865476,
+      "loss": 0.8177,
+      "step": 23
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 3.825141191482544,
+      "learning_rate": 0.00016631226582407952,
+      "loss": 0.4624,
+      "step": 24
+    },
+    {
+      "epoch": 1.92,
+      "eval_loss": 0.5740255117416382,
+      "eval_runtime": 1.3655,
+      "eval_samples_per_second": 8.788,
+      "eval_steps_per_second": 4.394,
+      "step": 24
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 3.558993101119995,
+      "learning_rate": 0.00016167188726285434,
+      "loss": 0.3714,
+      "step": 25
+    },
+    {
+      "epoch": 2.08,
+      "grad_norm": 11.759211540222168,
+      "learning_rate": 0.00015680647467311557,
+      "loss": 0.6562,
+      "step": 26
+    },
+    {
+      "epoch": 2.16,
+      "grad_norm": 96.2179183959961,
+      "learning_rate": 0.00015173378141776568,
+      "loss": 1.5141,
+      "step": 27
+    },
+    {
+      "epoch": 2.24,
+      "grad_norm": 31.022045135498047,
+      "learning_rate": 0.00014647231720437686,
+      "loss": 0.7753,
+      "step": 28
+    },
+    {
+      "epoch": 2.24,
+      "eval_loss": 0.3771994113922119,
+      "eval_runtime": 1.3676,
+      "eval_samples_per_second": 8.775,
+      "eval_steps_per_second": 4.387,
+      "step": 28
+    },
+    {
+      "epoch": 2.32,
+      "grad_norm": 3.5004501342773438,
+      "learning_rate": 0.0001410412805452757,
+      "loss": 0.2649,
+      "step": 29
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 5.16464376449585,
+      "learning_rate": 0.00013546048870425356,
+      "loss": 0.171,
+      "step": 30
+    },
+    {
+      "epoch": 2.48,
+      "grad_norm": 25.634010314941406,
+      "learning_rate": 0.00012975030538552032,
+      "loss": 0.9172,
+      "step": 31
+    },
+    {
+      "epoch": 2.56,
+      "grad_norm": 7.102908134460449,
+      "learning_rate": 0.0001239315664287558,
+      "loss": 0.3324,
+      "step": 32
+    },
+    {
+      "epoch": 2.56,
+      "eval_loss": 0.29374203085899353,
+      "eval_runtime": 1.3678,
+      "eval_samples_per_second": 8.773,
+      "eval_steps_per_second": 4.387,
+      "step": 32
+    },
+    {
+      "epoch": 2.64,
+      "grad_norm": 6.236325263977051,
+      "learning_rate": 0.0001180255037813906,
+      "loss": 0.4932,
+      "step": 33
+    },
+    {
+      "epoch": 2.72,
+      "grad_norm": 4.445058345794678,
+      "learning_rate": 0.0001120536680255323,
+      "loss": 0.1284,
+      "step": 34
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 6.94170618057251,
+      "learning_rate": 0.00010603784974222861,
+      "loss": 0.1547,
+      "step": 35
+    },
+    {
+      "epoch": 2.88,
+      "grad_norm": 5.656033039093018,
+      "learning_rate": 0.0001,
+      "loss": 0.1973,
+      "step": 36
+    },
+    {
+      "epoch": 2.88,
+      "eval_loss": 0.5674905180931091,
+      "eval_runtime": 1.3681,
+      "eval_samples_per_second": 8.771,
+      "eval_steps_per_second": 4.386,
+      "step": 36
+    },
+    {
+      "epoch": 2.96,
+      "grad_norm": 18.19667625427246,
+      "learning_rate": 9.396215025777139e-05,
+      "loss": 0.4884,
+      "step": 37
+    },
+    {
+      "epoch": 3.04,
+      "grad_norm": 17.964893341064453,
+      "learning_rate": 8.79463319744677e-05,
+      "loss": 0.5526,
+      "step": 38
+    },
+    {
+      "epoch": 3.12,
+      "grad_norm": 5.015590190887451,
+      "learning_rate": 8.197449621860943e-05,
+      "loss": 0.2116,
+      "step": 39
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 62,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
+  "save_steps": 13,
+  "total_flos": 3414704284237824.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/qlora-out/checkpoint-39/training_args.bin b/qlora-out/checkpoint-39/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3934f8a185d5789fd8a9250dff7398cf3eb121d1
--- /dev/null
+++ b/qlora-out/checkpoint-39/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66fede3f83b4ad6ce095e0aa09047d95bd4acc13170780f9e890d9d17d1bdace
+size 5624
diff --git a/qlora-out/checkpoint-52/README.md b/qlora-out/checkpoint-52/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..dcac0d91142fbcc17d01002ba7be40a55deb5c28
--- /dev/null
+++ b/qlora-out/checkpoint-52/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: mistralai/Mistral-7B-v0.1
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.1.dev0
\ No newline at end of file
diff --git a/qlora-out/checkpoint-52/adapter_config.json b/qlora-out/checkpoint-52/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..78cef8595a53cdf83ff2d2b039d6740c09ac7281
--- /dev/null
+++ b/qlora-out/checkpoint-52/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "up_proj",
+    "k_proj",
+    "gate_proj",
+    "o_proj",
+    "down_proj",
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/qlora-out/checkpoint-52/adapter_model.safetensors b/qlora-out/checkpoint-52/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0f3e3e9824bd4618085512b96aac10c77eb8c5ea
--- /dev/null
+++ b/qlora-out/checkpoint-52/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61f543a16c2bbb11166292af99cbab42fa039c72766ce2da396aa279512c9d67
+size 335604696
diff --git a/qlora-out/checkpoint-52/optimizer.pt b/qlora-out/checkpoint-52/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fc12ac5ed03f5f623348860fc82033908b7d6cdd
--- /dev/null
+++ b/qlora-out/checkpoint-52/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc05bf731a50997e7af44d91b701be1a9474180b446eef7cccd0a9bb6f49593f
+size 168624724
diff --git a/qlora-out/checkpoint-52/rng_state.pth b/qlora-out/checkpoint-52/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9dc5f77bdc7f8a0a4a583cc7b6ea5bb09e3f0320
--- /dev/null
+++ b/qlora-out/checkpoint-52/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d215519440d74cb3c2d938d0a6d0dcc602aa66ebc4017b44adae1cc4c34379e9
+size 14244
diff --git a/qlora-out/checkpoint-52/scheduler.pt b/qlora-out/checkpoint-52/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c05e4f67aa0cfdfc7b570266139876956d57a50b
--- /dev/null
+++ b/qlora-out/checkpoint-52/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:210095055e4e9fa9a08e2ee8a6ef338aebf6d1d63c758470bd2537cf069290da
+size 1064
diff --git a/qlora-out/checkpoint-52/trainer_state.json b/qlora-out/checkpoint-52/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..808479166153679b1687744fdeb4ab2771cc8f2f
--- /dev/null
+++ b/qlora-out/checkpoint-52/trainer_state.json
@@ -0,0 +1,497 @@
+{
+  "best_metric": 0.19463467597961426,
+  "best_model_checkpoint": "./qlora-out/checkpoint-52",
+  "epoch": 4.16,
+  "eval_steps": 4,
+  "global_step": 52,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 102.28898620605469,
+      "learning_rate": 2e-05,
+      "loss": 6.6367,
+      "step": 1
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 7.300913333892822,
+      "eval_runtime": 1.3523,
+      "eval_samples_per_second": 8.873,
+      "eval_steps_per_second": 4.437,
+      "step": 1
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 103.4541015625,
+      "learning_rate": 4e-05,
+      "loss": 7.0616,
+      "step": 2
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 67.47515869140625,
+      "learning_rate": 6e-05,
+      "loss": 4.686,
+      "step": 3
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 72.36919403076172,
+      "learning_rate": 8e-05,
+      "loss": 2.3866,
+      "step": 4
+    },
+    {
+      "epoch": 0.32,
+      "eval_loss": 0.7137572169303894,
+      "eval_runtime": 1.3532,
+      "eval_samples_per_second": 8.868,
+      "eval_steps_per_second": 4.434,
+      "step": 4
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 16.83085060119629,
+      "learning_rate": 0.0001,
+      "loss": 0.6844,
+      "step": 5
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 25.897714614868164,
+      "learning_rate": 0.00012,
+      "loss": 0.914,
+      "step": 6
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 18.89151382446289,
+      "learning_rate": 0.00014,
+      "loss": 0.63,
+      "step": 7
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 27.15555763244629,
+      "learning_rate": 0.00016,
+      "loss": 0.948,
+      "step": 8
+    },
+    {
+      "epoch": 0.64,
+      "eval_loss": 1.0445994138717651,
+      "eval_runtime": 1.356,
+      "eval_samples_per_second": 8.85,
+      "eval_steps_per_second": 4.425,
+      "step": 8
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 20.812381744384766,
+      "learning_rate": 0.00018,
+      "loss": 1.0285,
+      "step": 9
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 56.3886604309082,
+      "learning_rate": 0.0002,
+      "loss": 1.3756,
+      "step": 10
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 6.24803352355957,
+      "learning_rate": 0.00019981755542233177,
+      "loss": 0.5178,
+      "step": 11
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 8.379430770874023,
+      "learning_rate": 0.0001992708874098054,
+      "loss": 0.6822,
+      "step": 12
+    },
+    {
+      "epoch": 0.96,
+      "eval_loss": 1.3959709405899048,
+      "eval_runtime": 1.3583,
+      "eval_samples_per_second": 8.835,
+      "eval_steps_per_second": 4.417,
+      "step": 12
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 20.744348526000977,
+      "learning_rate": 0.00019836199069471437,
+      "loss": 1.3762,
+      "step": 13
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 4.800480842590332,
+      "learning_rate": 0.0001970941817426052,
+      "loss": 0.5248,
+      "step": 14
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 11.284302711486816,
+      "learning_rate": 0.00019547208665085457,
+      "loss": 0.8094,
+      "step": 15
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 5.787976264953613,
+      "learning_rate": 0.0001935016242685415,
+      "loss": 0.5222,
+      "step": 16
+    },
+    {
+      "epoch": 1.28,
+      "eval_loss": 0.9023411870002747,
+      "eval_runtime": 1.3623,
+      "eval_samples_per_second": 8.808,
+      "eval_steps_per_second": 4.404,
+      "step": 16
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 21.48629379272461,
+      "learning_rate": 0.00019118998459920902,
+      "loss": 0.8027,
+      "step": 17
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 38.0982666015625,
+      "learning_rate": 0.000188545602565321,
+      "loss": 1.7772,
+      "step": 18
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 10.824837684631348,
+      "learning_rate": 0.00018557812723014476,
+      "loss": 0.7737,
+      "step": 19
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 9.1353120803833,
+      "learning_rate": 0.00018229838658936564,
+      "loss": 0.534,
+      "step": 20
+    },
+    {
+      "epoch": 1.6,
+      "eval_loss": 0.4847445785999298,
+      "eval_runtime": 1.3637,
+      "eval_samples_per_second": 8.799,
+      "eval_steps_per_second": 4.4,
+      "step": 20
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 3.8411033153533936,
+      "learning_rate": 0.00017871834806090501,
+      "loss": 0.3201,
+      "step": 21
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 23.888507843017578,
+      "learning_rate": 0.00017485107481711012,
+      "loss": 2.2541,
+      "step": 22
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 8.5956392288208,
+      "learning_rate": 0.00017071067811865476,
+      "loss": 0.8177,
+      "step": 23
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 3.825141191482544,
+      "learning_rate": 0.00016631226582407952,
+      "loss": 0.4624,
+      "step": 24
+    },
+    {
+      "epoch": 1.92,
+      "eval_loss": 0.5740255117416382,
+      "eval_runtime": 1.3655,
+      "eval_samples_per_second": 8.788,
+      "eval_steps_per_second": 4.394,
+      "step": 24
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 3.558993101119995,
+      "learning_rate": 0.00016167188726285434,
+      "loss": 0.3714,
+      "step": 25
+    },
+    {
+      "epoch": 2.08,
+      "grad_norm": 11.759211540222168,
+      "learning_rate": 0.00015680647467311557,
+      "loss": 0.6562,
+      "step": 26
+    },
+    {
+      "epoch": 2.16,
+      "grad_norm": 96.2179183959961,
+      "learning_rate": 0.00015173378141776568,
+      "loss": 1.5141,
+      "step": 27
+    },
+    {
+      "epoch": 2.24,
+      "grad_norm": 31.022045135498047,
+      "learning_rate": 0.00014647231720437686,
+      "loss": 0.7753,
+      "step": 28
+    },
+    {
+      "epoch": 2.24,
+      "eval_loss": 0.3771994113922119,
+      "eval_runtime": 1.3676,
+      "eval_samples_per_second": 8.775,
+      "eval_steps_per_second": 4.387,
+      "step": 28
+    },
+    {
+      "epoch": 2.32,
+      "grad_norm": 3.5004501342773438,
+      "learning_rate": 0.0001410412805452757,
+      "loss": 0.2649,
+      "step": 29
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 5.16464376449585,
+      "learning_rate": 0.00013546048870425356,
+      "loss": 0.171,
+      "step": 30
+    },
+    {
+      "epoch": 2.48,
+      "grad_norm": 25.634010314941406,
+      "learning_rate": 0.00012975030538552032,
+      "loss": 0.9172,
+      "step": 31
+    },
+    {
+      "epoch": 2.56,
+      "grad_norm": 7.102908134460449,
+      "learning_rate": 0.0001239315664287558,
+      "loss": 0.3324,
+      "step": 32
+    },
+    {
+      "epoch": 2.56,
+      "eval_loss": 0.29374203085899353,
+      "eval_runtime": 1.3678,
+      "eval_samples_per_second": 8.773,
+      "eval_steps_per_second": 4.387,
+      "step": 32
+    },
+    {
+      "epoch": 2.64,
+      "grad_norm": 6.236325263977051,
+      "learning_rate": 0.0001180255037813906,
+      "loss": 0.4932,
+      "step": 33
+    },
+    {
+      "epoch": 2.72,
+      "grad_norm": 4.445058345794678,
+      "learning_rate": 0.0001120536680255323,
+      "loss": 0.1284,
+      "step": 34
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 6.94170618057251,
+      "learning_rate": 0.00010603784974222861,
+      "loss": 0.1547,
+      "step": 35
+    },
+    {
+      "epoch": 2.88,
+      "grad_norm": 5.656033039093018,
+      "learning_rate": 0.0001,
+      "loss": 0.1973,
+      "step": 36
+    },
+    {
+      "epoch": 2.88,
+      "eval_loss": 0.5674905180931091,
+      "eval_runtime": 1.3681,
+      "eval_samples_per_second": 8.771,
+      "eval_steps_per_second": 4.386,
+      "step": 36
+    },
+    {
+      "epoch": 2.96,
+      "grad_norm": 18.19667625427246,
+      "learning_rate": 9.396215025777139e-05,
+      "loss": 0.4884,
+      "step": 37
+    },
+    {
+      "epoch": 3.04,
+      "grad_norm": 17.964893341064453,
+      "learning_rate": 8.79463319744677e-05,
+      "loss": 0.5526,
+      "step": 38
+    },
+    {
+      "epoch": 3.12,
+      "grad_norm": 5.015590190887451,
+      "learning_rate": 8.197449621860943e-05,
+      "loss": 0.2116,
+      "step": 39
+    },
+    {
+      "epoch": 3.2,
+      "grad_norm": 5.6883225440979,
+      "learning_rate": 7.606843357124426e-05,
+      "loss": 0.0843,
+      "step": 40
+    },
+    {
+      "epoch": 3.2,
+      "eval_loss": 0.2360386848449707,
+      "eval_runtime": 1.3667,
+      "eval_samples_per_second": 8.78,
+      "eval_steps_per_second": 4.39,
+      "step": 40
+    },
+    {
+      "epoch": 3.28,
+      "grad_norm": 6.636446475982666,
+      "learning_rate": 7.024969461447972e-05,
+      "loss": 0.1158,
+      "step": 41
+    },
+    {
+      "epoch": 3.36,
+      "grad_norm": 4.405576229095459,
+      "learning_rate": 6.453951129574644e-05,
+      "loss": 0.2755,
+      "step": 42
+    },
+    {
+      "epoch": 3.44,
+      "grad_norm": 1.6179524660110474,
+      "learning_rate": 5.8958719454724346e-05,
+      "loss": 0.0186,
+      "step": 43
+    },
+    {
+      "epoch": 3.52,
+      "grad_norm": 8.783114433288574,
+      "learning_rate": 5.3527682795623146e-05,
+      "loss": 0.3836,
+      "step": 44
+    },
+    {
+      "epoch": 3.52,
+      "eval_loss": 0.13969357311725616,
+      "eval_runtime": 1.3687,
+      "eval_samples_per_second": 8.767,
+      "eval_steps_per_second": 4.384,
+      "step": 44
+    },
+    {
+      "epoch": 3.6,
+      "grad_norm": 0.8835445046424866,
+      "learning_rate": 4.826621858223431e-05,
+      "loss": 0.0141,
+      "step": 45
+    },
+    {
+      "epoch": 3.68,
+      "grad_norm": 12.678099632263184,
+      "learning_rate": 4.3193525326884435e-05,
+      "loss": 0.6196,
+      "step": 46
+    },
+    {
+      "epoch": 3.76,
+      "grad_norm": 5.320870876312256,
+      "learning_rate": 3.832811273714569e-05,
+      "loss": 0.0948,
+      "step": 47
+    },
+    {
+      "epoch": 3.84,
+      "grad_norm": 2.7501108646392822,
+      "learning_rate": 3.36877341759205e-05,
+      "loss": 0.0449,
+      "step": 48
+    },
+    {
+      "epoch": 3.84,
+      "eval_loss": 0.2801015079021454,
+      "eval_runtime": 1.3706,
+      "eval_samples_per_second": 8.755,
+      "eval_steps_per_second": 4.378,
+      "step": 48
+    },
+    {
+      "epoch": 3.92,
+      "grad_norm": 4.41072940826416,
+      "learning_rate": 2.9289321881345254e-05,
+      "loss": 0.3026,
+      "step": 49
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 1.2105910778045654,
+      "learning_rate": 2.514892518288988e-05,
+      "loss": 0.0152,
+      "step": 50
+    },
+    {
+      "epoch": 4.08,
+      "grad_norm": 4.502895355224609,
+      "learning_rate": 2.1281651939094992e-05,
+      "loss": 0.0629,
+      "step": 51
+    },
+    {
+      "epoch": 4.16,
+      "grad_norm": 6.058006286621094,
+      "learning_rate": 1.7701613410634365e-05,
+      "loss": 0.2246,
+      "step": 52
+    },
+    {
+      "epoch": 4.16,
+      "eval_loss": 0.19463467597961426,
+      "eval_runtime": 1.3725,
+      "eval_samples_per_second": 8.743,
+      "eval_steps_per_second": 4.372,
+      "step": 52
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 62,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
+  "save_steps": 13,
+  "total_flos": 4552939045650432.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/qlora-out/checkpoint-52/training_args.bin b/qlora-out/checkpoint-52/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3934f8a185d5789fd8a9250dff7398cf3eb121d1
--- /dev/null
+++ b/qlora-out/checkpoint-52/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66fede3f83b4ad6ce095e0aa09047d95bd4acc13170780f9e890d9d17d1bdace
+size 5624
diff --git a/qlora-out/config.json b/qlora-out/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c1e39741fb665ec4711bd92a0c67178c983710d9
--- /dev/null
+++ b/qlora-out/config.json
@@ -0,0 +1,40 @@
+{
+  "_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 32768,
+  "model_type": "mistral",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "quantization_config": {
+    "_load_in_4bit": true,
+    "_load_in_8bit": false,
+    "bnb_4bit_compute_dtype": "bfloat16",
+    "bnb_4bit_quant_type": "nf4",
+    "bnb_4bit_use_double_quant": true,
+    "llm_int8_enable_fp32_cpu_offload": false,
+    "llm_int8_has_fp16_weight": false,
+    "llm_int8_skip_modules": null,
+    "llm_int8_threshold": 6.0,
+    "load_in_4bit": true,
+    "load_in_8bit": false,
+    "quant_method": "bitsandbytes"
+  },
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 10000.0,
+  "sliding_window": 4096,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.39.0.dev0",
+  "use_cache": false,
+  "vocab_size": 32000
+}
diff --git a/qlora-out/runs/Apr09_08-29-36_gpu06.pri.dmog.alces.network/events.out.tfevents.1712647777.gpu06.pri.dmog.alces.network.30736.0 b/qlora-out/runs/Apr09_08-29-36_gpu06.pri.dmog.alces.network/events.out.tfevents.1712647777.gpu06.pri.dmog.alces.network.30736.0
new file mode 100644
index 0000000000000000000000000000000000000000..6ba64c8069ced15f613f5a512ae2fad7aae6ded9
--- /dev/null
+++ b/qlora-out/runs/Apr09_08-29-36_gpu06.pri.dmog.alces.network/events.out.tfevents.1712647777.gpu06.pri.dmog.alces.network.30736.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b916e565a77dcb7d5bd53aba6f367407f84d56fd38e46a20f33d8b05d82f6ec7
+size 23212
diff --git a/qlora-out/special_tokens_map.json b/qlora-out/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1
--- /dev/null
+++ b/qlora-out/special_tokens_map.json
@@ -0,0 +1,24 @@
+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/qlora-out/tokenizer.model b/qlora-out/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6
--- /dev/null
+++ b/qlora-out/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443
diff --git a/qlora-out/tokenizer_config.json b/qlora-out/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..44ceae3369b580af560afc8670fe5db6f3296960
--- /dev/null
+++ b/qlora-out/tokenizer_config.json
@@ -0,0 +1,44 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "</s>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false,
+  "use_fast": true
+}