Model save

Files changed (9) hide show

README.md CHANGED Viewed

@@ -1,16 +1,11 @@
 ---
 license: apache-2.0
-base_model: mistralai/Mistral-7B-Instruct-v0.2
 tags:
-- alignment-handbook
 - trl
 - sft
 - generated_from_trainer
-- trl
-- sft
-- generated_from_trainer
-datasets:
-- preference-data
 model-index:
 - name: preference_p0.1_seed42_level2_raremixbatch16
   results: []
@@ -21,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
 # preference_p0.1_seed42_level2_raremixbatch16
-This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the preference-data dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.2881
 ## Model description
@@ -59,12 +54,13 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 0.3161        | 1.0   | 2414 | 0.2881          |
 ### Framework versions
-- Transformers 4.43.4
 - Pytorch 2.3.1+cu121
 - Datasets 2.19.1
-- Tokenizers 0.19.1

 ---
 license: apache-2.0
+library_name: peft
 tags:
 - trl
 - sft
 - generated_from_trainer
+base_model: mistralai/Mistral-7B-Instruct-v0.2
 model-index:
 - name: preference_p0.1_seed42_level2_raremixbatch16
   results: []
 # preference_p0.1_seed42_level2_raremixbatch16
+This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.8599
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 0.8409        | 1.0   | 6181 | 0.8599          |
 ### Framework versions
+- PEFT 0.11.1
+- Transformers 4.44.2
 - Pytorch 2.3.1+cu121
 - Datasets 2.19.1
+- Tokenizers 0.19.1

adapter_config.json ADDED Viewed

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "w2",
+    "w1",
+    "v_proj",
+    "q_proj",
+    "w3",
+    "lm_head",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:63b28d7e6592845b2e369912a7b0ab05112233cb2adcff154784d02a2c0d9318
+size 319015728

all_results.json CHANGED Viewed

@@ -1,14 +1,9 @@
 {
     "epoch": 1.0,
-    "eval_loss": 0.28806865215301514,
-    "eval_runtime": 1.3149,
-    "eval_samples": 10,
-    "eval_samples_per_second": 2.282,
-    "eval_steps_per_second": 0.761,
-    "total_flos": 252721244405760.0,
-    "train_loss": 0.5156875643339054,
-    "train_runtime": 24515.4963,
     "train_samples": 98881,
-    "train_samples_per_second": 1.575,
-    "train_steps_per_second": 0.098
 }

 {
     "epoch": 1.0,
+    "total_flos": 2571089024581632.0,
+    "train_loss": 0.8638364601301117,
+    "train_runtime": 28571.7374,
     "train_samples": 98881,
+    "train_samples_per_second": 3.461,
+    "train_steps_per_second": 0.216
 }

runs/Sep14_01-24-34_COE-CS-sv004/events.out.tfevents.1726277286.COE-CS-sv004.1678647.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:d7476b0540fc0ba01e51c18044cd975822ac8088765b3366bc8120a38bca7c6a
+size 266794

tokenizer_config.json CHANGED Viewed

@@ -30,11 +30,11 @@
   },
   "additional_special_tokens": [],
   "bos_token": "<s>",
-  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
   "legacy": false,
-  "model_max_length": 2048,
   "pad_token": "</s>",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,

   },
   "additional_special_tokens": [],
   "bos_token": "<s>",
+  "chat_template": "{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content'] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n        {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n    {%- endif %}\n    {%- if message['role'] == 'user' %}\n        {%- if loop.first and system_message is defined %}\n            {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n        {%- else %}\n            {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n        {%- endif %}\n    {%- elif message['role'] == 'assistant' %}\n        {{- ' ' + message['content'] + eos_token}}\n    {%- else %}\n        {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n    {%- endif %}\n{%- endfor %}\n",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
   "legacy": false,
+  "model_max_length": 1000000000000000019884624838656,
   "pad_token": "</s>",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
     "epoch": 1.0,
-    "total_flos": 252721244405760.0,
-    "train_loss": 0.5156875643339054,
-    "train_runtime": 24515.4963,
     "train_samples": 98881,
-    "train_samples_per_second": 1.575,
-    "train_steps_per_second": 0.098
 }

 {
     "epoch": 1.0,
+    "total_flos": 2571089024581632.0,
+    "train_loss": 0.8638364601301117,
+    "train_runtime": 28571.7374,
     "train_samples": 98881,
+    "train_samples_per_second": 3.461,
+    "train_steps_per_second": 0.216
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b187636bdae2f5fd2921d35e55c44fba16fb41f45d96bba8ece972c877b93c3a
-size 6584

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d2cfab61ea7d4ddc928c8af1ae1777847f06be2085febf03b4016a7ff48a13a
+size 7032