End of training

Browse files

Files changed (6) hide show

README.md +11 -11
adapter_config.json +3 -3
adapter_model.bin +1 -1
adapter_model.safetensors +1 -1
tokenizer_config.json +1 -1
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -21,7 +21,7 @@ axolotl version: `0.4.1`
 adapter: lora
 base_model: echarlaix/tiny-random-PhiForCausalLM
 bf16: auto
-chat_template: llama3
 dataset_prepared_path: null
 datasets:
 - data_files:
@@ -67,7 +67,7 @@ lora_r: 32
 lora_target_linear: true
 lr_scheduler: cosine
 #max_steps: 100
-micro_batch_size: 1
 mlflow_experiment_name: /tmp/1c3359627c73674a_train_data.json
 model_type: AutoModelForCausalLM
 num_epochs: 4
@@ -105,7 +105,7 @@ xformers_attention: null
 This model is a fine-tuned version of [echarlaix/tiny-random-PhiForCausalLM](https://huggingface.co/echarlaix/tiny-random-PhiForCausalLM) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 6.8706
 ## Model description
@@ -125,11 +125,11 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 0.0001
-- train_batch_size: 1
-- eval_batch_size: 1
 - seed: 42
 - gradient_accumulation_steps: 8
-- total_train_batch_size: 8
 - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
@@ -139,11 +139,11 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 6.9378        | 0.0023 | 1    | 6.9445          |
-| 6.8903        | 0.9983 | 428  | 6.8857          |
-| 5.2312        | 1.9965 | 856  | 6.8746          |
-| 6.8703        | 2.9948 | 1284 | 6.8712          |
-| 6.909         | 3.9930 | 1712 | 6.8706          |
 ### Framework versions

 adapter: lora
 base_model: echarlaix/tiny-random-PhiForCausalLM
 bf16: auto
+chat_template: phi_3
 dataset_prepared_path: null
 datasets:
 - data_files:
 lora_target_linear: true
 lr_scheduler: cosine
 #max_steps: 100
+micro_batch_size: 4
 mlflow_experiment_name: /tmp/1c3359627c73674a_train_data.json
 model_type: AutoModelForCausalLM
 num_epochs: 4
 This model is a fine-tuned version of [echarlaix/tiny-random-PhiForCausalLM](https://huggingface.co/echarlaix/tiny-random-PhiForCausalLM) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 6.8923
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 0.0001
+- train_batch_size: 4
+- eval_batch_size: 4
 - seed: 42
 - gradient_accumulation_steps: 8
+- total_train_batch_size: 32
 - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 6.9389        | 0.0093 | 1    | 6.9391          |
+| 6.9136        | 0.9977 | 107  | 6.9141          |
+| 7.0022        | 1.9953 | 214  | 6.8957          |
+| 8.2919        | 2.9930 | 321  | 6.8927          |
+| 6.8279        | 3.9907 | 428  | 6.8923          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -20,11 +20,11 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v_proj",
-    "k_proj",
-    "fc2",
     "dense",
     "fc1",
     "q_proj"
   ],
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "dense",
     "fc1",
+    "k_proj",
+    "fc2",
+    "v_proj",
     "q_proj"
   ],
   "task_type": "CAUSAL_LM",

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0590b7dff722f70a9965855e7ed492aeb91db3abd12e5a305e6a83c9c8661488
 size 243839

 version https://git-lfs.github.com/spec/v1
+oid sha256:edb08838f1e9c70770086e93316cd01e45e3e658c6f6204cc1130b459a822faf
 size 243839

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:76aeed67146f0423f4f7c765fd81aa14205d90af9a2f024f5952b85db3fe114c
 size 237402

 version https://git-lfs.github.com/spec/v1
+oid sha256:649ba4d162d4c3e7a8f2608073f50253439052dfea2d6be9c79901c528069118
 size 237402

tokenizer_config.json CHANGED Viewed

@@ -29,7 +29,7 @@
     }
   },
   "bos_token": "<|endoftext|>",
-  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|endoftext|>",
   "model_max_length": 2048,

     }
   },
   "bos_token": "<|endoftext|>",
+  "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|system|>' + '\n' + message['content'] + '<|end|>' + '\n'}}{% elif (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif message['role'] == 'assistant' %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|endoftext|>",
   "model_max_length": 2048,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:406837ff17e56dcab10c2f1be3959921ce3f7d0e2ff609fc8ea2de4ba3a6ccfe
-size 6776

 version https://git-lfs.github.com/spec/v1
+oid sha256:56b892c2ae2ec651f2ec9e29f264480645916cc4c0ab4f1102d1f60e45fe3419
+size 6648