error577 commited on
Commit
2d90cac
·
verified ·
1 Parent(s): c677adf

End of training

Browse files
README.md CHANGED
@@ -21,7 +21,7 @@ axolotl version: `0.4.1`
21
  adapter: lora
22
  base_model: echarlaix/tiny-random-PhiForCausalLM
23
  bf16: auto
24
- chat_template: llama3
25
  dataset_prepared_path: null
26
  datasets:
27
  - data_files:
@@ -67,7 +67,7 @@ lora_r: 32
67
  lora_target_linear: true
68
  lr_scheduler: cosine
69
  #max_steps: 100
70
- micro_batch_size: 1
71
  mlflow_experiment_name: /tmp/1c3359627c73674a_train_data.json
72
  model_type: AutoModelForCausalLM
73
  num_epochs: 4
@@ -105,7 +105,7 @@ xformers_attention: null
105
 
106
  This model is a fine-tuned version of [echarlaix/tiny-random-PhiForCausalLM](https://huggingface.co/echarlaix/tiny-random-PhiForCausalLM) on the None dataset.
107
  It achieves the following results on the evaluation set:
108
- - Loss: 6.8706
109
 
110
  ## Model description
111
 
@@ -125,11 +125,11 @@ More information needed
125
 
126
  The following hyperparameters were used during training:
127
  - learning_rate: 0.0001
128
- - train_batch_size: 1
129
- - eval_batch_size: 1
130
  - seed: 42
131
  - gradient_accumulation_steps: 8
132
- - total_train_batch_size: 8
133
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
134
  - lr_scheduler_type: cosine
135
  - lr_scheduler_warmup_steps: 10
@@ -139,11 +139,11 @@ The following hyperparameters were used during training:
139
 
140
  | Training Loss | Epoch | Step | Validation Loss |
141
  |:-------------:|:------:|:----:|:---------------:|
142
- | 6.9378 | 0.0023 | 1 | 6.9445 |
143
- | 6.8903 | 0.9983 | 428 | 6.8857 |
144
- | 5.2312 | 1.9965 | 856 | 6.8746 |
145
- | 6.8703 | 2.9948 | 1284 | 6.8712 |
146
- | 6.909 | 3.9930 | 1712 | 6.8706 |
147
 
148
 
149
  ### Framework versions
 
21
  adapter: lora
22
  base_model: echarlaix/tiny-random-PhiForCausalLM
23
  bf16: auto
24
+ chat_template: phi_3
25
  dataset_prepared_path: null
26
  datasets:
27
  - data_files:
 
67
  lora_target_linear: true
68
  lr_scheduler: cosine
69
  #max_steps: 100
70
+ micro_batch_size: 4
71
  mlflow_experiment_name: /tmp/1c3359627c73674a_train_data.json
72
  model_type: AutoModelForCausalLM
73
  num_epochs: 4
 
105
 
106
  This model is a fine-tuned version of [echarlaix/tiny-random-PhiForCausalLM](https://huggingface.co/echarlaix/tiny-random-PhiForCausalLM) on the None dataset.
107
  It achieves the following results on the evaluation set:
108
+ - Loss: 6.8923
109
 
110
  ## Model description
111
 
 
125
 
126
  The following hyperparameters were used during training:
127
  - learning_rate: 0.0001
128
+ - train_batch_size: 4
129
+ - eval_batch_size: 4
130
  - seed: 42
131
  - gradient_accumulation_steps: 8
132
+ - total_train_batch_size: 32
133
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
134
  - lr_scheduler_type: cosine
135
  - lr_scheduler_warmup_steps: 10
 
139
 
140
  | Training Loss | Epoch | Step | Validation Loss |
141
  |:-------------:|:------:|:----:|:---------------:|
142
+ | 6.9389 | 0.0093 | 1 | 6.9391 |
143
+ | 6.9136 | 0.9977 | 107 | 6.9141 |
144
+ | 7.0022 | 1.9953 | 214 | 6.8957 |
145
+ | 8.2919 | 2.9930 | 321 | 6.8927 |
146
+ | 6.8279 | 3.9907 | 428 | 6.8923 |
147
 
148
 
149
  ### Framework versions
adapter_config.json CHANGED
@@ -20,11 +20,11 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "v_proj",
24
- "k_proj",
25
- "fc2",
26
  "dense",
27
  "fc1",
 
 
 
28
  "q_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
23
  "dense",
24
  "fc1",
25
+ "k_proj",
26
+ "fc2",
27
+ "v_proj",
28
  "q_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0590b7dff722f70a9965855e7ed492aeb91db3abd12e5a305e6a83c9c8661488
3
  size 243839
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edb08838f1e9c70770086e93316cd01e45e3e658c6f6204cc1130b459a822faf
3
  size 243839
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76aeed67146f0423f4f7c765fd81aa14205d90af9a2f024f5952b85db3fe114c
3
  size 237402
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:649ba4d162d4c3e7a8f2608073f50253439052dfea2d6be9c79901c528069118
3
  size 237402
tokenizer_config.json CHANGED
@@ -29,7 +29,7 @@
29
  }
30
  },
31
  "bos_token": "<|endoftext|>",
32
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
33
  "clean_up_tokenization_spaces": true,
34
  "eos_token": "<|endoftext|>",
35
  "model_max_length": 2048,
 
29
  }
30
  },
31
  "bos_token": "<|endoftext|>",
32
+ "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|system|>' + '\n' + message['content'] + '<|end|>' + '\n'}}{% elif (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif message['role'] == 'assistant' %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
33
  "clean_up_tokenization_spaces": true,
34
  "eos_token": "<|endoftext|>",
35
  "model_max_length": 2048,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:406837ff17e56dcab10c2f1be3959921ce3f7d0e2ff609fc8ea2de4ba3a6ccfe
3
- size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56b892c2ae2ec651f2ec9e29f264480645916cc4c0ab4f1102d1f60e45fe3419
3
+ size 6648