error577 commited on
Commit
e078e38
·
verified ·
1 Parent(s): b5d2d5c

End of training

Browse files
Files changed (3) hide show
  1. README.md +51 -24
  2. adapter_model.bin +1 -1
  3. adapter_model.safetensors +1 -1
README.md CHANGED
@@ -18,10 +18,10 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  axolotl version: `0.4.1`
20
  ```yaml
21
- adapter: qlora
22
- auto_resume_from_checkpoints: true
23
  base_model: fxmarty/tiny-random-GemmaForCausalLM
24
- bf16: auto
25
  chat_template: llama3
26
  dataset_prepared_path: null
27
  datasets:
@@ -40,23 +40,23 @@ datasets:
40
  system_prompt: ''
41
  debug: null
42
  deepspeed: null
43
- early_stopping_patience: 4
44
  eval_max_new_tokens: 128
45
- eval_steps: 100
46
  eval_table_size: null
47
  flash_attention: true
48
- fp16: false
49
  fsdp: null
50
  fsdp_config: null
51
  gradient_accumulation_steps: 4
52
- gradient_checkpointing: true
53
  group_by_length: false
54
  hub_model_id: error577/8a76346a-e5e1-4372-8a33-4ae45d89359b
55
  hub_repo: null
56
  hub_strategy: checkpoint
57
  hub_token: null
58
  learning_rate: 0.0002
59
- load_in_4bit: true
60
  load_in_8bit: false
61
  local_rank: null
62
  logging_steps: 1
@@ -72,14 +72,14 @@ max_steps: null
72
  micro_batch_size: 2
73
  mlflow_experiment_name: /tmp/95621c23f229fe74_train_data.json
74
  model_type: AutoModelForCausalLM
75
- num_epochs: 3
76
- optimizer: adamw_torch_4bit
77
  output_dir: miner_id_24
78
  pad_to_sequence_len: true
79
  resume_from_checkpoint: null
80
  s2_attention: null
81
  sample_packing: false
82
- save_steps: 100
83
  sequence_len: 512
84
  strict: false
85
  tf32: false
@@ -93,7 +93,7 @@ wandb_name: e75973b3-c17e-44e4-b527-21c602afd6c4
93
  wandb_project: Gradients-On-Demand
94
  wandb_run: your_name
95
  wandb_runid: e75973b3-c17e-44e4-b527-21c602afd6c4
96
- warmup_steps: 30
97
  weight_decay: 0.0
98
  xformers_attention: null
99
 
@@ -105,7 +105,7 @@ xformers_attention: null
105
 
106
  This model is a fine-tuned version of [fxmarty/tiny-random-GemmaForCausalLM](https://huggingface.co/fxmarty/tiny-random-GemmaForCausalLM) on the None dataset.
107
  It achieves the following results on the evaluation set:
108
- - Loss: nan
109
 
110
  ## Model description
111
 
@@ -130,21 +130,48 @@ The following hyperparameters were used during training:
130
  - seed: 42
131
  - gradient_accumulation_steps: 4
132
  - total_train_batch_size: 8
133
- - optimizer: Use OptimizerNames.ADAMW_TORCH_4BIT with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
134
  - lr_scheduler_type: cosine
135
- - lr_scheduler_warmup_steps: 30
136
- - num_epochs: 3
 
137
 
138
  ### Training results
139
 
140
- | Training Loss | Epoch | Step | Validation Loss |
141
- |:-------------:|:------:|:----:|:---------------:|
142
- | 0.0 | 0.0002 | 1 | nan |
143
- | 0.0 | 0.0164 | 100 | nan |
144
- | 0.0 | 0.0327 | 200 | nan |
145
- | 0.0 | 0.0491 | 300 | nan |
146
- | 0.0 | 0.0655 | 400 | nan |
147
- | 0.0 | 0.0819 | 500 | nan |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
 
150
  ### Framework versions
 
18
 
19
  axolotl version: `0.4.1`
20
  ```yaml
21
+ adapter: lora
22
+ auto_resume_from_checkpoints: false
23
  base_model: fxmarty/tiny-random-GemmaForCausalLM
24
+ bf16: false
25
  chat_template: llama3
26
  dataset_prepared_path: null
27
  datasets:
 
40
  system_prompt: ''
41
  debug: null
42
  deepspeed: null
43
+ early_stopping_patience: 3
44
  eval_max_new_tokens: 128
45
+ eval_steps: 1000
46
  eval_table_size: null
47
  flash_attention: true
48
+ fp16: true
49
  fsdp: null
50
  fsdp_config: null
51
  gradient_accumulation_steps: 4
52
+ gradient_checkpointing: false
53
  group_by_length: false
54
  hub_model_id: error577/8a76346a-e5e1-4372-8a33-4ae45d89359b
55
  hub_repo: null
56
  hub_strategy: checkpoint
57
  hub_token: null
58
  learning_rate: 0.0002
59
+ load_in_4bit: false
60
  load_in_8bit: false
61
  local_rank: null
62
  logging_steps: 1
 
72
  micro_batch_size: 2
73
  mlflow_experiment_name: /tmp/95621c23f229fe74_train_data.json
74
  model_type: AutoModelForCausalLM
75
+ num_epochs: 10
76
+ optimizer: adamw_torch
77
  output_dir: miner_id_24
78
  pad_to_sequence_len: true
79
  resume_from_checkpoint: null
80
  s2_attention: null
81
  sample_packing: false
82
+ save_steps: 1000
83
  sequence_len: 512
84
  strict: false
85
  tf32: false
 
93
  wandb_project: Gradients-On-Demand
94
  wandb_run: your_name
95
  wandb_runid: e75973b3-c17e-44e4-b527-21c602afd6c4
96
+ warmup_steps: 300
97
  weight_decay: 0.0
98
  xformers_attention: null
99
 
 
105
 
106
  This model is a fine-tuned version of [fxmarty/tiny-random-GemmaForCausalLM](https://huggingface.co/fxmarty/tiny-random-GemmaForCausalLM) on the None dataset.
107
  It achieves the following results on the evaluation set:
108
+ - Loss: 12.1613
109
 
110
  ## Model description
111
 
 
130
  - seed: 42
131
  - gradient_accumulation_steps: 4
132
  - total_train_batch_size: 8
133
+ - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
134
  - lr_scheduler_type: cosine
135
+ - lr_scheduler_warmup_steps: 300
136
+ - num_epochs: 10
137
+ - mixed_precision_training: Native AMP
138
 
139
  ### Training results
140
 
141
+ | Training Loss | Epoch | Step | Validation Loss |
142
+ |:-------------:|:------:|:-----:|:---------------:|
143
+ | 12.4513 | 0.0002 | 1 | 12.4418 |
144
+ | 12.2591 | 0.1637 | 1000 | 12.2520 |
145
+ | 12.2403 | 0.3275 | 2000 | 12.2224 |
146
+ | 12.2069 | 0.4912 | 3000 | 12.2032 |
147
+ | 12.1813 | 0.6550 | 4000 | 12.1945 |
148
+ | 12.2163 | 0.8187 | 5000 | 12.1882 |
149
+ | 12.1597 | 0.9825 | 6000 | 12.1822 |
150
+ | 12.2022 | 1.1462 | 7000 | 12.1761 |
151
+ | 12.2427 | 1.3100 | 8000 | 12.1720 |
152
+ | 12.1622 | 1.4737 | 9000 | 12.1691 |
153
+ | 12.2151 | 1.6375 | 10000 | 12.1676 |
154
+ | 12.18 | 1.8012 | 11000 | 12.1669 |
155
+ | 12.1537 | 1.9650 | 12000 | 12.1656 |
156
+ | 12.1634 | 2.1287 | 13000 | 12.1650 |
157
+ | 12.2148 | 2.2925 | 14000 | 12.1649 |
158
+ | 12.1868 | 2.4562 | 15000 | 12.1646 |
159
+ | 12.1903 | 2.6199 | 16000 | 12.1642 |
160
+ | 12.1781 | 2.7837 | 17000 | 12.1643 |
161
+ | 12.1894 | 2.9474 | 18000 | 12.1638 |
162
+ | 12.2065 | 3.1112 | 19000 | 12.1633 |
163
+ | 12.1887 | 3.2749 | 20000 | 12.1635 |
164
+ | 12.1549 | 3.4387 | 21000 | 12.1626 |
165
+ | 12.1719 | 3.6024 | 22000 | 12.1624 |
166
+ | 12.2151 | 3.7662 | 23000 | 12.1626 |
167
+ | 12.157 | 3.9299 | 24000 | 12.1629 |
168
+ | 12.1682 | 4.0937 | 25000 | 12.1619 |
169
+ | 12.1968 | 4.2574 | 26000 | 12.1619 |
170
+ | 12.1651 | 4.4212 | 27000 | 12.1617 |
171
+ | 12.168 | 4.5849 | 28000 | 12.1612 |
172
+ | 12.1713 | 4.7486 | 29000 | 12.1617 |
173
+ | 12.1767 | 4.9124 | 30000 | 12.1614 |
174
+ | 12.2027 | 5.0761 | 31000 | 12.1613 |
175
 
176
 
177
  ### Framework versions
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d30d35f390d6930b48fb55590f138015c36014271463949d7691f3871bc705b
3
  size 76696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7845f6e98f4180b9c1bdf45b14bbd54e8d1ed1f886df7b416d91d1676a26ac4d
3
  size 76696
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e31ab3cf726907124f36b62202b35bd1d2de4320ef735f47c1d3589bc486329
3
  size 72936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a0040a69c05456dd3555fc688eedb7c3608bc71c0a16aebc4a707d9eafcb450
3
  size 72936