End of training
Browse files- README.md +51 -24
- adapter_model.bin +1 -1
- adapter_model.safetensors +1 -1
README.md
CHANGED
|
@@ -18,10 +18,10 @@ should probably proofread and complete it, then remove this comment. -->
|
|
| 18 |
|
| 19 |
axolotl version: `0.4.1`
|
| 20 |
```yaml
|
| 21 |
-
adapter:
|
| 22 |
-
auto_resume_from_checkpoints:
|
| 23 |
base_model: fxmarty/tiny-random-GemmaForCausalLM
|
| 24 |
-
bf16:
|
| 25 |
chat_template: llama3
|
| 26 |
dataset_prepared_path: null
|
| 27 |
datasets:
|
|
@@ -40,23 +40,23 @@ datasets:
|
|
| 40 |
system_prompt: ''
|
| 41 |
debug: null
|
| 42 |
deepspeed: null
|
| 43 |
-
early_stopping_patience:
|
| 44 |
eval_max_new_tokens: 128
|
| 45 |
-
eval_steps:
|
| 46 |
eval_table_size: null
|
| 47 |
flash_attention: true
|
| 48 |
-
fp16:
|
| 49 |
fsdp: null
|
| 50 |
fsdp_config: null
|
| 51 |
gradient_accumulation_steps: 4
|
| 52 |
-
gradient_checkpointing:
|
| 53 |
group_by_length: false
|
| 54 |
hub_model_id: error577/8a76346a-e5e1-4372-8a33-4ae45d89359b
|
| 55 |
hub_repo: null
|
| 56 |
hub_strategy: checkpoint
|
| 57 |
hub_token: null
|
| 58 |
learning_rate: 0.0002
|
| 59 |
-
load_in_4bit:
|
| 60 |
load_in_8bit: false
|
| 61 |
local_rank: null
|
| 62 |
logging_steps: 1
|
|
@@ -72,14 +72,14 @@ max_steps: null
|
|
| 72 |
micro_batch_size: 2
|
| 73 |
mlflow_experiment_name: /tmp/95621c23f229fe74_train_data.json
|
| 74 |
model_type: AutoModelForCausalLM
|
| 75 |
-
num_epochs:
|
| 76 |
-
optimizer:
|
| 77 |
output_dir: miner_id_24
|
| 78 |
pad_to_sequence_len: true
|
| 79 |
resume_from_checkpoint: null
|
| 80 |
s2_attention: null
|
| 81 |
sample_packing: false
|
| 82 |
-
save_steps:
|
| 83 |
sequence_len: 512
|
| 84 |
strict: false
|
| 85 |
tf32: false
|
|
@@ -93,7 +93,7 @@ wandb_name: e75973b3-c17e-44e4-b527-21c602afd6c4
|
|
| 93 |
wandb_project: Gradients-On-Demand
|
| 94 |
wandb_run: your_name
|
| 95 |
wandb_runid: e75973b3-c17e-44e4-b527-21c602afd6c4
|
| 96 |
-
warmup_steps:
|
| 97 |
weight_decay: 0.0
|
| 98 |
xformers_attention: null
|
| 99 |
|
|
@@ -105,7 +105,7 @@ xformers_attention: null
|
|
| 105 |
|
| 106 |
This model is a fine-tuned version of [fxmarty/tiny-random-GemmaForCausalLM](https://huggingface.co/fxmarty/tiny-random-GemmaForCausalLM) on the None dataset.
|
| 107 |
It achieves the following results on the evaluation set:
|
| 108 |
-
- Loss:
|
| 109 |
|
| 110 |
## Model description
|
| 111 |
|
|
@@ -130,21 +130,48 @@ The following hyperparameters were used during training:
|
|
| 130 |
- seed: 42
|
| 131 |
- gradient_accumulation_steps: 4
|
| 132 |
- total_train_batch_size: 8
|
| 133 |
-
- optimizer: Use OptimizerNames.
|
| 134 |
- lr_scheduler_type: cosine
|
| 135 |
-
- lr_scheduler_warmup_steps:
|
| 136 |
-
- num_epochs:
|
|
|
|
| 137 |
|
| 138 |
### Training results
|
| 139 |
|
| 140 |
-
| Training Loss | Epoch | Step
|
| 141 |
-
|:-------------:|:------:|:----:|:---------------:|
|
| 142 |
-
|
|
| 143 |
-
|
|
| 144 |
-
|
|
| 145 |
-
|
|
| 146 |
-
|
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
|
| 150 |
### Framework versions
|
|
|
|
| 18 |
|
| 19 |
axolotl version: `0.4.1`
|
| 20 |
```yaml
|
| 21 |
+
adapter: lora
|
| 22 |
+
auto_resume_from_checkpoints: false
|
| 23 |
base_model: fxmarty/tiny-random-GemmaForCausalLM
|
| 24 |
+
bf16: false
|
| 25 |
chat_template: llama3
|
| 26 |
dataset_prepared_path: null
|
| 27 |
datasets:
|
|
|
|
| 40 |
system_prompt: ''
|
| 41 |
debug: null
|
| 42 |
deepspeed: null
|
| 43 |
+
early_stopping_patience: 3
|
| 44 |
eval_max_new_tokens: 128
|
| 45 |
+
eval_steps: 1000
|
| 46 |
eval_table_size: null
|
| 47 |
flash_attention: true
|
| 48 |
+
fp16: true
|
| 49 |
fsdp: null
|
| 50 |
fsdp_config: null
|
| 51 |
gradient_accumulation_steps: 4
|
| 52 |
+
gradient_checkpointing: false
|
| 53 |
group_by_length: false
|
| 54 |
hub_model_id: error577/8a76346a-e5e1-4372-8a33-4ae45d89359b
|
| 55 |
hub_repo: null
|
| 56 |
hub_strategy: checkpoint
|
| 57 |
hub_token: null
|
| 58 |
learning_rate: 0.0002
|
| 59 |
+
load_in_4bit: false
|
| 60 |
load_in_8bit: false
|
| 61 |
local_rank: null
|
| 62 |
logging_steps: 1
|
|
|
|
| 72 |
micro_batch_size: 2
|
| 73 |
mlflow_experiment_name: /tmp/95621c23f229fe74_train_data.json
|
| 74 |
model_type: AutoModelForCausalLM
|
| 75 |
+
num_epochs: 10
|
| 76 |
+
optimizer: adamw_torch
|
| 77 |
output_dir: miner_id_24
|
| 78 |
pad_to_sequence_len: true
|
| 79 |
resume_from_checkpoint: null
|
| 80 |
s2_attention: null
|
| 81 |
sample_packing: false
|
| 82 |
+
save_steps: 1000
|
| 83 |
sequence_len: 512
|
| 84 |
strict: false
|
| 85 |
tf32: false
|
|
|
|
| 93 |
wandb_project: Gradients-On-Demand
|
| 94 |
wandb_run: your_name
|
| 95 |
wandb_runid: e75973b3-c17e-44e4-b527-21c602afd6c4
|
| 96 |
+
warmup_steps: 300
|
| 97 |
weight_decay: 0.0
|
| 98 |
xformers_attention: null
|
| 99 |
|
|
|
|
| 105 |
|
| 106 |
This model is a fine-tuned version of [fxmarty/tiny-random-GemmaForCausalLM](https://huggingface.co/fxmarty/tiny-random-GemmaForCausalLM) on the None dataset.
|
| 107 |
It achieves the following results on the evaluation set:
|
| 108 |
+
- Loss: 12.1613
|
| 109 |
|
| 110 |
## Model description
|
| 111 |
|
|
|
|
| 130 |
- seed: 42
|
| 131 |
- gradient_accumulation_steps: 4
|
| 132 |
- total_train_batch_size: 8
|
| 133 |
+
- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
| 134 |
- lr_scheduler_type: cosine
|
| 135 |
+
- lr_scheduler_warmup_steps: 300
|
| 136 |
+
- num_epochs: 10
|
| 137 |
+
- mixed_precision_training: Native AMP
|
| 138 |
|
| 139 |
### Training results
|
| 140 |
|
| 141 |
+
| Training Loss | Epoch | Step | Validation Loss |
|
| 142 |
+
|:-------------:|:------:|:-----:|:---------------:|
|
| 143 |
+
| 12.4513 | 0.0002 | 1 | 12.4418 |
|
| 144 |
+
| 12.2591 | 0.1637 | 1000 | 12.2520 |
|
| 145 |
+
| 12.2403 | 0.3275 | 2000 | 12.2224 |
|
| 146 |
+
| 12.2069 | 0.4912 | 3000 | 12.2032 |
|
| 147 |
+
| 12.1813 | 0.6550 | 4000 | 12.1945 |
|
| 148 |
+
| 12.2163 | 0.8187 | 5000 | 12.1882 |
|
| 149 |
+
| 12.1597 | 0.9825 | 6000 | 12.1822 |
|
| 150 |
+
| 12.2022 | 1.1462 | 7000 | 12.1761 |
|
| 151 |
+
| 12.2427 | 1.3100 | 8000 | 12.1720 |
|
| 152 |
+
| 12.1622 | 1.4737 | 9000 | 12.1691 |
|
| 153 |
+
| 12.2151 | 1.6375 | 10000 | 12.1676 |
|
| 154 |
+
| 12.18 | 1.8012 | 11000 | 12.1669 |
|
| 155 |
+
| 12.1537 | 1.9650 | 12000 | 12.1656 |
|
| 156 |
+
| 12.1634 | 2.1287 | 13000 | 12.1650 |
|
| 157 |
+
| 12.2148 | 2.2925 | 14000 | 12.1649 |
|
| 158 |
+
| 12.1868 | 2.4562 | 15000 | 12.1646 |
|
| 159 |
+
| 12.1903 | 2.6199 | 16000 | 12.1642 |
|
| 160 |
+
| 12.1781 | 2.7837 | 17000 | 12.1643 |
|
| 161 |
+
| 12.1894 | 2.9474 | 18000 | 12.1638 |
|
| 162 |
+
| 12.2065 | 3.1112 | 19000 | 12.1633 |
|
| 163 |
+
| 12.1887 | 3.2749 | 20000 | 12.1635 |
|
| 164 |
+
| 12.1549 | 3.4387 | 21000 | 12.1626 |
|
| 165 |
+
| 12.1719 | 3.6024 | 22000 | 12.1624 |
|
| 166 |
+
| 12.2151 | 3.7662 | 23000 | 12.1626 |
|
| 167 |
+
| 12.157 | 3.9299 | 24000 | 12.1629 |
|
| 168 |
+
| 12.1682 | 4.0937 | 25000 | 12.1619 |
|
| 169 |
+
| 12.1968 | 4.2574 | 26000 | 12.1619 |
|
| 170 |
+
| 12.1651 | 4.4212 | 27000 | 12.1617 |
|
| 171 |
+
| 12.168 | 4.5849 | 28000 | 12.1612 |
|
| 172 |
+
| 12.1713 | 4.7486 | 29000 | 12.1617 |
|
| 173 |
+
| 12.1767 | 4.9124 | 30000 | 12.1614 |
|
| 174 |
+
| 12.2027 | 5.0761 | 31000 | 12.1613 |
|
| 175 |
|
| 176 |
|
| 177 |
### Framework versions
|
adapter_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 76696
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7845f6e98f4180b9c1bdf45b14bbd54e8d1ed1f886df7b416d91d1676a26ac4d
|
| 3 |
size 76696
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 72936
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a0040a69c05456dd3555fc688eedb7c3608bc71c0a16aebc4a707d9eafcb450
|
| 3 |
size 72936
|