Update training_config.yml
Browse files- training_config.yml +6 -6
training_config.yml
CHANGED
|
@@ -29,7 +29,7 @@ temperature: 0.6
|
|
| 29 |
top_k: 231
|
| 30 |
dataset:
|
| 31 |
_component_: ds.EvenBatcher
|
| 32 |
-
buffer_size:
|
| 33 |
dataset:
|
| 34 |
_component_: ds.RoundRobinDataset
|
| 35 |
datasets:
|
|
@@ -49,17 +49,17 @@ shuffle: true
|
|
| 49 |
batch_size: 8
|
| 50 |
optimizer:
|
| 51 |
_component_: torch.optim.AdamW
|
| 52 |
-
weight_decay: 1.
|
| 53 |
-
lr: 0.
|
| 54 |
lr_scheduler:
|
| 55 |
_component_: torchtune.modules.get_cosine_schedule_with_warmup
|
| 56 |
-
num_warmup_steps:
|
| 57 |
loss:
|
| 58 |
_component_: torch.nn.CrossEntropyLoss
|
| 59 |
-
|
| 60 |
epochs: 10
|
| 61 |
max_steps_per_epoch: null
|
| 62 |
-
gradient_accumulation_steps:
|
| 63 |
compile: false
|
| 64 |
output_dir: /tmp/lora_finetune_output
|
| 65 |
metric_logger:
|
|
|
|
| 29 |
top_k: 231
|
| 30 |
dataset:
|
| 31 |
_component_: ds.EvenBatcher
|
| 32 |
+
buffer_size: 648
|
| 33 |
dataset:
|
| 34 |
_component_: ds.RoundRobinDataset
|
| 35 |
datasets:
|
|
|
|
| 49 |
batch_size: 8
|
| 50 |
optimizer:
|
| 51 |
_component_: torch.optim.AdamW
|
| 52 |
+
weight_decay: 1.1
|
| 53 |
+
lr: 0.1
|
| 54 |
lr_scheduler:
|
| 55 |
_component_: torchtune.modules.get_cosine_schedule_with_warmup
|
| 56 |
+
num_warmup_steps: 1
|
| 57 |
loss:
|
| 58 |
_component_: torch.nn.CrossEntropyLoss
|
| 59 |
+
|
| 60 |
epochs: 10
|
| 61 |
max_steps_per_epoch: null
|
| 62 |
+
gradient_accumulation_steps: 512
|
| 63 |
compile: false
|
| 64 |
output_dir: /tmp/lora_finetune_output
|
| 65 |
metric_logger:
|