Upload configs.yaml with huggingface_hub
Browse files- configs.yaml +2 -2
configs.yaml
CHANGED
|
@@ -9,7 +9,7 @@ eval_strategy: 'no'
|
|
| 9 |
finetuning_type: full
|
| 10 |
formatting: sharegpt
|
| 11 |
global_batch_size: 96
|
| 12 |
-
gradient_accumulation_steps:
|
| 13 |
hub_model_id: mlfoundations-dev/llama3-1_8b_4o_annotated_aops
|
| 14 |
include_hp: dcft/train/hp_settings/reasoning.yaml
|
| 15 |
learning_rate: 1.0e-05
|
|
@@ -21,7 +21,7 @@ model_name_or_path: Qwen/Qwen2.5-7B-Instruct
|
|
| 21 |
num_train_epochs: 3.0
|
| 22 |
output_dir: /tmp/dcft_checkpoints/llama3-1_8b_4o_annotated_aops
|
| 23 |
overwrite_cache: true
|
| 24 |
-
per_device_train_batch_size:
|
| 25 |
plot_loss: true
|
| 26 |
preprocessing_num_workers: 16
|
| 27 |
push_to_db: true
|
|
|
|
| 9 |
finetuning_type: full
|
| 10 |
formatting: sharegpt
|
| 11 |
global_batch_size: 96
|
| 12 |
+
gradient_accumulation_steps: 3
|
| 13 |
hub_model_id: mlfoundations-dev/llama3-1_8b_4o_annotated_aops
|
| 14 |
include_hp: dcft/train/hp_settings/reasoning.yaml
|
| 15 |
learning_rate: 1.0e-05
|
|
|
|
| 21 |
num_train_epochs: 3.0
|
| 22 |
output_dir: /tmp/dcft_checkpoints/llama3-1_8b_4o_annotated_aops
|
| 23 |
overwrite_cache: true
|
| 24 |
+
per_device_train_batch_size: 1
|
| 25 |
plot_loss: true
|
| 26 |
preprocessing_num_workers: 16
|
| 27 |
push_to_db: true
|