Upload training_config.yml with huggingface_hub
Browse files- training_config.yml +7 -7
training_config.yml
CHANGED
|
@@ -24,10 +24,10 @@ checkpointer:
|
|
| 24 |
output_dir: output_checkpoints/experiment_1
|
| 25 |
model_type: LLAMA3
|
| 26 |
resume_from_checkpoint: false
|
| 27 |
-
interim_checkpoint_steps:
|
| 28 |
interim_gen_steps: null
|
| 29 |
-
max_new_tokens:
|
| 30 |
-
temperature: 0.
|
| 31 |
top_k: 300
|
| 32 |
dataset:
|
| 33 |
_component_: ds.EvenBatcher
|
|
@@ -58,10 +58,10 @@ optimizer:
|
|
| 58 |
lr: 0.0001
|
| 59 |
lr_scheduler:
|
| 60 |
_component_: torchtune.modules.get_cosine_schedule_with_warmup
|
| 61 |
-
num_warmup_steps:
|
| 62 |
loss:
|
| 63 |
_component_: torch.nn.CrossEntropyLoss
|
| 64 |
-
epochs:
|
| 65 |
max_steps_per_epoch: null
|
| 66 |
gradient_accumulation_steps: 16
|
| 67 |
compile: true
|
|
@@ -92,8 +92,8 @@ inference:
|
|
| 92 |
{video}
|
| 93 |
|
| 94 |
Caption the previous video.'
|
| 95 |
-
max_new_tokens:
|
| 96 |
-
temperature: 0.
|
| 97 |
top_k: 300
|
| 98 |
quantizer: null
|
| 99 |
gradient-accumulation-steps: 32
|
|
|
|
| 24 |
output_dir: output_checkpoints/experiment_1
|
| 25 |
model_type: LLAMA3
|
| 26 |
resume_from_checkpoint: false
|
| 27 |
+
interim_checkpoint_steps: 750
|
| 28 |
interim_gen_steps: null
|
| 29 |
+
max_new_tokens: 300
|
| 30 |
+
temperature: 0.74
|
| 31 |
top_k: 300
|
| 32 |
dataset:
|
| 33 |
_component_: ds.EvenBatcher
|
|
|
|
| 58 |
lr: 0.0001
|
| 59 |
lr_scheduler:
|
| 60 |
_component_: torchtune.modules.get_cosine_schedule_with_warmup
|
| 61 |
+
num_warmup_steps: 2500
|
| 62 |
loss:
|
| 63 |
_component_: torch.nn.CrossEntropyLoss
|
| 64 |
+
epochs: 4
|
| 65 |
max_steps_per_epoch: null
|
| 66 |
gradient_accumulation_steps: 16
|
| 67 |
compile: true
|
|
|
|
| 92 |
{video}
|
| 93 |
|
| 94 |
Caption the previous video.'
|
| 95 |
+
max_new_tokens: 500
|
| 96 |
+
temperature: 0.75
|
| 97 |
top_k: 300
|
| 98 |
quantizer: null
|
| 99 |
gradient-accumulation-steps: 32
|