jame5
/

bfg

@@ -24,10 +24,10 @@ checkpointer:
   output_dir: output_checkpoints/experiment_1
   model_type: LLAMA3
 resume_from_checkpoint: false
-interim_checkpoint_steps: 1000
 interim_gen_steps: null
-max_new_tokens: 200
-temperature: 0.7
 top_k: 300
 dataset:
   _component_: ds.EvenBatcher
@@ -58,10 +58,10 @@ optimizer:
   lr: 0.0001
 lr_scheduler:
   _component_: torchtune.modules.get_cosine_schedule_with_warmup
-  num_warmup_steps: 2000
 loss:
   _component_: torch.nn.CrossEntropyLoss
-epochs: 3
 max_steps_per_epoch: null
 gradient_accumulation_steps: 16
 compile: true
@@ -92,8 +92,8 @@ inference:
     {video}
     Caption the previous video.'
-  max_new_tokens: 300
-  temperature: 0.7
   top_k: 300
   quantizer: null
 gradient-accumulation-steps: 32

   output_dir: output_checkpoints/experiment_1
   model_type: LLAMA3
 resume_from_checkpoint: false
+interim_checkpoint_steps: 750
 interim_gen_steps: null
+max_new_tokens: 300
+temperature: 0.74
 top_k: 300
 dataset:
   _component_: ds.EvenBatcher
   lr: 0.0001
 lr_scheduler:
   _component_: torchtune.modules.get_cosine_schedule_with_warmup
+  num_warmup_steps: 2500
 loss:
   _component_: torch.nn.CrossEntropyLoss
+epochs: 4
 max_steps_per_epoch: null
 gradient_accumulation_steps: 16
 compile: true
     {video}
     Caption the previous video.'
+  max_new_tokens: 500
+  temperature: 0.75
   top_k: 300
   quantizer: null
 gradient-accumulation-steps: 32