dimasik87 commited on
Commit
fb64727
·
verified ·
1 Parent(s): 94c848f

Upload training_config.yml with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_config.yml +7 -7
training_config.yml CHANGED
@@ -3,7 +3,7 @@ model:
3
  lora_attn_modules:
4
  - q_proj
5
  - v_proj
6
- apply_lora_to_mlp: false
7
  apply_lora_to_output: false
8
  lora_rank: 32
9
  lora_alpha: 64
@@ -16,7 +16,7 @@ checkpointer:
16
  _component_: torchtune.utils.FullModelMetaCheckpointer
17
  checkpoint_dir: checkpoints/Meta-Llama-3-8B-Instruct/original
18
  checkpoint_files:
19
- - consolidated.00.pth
20
  adapter_checkpoint: null
21
  recipe_checkpoint: null
22
  output_dir: output_checkpoints/experiment_1
@@ -46,19 +46,19 @@ dataset:
46
  train_on_input: false
47
  seed: null
48
  shuffle: true
49
- batch_size: 16
50
  optimizer:
51
  _component_: torch.optim.AdamW
52
- weight_decay: 1.0e-05
53
  lr: 5.0e-05
54
  lr_scheduler:
55
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
56
- num_warmup_steps: 2000
57
  loss:
58
  _component_: torch.nn.CrossEntropyLoss
59
  epochs: 7
60
- max_steps_per_epoch: 2000
61
- gradient_accumulation_steps: 16
62
  compile: false
63
  output_dir: /tmp/lora_finetune_output
64
  metric_logger:
 
3
  lora_attn_modules:
4
  - q_proj
5
  - v_proj
6
+ apply_lora_to_mlp: true
7
  apply_lora_to_output: false
8
  lora_rank: 32
9
  lora_alpha: 64
 
16
  _component_: torchtune.utils.FullModelMetaCheckpointer
17
  checkpoint_dir: checkpoints/Meta-Llama-3-8B-Instruct/original
18
  checkpoint_files:
19
+ - meta_model_5.pt
20
  adapter_checkpoint: null
21
  recipe_checkpoint: null
22
  output_dir: output_checkpoints/experiment_1
 
46
  train_on_input: false
47
  seed: null
48
  shuffle: true
49
+ batch_size: 32
50
  optimizer:
51
  _component_: torch.optim.AdamW
52
+ weight_decay: 0.0
53
  lr: 5.0e-05
54
  lr_scheduler:
55
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
56
+ num_warmup_steps: 1000
57
  loss:
58
  _component_: torch.nn.CrossEntropyLoss
59
  epochs: 7
60
+ max_steps_per_epoch: 1000
61
+ gradient_accumulation_steps: 8
62
  compile: false
63
  output_dir: /tmp/lora_finetune_output
64
  metric_logger: