dimasik87 commited on
Commit
5780dac
·
verified ·
1 Parent(s): c5b10ae

Upload training_config.yml with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_config.yml +4 -4
training_config.yml CHANGED
@@ -24,12 +24,12 @@ checkpointer:
24
  resume_from_checkpoint: false
25
  interim_checkpoint_steps: 5000
26
  interim_gen_steps: null
27
- max_new_tokens: 200
28
  temperature: 0.8
29
  top_k: 200
30
  dataset:
31
  _component_: ds.EvenBatcher
32
- buffer_size: 72
33
  dataset:
34
  _component_: ds.RoundRobinDataset
35
  datasets:
@@ -50,13 +50,13 @@ batch_size: 16
50
  optimizer:
51
  _component_: torch.optim.AdamW
52
  weight_decay: 0.001
53
- lr: 0.0003
54
  lr_scheduler:
55
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
56
  num_warmup_steps: 150
57
  loss:
58
  _component_: torch.nn.CrossEntropyLoss
59
- epochs: 6
60
  max_steps_per_epoch: null
61
  gradient_accumulation_steps: 16
62
  compile: false
 
24
  resume_from_checkpoint: false
25
  interim_checkpoint_steps: 5000
26
  interim_gen_steps: null
27
+ max_new_tokens: 210
28
  temperature: 0.8
29
  top_k: 200
30
  dataset:
31
  _component_: ds.EvenBatcher
32
+ buffer_size: 90
33
  dataset:
34
  _component_: ds.RoundRobinDataset
35
  datasets:
 
50
  optimizer:
51
  _component_: torch.optim.AdamW
52
  weight_decay: 0.001
53
+ lr: 0.0002
54
  lr_scheduler:
55
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
56
  num_warmup_steps: 150
57
  loss:
58
  _component_: torch.nn.CrossEntropyLoss
59
+ epochs: 7
60
  max_steps_per_epoch: null
61
  gradient_accumulation_steps: 16
62
  compile: false