simmo commited on
Commit
6db9dfc
·
verified ·
1 Parent(s): ccf2938

First model training round

Browse files
checkpoints/data_4_train_params.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ batch_size_training: '4'
2
+ batching_strategy: packing
3
+ checkpoint_type: StateDictType.FULL_STATE_DICT
4
+ context_length: '4096'
5
+ dataset: fim_dataset
6
+ dist_checkpoint_folder: fine-tuned
7
+ dist_checkpoint_root_folder: /home/model_checkpoints
8
+ enable_fsdp: 'True'
9
+ flop_counter: 'True'
10
+ flop_counter_start: '3'
11
+ freeze_layers: 'False'
12
+ from_peft_checkpoint: ''
13
+ fsdp_activation_checkpointing: 'True'
14
+ fsdp_cpu_offload: 'False'
15
+ gamma: '0.85'
16
+ gradient_accumulation_steps: '1'
17
+ gradient_clipping: 'False'
18
+ gradient_clipping_threshold: '1.0'
19
+ hsdp: 'False'
20
+ low_cpu_fsdp: 'False'
21
+ lr: '0.0001'
22
+ max_eval_step: '0'
23
+ max_train_step: '0'
24
+ mixed_precision: 'True'
25
+ model_name: meta-llama/Llama-3.2-1B-Instruct
26
+ num_epochs: '1'
27
+ num_freeze_layers: '1'
28
+ num_workers_dataloader: '1'
29
+ one_gpu: 'False'
30
+ optimizer: AdamW
31
+ output_dir: PATH/to/save/PEFT/model
32
+ peft_method: lora
33
+ profiler_dir: PATH/to/save/profiler/results
34
+ pure_bf16: 'True'
35
+ quantization: None
36
+ replica_group_size: '0'
37
+ run_validation: 'True'
38
+ save_metrics: 'False'
39
+ save_model: 'True'
40
+ save_optimizer: 'False'
41
+ seed: '42'
42
+ sharding_group_size: '0'
43
+ sharding_strategy: ShardingStrategy.NO_SHARD
44
+ tokenizer_name: simmo/llama3.2-pyfim-3b
45
+ use_fast_kernels: 'True'
46
+ use_fp16: 'False'
47
+ use_peft: 'False'
48
+ use_profiler: 'False'
49
+ use_wandb: 'True'
50
+ val_batch_size: '1'
51
+ weight_decay: '0.0'