JuncheolK commited on
Commit
7a091dc
·
verified ·
1 Parent(s): e011bf8

Upload run_fsdp.text with huggingface_hub

Browse files
Files changed (1) hide show
  1. run_fsdp.text +58 -0
run_fsdp.text ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True accelerate launch --config_file "configs/fsdp_config.yaml" \
2
+ --main_process_ip "172.17.0.2" \
3
+ --main_process_port "29080" \
4
+ --machine_rank $BACKENDAI_CLUSTER_LOCAL_RANK \
5
+ --num_processes 4 \
6
+ --num_machines 1 \
7
+ --monitor_interval 10 \
8
+ --num_cpu_threads_per_process 8 \
9
+ train.py \
10
+ --seed 100 \
11
+ --model_name_or_path "meta-llama/Meta-Llama-3.1-8B-Instruct" \
12
+ --output_dir "results/mncai/Meta-Llama-3.1-8B-Instruct-v0.3-32batch_0.05warmup_onlyarc_5e5" \
13
+ --dataset_name "mncai/ai2_arc" \
14
+ --chat_template_format "llama3.1" \
15
+ --add_special_tokens False \
16
+ --append_concat_token False \
17
+ --splits "train,test" \
18
+ --max_seq_len 8192 \
19
+ --num_train_epochs 2 \
20
+ --logging_steps 5 \
21
+ --log_level "info" \
22
+ --logging_strategy "steps" \
23
+ --eval_strategy "steps" \
24
+ --eval_steps 10 \
25
+ --save_total_limit 2 \
26
+ --bf16 True \
27
+ --packing False \
28
+ --learning_rate 5e-5 \
29
+ --lr_scheduler_type "cosine" \
30
+ --weight_decay 1e-5 \
31
+ --warmup_ratio 0.05 \
32
+ --max_grad_norm 1.0 \
33
+ --per_device_train_batch_size 32 \
34
+ --per_device_eval_batch_size 32 \
35
+ --gradient_accumulation_steps 1 \
36
+ --gradient_checkpointing True \
37
+ --use_reentrant False \
38
+ --dataset_text_field "content" \
39
+ --use_flash_attn True \
40
+ --optim paged_adamw_32bit \
41
+ --report_to "wandb" \
42
+ # --hub_model_id "mncai/Meta-Llama-3.1-8B-Instruct-v0.2" \
43
+ # --push_to_hub True \
44
+ # --hub_private_repo True \
45
+ # --hub_strategy "end" \
46
+ # --hub_token $HF_WRITE_TOKEN \
47
+
48
+ # --save_strategy "steps" \
49
+ # --save_steps 200 \
50
+ # --use_peft_lora True \
51
+ # --lora_r 8 \
52
+ # --lora_alpha 16 \
53
+ # --lora_dropout 0.1 \
54
+ # --lora_target_modules "all-linear" \
55
+ # --use_4bit_quantization False
56
+
57
+ # Dataset
58
+ # ,mncai/hellaswag,mncai/mbpp,mncai/openbookqa,mncai/winogrande,mncai/trivia_qa,mncai/KMMLU"