| <!DOCTYPE html> |
| <html lang="en"> |
|
|
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>Training Commands</title> |
| <style> |
| body { |
| font-family: monospace; |
| margin: 0; |
| padding: 8px; |
| } |
| |
| #command { |
| padding: 8px; |
| background-color: #f6f8fa; |
| border: 1px solid #d1d5da; |
| border-radius: 3px; |
| display: inline-block; |
| white-space: pre-wrap; |
| |
| word-break: break-all; |
| |
| } |
| |
| button { |
| margin-left: 8px; |
| cursor: pointer; |
| } |
| </style> |
| </head> |
|
|
| <body> |
| <div id="command">torchrun --nproc_per_node=8 --master_port=<YOUR PORT> train.py \ |
| --model_name_or_path "facebook/opt-6.7b" \ |
| --data_path medalpaca_small.json \ |
| --bf16 True \ |
| --output_dir models \ |
| --num_train_epochs 3 \ |
| --per_device_train_batch_size 4 \ |
| --per_device_eval_batch_size 4 \ |
| --gradient_accumulation_steps 8 \ |
| --evaluation_strategy "no" \ |
| --save_strategy "steps" \ |
| --save_steps 2000 \ |
| --save_total_limit 1 \ |
| --learning_rate 2e-5 \ |
| --weight_decay 0. \ |
| --warmup_ratio 0.03 \ |
| --lr_scheduler_type "cosine" \ |
| --logging_steps 1 \ |
| --fsdp "full_shard auto_wrap" \ |
| --fsdp_transformer_layer_cls_to_wrap 'OPTDecoderLayer' \ |
| --tf32 True</div> |
| <div> |
| <button onclick="changeCommand(0)">OPT 6.7B</button> |
| <button onclick="changeCommand(0)">OPT 13B</button> |
| <button onclick="changeCommand(1)">Alpaca 7B</button> |
| </div> |
|
|
|
|
| <script> |
| const commands = [ |
| `torchrun --nproc_per_node=8 --master_port=<YOUR PORT> train.py \\ |
| --model_name_or_path "facebook/opt-6.7b" \\ |
| --data_path medalpaca_small.json \\ |
| --bf16 True \\ |
| --output_dir models \\ |
| --num_train_epochs 3 \\ |
| --per_device_train_batch_size 4 \\ |
| --per_device_eval_batch_size 4 \\ |
| --gradient_accumulation_steps 8 \\ |
| --evaluation_strategy "no" \\ |
| --save_strategy "steps" \\ |
| --save_steps 2000 \\ |
| --save_total_limit 1 \\ |
| --learning_rate 2e-5 \\ |
| --weight_decay 0. \\ |
| --warmup_ratio 0.03 \\ |
| --lr_scheduler_type "cosine" \\ |
| --logging_steps 1 \\ |
| --fsdp "full_shard auto_wrap" \\ |
| --fsdp_transformer_layer_cls_to_wrap 'OPTDecoderLayer' \\ |
| --tf32 True`, |
| |
| `torchrun --nproc_per_node=8 --master_port=<YOUR PORT> train.py \\ |
| --model_name_or_path "facebook/opt-13b" \\ |
| --data_path medalpaca_small.json \\ |
| --bf16 True \\ |
| --output_dir models \\ |
| --num_train_epochs 3 \\ |
| --per_device_train_batch_size 2 \\ |
| --per_device_eval_batch_size 2 \\ |
| --gradient_accumulation_steps 16 \\ |
| --evaluation_strategy "no" \\ |
| --save_strategy "steps" \\ |
| --save_steps 2000 \\ |
| --save_total_limit 1 \\ |
| --learning_rate 2e-5 \\ |
| --weight_decay 0. \\ |
| --warmup_ratio 0.03 \\ |
| --lr_scheduler_type "cosine" \\ |
| --logging_steps 1 \\ |
| --fsdp "full_shard auto_wrap" \\ |
| --fsdp_transformer_layer_cls_to_wrap 'OPTDecoderLayer' \\ |
| --tf32 True`, |
| |
| `torchrun --nproc_per_node=8 --master_port=<YOUR PORT> train.py \\ |
| --model_name_or_path <PATH_TO_LLAMA_WEIGHTS> \\ |
| --data_path medalpaca_small.json \\ |
| --bf16 True \\ |
| --output_dir models \\ |
| --num_train_epochs 3 \\ |
| --per_device_train_batch_size 4 \\ |
| --per_device_eval_batch_size 4 \\ |
| --gradient_accumulation_steps 8 \\ |
| --evaluation_strategy "no" \\ |
| --save_strategy "steps" \\ |
| --save_steps 2000 \\ |
| --save_total_limit 1 \\ |
| --learning_rate 2e-5 \\ |
| --weight_decay 0. \\ |
| --warmup_ratio 0.03 \\ |
| --lr_scheduler_type "cosine" \\ |
| --logging_steps 1 \\ |
| --fsdp "full_shard auto_wrap" \\ |
| --fsdp_transformer_layer_cls_to_wrap 'LlamaDecoderLayer' \\ |
| --tf32 True`, |
| ]; |
| |
| function changeCommand(appIndex) { |
| document.getElementById("command").innerText = commands[appIndex]; |
| } |
| </script> |
| </body> |
|
|
| </html> |