asach
/

amigov1

Model card Files Files and versions

amigov1 / docs /index.html

asach's picture

Upload folder using huggingface_hub

d727a17 over 2 years ago

history blame contribute delete

4.47 kB

	<!DOCTYPE html>
	<html lang="en">

	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Training Commands</title>
	<style>
	body {
	font-family: monospace;
	margin: 0;
	padding: 8px;
	}

	#command {
	padding: 8px;
	background-color: #f6f8fa;
	border: 1px solid #d1d5da;
	border-radius: 3px;
	display: inline-block;
	white-space: pre-wrap;
	/* Allows the text to wrap */
	word-break: break-all;
	/* Breaks long words to fit container width */
	}

	button {
	margin-left: 8px;
	cursor: pointer;
	}
	</style>
	</head>

	<body>
	<div id="command">torchrun --nproc_per_node=8 --master_port=<YOUR PORT> train.py \
	--model_name_or_path "facebook/opt-6.7b" \
	--data_path medalpaca_small.json \
	--bf16 True \
	--output_dir models \
	--num_train_epochs 3 \
	--per_device_train_batch_size 4 \
	--per_device_eval_batch_size 4 \
	--gradient_accumulation_steps 8 \
	--evaluation_strategy "no" \
	--save_strategy "steps" \
	--save_steps 2000 \
	--save_total_limit 1 \
	--learning_rate 2e-5 \
	--weight_decay 0. \
	--warmup_ratio 0.03 \
	--lr_scheduler_type "cosine" \
	--logging_steps 1 \
	--fsdp "full_shard auto_wrap" \
	--fsdp_transformer_layer_cls_to_wrap 'OPTDecoderLayer' \
	--tf32 True</div>
	<div>
	<button onclick="changeCommand(0)">OPT 6.7B</button>
	<button onclick="changeCommand(0)">OPT 13B</button>
	<button onclick="changeCommand(1)">Alpaca 7B</button>
	</div>


	<script>
	const commands = [
	`torchrun --nproc_per_node=8 --master_port=<YOUR PORT> train.py \\
	--model_name_or_path "facebook/opt-6.7b" \\
	--data_path medalpaca_small.json \\
	--bf16 True \\
	--output_dir models \\
	--num_train_epochs 3 \\
	--per_device_train_batch_size 4 \\
	--per_device_eval_batch_size 4 \\
	--gradient_accumulation_steps 8 \\
	--evaluation_strategy "no" \\
	--save_strategy "steps" \\
	--save_steps 2000 \\
	--save_total_limit 1 \\
	--learning_rate 2e-5 \\
	--weight_decay 0. \\
	--warmup_ratio 0.03 \\
	--lr_scheduler_type "cosine" \\
	--logging_steps 1 \\
	--fsdp "full_shard auto_wrap" \\
	--fsdp_transformer_layer_cls_to_wrap 'OPTDecoderLayer' \\
	--tf32 True`,

	`torchrun --nproc_per_node=8 --master_port=<YOUR PORT> train.py \\
	--model_name_or_path "facebook/opt-13b" \\
	--data_path medalpaca_small.json \\
	--bf16 True \\
	--output_dir models \\
	--num_train_epochs 3 \\
	--per_device_train_batch_size 2 \\
	--per_device_eval_batch_size 2 \\
	--gradient_accumulation_steps 16 \\
	--evaluation_strategy "no" \\
	--save_strategy "steps" \\
	--save_steps 2000 \\
	--save_total_limit 1 \\
	--learning_rate 2e-5 \\
	--weight_decay 0. \\
	--warmup_ratio 0.03 \\
	--lr_scheduler_type "cosine" \\
	--logging_steps 1 \\
	--fsdp "full_shard auto_wrap" \\
	--fsdp_transformer_layer_cls_to_wrap 'OPTDecoderLayer' \\
	--tf32 True`,

	`torchrun --nproc_per_node=8 --master_port=<YOUR PORT> train.py \\
	--model_name_or_path <PATH_TO_LLAMA_WEIGHTS> \\
	--data_path medalpaca_small.json \\
	--bf16 True \\
	--output_dir models \\
	--num_train_epochs 3 \\
	--per_device_train_batch_size 4 \\
	--per_device_eval_batch_size 4 \\
	--gradient_accumulation_steps 8 \\
	--evaluation_strategy "no" \\
	--save_strategy "steps" \\
	--save_steps 2000 \\
	--save_total_limit 1 \\
	--learning_rate 2e-5 \\
	--weight_decay 0. \\
	--warmup_ratio 0.03 \\
	--lr_scheduler_type "cosine" \\
	--logging_steps 1 \\
	--fsdp "full_shard auto_wrap" \\
	--fsdp_transformer_layer_cls_to_wrap 'LlamaDecoderLayer' \\
	--tf32 True`,
	];

	function changeCommand(appIndex) {
	document.getElementById("command").innerText = commands[appIndex];
	}
	</script>
	</body>

	</html>