FasterDFlash
/

Hanrui

Model card Files Files and versions

Hanrui / syxin /launch_train.sh

Lekr0's picture

Add files using upload-large-folder tool

7c50656 verified about 1 month ago

history blame contribute delete

1.15 kB

	#!/bin/bash
	set -euo pipefail

	cd /workspace/hanrui/syxin/Specforge

	export TORCHINDUCTOR_CACHE_DIR=/workspace/hanrui/cache/compiled_kernels
	export SPECFORGE_DATA_NUM_PROC=16
	export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
	export PYTORCH_ALLOC_CONF=expandable_segments:True
	export HF_DATASETS_CACHE=/workspace/hanrui/cache/hf_datasets
	export HF_HOME=/workspace/hanrui/cache/hf_home

	torchrun --nproc_per_node=8 \
	scripts/train_dflash_lora_inject.py \
	--target-model-path /workspace/models/Qwen3-8B \
	--target-model-backend hf \
	--train-data-path /workspace/hanrui/datasets/Nemotron-CodeAlpaca-qwen3-8b-800K \
	--output-dir outputs/qwen3-8b-sft-32gpu-v2 \
	--block-size 16 \
	--attention-backend additive \
	--attn-implementation sdpa \
	--max-length 2048 \
	--batch-size 4 \
	--accumulation-steps 8 \
	--num-epochs 3 \
	--learning-rate 5e-5 \
	--loss-decay-gamma 7 \
	--gradient-checkpointing \
	--chat-template qwen \
	--log-interval 50 \
	--save-interval 500 \
	--cache-dir /workspace/hanrui/cache \
	--lora-rank 32 \
	--lora-alpha 64 \
	--lora-dropout 0.1 \
	--trust-remote-code \
	--dataloader-num-workers 0