DMind-3-nano / run_training.sh
yuzhe's picture
Upload 13 files
6f09d40 verified
#!/bin/bash
# FunctionGemma SFT LoRA quickstart
# Environment
export CUDA_VISIBLE_DEVICES=0 # e.g. "0,1,2,3" for multi-GPU
export TOKENIZERS_PARALLELISM=false
# Model path (update to your local model location)
MODEL_PATH="/path/to/your/functiongemma-270m-it"
# Dataset path
DATASET_PATH="./data/training_data.json"
# Output directory
OUTPUT_DIR="./runs"
# Run name
RUN_NAME="functiongemma-lora-$(date +%Y%m%d_%H%M%S)"
echo "========================================"
echo "FunctionGemma SFT LoRA training"
echo "========================================"
echo "Model: $MODEL_PATH"
echo "Dataset: $DATASET_PATH"
echo "Output: $OUTPUT_DIR/$RUN_NAME"
echo "========================================"
# Option 1: Standard LoRA (recommended for most GPUs)
python -m src.train \
--model_path "$MODEL_PATH" \
--dataset_path "$DATASET_PATH" \
--output_dir "$OUTPUT_DIR" \
--run_name "$RUN_NAME" \
--lora_r 16 \
--lora_alpha 32 \
--lora_dropout 0.05 \
--num_train_epochs 3 \
--per_device_train_batch_size 4 \
--gradient_accumulation_steps 4 \
--learning_rate 5e-5 \
--warmup_ratio 0.1 \
--max_seq_length 2048 \
--bf16 \
--logging_steps 10 \
--save_steps 100 \
--eval_steps 100 \
--gradient_checkpointing
# Option 2: QLoRA (for smaller GPUs, uncomment to use)
# python -m src.train \
# --model_path "$MODEL_PATH" \
# --dataset_path "$DATASET_PATH" \
# --output_dir "$OUTPUT_DIR" \
# --run_name "$RUN_NAME-qlora" \
# --lora_r 16 \
# --lora_alpha 32 \
# --lora_dropout 0.05 \
# --num_train_epochs 3 \
# --per_device_train_batch_size 8 \
# --gradient_accumulation_steps 2 \
# --learning_rate 2e-4 \
# --warmup_ratio 0.1 \
# --max_seq_length 2048 \
# --use_4bit \
# --logging_steps 10 \
# --save_steps 100 \
# --eval_steps 100 \
# --gradient_checkpointing
echo "========================================"
echo "Training finished!"
echo "Model saved to: $OUTPUT_DIR/$RUN_NAME"
echo "========================================"