#!/bin/bash # FunctionGemma SFT LoRA quickstart # Environment export CUDA_VISIBLE_DEVICES=0 # e.g. "0,1,2,3" for multi-GPU export TOKENIZERS_PARALLELISM=false # Model path (update to your local model location) MODEL_PATH="/path/to/your/functiongemma-270m-it" # Dataset path DATASET_PATH="./data/training_data.json" # Output directory OUTPUT_DIR="./runs" # Run name RUN_NAME="functiongemma-lora-$(date +%Y%m%d_%H%M%S)" echo "========================================" echo "FunctionGemma SFT LoRA training" echo "========================================" echo "Model: $MODEL_PATH" echo "Dataset: $DATASET_PATH" echo "Output: $OUTPUT_DIR/$RUN_NAME" echo "========================================" # Option 1: Standard LoRA (recommended for most GPUs) python -m src.train \ --model_path "$MODEL_PATH" \ --dataset_path "$DATASET_PATH" \ --output_dir "$OUTPUT_DIR" \ --run_name "$RUN_NAME" \ --lora_r 16 \ --lora_alpha 32 \ --lora_dropout 0.05 \ --num_train_epochs 3 \ --per_device_train_batch_size 4 \ --gradient_accumulation_steps 4 \ --learning_rate 5e-5 \ --warmup_ratio 0.1 \ --max_seq_length 2048 \ --bf16 \ --logging_steps 10 \ --save_steps 100 \ --eval_steps 100 \ --gradient_checkpointing # Option 2: QLoRA (for smaller GPUs, uncomment to use) # python -m src.train \ # --model_path "$MODEL_PATH" \ # --dataset_path "$DATASET_PATH" \ # --output_dir "$OUTPUT_DIR" \ # --run_name "$RUN_NAME-qlora" \ # --lora_r 16 \ # --lora_alpha 32 \ # --lora_dropout 0.05 \ # --num_train_epochs 3 \ # --per_device_train_batch_size 8 \ # --gradient_accumulation_steps 2 \ # --learning_rate 2e-4 \ # --warmup_ratio 0.1 \ # --max_seq_length 2048 \ # --use_4bit \ # --logging_steps 10 \ # --save_steps 100 \ # --eval_steps 100 \ # --gradient_checkpointing echo "========================================" echo "Training finished!" echo "Model saved to: $OUTPUT_DIR/$RUN_NAME" echo "========================================"