|
|
#!/bin/bash |
|
|
|
|
|
|
|
|
|
|
|
export CUDA_VISIBLE_DEVICES=0 |
|
|
export TOKENIZERS_PARALLELISM=false |
|
|
|
|
|
|
|
|
MODEL_PATH="/path/to/your/functiongemma-270m-it" |
|
|
|
|
|
|
|
|
DATASET_PATH="./data/training_data.json" |
|
|
|
|
|
|
|
|
OUTPUT_DIR="./runs" |
|
|
|
|
|
|
|
|
RUN_NAME="functiongemma-lora-$(date +%Y%m%d_%H%M%S)" |
|
|
|
|
|
echo "========================================" |
|
|
echo "FunctionGemma SFT LoRA training" |
|
|
echo "========================================" |
|
|
echo "Model: $MODEL_PATH" |
|
|
echo "Dataset: $DATASET_PATH" |
|
|
echo "Output: $OUTPUT_DIR/$RUN_NAME" |
|
|
echo "========================================" |
|
|
|
|
|
|
|
|
python -m src.train \ |
|
|
--model_path "$MODEL_PATH" \ |
|
|
--dataset_path "$DATASET_PATH" \ |
|
|
--output_dir "$OUTPUT_DIR" \ |
|
|
--run_name "$RUN_NAME" \ |
|
|
--lora_r 16 \ |
|
|
--lora_alpha 32 \ |
|
|
--lora_dropout 0.05 \ |
|
|
--num_train_epochs 3 \ |
|
|
--per_device_train_batch_size 4 \ |
|
|
--gradient_accumulation_steps 4 \ |
|
|
--learning_rate 5e-5 \ |
|
|
--warmup_ratio 0.1 \ |
|
|
--max_seq_length 2048 \ |
|
|
--bf16 \ |
|
|
--logging_steps 10 \ |
|
|
--save_steps 100 \ |
|
|
--eval_steps 100 \ |
|
|
--gradient_checkpointing |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
echo "========================================" |
|
|
echo "Training finished!" |
|
|
echo "Model saved to: $OUTPUT_DIR/$RUN_NAME" |
|
|
echo "========================================" |
|
|
|