File size: 752 Bytes
b08fe17 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
#!/bin/bash
# Script to clean GPU memory and run training
# Kill any existing Python processes
echo "Stopping any running Python processes..."
pkill -9 python
# Clear GPU cache
echo "Clearing GPU cache..."
nvidia-smi --gpu-reset
# Wait a moment for cleanup
sleep 5
# Check GPU memory status
echo "Current GPU memory status:"
nvidia-smi
# Set memory optimization environment variables
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
export PYTORCH_NO_CUDA_MEMORY_CACHING=1
# Run training with reduced image size (optional)
echo "Starting training..."
python run_train.py --cfg train_config/ace_plus_fft_lora.yaml
# Or if you have a specific memory-optimized config:
# python run_train.py --cfg train_config/ace_plus_fft_lora_low_mem.yaml
|