Unmodel-Fill-Training-Code / ACE_plus /Unmodel_training.sh
vijayakumaran92's picture
Add files using upload-large-folder tool
b08fe17 verified
#!/bin/bash
# Script to clean GPU memory and run training
# Kill any existing Python processes
echo "Stopping any running Python processes..."
pkill -9 python
# Clear GPU cache
echo "Clearing GPU cache..."
nvidia-smi --gpu-reset
# Wait a moment for cleanup
sleep 5
# Check GPU memory status
echo "Current GPU memory status:"
nvidia-smi
# Set memory optimization environment variables
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
export PYTORCH_NO_CUDA_MEMORY_CACHING=1
# Run training with reduced image size (optional)
echo "Starting training..."
python run_train.py --cfg train_config/ace_plus_fft_lora.yaml
# Or if you have a specific memory-optimized config:
# python run_train.py --cfg train_config/ace_plus_fft_lora_low_mem.yaml