File size: 752 Bytes
b08fe17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#!/bin/bash
# Script to clean GPU memory and run training

# Kill any existing Python processes
echo "Stopping any running Python processes..."
pkill -9 python

# Clear GPU cache
echo "Clearing GPU cache..."
nvidia-smi --gpu-reset

# Wait a moment for cleanup
sleep 5

# Check GPU memory status
echo "Current GPU memory status:"
nvidia-smi

# Set memory optimization environment variables
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
export PYTORCH_NO_CUDA_MEMORY_CACHING=1

# Run training with reduced image size (optional)
echo "Starting training..."
python run_train.py --cfg train_config/ace_plus_fft_lora.yaml

# Or if you have a specific memory-optimized config:
# python run_train.py --cfg train_config/ace_plus_fft_lora_low_mem.yaml