Spaces:
No application file
No application file
File size: 1,921 Bytes
4f2b2f4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
#!/bin/bash
# Exit on any error
set -e
# Print script start
echo "=========================================="
echo "Starting Adaptive Block Forcing Training"
echo "=========================================="
# Print system information
echo "System Information:"
echo " Hostname: $(hostname)"
echo " Date: $(date)"
echo " User: $(whoami)"
echo " Working Directory: $(pwd)"
echo " Python Version: $(python --version 2>/dev/null || echo 'Python not found')"
echo ""
# Activate micromamba environment
# echo "Activating micromamba environment 'abf'..."
# eval "$(micromamba shell hook --shell bash)"
# micromamba activate abf
# Print environment information
echo "Environment Information:"
echo " Active Environment: $CONDA_DEFAULT_ENV"
echo " Python Path: $(which python)"
echo " Python Version: $(python --version)"
echo " CUDA Available: $(python -c 'import torch; print(torch.cuda.is_available())' 2>/dev/null || echo 'PyTorch not available')"
if python -c 'import torch' 2>/dev/null; then
echo " CUDA Version: $(python -c 'import torch; print(torch.version.cuda)')"
echo " GPU Count: $(python -c 'import torch; print(torch.cuda.device_count())')"
fi
echo ""
# Set environment variables
export CUDA_VISIBLE_DEVICES=0
export DEBUGPY=0
# export CUDA_LAUNCH_BLOCKING=1
echo "Starting training with the following configuration:"
echo " CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES"
echo " DEBUGPY: $DEBUGPY"
echo " Config File: config/llada.yaml"
echo " Accelerate Config: config/acc_config"
echo " Number of Processes: 1"
echo " Main Process Port: 29577"
echo ""
# Launch training
echo "Launching training..."
accelerate launch \
--config_file config/acc_config \
--num_processes 1 \
--main_process_port 29577 \
train.py --config config/llada.yaml
echo ""
echo "=========================================="
echo "Training completed!"
echo "=========================================="
|