Spaces:
No application file
No application file
| # Exit on any error | |
| set -e | |
| # Print script start | |
| echo "==========================================" | |
| echo "Starting Adaptive Block Forcing Training" | |
| echo "==========================================" | |
| # Print system information | |
| echo "System Information:" | |
| echo " Hostname: $(hostname)" | |
| echo " Date: $(date)" | |
| echo " User: $(whoami)" | |
| echo " Working Directory: $(pwd)" | |
| echo " Python Version: $(python --version 2>/dev/null || echo 'Python not found')" | |
| echo "" | |
| # Activate micromamba environment | |
| # echo "Activating micromamba environment 'abf'..." | |
| # eval "$(micromamba shell hook --shell bash)" | |
| # micromamba activate abf | |
| # Print environment information | |
| echo "Environment Information:" | |
| echo " Active Environment: $CONDA_DEFAULT_ENV" | |
| echo " Python Path: $(which python)" | |
| echo " Python Version: $(python --version)" | |
| echo " CUDA Available: $(python -c 'import torch; print(torch.cuda.is_available())' 2>/dev/null || echo 'PyTorch not available')" | |
| if python -c 'import torch' 2>/dev/null; then | |
| echo " CUDA Version: $(python -c 'import torch; print(torch.version.cuda)')" | |
| echo " GPU Count: $(python -c 'import torch; print(torch.cuda.device_count())')" | |
| fi | |
| echo "" | |
| # Set environment variables | |
| export CUDA_VISIBLE_DEVICES=0 | |
| export DEBUGPY=0 | |
| # export CUDA_LAUNCH_BLOCKING=1 | |
| echo "Starting training with the following configuration:" | |
| echo " CUDA_VISIBLE_DEVICES: $CUDA_VISIBLE_DEVICES" | |
| echo " DEBUGPY: $DEBUGPY" | |
| echo " Config File: config/llada.yaml" | |
| echo " Accelerate Config: config/acc_config" | |
| echo " Number of Processes: 1" | |
| echo " Main Process Port: 29577" | |
| echo "" | |
| # Launch training | |
| echo "Launching training..." | |
| accelerate launch \ | |
| --config_file config/acc_config \ | |
| --num_processes 1 \ | |
| --main_process_port 29577 \ | |
| train.py --config config/llada.yaml | |
| echo "" | |
| echo "==========================================" | |
| echo "Training completed!" | |
| echo "==========================================" | |