| #!/usr/bin/env bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| set -euo pipefail |
|
|
| ROOT="$(cd "$(dirname "$0")/.." && pwd)" |
| cd "${ROOT}" |
| source "${ROOT}/scripts/lib/nohup_runner.sh" |
|
|
| FLASH_ATTN_VERSION="${FLASH_ATTN_VERSION:-2.7.4}" |
|
|
| |
| if ! command -v python >/dev/null 2>&1; then |
| echo " python not found in PATH. Activate the conda env first:" >&2 |
| echo " conda activate causalgrok" >&2 |
| exit 1 |
| fi |
| if [[ -z "${CONDA_PREFIX:-}" ]]; then |
| echo " CONDA_PREFIX is empty β no conda env appears to be active." >&2 |
| echo " conda activate causalgrok" >&2 |
| exit 1 |
| fi |
|
|
| |
| |
| |
| |
| STAMP="$(date -u +%Y%m%d-%H%M%S)" |
| INSTALL_DIR="logs/install/${STAMP}_flash_attn" |
| mkdir -p "${INSTALL_DIR}" |
|
|
| |
| { |
| echo "# captured: $(date -u +%FT%TZ)" |
| echo "# host: $(hostname)" |
| echo "# CONDA_PREFIX:${CONDA_PREFIX}" |
| echo "# CONDA_DEFAULT_ENV: ${CONDA_DEFAULT_ENV:-}" |
| echo "# python: $(python --version 2>&1)" |
| echo "# which python: $(command -v python)" |
| if command -v nvcc >/dev/null 2>&1; then |
| echo "# nvcc: $(nvcc --version | tail -1)" |
| else |
| echo "# nvcc: NOT FOUND (CUDA toolkit may be missing)" |
| fi |
| if command -v nvidia-smi >/dev/null 2>&1; then |
| echo "# nvidia-smi:" |
| nvidia-smi --query-gpu=name,driver_version,compute_cap --format=csv,noheader \ |
| | sed 's/^/# /' |
| fi |
| python -c "import torch, sys; print(f'# torch: {torch.__version__}'); print(f'# torch.cuda: {torch.version.cuda}'); print(f'# CUDA avail: {torch.cuda.is_available()}')" 2>/dev/null \ |
| || echo "# torch: NOT INSTALLED β install torch first" |
| } > "${INSTALL_DIR}/env.txt" |
|
|
| cat "${INSTALL_DIR}/env.txt" |
| echo |
|
|
| |
| if ! python -c "import torch" >/dev/null 2>&1; then |
| echo " torch is not installed in this env. Install it first:" >&2 |
| echo " pip install torch torchvision --index-url https://download.pytorch.org/whl/cu118" >&2 |
| exit 1 |
| fi |
|
|
| |
| |
| |
| echo "Starting flash-attn ${FLASH_ATTN_VERSION} install (detached)..." |
| launch_detached "${INSTALL_DIR}" \ |
| bash -c " |
| set -euo pipefail |
| echo '== installing build deps ==' |
| pip install --upgrade pip wheel setuptools packaging ninja |
| echo |
| echo '== installing flash-attn ${FLASH_ATTN_VERSION} ==' |
| pip install --no-build-isolation 'flash-attn==${FLASH_ATTN_VERSION}' |
| echo |
| echo '== sanity check ==' |
| python -c 'import flash_attn; print(\"flash-attn version:\", flash_attn.__version__)' |
| echo 'DONE' |
| " |
|
|
| echo |
| echo "Outputs:" |
| echo " env snapshot : ${INSTALL_DIR}/env.txt" |
| echo " build log : ${INSTALL_DIR}/logs/train.log" |
| echo " build err : ${INSTALL_DIR}/logs/train.err" |
| echo " PID : ${INSTALL_DIR}/run.pid" |
| echo |
| echo "Verify completion with:" |
| echo " grep -E 'DONE|ERROR' ${INSTALL_DIR}/logs/train.log" |
|
|