| #!/usr/bin/env bash |
| module load mamba/latest |
| source activate gaudi-pytorch-diffusion-1.22.0.740 |
|
|
| set -euo pipefail |
|
|
| |
| ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" |
|
|
| |
| |
| |
| |
|
|
| |
| TARGET_GAUDI_ENV="gaudi-pytorch-diffusion-1.22.0.740" |
|
|
| |
| POSSIBLE_GAUDI_PATHS=( |
| "${HOME}/mamba/envs/${TARGET_GAUDI_ENV}" |
| "${HOME}/.conda/envs/${TARGET_GAUDI_ENV}" |
| "/packages/envs/${TARGET_GAUDI_ENV}" |
| "${CONDA_PREFIX}/../${TARGET_GAUDI_ENV}" |
| ) |
|
|
| TARGET_GAUDI_PREFIX="" |
| for path in "${POSSIBLE_GAUDI_PATHS[@]}"; do |
| if [[ -d "$path" ]]; then |
| TARGET_GAUDI_PREFIX="$path" |
| break |
| fi |
| done |
|
|
| TARGET_GAUDI_ACTIVATE="${TARGET_GAUDI_PREFIX}/bin/activate" |
|
|
| |
| |
| ensure_gaudi_env() { |
| if [[ "${LWM_AUTO_HABANA:-1}" != "1" ]]; then |
| echo "[DEBUG] Auto Gaudi activation disabled (LWM_AUTO_HABANA=0)" |
| return |
| fi |
|
|
| if [[ "${CONDA_DEFAULT_ENV:-}" == "${TARGET_GAUDI_ENV}" ]]; then |
| echo "[DEBUG] Already in Gaudi environment: ${CONDA_DEFAULT_ENV}" |
| return |
| fi |
|
|
| if [[ ! -f "${TARGET_GAUDI_ACTIVATE}" ]]; then |
| echo "[DEBUG] Gaudi environment not found at ${TARGET_GAUDI_ACTIVATE}" |
| return |
| fi |
|
|
| echo "[DEBUG] Attempting to activate Gaudi environment..." |
| |
| if command -v module >/dev/null 2>&1; then |
| echo "[DEBUG] Loading mamba module..." |
| module load mamba/latest 2>&1 | grep -v "^$" || true |
| else |
| echo "[DEBUG] module command not available, skipping module load" |
| fi |
|
|
| local activated="0" |
|
|
| if [[ -f "${TARGET_GAUDI_ACTIVATE}" ]]; then |
| echo "[DEBUG] Trying direct activation: source ${TARGET_GAUDI_ACTIVATE}" |
| |
| if source "${TARGET_GAUDI_ACTIVATE}" 2>&1; then |
| activated="1" |
| echo "[DEBUG] Successfully activated via direct path" |
| fi |
| fi |
|
|
| if [[ "${activated}" != "1" ]]; then |
| echo "[DEBUG] Trying conda activate: source activate ${TARGET_GAUDI_ENV}" |
| |
| if source activate "${TARGET_GAUDI_ENV}" 2>&1; then |
| activated="1" |
| echo "[DEBUG] Successfully activated via conda" |
| else |
| echo "[DEBUG] Failed to activate Gaudi environment" |
| fi |
| fi |
| |
| if [[ "${activated}" == "1" ]]; then |
| echo "[DEBUG] Gaudi environment activated successfully" |
| fi |
| } |
|
|
| ensure_gaudi_env |
|
|
| |
| |
| try_python() { |
| local executable="$1" |
| if [[ -z "${executable}" ]]; then |
| return 1 |
| fi |
|
|
| if [[ "${executable}" == */* ]]; then |
| if [[ ! -x "${executable}" ]]; then |
| return 1 |
| fi |
| else |
| if ! command -v "${executable}" >/dev/null 2>&1; then |
| return 1 |
| fi |
| fi |
|
|
| if "${executable}" -c "import torch" >/dev/null 2>&1; then |
| PYTHON_CMD=("${executable}") |
| return 0 |
| fi |
|
|
| return 1 |
| } |
|
|
| PYTHON_CMD=() |
|
|
| |
| |
| if [[ "${CONDA_DEFAULT_ENV:-}" == "${TARGET_GAUDI_ENV}" ]] || [[ "${CONDA_PREFIX:-}" == "${TARGET_GAUDI_PREFIX}" ]]; then |
| echo "[DEBUG] Gaudi environment active: ${CONDA_DEFAULT_ENV:-unknown}" |
| if [[ -x "${TARGET_GAUDI_PREFIX}/bin/python" ]]; then |
| PYTHON_CMD=("${TARGET_GAUDI_PREFIX}/bin/python") |
| echo "[DEBUG] Forcing use of Gaudi Python: ${TARGET_GAUDI_PREFIX}/bin/python" |
| fi |
| fi |
|
|
| |
| if [[ ${#PYTHON_CMD[@]} -eq 0 && -n "${CONDA_PREFIX:-}" ]]; then |
| try_python "${CONDA_PREFIX}/bin/python" || true |
| fi |
|
|
| |
| if [[ ${#PYTHON_CMD[@]} -eq 0 && -n "${VIRTUAL_ENV:-}" ]]; then |
| try_python "${VIRTUAL_ENV}/bin/python" || true |
| fi |
|
|
| |
| if [[ ${#PYTHON_CMD[@]} -eq 0 && -x "${TARGET_GAUDI_PREFIX}/bin/python" ]]; then |
| try_python "${TARGET_GAUDI_PREFIX}/bin/python" || true |
| fi |
|
|
| |
| if [[ ${#PYTHON_CMD[@]} -eq 0 && -n "${CONDA_DEFAULT_ENV:-}" && "${CONDA_DEFAULT_ENV}" != "base" ]]; then |
| if [[ -n "${HOME:-}" ]]; then |
| try_python "${HOME}/mamba/envs/${CONDA_DEFAULT_ENV}/bin/python" || true |
| if [[ ${#PYTHON_CMD[@]} -eq 0 ]]; then |
| try_python "${HOME}/.conda/envs/${CONDA_DEFAULT_ENV}/bin/python" || true |
| fi |
| fi |
|
|
| if [[ ${#PYTHON_CMD[@]} -eq 0 && -n "${MAMBA_ROOT_PREFIX:-}" ]]; then |
| try_python "${MAMBA_ROOT_PREFIX}/envs/${CONDA_DEFAULT_ENV}/bin/python" || true |
| fi |
|
|
| if [[ ${#PYTHON_CMD[@]} -eq 0 ]]; then |
| base_python="$(command -v python 2>/dev/null || true)" |
| if [[ -n "${base_python}" ]]; then |
| base_root="$(dirname "$(dirname "${base_python}")")" |
| try_python "${base_root}/envs/${CONDA_DEFAULT_ENV}/bin/python" || true |
| fi |
| fi |
| fi |
|
|
| |
| if [[ ${#PYTHON_CMD[@]} -eq 0 ]]; then |
| try_python "$(command -v python3 2>/dev/null || true)" || true |
| fi |
|
|
| if [[ ${#PYTHON_CMD[@]} -eq 0 ]]; then |
| try_python "$(command -v python 2>/dev/null || true)" || true |
| fi |
|
|
| |
| if [[ ${#PYTHON_CMD[@]} -eq 0 && -n "${PYTHON:-}" ]]; then |
| echo "[WARN] Falling back to PYTHON=${PYTHON}" |
| try_python "${PYTHON}" || true |
| fi |
|
|
| if [[ ${#PYTHON_CMD[@]} -eq 0 ]]; then |
| echo "[ERROR] Could not find a python interpreter with torch installed." >&2 |
| echo "[ERROR] Activate the Habana environment (module load mamba/latest; source activate gaudi-pytorch-diffusion-1.22.0.740)." >&2 |
| exit 1 |
| fi |
|
|
| |
| export PYTHONNOUSERSITE=1 |
|
|
| |
| python_path="$(command -v "${PYTHON_CMD[0]}" 2>/dev/null || true)" |
| if [[ -z "${python_path}" ]]; then |
| python_path="${PYTHON_CMD[0]}" |
| fi |
|
|
| echo "[DEBUG] Using Python: ${python_path}" |
| python_version="$("${PYTHON_CMD[@]}" --version)" |
| echo "[DEBUG] Python version: ${python_version}" |
| echo "[DEBUG] PYTHONNOUSERSITE=${PYTHONNOUSERSITE}" |
|
|
| |
| |
| if [[ "$*" == *"--models"* ]]; then |
| |
| "${PYTHON_CMD[@]}" "${ROOT_DIR}/task1/train_mcs_models.py" \ |
| --cities city_10_austin \ |
| --comm-types LTE \ |
| "$@" |
| else |
| |
| for model in lwm resnet18 efficientnet_b0 mobilenet_v3_small simple_cnn; do |
| echo "" |
| echo "==========================================" |
| echo "Training model: ${model}" |
| echo "==========================================" |
| "${PYTHON_CMD[@]}" "${ROOT_DIR}/task1/train_mcs_models.py" \ |
| --cities city_10_austin \ |
| --comm-types LTE \ |
| --models "${model}" \ |
| "$@" |
| done |
| fi |
|
|