Upload folder using huggingface_hub

Browse files

Files changed (16) hide show

.gitattributes +4 -0
docker-aienv/build.sh +1 -0
docker-aienv/dockerfile +68 -0
docker-aienv/entrypoint.sh +7 -0
docker-aienv/wheels/flash_attn-2.7.4.post1-cp310-cp310-linux_x86_64.whl +3 -0
docker-aienv/wheels/triton-3.3.0+git95326d9f-cp310-cp310-linux_x86_64.whl +3 -0
docker-aienv/wheels/vllm-0.8.4.dev0+g296c657.d20250410.cu128-cp310-cp310-linux_x86_64.whl +3 -0
docker-aienv/wheels/xformers-0.0.30+4fa0149.d20250410-cp310-cp310-linux_x86_64.whl +3 -0
docker-aiwhl/build.sh +8 -0
docker-aiwhl/dockerfile +41 -0
docker-aiwhl/install_0_pre.sh +70 -0
docker-aiwhl/install_1_xformers.sh +89 -0
docker-aiwhl/install_2_flash_attn.sh +82 -0
docker-aiwhl/install_3_vllm.sh +110 -0
docker-aiwhl/install_4_triton.sh +142 -0
docker-aiwhl/install_verify.sh +80 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+docker-aienv/wheels/flash_attn-2.7.4.post1-cp310-cp310-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+docker-aienv/wheels/triton-3.3.0+git95326d9f-cp310-cp310-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+docker-aienv/wheels/vllm-0.8.4.dev0+g296c657.d20250410.cu128-cp310-cp310-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+docker-aienv/wheels/xformers-0.0.30+4fa0149.d20250410-cp310-cp310-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text

docker-aienv/build.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ docker build --platform linux/amd64 --tag zhaomh1998/pytorch:cu128 .

docker-aienv/dockerfile ADDED Viewed

	@@ -0,0 +1,68 @@

+FROM nvcr.io/nvidia/cuda-dl-base:25.03-cuda12.8-devel-ubuntu24.04
+WORKDIR /workspace
+# Init
+RUN apt update
+# cv2 dependencies
+RUN apt-get install ffmpeg libsm6 libxext6 -y
+# Util
+RUN apt-get install screen -y
+# Oh-my-zsh
+RUN sh -c "$(wget -O- https://github.com/deluan/zsh-in-docker/releases/download/v1.2.1/zsh-in-docker.sh)" -- \
+    -t robbyrussell \
+    -p git \
+    -p https://github.com/zsh-users/zsh-autosuggestions \
+    -p https://github.com/zsh-users/zsh-syntax-highlighting \
+    -p history \
+    -p fzf
+RUN git clone --depth 1 https://github.com/junegunn/fzf.git --branch v0.61.0 --single-branch ~/.fzf
+RUN ~/.fzf/install
+RUN chsh -s /bin/zsh
+# Python environment setup
+# Miniconda
+RUN mkdir -p /root/miniconda3 && \
+    wget https://repo.anaconda.com/miniconda/Miniconda3-py310_25.1.1-2-Linux-x86_64.sh && \
+    bash Miniconda3-py310_25.1.1-2-Linux-x86_64.sh -b -u -p /root/miniconda3 && \
+    rm Miniconda3-py310_25.1.1-2-Linux-x86_64.sh && \
+    source /root/miniconda3/bin/activate && \
+    conda init --all && \
+    conda install -c conda-forge libstdcxx-ng=14 -y
+# HF Stuff
+RUN source /root/miniconda3/bin/activate && \
+    pip install hf_transfer
+RUN mkdir -p /root/wheels
+COPY wheels/flash_attn-2.7.4.post1-cp310-cp310-linux_x86_64.whl /root/wheels
+COPY wheels/triton-3.3.0+git95326d9f-cp310-cp310-linux_x86_64.whl /root/wheels
+COPY wheels/vllm-0.8.4.dev0+g296c657.d20250410.cu128-cp310-cp310-linux_x86_64.whl /root/wheels
+COPY wheels/xformers-0.0.30+4fa0149.d20250410-cp310-cp310-linux_x86_64.whl /root/wheels
+RUN /root/miniconda3/bin/pip install --force-reinstall torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/cu128
+RUN /root/miniconda3/bin/pip install /root/wheels/xformers-0.0.30+4fa0149.d20250410-cp310-cp310-linux_x86_64.whl
+RUN /root/miniconda3/bin/pip install /root/wheels/flash_attn-2.7.4.post1-cp310-cp310-linux_x86_64.whl
+RUN /root/miniconda3/bin/pip install /root/wheels/vllm-0.8.4.dev0+g296c657.d20250410.cu128-cp310-cp310-linux_x86_64.whl
+RUN /root/miniconda3/bin/pip install /root/wheels/triton-3.3.0+git95326d9f-cp310-cp310-linux_x86_64.whl
+# Clean up
+RUN rm -rf /root/wheels
+RUN /root/miniconda3/bin/pip cache purge
+# SSH
+RUN apt-get install openssh-server -y && \
+    mkdir -p /root/.ssh && \
+    chmod 700 /root/.ssh && \
+    sed -i 's/#Port 22/Port 1022/' /etc/ssh/sshd_config
+RUN echo "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILLRIFSAs+bubZQULKHVe7HG7oXhP2E7eqg2+Qn5sKsY zhaomh1998@outlook.com" >> /root/.ssh/authorized_keys
+RUN chmod 600 /root/.ssh/authorized_keys
+COPY entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+ENTRYPOINT ["/entrypoint.sh"]

docker-aienv/entrypoint.sh ADDED Viewed

	@@ -0,0 +1,7 @@

+#!/bin/bash
+service ssh start
+exec "$@"
+# Keep container running
+tail -f /dev/null

docker-aienv/wheels/flash_attn-2.7.4.post1-cp310-cp310-linux_x86_64.whl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d843da16f4785ed5e5d0c6e7309a81413c8a903090cad41759bea0f232197d3
+size 403364957

docker-aienv/wheels/triton-3.3.0+git95326d9f-cp310-cp310-linux_x86_64.whl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c6990fe4af3d68628792a8ba049f3d1a4590008a84345a383ca05f96df1dccdc
+size 244871549

docker-aienv/wheels/vllm-0.8.4.dev0+g296c657.d20250410.cu128-cp310-cp310-linux_x86_64.whl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5ca77614d05fec383482ede760f345d197dfbe46e77050c5d7cbd7b5841f5df
+size 277543574

docker-aienv/wheels/xformers-0.0.30+4fa0149.d20250410-cp310-cp310-linux_x86_64.whl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b674e8c6547fc66a2dff8468f82e5a66544aa7b77b2e5f21e6d961cbc2393a3e
+size 603907

docker-aiwhl/build.sh ADDED Viewed

	@@ -0,0 +1,8 @@

+read -p "Build? (y/n): " confirm
+if [ "$confirm" == "y" ]; then
+    docker build --platform linux/amd64 --tag zhaomh1998/aiwhl:250410 .
+fi
+docker create --name aiwhl zhaomh1998/aiwhl:250410
+docker cp aiwhl:/root/wheels ../docker-aienv/wheels/
+docker rm aiwhl

docker-aiwhl/dockerfile ADDED Viewed

	@@ -0,0 +1,41 @@

+FROM nvcr.io/nvidia/cuda-dl-base:25.03-cuda12.8-devel-ubuntu24.04
+WORKDIR /workspace
+# Dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    git \
+    curl \
+    wget \
+    cmake \
+    ninja-build \
+    gcc-14 g++-14 \
+    unixodbc
+# Install packages
+COPY install_0_pre.sh /install_0_pre.sh
+RUN chmod +x /install_0_pre.sh
+RUN /install_0_pre.sh
+COPY install_1_xformers.sh /install_1_xformers.sh
+RUN chmod +x /install_1_xformers.sh
+RUN /install_1_xformers.sh
+COPY install_2_flash_attn.sh /install_2_flash_attn.sh
+RUN chmod +x /install_2_flash_attn.sh
+RUN /install_2_flash_attn.sh
+COPY install_3_vllm.sh /install_3_vllm.sh
+RUN chmod +x /install_3_vllm.sh
+RUN /install_3_vllm.sh
+COPY install_4_triton.sh /install_4_triton.sh
+RUN chmod +x /install_4_triton.sh
+RUN /install_4_triton.sh
+COPY install_verify.sh /install_verify.sh
+RUN chmod +x /install_verify.sh
+RUN /install_verify.sh
+CMD ["tail", "-f", "/dev/null"]

docker-aiwhl/install_0_pre.sh ADDED Viewed

	@@ -0,0 +1,70 @@

+#!/bin/bash
+set -euo pipefail
+# Colors for better readability
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+PS1=${PS1:-}
+check_status() {
+    if [ $? -eq 0 ]; then
+        echo -e "${GREEN}✓ Success${NC}"
+    else
+        echo -e "${RED}✗ Error occurred${NC}"
+        if [ "${1:-}" == "critical" ]; then
+            echo -e "${RED}Critical error. Installation cannot continue.${NC}"
+            exit 1
+        fi
+    fi
+}
+# Only allow root/sudo to install system dependencies
+if [[ $EUID -ne 0 ]]; then
+   echo -e "${RED}This script must be run as root (sudo).${NC}"
+   exit 1
+fi
+# Configure GCC 14 and G++ 14 as defaults
+update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 14
+update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 14
+check_status
+echo -e "\n[STEP] ${YELLOW}Installing Python3.10${NC}"
+bash -c "
+    mkdir -p /root/miniconda3 && \
+    wget https://repo.anaconda.com/miniconda/Miniconda3-py310_25.1.1-2-Linux-x86_64.sh && \
+    bash Miniconda3-py310_25.1.1-2-Linux-x86_64.sh -b -u -p /root/miniconda3 && \
+    rm Miniconda3-py310_25.1.1-2-Linux-x86_64.sh && \
+    source /root/miniconda3/bin/activate && \
+    conda init --all
+"
+VENV_DIR="/root/miniconda3/"
+BUILD_DIR="/root/build"
+WHEEL_DIR="/root/wheels"
+mkdir -p $BUILD_DIR
+mkdir -p $WHEEL_DIR
+check_status
+echo -e "\n[STEP] ${YELLOW}Installing Python packages${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    pip install --upgrade pip setuptools wheel ninja cmake wheel pybind11 ipywidgets ipykernel chardet openpyxl wandb unsloth scikit-learn matplotlib
+    conda install -c conda-forge libstdcxx-ng=14 -y
+" # libstdcxx-ng=14 is to resolve https://github.com/triton-inference-server/server/issues/5933
+check_status
+echo -e "\n[STEP] ${YELLOW}Installing PyTorch...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    pip install --force-reinstall torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/cu128
+"
+check_status

docker-aiwhl/install_1_xformers.sh ADDED Viewed

	@@ -0,0 +1,89 @@

+#!/bin/bash
+set -euo pipefail
+# Colors for better readability
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+PS1=${PS1:-}
+VENV_DIR="/root/miniconda3/"
+BUILD_DIR="/root/build"
+WHEEL_DIR="/root/wheels"
+check_status() {
+    if [ $? -eq 0 ]; then
+        echo -e "${GREEN}✓ Success${NC}"
+    else
+        echo -e "${RED}✗ Error occurred${NC}"
+        if [ "${1:-}" == "critical" ]; then
+            echo -e "${RED}Critical error. Installation cannot continue.${NC}"
+            exit 1
+        fi
+    fi
+}
+# Only allow root/sudo to install system dependencies
+if [[ $EUID -ne 0 ]]; then
+   echo -e "${RED}This script must be run as root (sudo).${NC}"
+   exit 1
+fi
+echo -e "\n[STEP] ${YELLOW}Installing Xformers...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate
+    # Remove old installation if it exists
+    if [ -d '$BUILD_DIR/xformers' ]; then
+        echo 'Removing old Xformers installation...'
+        rm -rf '$BUILD_DIR/xformers'
+    fi
+    # Clone the Xformers repo from an alternate PR
+    echo 'Cloning Xformers from an alternate PR...'
+    git clone https://github.com/maludwig/xformers.git '$BUILD_DIR/xformers' || { echo '${RED}Error cloning Xformers${NC}'; exit 1; }
+    cd '$BUILD_DIR/xformers'
+    # Update necessary submodules
+    git submodule update --init --recursive || { echo '${RED}Error updating Xformers submodules${NC}'; exit 1; }
+    # Install required dependencies
+    echo 'Installing Xformers dependencies...'
+    pip install -r requirements.txt || { echo '${RED}Error installing Xformers dependencies${NC}'; exit 1; }
+    # Compile and install Xformers
+    echo 'Building and installing Xformers...'
+    pip install -v . || { echo '${RED}Error installing Xformers from source${NC}'; exit 1; }
+    # Build wheel
+    pip install --upgrade --force-reinstall setuptools==78.1.0
+    python setup.py bdist_wheel
+    mv dist/*.whl $WHEEL_DIR
+"
+check_status
+# Verify the installations
+echo -e "\n${BLUE}==============================================${NC}"
+echo -e "${BLUE}   Verifying installations   ${NC}"
+echo -e "${BLUE}==============================================${NC}"
+# Verify PyTorch within the virtual environment
+echo -e "\n[STEP] ${YELLOW}Verifying PyTorch installation...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    python3.10 -c 'import torch; print(\"PyTorch version:\", torch.__version__); print(\"CUDA available:\", torch.cuda.is_available()); print(\"CUDA version:\", torch.version.cuda if torch.cuda.is_available() else \"N/A\")'
+"
+check_status
+# Verify Xformers within the virtual environment
+echo -e "\n[VERIFY] ${YELLOW}Verifying Xformers installation...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    python3.10 -c 'import xformers; print(\"Xformers is installed\")'
+"
+check_status

docker-aiwhl/install_2_flash_attn.sh ADDED Viewed

	@@ -0,0 +1,82 @@

+#!/bin/bash
+set -euo pipefail
+# Colors for better readability
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+PS1=${PS1:-}
+VENV_DIR="/root/miniconda3/"
+BUILD_DIR="/root/build"
+WHEEL_DIR="/root/wheels"
+check_status() {
+    if [ $? -eq 0 ]; then
+        echo -e "${GREEN}✓ Success${NC}"
+    else
+        echo -e "${RED}✗ Error occurred${NC}"
+        if [ "${1:-}" == "critical" ]; then
+            echo -e "${RED}Critical error. Installation cannot continue.${NC}"
+            exit 1
+        fi
+    fi
+}
+# Only allow root/sudo to install system dependencies
+if [[ $EUID -ne 0 ]]; then
+   echo -e "${RED}This script must be run as root (sudo).${NC}"
+   exit 1
+fi
+echo -e "\n[STEP] ${YELLOW}Installing Flash Attention 2 ...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate
+    git clone --depth 1 --branch v2.7.4.post1 https://github.com/Dao-AILab/flash-attention.git '$BUILD_DIR/flash-attention' || { echo '${RED}Error cloning Flash Attention${NC}'; exit 1; }
+    cd '$BUILD_DIR/flash-attention'
+    # Install Flash Attention 2 from source
+    echo 'Building and installing Flash Attention 2...'
+    MAX_JOBS=4
+    MAX_JOBS=\${MAX_JOBS} python setup.py install || { echo '${RED}Error installing Flash Attention 2 from source${NC}'; exit 1; }
+    # Build wheel
+    pip install --upgrade --force-reinstall setuptools==78.1.0
+    python setup.py bdist_wheel
+    mv dist/*.whl $WHEEL_DIR
+"
+check_status
+# Verify the installations
+echo -e "\n${BLUE}==============================================${NC}"
+echo -e "${BLUE}   Verifying installations   ${NC}"
+echo -e "${BLUE}==============================================${NC}"
+# Verify PyTorch within the virtual environment
+echo -e "\n[STEP] ${YELLOW}Verifying PyTorch installation...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    python3.10 -c 'import torch; print(\"PyTorch version:\", torch.__version__); print(\"CUDA available:\", torch.cuda.is_available()); print(\"CUDA version:\", torch.version.cuda if torch.cuda.is_available() else \"N/A\")'
+"
+check_status
+# Verify Xformers within the virtual environment
+echo -e "\n[VERIFY] ${YELLOW}Verifying Xformers installation...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    python3.10 -c 'import xformers; print(\"Xformers is installed\")'
+"
+check_status
+# Verify Flash Attention 2 within the virtual environment
+echo -e "\n[VERIFY] ${YELLOW}Verifying Flash Attention 2 installation...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    python3.10 -c 'import flash_attn; print(\"Flash Attention 2 is installed\")'
+"
+check_status

docker-aiwhl/install_3_vllm.sh ADDED Viewed

	@@ -0,0 +1,110 @@

+#!/bin/bash
+set -euo pipefail
+# Colors for better readability
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+PS1=${PS1:-}
+VENV_DIR="/root/miniconda3/"
+BUILD_DIR="/root/build"
+WHEEL_DIR="/root/wheels"
+check_status() {
+    if [ $? -eq 0 ]; then
+        echo -e "${GREEN}✓ Success${NC}"
+    else
+        echo -e "${RED}✗ Error occurred${NC}"
+        if [ "${1:-}" == "critical" ]; then
+            echo -e "${RED}Critical error. Installation cannot continue.${NC}"
+            exit 1
+        fi
+    fi
+}
+# Only allow root/sudo to install system dependencies
+if [[ $EUID -ne 0 ]]; then
+   echo -e "${RED}This script must be run as root (sudo).${NC}"
+   exit 1
+fi
+echo -e "\n[STEP] ${YELLOW}Installing vllm...${NC}"
+bash -c "
+    export VLLM_INSTALL_PUNICA_KERNELS=1
+    export TORCH_CUDA_ARCH_LIST='12.0'
+    export CUDA_HOME=/usr/local/cuda
+    export PATH=\$CUDA_HOME/bin:\$PATH
+    export LD_LIBRARY_PATH=\$CUDA_HOME/lib64:\${LD_LIBRARY_PATH:-}
+    source $VENV_DIR/bin/activate
+    # Remove old vllm installation if it exists
+    if [ -d '$BUILD_DIR/vllm' ]; then
+        echo 'Removing old vllm installation...'
+        rm -rf '$BUILD_DIR/vllm'
+    fi
+    echo 'Cloning vllm'
+    git clone --depth 1 --branch v0.8.3 https://github.com/vllm-project/vllm.git '$BUILD_DIR/vllm' || { echo '${RED}Error cloning vllm${NC}'; exit 1; }
+    cd '$BUILD_DIR/vllm'
+    echo 'Deleting pip PyTorch dependencies ...'
+    python3.10 use_existing_torch.py || { echo '${RED}Error in use_existing_torch.py${NC}'; exit 1; }
+    '$VENV_DIR/bin/pip' install -r requirements/build.txt || { echo '${RED}Error installing vllm build dependencies${NC}'; exit 1; }
+    '$VENV_DIR/bin/pip' install -r requirements/common.txt || { echo '${RED}Error installing vllm common dependencies${NC}'; exit 1; }
+    echo 'Installing vllm...'
+    MAX_JOBS=2
+    echo \"Using MAX_JOBS=\${MAX_JOBS}\"
+    # Use MAX_JOBS for installing vllm
+    MAX_JOBS=\${MAX_JOBS} \"$VENV_DIR/bin/pip\" install -e . --no-build-isolation || { echo \"\${RED}Error installing vllm\${NC}\"; exit 1; }
+    # Build wheel
+    pip install --upgrade --force-reinstall setuptools==78.1.0
+    python setup.py bdist_wheel
+    mv dist/*.whl $WHEEL_DIR
+"
+check_status
+# Verify the installations
+echo -e "\n${BLUE}==============================================${NC}"
+echo -e "${BLUE}   Verifying installations   ${NC}"
+echo -e "${BLUE}==============================================${NC}"
+# Verify PyTorch within the virtual environment
+echo -e "\n[STEP] ${YELLOW}Verifying PyTorch installation...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    python3.10 -c 'import torch; print(\"PyTorch version:\", torch.__version__); print(\"CUDA available:\", torch.cuda.is_available()); print(\"CUDA version:\", torch.version.cuda if torch.cuda.is_available() else \"N/A\")'
+"
+check_status
+# Verify Xformers within the virtual environment
+echo -e "\n[VERIFY] ${YELLOW}Verifying Xformers installation...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    python3.10 -c 'import xformers; print(\"Xformers is installed\")'
+"
+check_status
+# Verify Flash Attention 2 within the virtual environment
+echo -e "\n[VERIFY] ${YELLOW}Verifying Flash Attention 2 installation...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    python3.10 -c 'import flash_attn; print(\"Flash Attention 2 is installed\")'
+"
+check_status
+# Verify vllm within the virtual environment
+echo -e "\n[VERIFY] ${YELLOW}Verifying vLLM installation...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    python3.10 -c 'import vllm; print(\"vLLM is installed\")'
+"
+check_status

docker-aiwhl/install_4_triton.sh ADDED Viewed

	@@ -0,0 +1,142 @@

+#!/bin/bash
+set -euo pipefail
+# Colors for better readability
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+PS1=${PS1:-}
+VENV_DIR="/root/miniconda3/"
+BUILD_DIR="/root/build"
+WHEEL_DIR="/root/wheels"
+check_status() {
+    if [ $? -eq 0 ]; then
+        echo -e "${GREEN}✓ Success${NC}"
+    else
+        echo -e "${RED}✗ Error occurred${NC}"
+        if [ "${1:-}" == "critical" ]; then
+            echo -e "${RED}Critical error. Installation cannot continue.${NC}"
+            exit 1
+        fi
+    fi
+}
+# Only allow root/sudo to install system dependencies
+if [[ $EUID -ne 0 ]]; then
+   echo -e "${RED}This script must be run as root (sudo).${NC}"
+   exit 1
+fi
+echo -e "\n[STEP] ${YELLOW}Uninstalling PyTorch-triton...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    pip uninstall pytorch-triton -y
+"
+check_status
+echo -e "\n[STEP] ${YELLOW}Installing Triton from the 'patch-1' branch...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate
+    apt-get install -y zlib1g-dev
+    # Remove any old Triton installation if it exists
+    if [ -d '$BUILD_DIR/triton' ]; then
+        echo 'Removing old Triton installation...'
+        rm -rf '$BUILD_DIR/triton'
+    fi
+    # Clone the Triton repository from your patch-1 branch
+    echo 'Cloning Triton from your GitHub on patch-1 branch...'
+    git clone --branch patch-1 https://github.com/oteroantoniogom/triton.git '$BUILD_DIR/triton' || { echo '${RED}Error cloning Triton${NC}'; exit 1; }
+    cd '$BUILD_DIR/triton'
+    # Update any necessary submodules
+    git submodule update --init --recursive || { echo '${RED}Error updating Triton submodules${NC}'; exit 1; }
+    # Install needed dependencies
+    echo 'Installing Triton dependencies...'
+    pip install ninja cmake wheel pybind11 ipywidgets ipykernel chardet openpyxl wandb || { echo '${RED}Error installing Triton dependencies${NC}'; exit 1; }
+    # Install Triton from source
+    echo 'Building and installing Triton...'
+    MAX_JOBS=3
+    MAX_JOBS=\${MAX_JOBS} pip install -e python -v || { echo '${RED}Error installing Triton from source${NC}'; exit 1; }
+    # Resolve version GLIBCXX_3.4.30 not found
+    conda install -c conda-forge libstdcxx-ng=14 -y
+    # Build wheel
+    pip install --upgrade --force-reinstall setuptools==78.1.0
+    cd python
+    python setup.py bdist_wheel
+    mv dist/*.whl $WHEEL_DIR
+"
+check_status
+echo -e "\n[STEP] ${YELLOW}Reinstalling PyTorch...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    pip install --force-reinstall torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/cu128
+"
+check_status
+echo -e "\n[STEP] ${YELLOW}Uninstalling PyTorch-triton again...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    pip uninstall pytorch-triton -y
+"
+check_status
+# Verify the installations
+echo -e "\n${BLUE}==============================================${NC}"
+echo -e "${BLUE}   Verifying installations   ${NC}"
+echo -e "${BLUE}==============================================${NC}"
+# Verify PyTorch within the virtual environment
+echo -e "\n[STEP] ${YELLOW}Verifying PyTorch installation...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    python3.10 -c 'import torch; print(\"PyTorch version:\", torch.__version__); print(\"CUDA available:\", torch.cuda.is_available()); print(\"CUDA version:\", torch.version.cuda if torch.cuda.is_available() else \"N/A\")'
+"
+check_status
+# Verify Xformers within the virtual environment
+echo -e "\n[VERIFY] ${YELLOW}Verifying Xformers installation...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    python3.10 -c 'import xformers; print(\"Xformers is installed\")'
+"
+check_status
+# Verify Flash Attention 2 within the virtual environment
+echo -e "\n[VERIFY] ${YELLOW}Verifying Flash Attention 2 installation...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    python3.10 -c 'import flash_attn; print(\"Flash Attention 2 is installed\")'
+"
+check_status
+# Verify vllm within the virtual environment
+echo -e "\n[VERIFY] ${YELLOW}Verifying vLLM installation...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    python3.10 -c 'import vllm; print(\"vLLM is installed\")'
+"
+check_status
+# Verify Triton within the virtual environment
+echo -e "\n[VERIFY] ${YELLOW}Verifying Triton installation...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    python3.10 -c 'import triton; print(\"Triton is installed\")'
+"
+check_status

docker-aiwhl/install_verify.sh ADDED Viewed

	@@ -0,0 +1,80 @@

+#!/bin/bash
+set -euo pipefail
+# Colors for better readability
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+PS1=${PS1:-}
+VENV_DIR="/root/miniconda3/"
+BUILD_DIR="/root/build"
+WHEEL_DIR="/root/wheels"
+check_status() {
+    if [ $? -eq 0 ]; then
+        echo -e "${GREEN}✓ Success${NC}"
+    else
+        echo -e "${RED}✗ Error occurred${NC}"
+        if [ "${1:-}" == "critical" ]; then
+            echo -e "${RED}Critical error. Installation cannot continue.${NC}"
+            exit 1
+        fi
+    fi
+}
+# Only allow root/sudo to install system dependencies
+if [[ $EUID -ne 0 ]]; then
+   echo -e "${RED}This script must be run as root (sudo).${NC}"
+   exit 1
+fi
+# Verify the installations
+echo -e "\n${BLUE}==============================================${NC}"
+echo -e "${BLUE}   Verifying installations   ${NC}"
+echo -e "${BLUE}==============================================${NC}"
+# Verify PyTorch within the virtual environment
+echo -e "\n[STEP] ${YELLOW}Verifying PyTorch installation...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    python3.10 -c 'import torch; print(\"PyTorch version:\", torch.__version__); print(\"CUDA available:\", torch.cuda.is_available()); print(\"CUDA version:\", torch.version.cuda if torch.cuda.is_available() else \"N/A\")'
+"
+check_status
+# Verify Xformers within the virtual environment
+echo -e "\n[VERIFY] ${YELLOW}Verifying Xformers installation...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    python3.10 -c 'import xformers; print(\"Xformers is installed\")'
+"
+check_status
+# Verify Flash Attention 2 within the virtual environment
+echo -e "\n[VERIFY] ${YELLOW}Verifying Flash Attention 2 installation...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    python3.10 -c 'import flash_attn; print(\"Flash Attention 2 is installed\")'
+"
+check_status
+# Verify vllm within the virtual environment
+echo -e "\n[VERIFY] ${YELLOW}Verifying vLLM installation...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    python3.10 -c 'import vllm; print(\"vLLM is installed\")'
+"
+check_status
+# Verify Triton within the virtual environment
+echo -e "\n[VERIFY] ${YELLOW}Verifying Triton installation...${NC}"
+bash -c "
+    source $VENV_DIR/bin/activate && \
+    python3.10 -c 'import triton; print(\"Triton is installed\")'
+"
+check_status