scottzhao commited on
Commit
873520c
·
verified ·
1 Parent(s): eb5880a

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ docker-aienv/wheels/flash_attn-2.7.4.post1-cp310-cp310-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
37
+ docker-aienv/wheels/triton-3.3.0+git95326d9f-cp310-cp310-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
38
+ docker-aienv/wheels/vllm-0.8.4.dev0+g296c657.d20250410.cu128-cp310-cp310-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
39
+ docker-aienv/wheels/xformers-0.0.30+4fa0149.d20250410-cp310-cp310-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
docker-aienv/build.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ docker build --platform linux/amd64 --tag zhaomh1998/pytorch:cu128 .
docker-aienv/dockerfile ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvcr.io/nvidia/cuda-dl-base:25.03-cuda12.8-devel-ubuntu24.04
2
+
3
+ WORKDIR /workspace
4
+
5
+ # Init
6
+ RUN apt update
7
+
8
+ # cv2 dependencies
9
+ RUN apt-get install ffmpeg libsm6 libxext6 -y
10
+
11
+ # Util
12
+ RUN apt-get install screen -y
13
+
14
+ # Oh-my-zsh
15
+ RUN sh -c "$(wget -O- https://github.com/deluan/zsh-in-docker/releases/download/v1.2.1/zsh-in-docker.sh)" -- \
16
+ -t robbyrussell \
17
+ -p git \
18
+ -p https://github.com/zsh-users/zsh-autosuggestions \
19
+ -p https://github.com/zsh-users/zsh-syntax-highlighting \
20
+ -p history \
21
+ -p fzf
22
+ RUN git clone --depth 1 https://github.com/junegunn/fzf.git --branch v0.61.0 --single-branch ~/.fzf
23
+ RUN ~/.fzf/install
24
+ RUN chsh -s /bin/zsh
25
+
26
+ # Python environment setup
27
+ # Miniconda
28
+ RUN mkdir -p /root/miniconda3 && \
29
+ wget https://repo.anaconda.com/miniconda/Miniconda3-py310_25.1.1-2-Linux-x86_64.sh && \
30
+ bash Miniconda3-py310_25.1.1-2-Linux-x86_64.sh -b -u -p /root/miniconda3 && \
31
+ rm Miniconda3-py310_25.1.1-2-Linux-x86_64.sh && \
32
+ source /root/miniconda3/bin/activate && \
33
+ conda init --all && \
34
+ conda install -c conda-forge libstdcxx-ng=14 -y
35
+
36
+ # HF Stuff
37
+ RUN source /root/miniconda3/bin/activate && \
38
+ pip install hf_transfer
39
+
40
+ RUN mkdir -p /root/wheels
41
+
42
+ COPY wheels/flash_attn-2.7.4.post1-cp310-cp310-linux_x86_64.whl /root/wheels
43
+ COPY wheels/triton-3.3.0+git95326d9f-cp310-cp310-linux_x86_64.whl /root/wheels
44
+ COPY wheels/vllm-0.8.4.dev0+g296c657.d20250410.cu128-cp310-cp310-linux_x86_64.whl /root/wheels
45
+ COPY wheels/xformers-0.0.30+4fa0149.d20250410-cp310-cp310-linux_x86_64.whl /root/wheels
46
+
47
+ RUN /root/miniconda3/bin/pip install --force-reinstall torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/cu128
48
+ RUN /root/miniconda3/bin/pip install /root/wheels/xformers-0.0.30+4fa0149.d20250410-cp310-cp310-linux_x86_64.whl
49
+ RUN /root/miniconda3/bin/pip install /root/wheels/flash_attn-2.7.4.post1-cp310-cp310-linux_x86_64.whl
50
+ RUN /root/miniconda3/bin/pip install /root/wheels/vllm-0.8.4.dev0+g296c657.d20250410.cu128-cp310-cp310-linux_x86_64.whl
51
+ RUN /root/miniconda3/bin/pip install /root/wheels/triton-3.3.0+git95326d9f-cp310-cp310-linux_x86_64.whl
52
+
53
+ # Clean up
54
+ RUN rm -rf /root/wheels
55
+ RUN /root/miniconda3/bin/pip cache purge
56
+
57
+ # SSH
58
+ RUN apt-get install openssh-server -y && \
59
+ mkdir -p /root/.ssh && \
60
+ chmod 700 /root/.ssh && \
61
+ sed -i 's/#Port 22/Port 1022/' /etc/ssh/sshd_config
62
+
63
+ RUN echo "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILLRIFSAs+bubZQULKHVe7HG7oXhP2E7eqg2+Qn5sKsY zhaomh1998@outlook.com" >> /root/.ssh/authorized_keys
64
+ RUN chmod 600 /root/.ssh/authorized_keys
65
+
66
+ COPY entrypoint.sh /entrypoint.sh
67
+ RUN chmod +x /entrypoint.sh
68
+ ENTRYPOINT ["/entrypoint.sh"]
docker-aienv/entrypoint.sh ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ service ssh start
3
+
4
+ exec "$@"
5
+
6
+ # Keep container running
7
+ tail -f /dev/null
docker-aienv/wheels/flash_attn-2.7.4.post1-cp310-cp310-linux_x86_64.whl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d843da16f4785ed5e5d0c6e7309a81413c8a903090cad41759bea0f232197d3
3
+ size 403364957
docker-aienv/wheels/triton-3.3.0+git95326d9f-cp310-cp310-linux_x86_64.whl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6990fe4af3d68628792a8ba049f3d1a4590008a84345a383ca05f96df1dccdc
3
+ size 244871549
docker-aienv/wheels/vllm-0.8.4.dev0+g296c657.d20250410.cu128-cp310-cp310-linux_x86_64.whl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5ca77614d05fec383482ede760f345d197dfbe46e77050c5d7cbd7b5841f5df
3
+ size 277543574
docker-aienv/wheels/xformers-0.0.30+4fa0149.d20250410-cp310-cp310-linux_x86_64.whl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b674e8c6547fc66a2dff8468f82e5a66544aa7b77b2e5f21e6d961cbc2393a3e
3
+ size 603907
docker-aiwhl/build.sh ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ read -p "Build? (y/n): " confirm
2
+ if [ "$confirm" == "y" ]; then
3
+ docker build --platform linux/amd64 --tag zhaomh1998/aiwhl:250410 .
4
+ fi
5
+
6
+ docker create --name aiwhl zhaomh1998/aiwhl:250410
7
+ docker cp aiwhl:/root/wheels ../docker-aienv/wheels/
8
+ docker rm aiwhl
docker-aiwhl/dockerfile ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvcr.io/nvidia/cuda-dl-base:25.03-cuda12.8-devel-ubuntu24.04
2
+
3
+ WORKDIR /workspace
4
+
5
+ # Dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ git \
9
+ curl \
10
+ wget \
11
+ cmake \
12
+ ninja-build \
13
+ gcc-14 g++-14 \
14
+ unixodbc
15
+
16
+ # Install packages
17
+ COPY install_0_pre.sh /install_0_pre.sh
18
+ RUN chmod +x /install_0_pre.sh
19
+ RUN /install_0_pre.sh
20
+
21
+ COPY install_1_xformers.sh /install_1_xformers.sh
22
+ RUN chmod +x /install_1_xformers.sh
23
+ RUN /install_1_xformers.sh
24
+
25
+ COPY install_2_flash_attn.sh /install_2_flash_attn.sh
26
+ RUN chmod +x /install_2_flash_attn.sh
27
+ RUN /install_2_flash_attn.sh
28
+
29
+ COPY install_3_vllm.sh /install_3_vllm.sh
30
+ RUN chmod +x /install_3_vllm.sh
31
+ RUN /install_3_vllm.sh
32
+
33
+ COPY install_4_triton.sh /install_4_triton.sh
34
+ RUN chmod +x /install_4_triton.sh
35
+ RUN /install_4_triton.sh
36
+
37
+ COPY install_verify.sh /install_verify.sh
38
+ RUN chmod +x /install_verify.sh
39
+ RUN /install_verify.sh
40
+
41
+ CMD ["tail", "-f", "/dev/null"]
docker-aiwhl/install_0_pre.sh ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+
4
+ # Colors for better readability
5
+ RED='\033[0;31m'
6
+ GREEN='\033[0;32m'
7
+ YELLOW='\033[0;33m'
8
+ BLUE='\033[0;34m'
9
+ NC='\033[0m' # No Color
10
+
11
+ PS1=${PS1:-}
12
+
13
+
14
+ check_status() {
15
+ if [ $? -eq 0 ]; then
16
+ echo -e "${GREEN}✓ Success${NC}"
17
+ else
18
+ echo -e "${RED}✗ Error occurred${NC}"
19
+ if [ "${1:-}" == "critical" ]; then
20
+ echo -e "${RED}Critical error. Installation cannot continue.${NC}"
21
+ exit 1
22
+ fi
23
+ fi
24
+ }
25
+
26
+ # Only allow root/sudo to install system dependencies
27
+ if [[ $EUID -ne 0 ]]; then
28
+ echo -e "${RED}This script must be run as root (sudo).${NC}"
29
+ exit 1
30
+ fi
31
+
32
+
33
+ # Configure GCC 14 and G++ 14 as defaults
34
+ update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 14
35
+ update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 14
36
+ check_status
37
+
38
+
39
+ echo -e "\n[STEP] ${YELLOW}Installing Python3.10${NC}"
40
+ bash -c "
41
+ mkdir -p /root/miniconda3 && \
42
+ wget https://repo.anaconda.com/miniconda/Miniconda3-py310_25.1.1-2-Linux-x86_64.sh && \
43
+ bash Miniconda3-py310_25.1.1-2-Linux-x86_64.sh -b -u -p /root/miniconda3 && \
44
+ rm Miniconda3-py310_25.1.1-2-Linux-x86_64.sh && \
45
+ source /root/miniconda3/bin/activate && \
46
+ conda init --all
47
+ "
48
+ VENV_DIR="/root/miniconda3/"
49
+ BUILD_DIR="/root/build"
50
+ WHEEL_DIR="/root/wheels"
51
+ mkdir -p $BUILD_DIR
52
+ mkdir -p $WHEEL_DIR
53
+ check_status
54
+
55
+
56
+ echo -e "\n[STEP] ${YELLOW}Installing Python packages${NC}"
57
+ bash -c "
58
+ source $VENV_DIR/bin/activate && \
59
+ pip install --upgrade pip setuptools wheel ninja cmake wheel pybind11 ipywidgets ipykernel chardet openpyxl wandb unsloth scikit-learn matplotlib
60
+ conda install -c conda-forge libstdcxx-ng=14 -y
61
+ " # libstdcxx-ng=14 is to resolve https://github.com/triton-inference-server/server/issues/5933
62
+ check_status
63
+
64
+
65
+ echo -e "\n[STEP] ${YELLOW}Installing PyTorch...${NC}"
66
+ bash -c "
67
+ source $VENV_DIR/bin/activate && \
68
+ pip install --force-reinstall torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/cu128
69
+ "
70
+ check_status
docker-aiwhl/install_1_xformers.sh ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+
4
+ # Colors for better readability
5
+ RED='\033[0;31m'
6
+ GREEN='\033[0;32m'
7
+ YELLOW='\033[0;33m'
8
+ BLUE='\033[0;34m'
9
+ NC='\033[0m' # No Color
10
+
11
+ PS1=${PS1:-}
12
+
13
+ VENV_DIR="/root/miniconda3/"
14
+ BUILD_DIR="/root/build"
15
+ WHEEL_DIR="/root/wheels"
16
+
17
+ check_status() {
18
+ if [ $? -eq 0 ]; then
19
+ echo -e "${GREEN}✓ Success${NC}"
20
+ else
21
+ echo -e "${RED}✗ Error occurred${NC}"
22
+ if [ "${1:-}" == "critical" ]; then
23
+ echo -e "${RED}Critical error. Installation cannot continue.${NC}"
24
+ exit 1
25
+ fi
26
+ fi
27
+ }
28
+
29
+ # Only allow root/sudo to install system dependencies
30
+ if [[ $EUID -ne 0 ]]; then
31
+ echo -e "${RED}This script must be run as root (sudo).${NC}"
32
+ exit 1
33
+ fi
34
+
35
+ echo -e "\n[STEP] ${YELLOW}Installing Xformers...${NC}"
36
+ bash -c "
37
+ source $VENV_DIR/bin/activate
38
+
39
+ # Remove old installation if it exists
40
+ if [ -d '$BUILD_DIR/xformers' ]; then
41
+ echo 'Removing old Xformers installation...'
42
+ rm -rf '$BUILD_DIR/xformers'
43
+ fi
44
+
45
+ # Clone the Xformers repo from an alternate PR
46
+ echo 'Cloning Xformers from an alternate PR...'
47
+ git clone https://github.com/maludwig/xformers.git '$BUILD_DIR/xformers' || { echo '${RED}Error cloning Xformers${NC}'; exit 1; }
48
+
49
+ cd '$BUILD_DIR/xformers'
50
+
51
+ # Update necessary submodules
52
+ git submodule update --init --recursive || { echo '${RED}Error updating Xformers submodules${NC}'; exit 1; }
53
+
54
+ # Install required dependencies
55
+ echo 'Installing Xformers dependencies...'
56
+ pip install -r requirements.txt || { echo '${RED}Error installing Xformers dependencies${NC}'; exit 1; }
57
+
58
+ # Compile and install Xformers
59
+ echo 'Building and installing Xformers...'
60
+ pip install -v . || { echo '${RED}Error installing Xformers from source${NC}'; exit 1; }
61
+
62
+ # Build wheel
63
+ pip install --upgrade --force-reinstall setuptools==78.1.0
64
+ python setup.py bdist_wheel
65
+ mv dist/*.whl $WHEEL_DIR
66
+ "
67
+ check_status
68
+
69
+ # Verify the installations
70
+ echo -e "\n${BLUE}==============================================${NC}"
71
+ echo -e "${BLUE} Verifying installations ${NC}"
72
+ echo -e "${BLUE}==============================================${NC}"
73
+
74
+ # Verify PyTorch within the virtual environment
75
+ echo -e "\n[STEP] ${YELLOW}Verifying PyTorch installation...${NC}"
76
+ bash -c "
77
+ source $VENV_DIR/bin/activate && \
78
+ python3.10 -c 'import torch; print(\"PyTorch version:\", torch.__version__); print(\"CUDA available:\", torch.cuda.is_available()); print(\"CUDA version:\", torch.version.cuda if torch.cuda.is_available() else \"N/A\")'
79
+ "
80
+ check_status
81
+
82
+ # Verify Xformers within the virtual environment
83
+ echo -e "\n[VERIFY] ${YELLOW}Verifying Xformers installation...${NC}"
84
+ bash -c "
85
+ source $VENV_DIR/bin/activate && \
86
+ python3.10 -c 'import xformers; print(\"Xformers is installed\")'
87
+ "
88
+ check_status
89
+
docker-aiwhl/install_2_flash_attn.sh ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+
4
+ # Colors for better readability
5
+ RED='\033[0;31m'
6
+ GREEN='\033[0;32m'
7
+ YELLOW='\033[0;33m'
8
+ BLUE='\033[0;34m'
9
+ NC='\033[0m' # No Color
10
+
11
+ PS1=${PS1:-}
12
+
13
+ VENV_DIR="/root/miniconda3/"
14
+ BUILD_DIR="/root/build"
15
+ WHEEL_DIR="/root/wheels"
16
+
17
+ check_status() {
18
+ if [ $? -eq 0 ]; then
19
+ echo -e "${GREEN}✓ Success${NC}"
20
+ else
21
+ echo -e "${RED}✗ Error occurred${NC}"
22
+ if [ "${1:-}" == "critical" ]; then
23
+ echo -e "${RED}Critical error. Installation cannot continue.${NC}"
24
+ exit 1
25
+ fi
26
+ fi
27
+ }
28
+
29
+ # Only allow root/sudo to install system dependencies
30
+ if [[ $EUID -ne 0 ]]; then
31
+ echo -e "${RED}This script must be run as root (sudo).${NC}"
32
+ exit 1
33
+ fi
34
+
35
+ echo -e "\n[STEP] ${YELLOW}Installing Flash Attention 2 ...${NC}"
36
+ bash -c "
37
+
38
+ source $VENV_DIR/bin/activate
39
+ git clone --depth 1 --branch v2.7.4.post1 https://github.com/Dao-AILab/flash-attention.git '$BUILD_DIR/flash-attention' || { echo '${RED}Error cloning Flash Attention${NC}'; exit 1; }
40
+
41
+ cd '$BUILD_DIR/flash-attention'
42
+
43
+ # Install Flash Attention 2 from source
44
+ echo 'Building and installing Flash Attention 2...'
45
+ MAX_JOBS=4
46
+ MAX_JOBS=\${MAX_JOBS} python setup.py install || { echo '${RED}Error installing Flash Attention 2 from source${NC}'; exit 1; }
47
+
48
+ # Build wheel
49
+ pip install --upgrade --force-reinstall setuptools==78.1.0
50
+ python setup.py bdist_wheel
51
+ mv dist/*.whl $WHEEL_DIR
52
+ "
53
+ check_status
54
+
55
+ # Verify the installations
56
+ echo -e "\n${BLUE}==============================================${NC}"
57
+ echo -e "${BLUE} Verifying installations ${NC}"
58
+ echo -e "${BLUE}==============================================${NC}"
59
+
60
+ # Verify PyTorch within the virtual environment
61
+ echo -e "\n[STEP] ${YELLOW}Verifying PyTorch installation...${NC}"
62
+ bash -c "
63
+ source $VENV_DIR/bin/activate && \
64
+ python3.10 -c 'import torch; print(\"PyTorch version:\", torch.__version__); print(\"CUDA available:\", torch.cuda.is_available()); print(\"CUDA version:\", torch.version.cuda if torch.cuda.is_available() else \"N/A\")'
65
+ "
66
+ check_status
67
+
68
+ # Verify Xformers within the virtual environment
69
+ echo -e "\n[VERIFY] ${YELLOW}Verifying Xformers installation...${NC}"
70
+ bash -c "
71
+ source $VENV_DIR/bin/activate && \
72
+ python3.10 -c 'import xformers; print(\"Xformers is installed\")'
73
+ "
74
+ check_status
75
+
76
+ # Verify Flash Attention 2 within the virtual environment
77
+ echo -e "\n[VERIFY] ${YELLOW}Verifying Flash Attention 2 installation...${NC}"
78
+ bash -c "
79
+ source $VENV_DIR/bin/activate && \
80
+ python3.10 -c 'import flash_attn; print(\"Flash Attention 2 is installed\")'
81
+ "
82
+ check_status
docker-aiwhl/install_3_vllm.sh ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+
4
+ # Colors for better readability
5
+ RED='\033[0;31m'
6
+ GREEN='\033[0;32m'
7
+ YELLOW='\033[0;33m'
8
+ BLUE='\033[0;34m'
9
+ NC='\033[0m' # No Color
10
+
11
+ PS1=${PS1:-}
12
+
13
+ VENV_DIR="/root/miniconda3/"
14
+ BUILD_DIR="/root/build"
15
+ WHEEL_DIR="/root/wheels"
16
+
17
+ check_status() {
18
+ if [ $? -eq 0 ]; then
19
+ echo -e "${GREEN}✓ Success${NC}"
20
+ else
21
+ echo -e "${RED}✗ Error occurred${NC}"
22
+ if [ "${1:-}" == "critical" ]; then
23
+ echo -e "${RED}Critical error. Installation cannot continue.${NC}"
24
+ exit 1
25
+ fi
26
+ fi
27
+ }
28
+
29
+ # Only allow root/sudo to install system dependencies
30
+ if [[ $EUID -ne 0 ]]; then
31
+ echo -e "${RED}This script must be run as root (sudo).${NC}"
32
+ exit 1
33
+ fi
34
+
35
+ echo -e "\n[STEP] ${YELLOW}Installing vllm...${NC}"
36
+ bash -c "
37
+ export VLLM_INSTALL_PUNICA_KERNELS=1
38
+ export TORCH_CUDA_ARCH_LIST='12.0'
39
+ export CUDA_HOME=/usr/local/cuda
40
+ export PATH=\$CUDA_HOME/bin:\$PATH
41
+ export LD_LIBRARY_PATH=\$CUDA_HOME/lib64:\${LD_LIBRARY_PATH:-}
42
+ source $VENV_DIR/bin/activate
43
+
44
+ # Remove old vllm installation if it exists
45
+ if [ -d '$BUILD_DIR/vllm' ]; then
46
+ echo 'Removing old vllm installation...'
47
+ rm -rf '$BUILD_DIR/vllm'
48
+ fi
49
+
50
+ echo 'Cloning vllm'
51
+ git clone --depth 1 --branch v0.8.3 https://github.com/vllm-project/vllm.git '$BUILD_DIR/vllm' || { echo '${RED}Error cloning vllm${NC}'; exit 1; }
52
+ cd '$BUILD_DIR/vllm'
53
+
54
+ echo 'Deleting pip PyTorch dependencies ...'
55
+ python3.10 use_existing_torch.py || { echo '${RED}Error in use_existing_torch.py${NC}'; exit 1; }
56
+
57
+ '$VENV_DIR/bin/pip' install -r requirements/build.txt || { echo '${RED}Error installing vllm build dependencies${NC}'; exit 1; }
58
+ '$VENV_DIR/bin/pip' install -r requirements/common.txt || { echo '${RED}Error installing vllm common dependencies${NC}'; exit 1; }
59
+
60
+ echo 'Installing vllm...'
61
+ MAX_JOBS=2
62
+
63
+ echo \"Using MAX_JOBS=\${MAX_JOBS}\"
64
+
65
+ # Use MAX_JOBS for installing vllm
66
+ MAX_JOBS=\${MAX_JOBS} \"$VENV_DIR/bin/pip\" install -e . --no-build-isolation || { echo \"\${RED}Error installing vllm\${NC}\"; exit 1; }
67
+
68
+ # Build wheel
69
+ pip install --upgrade --force-reinstall setuptools==78.1.0
70
+ python setup.py bdist_wheel
71
+ mv dist/*.whl $WHEEL_DIR
72
+ "
73
+ check_status
74
+
75
+ # Verify the installations
76
+ echo -e "\n${BLUE}==============================================${NC}"
77
+ echo -e "${BLUE} Verifying installations ${NC}"
78
+ echo -e "${BLUE}==============================================${NC}"
79
+
80
+ # Verify PyTorch within the virtual environment
81
+ echo -e "\n[STEP] ${YELLOW}Verifying PyTorch installation...${NC}"
82
+ bash -c "
83
+ source $VENV_DIR/bin/activate && \
84
+ python3.10 -c 'import torch; print(\"PyTorch version:\", torch.__version__); print(\"CUDA available:\", torch.cuda.is_available()); print(\"CUDA version:\", torch.version.cuda if torch.cuda.is_available() else \"N/A\")'
85
+ "
86
+ check_status
87
+
88
+ # Verify Xformers within the virtual environment
89
+ echo -e "\n[VERIFY] ${YELLOW}Verifying Xformers installation...${NC}"
90
+ bash -c "
91
+ source $VENV_DIR/bin/activate && \
92
+ python3.10 -c 'import xformers; print(\"Xformers is installed\")'
93
+ "
94
+ check_status
95
+
96
+ # Verify Flash Attention 2 within the virtual environment
97
+ echo -e "\n[VERIFY] ${YELLOW}Verifying Flash Attention 2 installation...${NC}"
98
+ bash -c "
99
+ source $VENV_DIR/bin/activate && \
100
+ python3.10 -c 'import flash_attn; print(\"Flash Attention 2 is installed\")'
101
+ "
102
+ check_status
103
+
104
+ # Verify vllm within the virtual environment
105
+ echo -e "\n[VERIFY] ${YELLOW}Verifying vLLM installation...${NC}"
106
+ bash -c "
107
+ source $VENV_DIR/bin/activate && \
108
+ python3.10 -c 'import vllm; print(\"vLLM is installed\")'
109
+ "
110
+ check_status
docker-aiwhl/install_4_triton.sh ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+
4
+ # Colors for better readability
5
+ RED='\033[0;31m'
6
+ GREEN='\033[0;32m'
7
+ YELLOW='\033[0;33m'
8
+ BLUE='\033[0;34m'
9
+ NC='\033[0m' # No Color
10
+
11
+ PS1=${PS1:-}
12
+
13
+ VENV_DIR="/root/miniconda3/"
14
+ BUILD_DIR="/root/build"
15
+ WHEEL_DIR="/root/wheels"
16
+
17
+ check_status() {
18
+ if [ $? -eq 0 ]; then
19
+ echo -e "${GREEN}✓ Success${NC}"
20
+ else
21
+ echo -e "${RED}✗ Error occurred${NC}"
22
+ if [ "${1:-}" == "critical" ]; then
23
+ echo -e "${RED}Critical error. Installation cannot continue.${NC}"
24
+ exit 1
25
+ fi
26
+ fi
27
+ }
28
+
29
+ # Only allow root/sudo to install system dependencies
30
+ if [[ $EUID -ne 0 ]]; then
31
+ echo -e "${RED}This script must be run as root (sudo).${NC}"
32
+ exit 1
33
+ fi
34
+
35
+ echo -e "\n[STEP] ${YELLOW}Uninstalling PyTorch-triton...${NC}"
36
+ bash -c "
37
+ source $VENV_DIR/bin/activate && \
38
+ pip uninstall pytorch-triton -y
39
+ "
40
+ check_status
41
+
42
+ echo -e "\n[STEP] ${YELLOW}Installing Triton from the 'patch-1' branch...${NC}"
43
+ bash -c "
44
+
45
+ source $VENV_DIR/bin/activate
46
+
47
+ apt-get install -y zlib1g-dev
48
+
49
+ # Remove any old Triton installation if it exists
50
+ if [ -d '$BUILD_DIR/triton' ]; then
51
+ echo 'Removing old Triton installation...'
52
+ rm -rf '$BUILD_DIR/triton'
53
+ fi
54
+
55
+ # Clone the Triton repository from your patch-1 branch
56
+ echo 'Cloning Triton from your GitHub on patch-1 branch...'
57
+ git clone --branch patch-1 https://github.com/oteroantoniogom/triton.git '$BUILD_DIR/triton' || { echo '${RED}Error cloning Triton${NC}'; exit 1; }
58
+
59
+ cd '$BUILD_DIR/triton'
60
+
61
+ # Update any necessary submodules
62
+ git submodule update --init --recursive || { echo '${RED}Error updating Triton submodules${NC}'; exit 1; }
63
+
64
+ # Install needed dependencies
65
+ echo 'Installing Triton dependencies...'
66
+ pip install ninja cmake wheel pybind11 ipywidgets ipykernel chardet openpyxl wandb || { echo '${RED}Error installing Triton dependencies${NC}'; exit 1; }
67
+
68
+ # Install Triton from source
69
+ echo 'Building and installing Triton...'
70
+ MAX_JOBS=3
71
+
72
+ MAX_JOBS=\${MAX_JOBS} pip install -e python -v || { echo '${RED}Error installing Triton from source${NC}'; exit 1; }
73
+
74
+ # Resolve version GLIBCXX_3.4.30 not found
75
+ conda install -c conda-forge libstdcxx-ng=14 -y
76
+
77
+ # Build wheel
78
+ pip install --upgrade --force-reinstall setuptools==78.1.0
79
+ cd python
80
+ python setup.py bdist_wheel
81
+ mv dist/*.whl $WHEEL_DIR
82
+ "
83
+ check_status
84
+
85
+ echo -e "\n[STEP] ${YELLOW}Reinstalling PyTorch...${NC}"
86
+ bash -c "
87
+ source $VENV_DIR/bin/activate && \
88
+ pip install --force-reinstall torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/cu128
89
+ "
90
+ check_status
91
+
92
+ echo -e "\n[STEP] ${YELLOW}Uninstalling PyTorch-triton again...${NC}"
93
+ bash -c "
94
+ source $VENV_DIR/bin/activate && \
95
+ pip uninstall pytorch-triton -y
96
+ "
97
+ check_status
98
+
99
+ # Verify the installations
100
+ echo -e "\n${BLUE}==============================================${NC}"
101
+ echo -e "${BLUE} Verifying installations ${NC}"
102
+ echo -e "${BLUE}==============================================${NC}"
103
+
104
+ # Verify PyTorch within the virtual environment
105
+ echo -e "\n[STEP] ${YELLOW}Verifying PyTorch installation...${NC}"
106
+ bash -c "
107
+ source $VENV_DIR/bin/activate && \
108
+ python3.10 -c 'import torch; print(\"PyTorch version:\", torch.__version__); print(\"CUDA available:\", torch.cuda.is_available()); print(\"CUDA version:\", torch.version.cuda if torch.cuda.is_available() else \"N/A\")'
109
+ "
110
+ check_status
111
+
112
+ # Verify Xformers within the virtual environment
113
+ echo -e "\n[VERIFY] ${YELLOW}Verifying Xformers installation...${NC}"
114
+ bash -c "
115
+ source $VENV_DIR/bin/activate && \
116
+ python3.10 -c 'import xformers; print(\"Xformers is installed\")'
117
+ "
118
+ check_status
119
+
120
+ # Verify Flash Attention 2 within the virtual environment
121
+ echo -e "\n[VERIFY] ${YELLOW}Verifying Flash Attention 2 installation...${NC}"
122
+ bash -c "
123
+ source $VENV_DIR/bin/activate && \
124
+ python3.10 -c 'import flash_attn; print(\"Flash Attention 2 is installed\")'
125
+ "
126
+ check_status
127
+
128
+ # Verify vllm within the virtual environment
129
+ echo -e "\n[VERIFY] ${YELLOW}Verifying vLLM installation...${NC}"
130
+ bash -c "
131
+ source $VENV_DIR/bin/activate && \
132
+ python3.10 -c 'import vllm; print(\"vLLM is installed\")'
133
+ "
134
+ check_status
135
+
136
+ # Verify Triton within the virtual environment
137
+ echo -e "\n[VERIFY] ${YELLOW}Verifying Triton installation...${NC}"
138
+ bash -c "
139
+ source $VENV_DIR/bin/activate && \
140
+ python3.10 -c 'import triton; print(\"Triton is installed\")'
141
+ "
142
+ check_status
docker-aiwhl/install_verify.sh ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+
4
+ # Colors for better readability
5
+ RED='\033[0;31m'
6
+ GREEN='\033[0;32m'
7
+ YELLOW='\033[0;33m'
8
+ BLUE='\033[0;34m'
9
+ NC='\033[0m' # No Color
10
+
11
+ PS1=${PS1:-}
12
+
13
+ VENV_DIR="/root/miniconda3/"
14
+ BUILD_DIR="/root/build"
15
+ WHEEL_DIR="/root/wheels"
16
+
17
+ check_status() {
18
+ if [ $? -eq 0 ]; then
19
+ echo -e "${GREEN}✓ Success${NC}"
20
+ else
21
+ echo -e "${RED}✗ Error occurred${NC}"
22
+ if [ "${1:-}" == "critical" ]; then
23
+ echo -e "${RED}Critical error. Installation cannot continue.${NC}"
24
+ exit 1
25
+ fi
26
+ fi
27
+ }
28
+
29
+ # Only allow root/sudo to install system dependencies
30
+ if [[ $EUID -ne 0 ]]; then
31
+ echo -e "${RED}This script must be run as root (sudo).${NC}"
32
+ exit 1
33
+ fi
34
+
35
+
36
+ # Verify the installations
37
+ echo -e "\n${BLUE}==============================================${NC}"
38
+ echo -e "${BLUE} Verifying installations ${NC}"
39
+ echo -e "${BLUE}==============================================${NC}"
40
+
41
+ # Verify PyTorch within the virtual environment
42
+ echo -e "\n[STEP] ${YELLOW}Verifying PyTorch installation...${NC}"
43
+ bash -c "
44
+ source $VENV_DIR/bin/activate && \
45
+ python3.10 -c 'import torch; print(\"PyTorch version:\", torch.__version__); print(\"CUDA available:\", torch.cuda.is_available()); print(\"CUDA version:\", torch.version.cuda if torch.cuda.is_available() else \"N/A\")'
46
+ "
47
+ check_status
48
+
49
+ # Verify Xformers within the virtual environment
50
+ echo -e "\n[VERIFY] ${YELLOW}Verifying Xformers installation...${NC}"
51
+ bash -c "
52
+ source $VENV_DIR/bin/activate && \
53
+ python3.10 -c 'import xformers; print(\"Xformers is installed\")'
54
+ "
55
+ check_status
56
+
57
+ # Verify Flash Attention 2 within the virtual environment
58
+ echo -e "\n[VERIFY] ${YELLOW}Verifying Flash Attention 2 installation...${NC}"
59
+ bash -c "
60
+ source $VENV_DIR/bin/activate && \
61
+ python3.10 -c 'import flash_attn; print(\"Flash Attention 2 is installed\")'
62
+ "
63
+ check_status
64
+
65
+ # Verify vllm within the virtual environment
66
+ echo -e "\n[VERIFY] ${YELLOW}Verifying vLLM installation...${NC}"
67
+ bash -c "
68
+ source $VENV_DIR/bin/activate && \
69
+ python3.10 -c 'import vllm; print(\"vLLM is installed\")'
70
+ "
71
+ check_status
72
+
73
+ # Verify Triton within the virtual environment
74
+ echo -e "\n[VERIFY] ${YELLOW}Verifying Triton installation...${NC}"
75
+ bash -c "
76
+ source $VENV_DIR/bin/activate && \
77
+ python3.10 -c 'import triton; print(\"Triton is installed\")'
78
+ "
79
+ check_status
80
+