vllm-llama2 / Dockerfile
binary1ne's picture
Update Dockerfile
932a3f7 verified
raw
history blame
2.55 kB
FROM nvidia/cuda:12.5.1-cudnn-devel-ubuntu20.04
ENV DEBIAN_FRONTEND=noninteractive \
TZ=Asia/Kolkata
# Remove any third-party apt sources to avoid issues with expiring keys.
RUN rm -f /etc/apt/sources.list.d/*.list && \
apt-get update && apt-get install -y --no-install-recommends \
curl \
ca-certificates \
sudo \
git \
wget \
procps \
git-lfs \
zip \
unzip \
htop \
vim \
nano \
bzip2 \
libx11-6 \
build-essential \
libsndfile-dev \
software-properties-common \
&& rm -rf /var/lib/apt/lists/*
RUN add-apt-repository ppa:flexiondotorg/nvtop && \
apt-get upgrade -y && \
apt-get install -y --no-install-recommends nvtop
RUN curl -sL https://deb.nodesource.com/setup_21.x | bash - && \
apt-get install -y nodejs && \
npm install -g configurable-http-proxy
# Create a working directory
WORKDIR /app
# Create non-root user
RUN adduser --disabled-password --gecos '' --shell /bin/bash user \
&& chown -R user:user /app
RUN echo "user ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-user
USER user
ENV HOME=/home/user
RUN mkdir $HOME/.cache $HOME/.config \
&& chmod -R 777 $HOME
# Install Miniconda
ENV CONDA_AUTO_UPDATE_CONDA=false \
PATH=$HOME/miniconda/bin:$PATH
RUN curl -sLo ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-py39_4.10.3-Linux-x86_64.sh \
&& chmod +x ~/miniconda.sh \
&& ~/miniconda.sh -b -p ~/miniconda \
&& rm ~/miniconda.sh \
&& conda clean -ya
WORKDIR $HOME/app
#######################################
# Root section
#######################################
USER root
# Optional extra packages
RUN apt-get update && apt-get install -y --no-install-recommends \
python3-dev python3-pip python3-venv \
&& rm -rf /var/lib/apt/lists/*
RUN mkdir /data && chown user:user /data
#######################################
# End root section
#######################################
USER user
# Install Python requirements for vLLM
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir vllm
# Download model at build time (optional)
RUN python3 -m vllm.entrypoints.api_server --model unsloth/llama-2-7b-bnb-4bit --download-only
# Copy source code
COPY --chown=user . $HOME/app
RUN chmod +x start_server.sh
# Set env for vLLM
ENV PYTHONUNBUFFERED=1 \
VLLM_HOST=0.0.0.0 \
VLLM_PORT=7860
# Expose port
EXPOSE 7860
# Start vLLM API server
CMD ["python3", "-m", "vllm.entrypoints.api_server", "--model", "unsloth/llama-2-7b-bnb-4bit", "--host", "0.0.0.0", "--port", "7860"]