File size: 3,195 Bytes

bab5bbc
 
b024d42
 
bab5bbc
b024d42
bab5bbc
b024d42
bab5bbc
 
b024d42
 
 
 
 
 
 
 
 
 
 
 
bab5bbc
 
b024d42
bab5bbc
 
 
 
 
 
 
b024d42
bab5bbc
b024d42
bab5bbc
b024d42
bab5bbc
 
 
 
 
 
 
b024d42
 
 
bab5bbc
 
 
b024d42
bab5bbc
b024d42
bab5bbc
b024d42
 
 
 
 
bab5bbc
b024d42
 
 
bab5bbc
 
 
 
 
 
 
 
b024d42
bab5bbc
b024d42
bab5bbc
b024d42
 
 
bab5bbc
 
 
 
 
 
 
 
 
 
b024d42
bab5bbc
b024d42
bab5bbc
 
 
 
 
 
 
 
 
b024d42
bab5bbc
fb15ab0
6beae45
 
0648639
6beae45

# Dockerfile of qwenllm/qwen-omni:2.5-cu121

ARG CUDA_VERSION=12.1.0
ARG from=nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu22.04

FROM ${from} as base

ARG DEBIAN_FRONTEND=noninteractive
RUN <<EOF
apt update -y && apt upgrade -y && apt install -y --no-install-recommends  \
    git \
    git-lfs \
    python3 \
    python3-pip \
    python3-dev \
    wget \
    vim \
    libsndfile1 \
    ccache \
    software-properties-common \
    ffmpeg \
&& rm -rf /var/lib/apt/lists/*
EOF

RUN wget https://github.com/Kitware/CMake/releases/download/v3.26.1/cmake-3.26.1-Linux-x86_64.sh \
    -q -O /tmp/cmake-install.sh \
    && chmod u+x /tmp/cmake-install.sh \
    && mkdir /opt/cmake-3.26.1 \
    && /tmp/cmake-install.sh --skip-license --prefix=/opt/cmake-3.26.1 \
    && rm /tmp/cmake-install.sh \
    && ln -s /opt/cmake-3.26.1/bin/* /usr/local/bin

RUN ln -s /usr/bin/python3 /usr/bin/python

RUN git lfs install

FROM base as dev

WORKDIR /

RUN mkdir -p /data/shared/Qwen

WORKDIR /data/shared/Qwen/

FROM dev as bundle_req
RUN --mount=type=cache,target=/root/.cache/pip pip3 install networkx==3.1
RUN --mount=type=cache,target=/root/.cache/pip pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 xformers==0.0.29.post2
RUN --mount=type=cache,target=/root/.cache/pip pip3 install git+https://github.com/huggingface/transformers@3a1ead0aabed473eafe527915eea8c197d424356  \
    && pip3 install accelerate qwen-omni-utils modelscope_studio

FROM bundle_req as bundle_vllm

ARG BUNDLE_FLASH_ATTENTION=true

ENV MAX_JOBS=8
ENV NVCC_THREADS=1
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
ENV VLLM_FA_CMAKE_GPU_ARCHES="80-real;90-real"
ENV CCACHE_DIR=/root/.cache/ccache

RUN --mount=type=cache,target=/root/.cache/ccache \
    --mount=type=cache,target=/root/.cache/pip \
    if [ "$BUNDLE_FLASH_ATTENTION" = "true" ]; then \
        mkdir -p /data/shared/code \
        && pip install ninja \
        && cd /data/shared/code \
        && git clone https://github.com/Dao-AILab/flash-attention.git \
        && cd flash-attention \
        && python setup.py install \
        && cd /data/shared/Qwen \
        && rm -rf /data/shared/code/flash-attention; \
    fi

ARG BUNDLE_VLLM=true

RUN --mount=type=cache,target=/root/.cache/ccache \
    --mount=type=cache,target=/root/.cache/pip \
    if [ "$BUNDLE_VLLM" = "true" ]; then \
    mkdir -p /data/shared/code \
        && cd /data/shared/code \
        && git clone -b qwen2_omni_public_v1 https://github.com/fyabc/vllm.git \
        && cd vllm \
        && python3 use_existing_torch.py \
        && pip3 install setuptools_scm \
        && pip3 install -r requirements/cuda.txt \
        && pip3 install . --no-build-isolation\
        && cd /data/shared/Qwen \
        && rm -rf /data/shared/code/vllm; \
    fi

RUN --mount=type=cache,target=/root/.cache/pip \
    pip3 install \
    gradio==5.21.0 \
    gradio_client==1.7.2 \
    librosa==0.11.0 \
    ffmpeg==1.4 \
    ffmpeg-python==0.2.0 \
    soundfile==0.13.1 \
    av

RUN rm -rvf /root/.cache/pip

COPY server.py ./
 
# Expose port 8000 (matching the port used in server.py)
EXPOSE 8000
 
# Set default command to run your server
CMD ["python3", "server.py"]