jaothan's picture
Upload 356 files
c33a7ce verified
ARG DRIVER_TOOLKIT_IMAGE="quay.io/ai-lab/intel-builder:latest"
ARG BASEIMAGE="quay.io/centos-bootc/centos-bootc:stream9"
ARG REPOS_REPO
ARG DRIVER_VERSION=1.17.1-40
ARG HABANA_REPO="https://vault.habana.ai/artifactory/rhel/9/9.4"
FROM ${DRIVER_TOOLKIT_IMAGE} as builder
ARG DRIVER_VERSION
ARG HABANA_REPO
# SHAs taken from original Makefile at HL packages
ARG DRIVER_GIT_SHA=78932ae
ARG NIC_GIT_SHA=31d590f
WORKDIR /home/builder
RUN . /etc/os-release \
&& export KERNEL_VERSION=$(rpm -q --qf '%{VERSION}-%{RELEASE}' kernel-core) \
&& export TARGET_ARCH=$(rpm -q --qf '%{ARCH}' kernel-core) \
&& export MAKEFLAGS="-j$(nproc)" \
&& rpm2cpio ${HABANA_REPO}/habanalabs-${DRIVER_VERSION}.el9.noarch.rpm | cpio -idmv \
&& pushd usr/src/habanalabs-${DRIVER_VERSION} \
&& MAKE_IB=1 make -f Makefile.nic KVERSION=${KERNEL_VERSION}.${TARGET_ARCH} GIT_SHA=${DRIVER_GIT_SHA} NIC_KMD_GIT_SHA=${NIC_GIT_SHA} \
&& make -f Makefile KVERSION=${KERNEL_VERSION}.${TARGET_ARCH} \
&& pushd drivers/infiniband/hw/hbl \
&& make KVERSION=${KERNEL_VERSION}.${TARGET_ARCH}
# Build libraries
FROM ${DRIVER_TOOLKIT_IMAGE} as libbuilder
ARG DRIVER_VERSION
ARG HABANA_REPO
ARG ARTIFACTORY_URL="vault.habana.ai"
USER root
COPY --chmod=755 scripts/os_dependencies.sh /tmp/
RUN --mount=type=secret,id=extra-secrets-intel-bootc/BUILDERS_TOKEN /tmp/os_dependencies.sh \
&& mv /etc/selinux /etc/selinux.tmp \
&& dnf install -y --nodocs --allowerasing --best \
git \
make \
gcc-c++ \
unzip \
habanalabs-graph-${DRIVER_VERSION}.el9 rdma-core-devel \
&& dnf clean all && rm -rf /var/cache/yum \
&& mv /etc/selinux.tmp /etc/selinux
ENV LIBFABRIC_VERSION="1.20.0"
ENV LIBFABRIC_ROOT="/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}"
ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH
ENV PATH=${LIBFABRIC_ROOT}/bin:$PATH
ENV RDMAV_FORK_SAFE=1
ENV PIP_NO_CACHE_DIR=on
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src
ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib
RUN curl -L -o /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/ofiwg/libfabric/releases/download/v${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION}.tar.bz2 && \
cd /tmp/ && tar --no-same-owner -xf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 && \
cd /tmp/libfabric-${LIBFABRIC_VERSION} && \
./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr && \
make -j$(nproc) && make install && cd / && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION}
#Hccl wrapper
RUN curl -L -o /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip && \
unzip /tmp/main.zip -d /tmp && \
cd /tmp/hccl_ofi_wrapper-main && \
make && cp -f libhccl_ofi_wrapper.so /usr/lib/habanalabs/libhccl_ofi_wrapper.so && \
cd / && \
rm -rf /tmp/main.zip /tmp/hccl_ofi_wrapper-main
FROM ${BASEIMAGE}
ARG DRIVER_VERSION="1.17.1-40"
ARG ARTIFACTORY_URL="vault.habana.ai"
USER root
COPY --from=builder /home/builder/usr/src/habanalabs-${DRIVER_VERSION}/drivers/accel/habanalabs/habanalabs.ko /tmp/extra/habanalabs.ko
COPY --from=builder /home/builder/usr/src/habanalabs-${DRIVER_VERSION}/drivers/infiniband/hw/hbl/habanalabs_ib.ko /tmp/extra/habanalabs_ib.ko
COPY --from=builder /home/builder/usr/src/habanalabs-${DRIVER_VERSION}/drivers/net/ethernet/intel/hbl_cn/habanalabs_cn.ko /tmp/extra/habanalabs_cn.ko
COPY --from=builder /home/builder/usr/src/habanalabs-${DRIVER_VERSION}/drivers/net/ethernet/intel/hbl_en/habanalabs_en.ko /tmp/extra/habanalabs_en.ko
COPY --from=builder /home/builder/etc/ /etc/
COPY --from=builder /home/builder/lib/firmware/habanalabs/gaudi/ /lib/firmware/habanalabs/gaudi/
COPY --from=builder /home/builder/usr/sbin /usr/sbin/
COPY --from=libbuilder /usr/lib/habanalabs/libhccl_ofi_wrapper.so /usr/lib/habanalabs/libhccl_ofi_wrapper.so
COPY --from=libbuilder /opt/habanalabs/libfabric-1.20.0 /opt/habanalabs/libfabric-1.20.0
COPY --chmod=755 scripts/os_dependencies.sh /tmp/
#Install python3.11 and other build stuff
RUN --mount=type=secret,id=extra-secrets-intel-bootc/BUILDERS_TOKEN \
mv /etc/selinux /etc/selinux.tmp \
&& dnf install -y \
python3.11 \
python3.11-pip \
python3.11-devel \
git \
make \
gcc-c++ \
unzip \
&& dnf clean all && rm -rf /var/cache/yum \
&& /tmp/os_dependencies.sh \
&& mv /etc/selinux.tmp /etc/selinux
#Build ninja-build
RUN git clone https://github.com/ninja-build/ninja.git \
&& cd ninja \
&& ./configure.py --bootstrap \
&& rm -rf ninja
RUN . /etc/os-release \
&& export OS_VERSION_MAJOR=$(echo ${VERSION} | cut -d'.' -f 1) \
&& export KERNEL_VERSION=$(rpm -q --qf '%{VERSION}-%{RELEASE}' kernel-core) \
&& export TARGET_ARCH=$(rpm -q --qf '%{ARCH}' kernel-core) \
&& mv /etc/selinux /etc/selinux.tmp \
&& dnf -y update --exclude=kernel* --exclude=microcode_ctl \
&& dnf install -y --nodocs --allowerasing --best \
libogg-devel \
libid3tag \
opusfile-devel \
sox-devel \
libnl3-devel \
habanalabs-rdma-core-${DRIVER_VERSION}.el9 \
habanalabs-thunk-${DRIVER_VERSION}.el9 \
habanalabs-firmware-${DRIVER_VERSION}.el9 \
habanalabs-firmware-tools-${DRIVER_VERSION}.el9 \
habanalabs-graph-${DRIVER_VERSION}.el9 \
habanalabs-qual-${DRIVER_VERSION}.el9 \
habanalabs-firmware-odm-${DRIVER_VERSION}.el9 \
&& rm -f /etc/yum.repos.d/habanalabs.repo && rm -f /etc/yum.repos.d/habana.repo \
&& dnf remove -y --noautoremove \
python3.11-devel \
&& dnf clean all && rm -rf /var/cache/yum \
&& mv /etc/selinux.tmp /etc/selinux \
&& mv /tmp/extra /usr/lib/modules/${KERNEL_VERSION}.${TARGET_ARCH} \
&& echo "softdep habanalabs post: habanalabs_ib" > /etc/modprobe.d/habanalabs_ib_dep.conf \
&& depmod -a ${KERNEL_VERSION}.${TARGET_ARCH} \
&& rm -rf tmp/*
RUN python3.11 -m pip install pip==23.3.1 setuptools==67.3.3 wheel==0.38.4 habana_media_loader=="1.17.1.40"
RUN mv /etc/selinux /etc/selinux.tmp \
&& dnf install -y ${EXTRA_RPM_PACKAGES} \
skopeo \
cloud-init \
rsync \
&& dnf clean all \
&& mv /etc/selinux.tmp /etc/selinux \
&& ln -s ../cloud-init.target /usr/lib/systemd/system/default.target.wants
ARG SSHPUBKEY
# The --build-arg "SSHPUBKEY=$(cat ~/.ssh/id_rsa.pub)" option inserts your
# public key into the image, allowing root access via ssh.
RUN if [ -n "${SSHPUBKEY}" ]; then \
set -eu; mkdir -p /usr/ssh && \
echo 'AuthorizedKeysFile /usr/ssh/%u.keys .ssh/authorized_keys .ssh/authorized_keys2' >> /etc/ssh/sshd_config.d/30-auth-system.conf && \
echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys; \
fi
# Setup /usr/lib/containers/storage as an additional store for images.
# Remove once the base images have this set by default.
# Also make sure not to duplicate if a base image already has it specified.
RUN grep -q /usr/lib/containers/storage /etc/containers/storage.conf || \
sed -i -e '/additionalimage.*/a "/usr/lib/containers/storage",' \
/etc/containers/storage.conf
COPY duplicated/ilab-wrapper/ilab /usr/bin/ilab
RUN chmod +x /usr/bin/ilab
ARG INSTRUCTLAB_IMAGE="quay.io/ai-lab/intel-instructlab:latest"
ARG INSTRUCTLAB_IMAGE_PULL_SECRET="extra-secrets-intel-bootc"
RUN for i in /usr/bin/ilab*; do \
sed -i 's/__REPLACE_TRAIN_DEVICE__/hpu/' $i; \
sed -i "s%__REPLACE_IMAGE_NAME__%${INSTRUCTLAB_IMAGE}%" $i; \
done
# Added for running as an OCI Container to prevent Overlay on Overlay issues.
VOLUME /var/lib/containers
# Prepull the instructlab image
RUN --mount=type=secret,id=${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson \
if [ -f "/run/.input/instructlab-intel/oci-layout" ]; then \
IID=$(podman --root /usr/lib/containers/storage pull oci:/run/.input/instructlab-intel) && \
podman --root /usr/lib/containers/storage image tag ${IID} ${INSTRUCTLAB_IMAGE}; \
elif [ -f "/run/secrets/${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson" ]; then \
IID=$(sudo podman --root /usr/lib/containers/storage pull --authfile /run/secrets/${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson ${INSTRUCTLAB_IMAGE}); \
else \
IID=$(sudo podman --root /usr/lib/containers/storage pull ${INSTRUCTLAB_IMAGE}); \
fi
RUN podman system reset --force 2>/dev/null