File size: 8,422 Bytes
c33a7ce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
ARG DRIVER_TOOLKIT_IMAGE="quay.io/ai-lab/intel-builder:latest"
ARG BASEIMAGE="quay.io/centos-bootc/centos-bootc:stream9"
ARG REPOS_REPO
ARG DRIVER_VERSION=1.17.1-40
ARG HABANA_REPO="https://vault.habana.ai/artifactory/rhel/9/9.4"
FROM ${DRIVER_TOOLKIT_IMAGE} as builder
ARG DRIVER_VERSION
ARG HABANA_REPO
# SHAs taken from original Makefile at HL packages
ARG DRIVER_GIT_SHA=78932ae
ARG NIC_GIT_SHA=31d590f
WORKDIR /home/builder
RUN . /etc/os-release \
&& export KERNEL_VERSION=$(rpm -q --qf '%{VERSION}-%{RELEASE}' kernel-core) \
&& export TARGET_ARCH=$(rpm -q --qf '%{ARCH}' kernel-core) \
&& export MAKEFLAGS="-j$(nproc)" \
&& rpm2cpio ${HABANA_REPO}/habanalabs-${DRIVER_VERSION}.el9.noarch.rpm | cpio -idmv \
&& pushd usr/src/habanalabs-${DRIVER_VERSION} \
&& MAKE_IB=1 make -f Makefile.nic KVERSION=${KERNEL_VERSION}.${TARGET_ARCH} GIT_SHA=${DRIVER_GIT_SHA} NIC_KMD_GIT_SHA=${NIC_GIT_SHA} \
&& make -f Makefile KVERSION=${KERNEL_VERSION}.${TARGET_ARCH} \
&& pushd drivers/infiniband/hw/hbl \
&& make KVERSION=${KERNEL_VERSION}.${TARGET_ARCH}
# Build libraries
FROM ${DRIVER_TOOLKIT_IMAGE} as libbuilder
ARG DRIVER_VERSION
ARG HABANA_REPO
ARG ARTIFACTORY_URL="vault.habana.ai"
USER root
COPY --chmod=755 scripts/os_dependencies.sh /tmp/
RUN --mount=type=secret,id=extra-secrets-intel-bootc/BUILDERS_TOKEN /tmp/os_dependencies.sh \
&& mv /etc/selinux /etc/selinux.tmp \
&& dnf install -y --nodocs --allowerasing --best \
git \
make \
gcc-c++ \
unzip \
habanalabs-graph-${DRIVER_VERSION}.el9 rdma-core-devel \
&& dnf clean all && rm -rf /var/cache/yum \
&& mv /etc/selinux.tmp /etc/selinux
ENV LIBFABRIC_VERSION="1.20.0"
ENV LIBFABRIC_ROOT="/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}"
ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH
ENV PATH=${LIBFABRIC_ROOT}/bin:$PATH
ENV RDMAV_FORK_SAFE=1
ENV PIP_NO_CACHE_DIR=on
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src
ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib
RUN curl -L -o /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/ofiwg/libfabric/releases/download/v${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION}.tar.bz2 && \
cd /tmp/ && tar --no-same-owner -xf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 && \
cd /tmp/libfabric-${LIBFABRIC_VERSION} && \
./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr && \
make -j$(nproc) && make install && cd / && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION}
#Hccl wrapper
RUN curl -L -o /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip && \
unzip /tmp/main.zip -d /tmp && \
cd /tmp/hccl_ofi_wrapper-main && \
make && cp -f libhccl_ofi_wrapper.so /usr/lib/habanalabs/libhccl_ofi_wrapper.so && \
cd / && \
rm -rf /tmp/main.zip /tmp/hccl_ofi_wrapper-main
FROM ${BASEIMAGE}
ARG DRIVER_VERSION="1.17.1-40"
ARG ARTIFACTORY_URL="vault.habana.ai"
USER root
COPY --from=builder /home/builder/usr/src/habanalabs-${DRIVER_VERSION}/drivers/accel/habanalabs/habanalabs.ko /tmp/extra/habanalabs.ko
COPY --from=builder /home/builder/usr/src/habanalabs-${DRIVER_VERSION}/drivers/infiniband/hw/hbl/habanalabs_ib.ko /tmp/extra/habanalabs_ib.ko
COPY --from=builder /home/builder/usr/src/habanalabs-${DRIVER_VERSION}/drivers/net/ethernet/intel/hbl_cn/habanalabs_cn.ko /tmp/extra/habanalabs_cn.ko
COPY --from=builder /home/builder/usr/src/habanalabs-${DRIVER_VERSION}/drivers/net/ethernet/intel/hbl_en/habanalabs_en.ko /tmp/extra/habanalabs_en.ko
COPY --from=builder /home/builder/etc/ /etc/
COPY --from=builder /home/builder/lib/firmware/habanalabs/gaudi/ /lib/firmware/habanalabs/gaudi/
COPY --from=builder /home/builder/usr/sbin /usr/sbin/
COPY --from=libbuilder /usr/lib/habanalabs/libhccl_ofi_wrapper.so /usr/lib/habanalabs/libhccl_ofi_wrapper.so
COPY --from=libbuilder /opt/habanalabs/libfabric-1.20.0 /opt/habanalabs/libfabric-1.20.0
COPY --chmod=755 scripts/os_dependencies.sh /tmp/
#Install python3.11 and other build stuff
RUN --mount=type=secret,id=extra-secrets-intel-bootc/BUILDERS_TOKEN \
mv /etc/selinux /etc/selinux.tmp \
&& dnf install -y \
python3.11 \
python3.11-pip \
python3.11-devel \
git \
make \
gcc-c++ \
unzip \
&& dnf clean all && rm -rf /var/cache/yum \
&& /tmp/os_dependencies.sh \
&& mv /etc/selinux.tmp /etc/selinux
#Build ninja-build
RUN git clone https://github.com/ninja-build/ninja.git \
&& cd ninja \
&& ./configure.py --bootstrap \
&& rm -rf ninja
RUN . /etc/os-release \
&& export OS_VERSION_MAJOR=$(echo ${VERSION} | cut -d'.' -f 1) \
&& export KERNEL_VERSION=$(rpm -q --qf '%{VERSION}-%{RELEASE}' kernel-core) \
&& export TARGET_ARCH=$(rpm -q --qf '%{ARCH}' kernel-core) \
&& mv /etc/selinux /etc/selinux.tmp \
&& dnf -y update --exclude=kernel* --exclude=microcode_ctl \
&& dnf install -y --nodocs --allowerasing --best \
libogg-devel \
libid3tag \
opusfile-devel \
sox-devel \
libnl3-devel \
habanalabs-rdma-core-${DRIVER_VERSION}.el9 \
habanalabs-thunk-${DRIVER_VERSION}.el9 \
habanalabs-firmware-${DRIVER_VERSION}.el9 \
habanalabs-firmware-tools-${DRIVER_VERSION}.el9 \
habanalabs-graph-${DRIVER_VERSION}.el9 \
habanalabs-qual-${DRIVER_VERSION}.el9 \
habanalabs-firmware-odm-${DRIVER_VERSION}.el9 \
&& rm -f /etc/yum.repos.d/habanalabs.repo && rm -f /etc/yum.repos.d/habana.repo \
&& dnf remove -y --noautoremove \
python3.11-devel \
&& dnf clean all && rm -rf /var/cache/yum \
&& mv /etc/selinux.tmp /etc/selinux \
&& mv /tmp/extra /usr/lib/modules/${KERNEL_VERSION}.${TARGET_ARCH} \
&& echo "softdep habanalabs post: habanalabs_ib" > /etc/modprobe.d/habanalabs_ib_dep.conf \
&& depmod -a ${KERNEL_VERSION}.${TARGET_ARCH} \
&& rm -rf tmp/*
RUN python3.11 -m pip install pip==23.3.1 setuptools==67.3.3 wheel==0.38.4 habana_media_loader=="1.17.1.40"
RUN mv /etc/selinux /etc/selinux.tmp \
&& dnf install -y ${EXTRA_RPM_PACKAGES} \
skopeo \
cloud-init \
rsync \
&& dnf clean all \
&& mv /etc/selinux.tmp /etc/selinux \
&& ln -s ../cloud-init.target /usr/lib/systemd/system/default.target.wants
ARG SSHPUBKEY
# The --build-arg "SSHPUBKEY=$(cat ~/.ssh/id_rsa.pub)" option inserts your
# public key into the image, allowing root access via ssh.
RUN if [ -n "${SSHPUBKEY}" ]; then \
set -eu; mkdir -p /usr/ssh && \
echo 'AuthorizedKeysFile /usr/ssh/%u.keys .ssh/authorized_keys .ssh/authorized_keys2' >> /etc/ssh/sshd_config.d/30-auth-system.conf && \
echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys; \
fi
# Setup /usr/lib/containers/storage as an additional store for images.
# Remove once the base images have this set by default.
# Also make sure not to duplicate if a base image already has it specified.
RUN grep -q /usr/lib/containers/storage /etc/containers/storage.conf || \
sed -i -e '/additionalimage.*/a "/usr/lib/containers/storage",' \
/etc/containers/storage.conf
COPY duplicated/ilab-wrapper/ilab /usr/bin/ilab
RUN chmod +x /usr/bin/ilab
ARG INSTRUCTLAB_IMAGE="quay.io/ai-lab/intel-instructlab:latest"
ARG INSTRUCTLAB_IMAGE_PULL_SECRET="extra-secrets-intel-bootc"
RUN for i in /usr/bin/ilab*; do \
sed -i 's/__REPLACE_TRAIN_DEVICE__/hpu/' $i; \
sed -i "s%__REPLACE_IMAGE_NAME__%${INSTRUCTLAB_IMAGE}%" $i; \
done
# Added for running as an OCI Container to prevent Overlay on Overlay issues.
VOLUME /var/lib/containers
# Prepull the instructlab image
RUN --mount=type=secret,id=${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson \
if [ -f "/run/.input/instructlab-intel/oci-layout" ]; then \
IID=$(podman --root /usr/lib/containers/storage pull oci:/run/.input/instructlab-intel) && \
podman --root /usr/lib/containers/storage image tag ${IID} ${INSTRUCTLAB_IMAGE}; \
elif [ -f "/run/secrets/${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson" ]; then \
IID=$(sudo podman --root /usr/lib/containers/storage pull --authfile /run/secrets/${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson ${INSTRUCTLAB_IMAGE}); \
else \
IID=$(sudo podman --root /usr/lib/containers/storage pull ${INSTRUCTLAB_IMAGE}); \
fi
RUN podman system reset --force 2>/dev/null
|