File size: 8,422 Bytes
c33a7ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
ARG DRIVER_TOOLKIT_IMAGE="quay.io/ai-lab/intel-builder:latest"
ARG BASEIMAGE="quay.io/centos-bootc/centos-bootc:stream9"

ARG REPOS_REPO
ARG DRIVER_VERSION=1.17.1-40
ARG HABANA_REPO="https://vault.habana.ai/artifactory/rhel/9/9.4"

FROM ${DRIVER_TOOLKIT_IMAGE} as builder
ARG DRIVER_VERSION
ARG HABANA_REPO
# SHAs taken from original Makefile at HL packages
ARG DRIVER_GIT_SHA=78932ae
ARG NIC_GIT_SHA=31d590f

WORKDIR /home/builder

RUN . /etc/os-release \
    && export KERNEL_VERSION=$(rpm -q --qf '%{VERSION}-%{RELEASE}' kernel-core) \
    && export TARGET_ARCH=$(rpm -q --qf '%{ARCH}' kernel-core) \
    && export MAKEFLAGS="-j$(nproc)" \
    && rpm2cpio ${HABANA_REPO}/habanalabs-${DRIVER_VERSION}.el9.noarch.rpm | cpio -idmv \
    && pushd usr/src/habanalabs-${DRIVER_VERSION} \
    && MAKE_IB=1 make -f Makefile.nic KVERSION=${KERNEL_VERSION}.${TARGET_ARCH} GIT_SHA=${DRIVER_GIT_SHA} NIC_KMD_GIT_SHA=${NIC_GIT_SHA} \
    && make -f Makefile KVERSION=${KERNEL_VERSION}.${TARGET_ARCH} \
    && pushd drivers/infiniband/hw/hbl \
    && make KVERSION=${KERNEL_VERSION}.${TARGET_ARCH}

# Build libraries
FROM ${DRIVER_TOOLKIT_IMAGE} as libbuilder
ARG DRIVER_VERSION
ARG HABANA_REPO
ARG ARTIFACTORY_URL="vault.habana.ai"

USER root
COPY --chmod=755 scripts/os_dependencies.sh /tmp/
RUN --mount=type=secret,id=extra-secrets-intel-bootc/BUILDERS_TOKEN /tmp/os_dependencies.sh \
    && mv /etc/selinux /etc/selinux.tmp \
    && dnf install -y --nodocs --allowerasing --best \
       git \
       make \
       gcc-c++ \
       unzip \
       habanalabs-graph-${DRIVER_VERSION}.el9 rdma-core-devel \
    && dnf clean all && rm -rf /var/cache/yum \
    && mv /etc/selinux.tmp /etc/selinux
ENV LIBFABRIC_VERSION="1.20.0"
ENV LIBFABRIC_ROOT="/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}"
ENV LD_LIBRARY_PATH=$LIBFABRIC_ROOT/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH
ENV PATH=${LIBFABRIC_ROOT}/bin:$PATH
ENV RDMAV_FORK_SAFE=1
ENV PIP_NO_CACHE_DIR=on
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src
ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib

RUN curl -L -o /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 https://github.com/ofiwg/libfabric/releases/download/v${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION}.tar.bz2 && \
    cd /tmp/ && tar --no-same-owner -xf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 && \
    cd /tmp/libfabric-${LIBFABRIC_VERSION} && \
    ./configure --prefix=$LIBFABRIC_ROOT --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr && \
    make -j$(nproc) && make install && cd / && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION}
#Hccl wrapper
RUN curl -L -o /tmp/main.zip https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/heads/main.zip && \
    unzip /tmp/main.zip -d /tmp && \
    cd /tmp/hccl_ofi_wrapper-main && \
    make && cp -f libhccl_ofi_wrapper.so /usr/lib/habanalabs/libhccl_ofi_wrapper.so && \
    cd / && \
    rm -rf /tmp/main.zip /tmp/hccl_ofi_wrapper-main

FROM ${BASEIMAGE}
ARG DRIVER_VERSION="1.17.1-40"
ARG ARTIFACTORY_URL="vault.habana.ai"

USER root
COPY --from=builder /home/builder/usr/src/habanalabs-${DRIVER_VERSION}/drivers/accel/habanalabs/habanalabs.ko /tmp/extra/habanalabs.ko
COPY --from=builder /home/builder/usr/src/habanalabs-${DRIVER_VERSION}/drivers/infiniband/hw/hbl/habanalabs_ib.ko /tmp/extra/habanalabs_ib.ko
COPY --from=builder /home/builder/usr/src/habanalabs-${DRIVER_VERSION}/drivers/net/ethernet/intel/hbl_cn/habanalabs_cn.ko /tmp/extra/habanalabs_cn.ko
COPY --from=builder /home/builder/usr/src/habanalabs-${DRIVER_VERSION}/drivers/net/ethernet/intel/hbl_en/habanalabs_en.ko /tmp/extra/habanalabs_en.ko
COPY --from=builder /home/builder/etc/ /etc/
COPY --from=builder /home/builder/lib/firmware/habanalabs/gaudi/ /lib/firmware/habanalabs/gaudi/
COPY --from=builder /home/builder/usr/sbin /usr/sbin/
COPY --from=libbuilder /usr/lib/habanalabs/libhccl_ofi_wrapper.so /usr/lib/habanalabs/libhccl_ofi_wrapper.so
COPY --from=libbuilder /opt/habanalabs/libfabric-1.20.0 /opt/habanalabs/libfabric-1.20.0
COPY --chmod=755 scripts/os_dependencies.sh /tmp/

#Install python3.11 and other build stuff
RUN --mount=type=secret,id=extra-secrets-intel-bootc/BUILDERS_TOKEN \
    mv /etc/selinux /etc/selinux.tmp \
    && dnf install -y \
    python3.11 \
    python3.11-pip \
    python3.11-devel \
    git \
    make \
    gcc-c++ \
    unzip \
    && dnf clean all && rm -rf /var/cache/yum \
    && /tmp/os_dependencies.sh \
    && mv /etc/selinux.tmp /etc/selinux

#Build ninja-build
RUN git clone https://github.com/ninja-build/ninja.git \
    && cd ninja \
    && ./configure.py --bootstrap \
    && rm -rf ninja

RUN . /etc/os-release \
    && export OS_VERSION_MAJOR=$(echo ${VERSION} | cut -d'.' -f 1) \
    && export KERNEL_VERSION=$(rpm -q --qf '%{VERSION}-%{RELEASE}' kernel-core) \
    && export TARGET_ARCH=$(rpm -q --qf '%{ARCH}' kernel-core) \
    && mv /etc/selinux /etc/selinux.tmp \
    && dnf -y update --exclude=kernel* --exclude=microcode_ctl \
    && dnf install -y --nodocs --allowerasing --best \
    libogg-devel \
    libid3tag \
    opusfile-devel \
    sox-devel \
    libnl3-devel \
    habanalabs-rdma-core-${DRIVER_VERSION}.el9 \
    habanalabs-thunk-${DRIVER_VERSION}.el9 \
    habanalabs-firmware-${DRIVER_VERSION}.el9 \
    habanalabs-firmware-tools-${DRIVER_VERSION}.el9 \
    habanalabs-graph-${DRIVER_VERSION}.el9 \
    habanalabs-qual-${DRIVER_VERSION}.el9 \
    habanalabs-firmware-odm-${DRIVER_VERSION}.el9 \
    && rm -f /etc/yum.repos.d/habanalabs.repo && rm -f /etc/yum.repos.d/habana.repo \
    && dnf remove -y --noautoremove \
    python3.11-devel \
    && dnf clean all && rm -rf /var/cache/yum \
    && mv /etc/selinux.tmp /etc/selinux \
    && mv /tmp/extra /usr/lib/modules/${KERNEL_VERSION}.${TARGET_ARCH} \
    && echo "softdep habanalabs post: habanalabs_ib" > /etc/modprobe.d/habanalabs_ib_dep.conf \
    && depmod -a ${KERNEL_VERSION}.${TARGET_ARCH} \
    && rm -rf tmp/*

RUN python3.11 -m pip install pip==23.3.1 setuptools==67.3.3 wheel==0.38.4 habana_media_loader=="1.17.1.40"

RUN mv /etc/selinux /etc/selinux.tmp \
    && dnf install -y ${EXTRA_RPM_PACKAGES} \
    skopeo \
    cloud-init \
    rsync \
    && dnf clean all \
    && mv /etc/selinux.tmp /etc/selinux \
    && ln -s ../cloud-init.target /usr/lib/systemd/system/default.target.wants


ARG SSHPUBKEY
# The --build-arg "SSHPUBKEY=$(cat ~/.ssh/id_rsa.pub)" option inserts your
# public key into the image, allowing root access via ssh.
RUN if [ -n "${SSHPUBKEY}" ]; then \
    set -eu; mkdir -p /usr/ssh && \
        echo 'AuthorizedKeysFile /usr/ssh/%u.keys .ssh/authorized_keys .ssh/authorized_keys2' >> /etc/ssh/sshd_config.d/30-auth-system.conf && \
	    echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys; \
fi
# Setup /usr/lib/containers/storage as an additional store for images.
# Remove once the base images have this set by default.
# Also make sure not to duplicate if a base image already has it specified.
RUN grep -q /usr/lib/containers/storage /etc/containers/storage.conf || \
    sed -i -e '/additionalimage.*/a "/usr/lib/containers/storage",' \
	/etc/containers/storage.conf
COPY duplicated/ilab-wrapper/ilab /usr/bin/ilab
RUN chmod +x /usr/bin/ilab
ARG INSTRUCTLAB_IMAGE="quay.io/ai-lab/intel-instructlab:latest"
ARG INSTRUCTLAB_IMAGE_PULL_SECRET="extra-secrets-intel-bootc"
RUN for i in /usr/bin/ilab*; do \
	sed -i 's/__REPLACE_TRAIN_DEVICE__/hpu/' $i;  \
	sed -i "s%__REPLACE_IMAGE_NAME__%${INSTRUCTLAB_IMAGE}%" $i; \
    done

# Added for running as an OCI Container to prevent Overlay on Overlay issues.
VOLUME /var/lib/containers
# Prepull the instructlab image
RUN --mount=type=secret,id=${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson \
    if [ -f "/run/.input/instructlab-intel/oci-layout" ]; then \
         IID=$(podman --root /usr/lib/containers/storage pull oci:/run/.input/instructlab-intel) && \
         podman --root /usr/lib/containers/storage image tag ${IID} ${INSTRUCTLAB_IMAGE}; \
    elif [ -f "/run/secrets/${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson" ]; then \
         IID=$(sudo podman --root /usr/lib/containers/storage pull --authfile /run/secrets/${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson ${INSTRUCTLAB_IMAGE}); \
    else \
         IID=$(sudo podman --root /usr/lib/containers/storage pull ${INSTRUCTLAB_IMAGE}); \
    fi
RUN podman system reset --force 2>/dev/null