| #!/bin/bash |
| set -ex |
|
|
| if [ $# -lt 2 ]; then |
| echo "Usage: $0 <PYTHON_VERSION> <CUDA_VERSION> [ARCH]" |
| exit 1 |
| fi |
|
|
| PYTHON_VERSION="$1" |
| CUDA_VERSION="$2" |
| ARCH="${3:-$(uname -i)}" |
|
|
| if [ "${ARCH}" = "aarch64" ]; then |
| BASE_IMG="pytorch/manylinuxaarch64-builder" |
| else |
| BASE_IMG="pytorch/manylinux2_28-builder" |
| fi |
|
|
| |
| |
| CACHE_DIR="${HOME}/.cache/sgl-kernel" |
| BUILDX_CACHE_DIR="${CACHE_DIR}/buildx" |
| CCACHE_HOST_DIR="${CACHE_DIR}/ccache" |
| mkdir -p "${BUILDX_CACHE_DIR}" "${CCACHE_HOST_DIR}" |
|
|
| |
| BUILDER_NAME="sgl-kernel-builder" |
| if ! docker buildx inspect "${BUILDER_NAME}" >/dev/null 2>&1; then |
| docker buildx create --name "${BUILDER_NAME}" --driver docker-container --use --bootstrap |
| else |
| docker buildx use "${BUILDER_NAME}" |
| fi |
|
|
| PY_TAG="cp${PYTHON_VERSION//.}-cp${PYTHON_VERSION//.}" |
|
|
| |
| DIST_DIR="dist" |
| mkdir -p "${DIST_DIR}" |
|
|
| echo "----------------------------------------" |
| echo "Build configuration" |
| echo "PYTHON_VERSION: ${PYTHON_VERSION}" |
| echo "CUDA_VERSION: ${CUDA_VERSION}" |
| echo "ARCH: ${ARCH}" |
| echo "BASE_IMG: ${BASE_IMG}" |
| echo "PYTHON_TAG: ${PY_TAG}" |
| echo "Output: ${DIST_DIR}/" |
| echo "Buildx cache: ${BUILDX_CACHE_DIR}" |
| echo "ccache dir: ${CCACHE_HOST_DIR}" |
| echo "Builder: ${BUILDER_NAME}" |
| echo "BUILD_JOBS: ${BUILD_JOBS:-auto}" |
| echo "NVCC_THREADS: ${NVCC_THREADS:-32}" |
| echo "USE_CCACHE: ${USE_CCACHE:-1}" |
| echo "----------------------------------------" |
|
|
| |
| BUILD_ARGS=() |
| [ -n "${ENABLE_CMAKE_PROFILE:-}" ] && BUILD_ARGS+=(--build-arg ENABLE_CMAKE_PROFILE="${ENABLE_CMAKE_PROFILE}") |
| [ -n "${ENABLE_BUILD_PROFILE:-}" ] && BUILD_ARGS+=(--build-arg ENABLE_BUILD_PROFILE="${ENABLE_BUILD_PROFILE}") |
| [ -n "${USE_CCACHE:-}" ] && BUILD_ARGS+=(--build-arg USE_CCACHE="${USE_CCACHE}") |
| [ -n "${BUILD_JOBS:-}" ] && BUILD_ARGS+=(--build-arg BUILD_JOBS="${BUILD_JOBS}") |
| [ -n "${NVCC_THREADS:-}" ] && BUILD_ARGS+=(--build-arg NVCC_THREADS="${NVCC_THREADS}") |
|
|
| |
| DEPS_TAG="sgl-kernel-deps:cuda${CUDA_VERSION}-${PY_TAG}-${ARCH}" |
|
|
| docker buildx build \ |
| --builder "${BUILDER_NAME}" \ |
| -f Dockerfile . \ |
| --build-arg BASE_IMG="${BASE_IMG}" \ |
| --build-arg CUDA_VERSION="${CUDA_VERSION}" \ |
| --build-arg ARCH="${ARCH}" \ |
| --build-arg PYTHON_VERSION="${PYTHON_VERSION}" \ |
| --build-arg PYTHON_TAG="${PY_TAG}" \ |
| "${BUILD_ARGS[@]}" \ |
| --cache-from type=local,src=${BUILDX_CACHE_DIR} \ |
| --cache-to type=local,dest=${BUILDX_CACHE_DIR},mode=max \ |
| --target deps \ |
| --load \ |
| -t "${DEPS_TAG}" \ |
| --network=host |
|
|
| echo "Deps image ready: ${DEPS_TAG}" |
|
|
| |
| |
| CCACHE_FLAG="${USE_CCACHE:-1}" |
| BUILD_JOBS_FLAG="${BUILD_JOBS:-0}" |
| NVCC_THREADS_FLAG="${NVCC_THREADS:-32}" |
|
|
| docker run --rm \ |
| --network=host \ |
| -v "$(pwd):/sgl-kernel" \ |
| -v "${CCACHE_HOST_DIR}:/ccache" \ |
| -w /sgl-kernel \ |
| -e ARCH="${ARCH}" \ |
| "${DEPS_TAG}" \ |
| bash -c ' |
| set -eux |
| |
| USE_CCACHE='"${CCACHE_FLAG}"' |
| BUILD_JOBS='"${BUILD_JOBS_FLAG}"' |
| NVCC_THREADS='"${NVCC_THREADS_FLAG}"' |
| |
| if [ "${USE_CCACHE}" = "1" ]; then |
| export CCACHE_DIR=/ccache |
| export CCACHE_BASEDIR=/sgl-kernel |
| export CCACHE_MAXSIZE=10G |
| export CCACHE_COMPILERCHECK=content |
| export CCACHE_COMPRESS=true |
| export CCACHE_SLOPPINESS=file_macro,time_macros,include_file_mtime,include_file_ctime |
| export CMAKE_C_COMPILER_LAUNCHER=ccache |
| export CMAKE_CXX_COMPILER_LAUNCHER=ccache |
| export CMAKE_CUDA_COMPILER_LAUNCHER=ccache |
| echo "=== ccache stats (before) ===" |
| ccache -sV |
| fi |
| |
| if [ "'"${ARCH}"'" = "aarch64" ]; then |
| export CUDA_NVCC_FLAGS="-Xcudafe --threads=8" |
| export MAKEFLAGS="-j8" |
| export CMAKE_BUILD_PARALLEL_LEVEL=2 |
| export NINJAFLAGS="-j4" |
| echo "ARM detected: Using extra conservative settings (2 parallel jobs)" |
| elif [ "${BUILD_JOBS}" -gt 0 ] 2>/dev/null; then |
| export CMAKE_BUILD_PARALLEL_LEVEL=${BUILD_JOBS} |
| else |
| export CMAKE_BUILD_PARALLEL_LEVEL=$(echo "$(( $(nproc) * 2 / 3 )) 64" | awk "{print (\$1 < \$2) ? \$1 : \$2}") |
| fi |
| |
| export CMAKE_ARGS="${CMAKE_ARGS:-} -DSGL_KERNEL_COMPILE_THREADS=${NVCC_THREADS}" |
| echo "Build parallelism: CMAKE_BUILD_PARALLEL_LEVEL=${CMAKE_BUILD_PARALLEL_LEVEL}, NVCC_THREADS=${NVCC_THREADS}" |
| |
| ${PYTHON_ROOT_PATH}/bin/python -m uv build --wheel -Cbuild-dir=build . --color=always --no-build-isolation |
| ./rename_wheels.sh |
| |
| if [ "${USE_CCACHE}" = "1" ]; then |
| echo "=== ccache stats (after) ===" |
| ccache -s |
| fi |
| ' |
|
|
| echo "Done. Wheels are in ${DIST_DIR}/" |
| ls -lh "${DIST_DIR}"/*.whl 2>/dev/null || true |
|
|