File size: 4,908 Bytes
61ba51e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | #!/bin/bash
set -ex
if [ $# -lt 2 ]; then
echo "Usage: $0 <PYTHON_VERSION> <CUDA_VERSION> [ARCH]"
exit 1
fi
PYTHON_VERSION="$1" # e.g. 3.10
CUDA_VERSION="$2" # e.g. 12.9
ARCH="${3:-$(uname -i)}" # optional override
if [ "${ARCH}" = "aarch64" ]; then
BASE_IMG="pytorch/manylinuxaarch64-builder"
else
BASE_IMG="pytorch/manylinux2_28-builder"
fi
# Create cache directories for persistent build artifacts in home directory
# Using home directory to persist across workspace cleanups/checkouts
CACHE_DIR="${HOME}/.cache/sgl-kernel"
BUILDX_CACHE_DIR="${CACHE_DIR}/buildx"
CCACHE_HOST_DIR="${CACHE_DIR}/ccache"
mkdir -p "${BUILDX_CACHE_DIR}" "${CCACHE_HOST_DIR}"
# Ensure a buildx builder with docker-container driver (required for cache export)
BUILDER_NAME="sgl-kernel-builder"
if ! docker buildx inspect "${BUILDER_NAME}" >/dev/null 2>&1; then
docker buildx create --name "${BUILDER_NAME}" --driver docker-container --use --bootstrap
else
docker buildx use "${BUILDER_NAME}"
fi
PY_TAG="cp${PYTHON_VERSION//.}-cp${PYTHON_VERSION//.}"
# Output directory for wheels
DIST_DIR="dist"
mkdir -p "${DIST_DIR}"
echo "----------------------------------------"
echo "Build configuration"
echo "PYTHON_VERSION: ${PYTHON_VERSION}"
echo "CUDA_VERSION: ${CUDA_VERSION}"
echo "ARCH: ${ARCH}"
echo "BASE_IMG: ${BASE_IMG}"
echo "PYTHON_TAG: ${PY_TAG}"
echo "Output: ${DIST_DIR}/"
echo "Buildx cache: ${BUILDX_CACHE_DIR}"
echo "ccache dir: ${CCACHE_HOST_DIR}"
echo "Builder: ${BUILDER_NAME}"
echo "BUILD_JOBS: ${BUILD_JOBS:-auto}"
echo "NVCC_THREADS: ${NVCC_THREADS:-32}"
echo "USE_CCACHE: ${USE_CCACHE:-1}"
echo "----------------------------------------"
# Optional build-args (empty string disables)
BUILD_ARGS=()
[ -n "${ENABLE_CMAKE_PROFILE:-}" ] && BUILD_ARGS+=(--build-arg ENABLE_CMAKE_PROFILE="${ENABLE_CMAKE_PROFILE}")
[ -n "${ENABLE_BUILD_PROFILE:-}" ] && BUILD_ARGS+=(--build-arg ENABLE_BUILD_PROFILE="${ENABLE_BUILD_PROFILE}")
[ -n "${USE_CCACHE:-}" ] && BUILD_ARGS+=(--build-arg USE_CCACHE="${USE_CCACHE}")
[ -n "${BUILD_JOBS:-}" ] && BUILD_ARGS+=(--build-arg BUILD_JOBS="${BUILD_JOBS}")
[ -n "${NVCC_THREADS:-}" ] && BUILD_ARGS+=(--build-arg NVCC_THREADS="${NVCC_THREADS}")
# ---- Step 1: Build deps image (layer cached, fast on repeat) ----
DEPS_TAG="sgl-kernel-deps:cuda${CUDA_VERSION}-${PY_TAG}-${ARCH}"
docker buildx build \
--builder "${BUILDER_NAME}" \
-f Dockerfile . \
--build-arg BASE_IMG="${BASE_IMG}" \
--build-arg CUDA_VERSION="${CUDA_VERSION}" \
--build-arg ARCH="${ARCH}" \
--build-arg PYTHON_VERSION="${PYTHON_VERSION}" \
--build-arg PYTHON_TAG="${PY_TAG}" \
"${BUILD_ARGS[@]}" \
--cache-from type=local,src=${BUILDX_CACHE_DIR} \
--cache-to type=local,dest=${BUILDX_CACHE_DIR},mode=max \
--target deps \
--load \
-t "${DEPS_TAG}" \
--network=host
echo "Deps image ready: ${DEPS_TAG}"
# ---- Step 2: Build wheel with host-mounted ccache ----
# This allows ccache to persist on the host filesystem across builds.
CCACHE_FLAG="${USE_CCACHE:-1}"
BUILD_JOBS_FLAG="${BUILD_JOBS:-0}"
NVCC_THREADS_FLAG="${NVCC_THREADS:-32}"
docker run --rm \
--network=host \
-v "$(pwd):/sgl-kernel" \
-v "${CCACHE_HOST_DIR}:/ccache" \
-w /sgl-kernel \
-e ARCH="${ARCH}" \
"${DEPS_TAG}" \
bash -c '
set -eux
USE_CCACHE='"${CCACHE_FLAG}"'
BUILD_JOBS='"${BUILD_JOBS_FLAG}"'
NVCC_THREADS='"${NVCC_THREADS_FLAG}"'
if [ "${USE_CCACHE}" = "1" ]; then
export CCACHE_DIR=/ccache
export CCACHE_BASEDIR=/sgl-kernel
export CCACHE_MAXSIZE=10G
export CCACHE_COMPILERCHECK=content
export CCACHE_COMPRESS=true
export CCACHE_SLOPPINESS=file_macro,time_macros,include_file_mtime,include_file_ctime
export CMAKE_C_COMPILER_LAUNCHER=ccache
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
export CMAKE_CUDA_COMPILER_LAUNCHER=ccache
echo "=== ccache stats (before) ==="
ccache -sV
fi
if [ "'"${ARCH}"'" = "aarch64" ]; then
export CUDA_NVCC_FLAGS="-Xcudafe --threads=8"
export MAKEFLAGS="-j8"
export CMAKE_BUILD_PARALLEL_LEVEL=2
export NINJAFLAGS="-j4"
echo "ARM detected: Using extra conservative settings (2 parallel jobs)"
elif [ "${BUILD_JOBS}" -gt 0 ] 2>/dev/null; then
export CMAKE_BUILD_PARALLEL_LEVEL=${BUILD_JOBS}
else
export CMAKE_BUILD_PARALLEL_LEVEL=$(echo "$(( $(nproc) * 2 / 3 )) 64" | awk "{print (\$1 < \$2) ? \$1 : \$2}")
fi
export CMAKE_ARGS="${CMAKE_ARGS:-} -DSGL_KERNEL_COMPILE_THREADS=${NVCC_THREADS}"
echo "Build parallelism: CMAKE_BUILD_PARALLEL_LEVEL=${CMAKE_BUILD_PARALLEL_LEVEL}, NVCC_THREADS=${NVCC_THREADS}"
${PYTHON_ROOT_PATH}/bin/python -m uv build --wheel -Cbuild-dir=build . --color=always --no-build-isolation
./rename_wheels.sh
if [ "${USE_CCACHE}" = "1" ]; then
echo "=== ccache stats (after) ==="
ccache -s
fi
'
echo "Done. Wheels are in ${DIST_DIR}/"
ls -lh "${DIST_DIR}"/*.whl 2>/dev/null || true
|