|
|
import logging |
|
|
import os |
|
|
from shutil import which, move |
|
|
import subprocess |
|
|
import sys |
|
|
from pathlib import Path |
|
|
|
|
|
from setuptools import Extension, find_packages, setup |
|
|
from setuptools.command.build_ext import build_ext |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
def is_sccache_available() -> bool: |
|
|
return which("sccache") is not None |
|
|
|
|
|
|
|
|
def is_ccache_available() -> bool: |
|
|
return which("ccache") is not None |
|
|
|
|
|
|
|
|
def is_ninja_available() -> bool: |
|
|
return which("ninja") is not None |
|
|
|
|
|
|
|
|
class CMakeExtension(Extension): |
|
|
def __init__(self, name: str, sourcedir: str = "") -> None: |
|
|
super().__init__(name, sources=[], py_limited_api=True) |
|
|
self.sourcedir = os.fspath(Path(sourcedir).resolve()) |
|
|
|
|
|
|
|
|
class CMakeBuild(build_ext): |
|
|
def build_extension(self, ext: CMakeExtension) -> None: |
|
|
ext_fullpath = Path.cwd() / self.get_ext_fullpath(ext.name) |
|
|
extdir = ext_fullpath.parent.resolve() |
|
|
|
|
|
debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug |
|
|
cfg = "Debug" if debug else "Release" |
|
|
|
|
|
cmake_generator = os.environ.get("CMAKE_GENERATOR", "") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cmake_args = [ |
|
|
f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}{os.sep}", |
|
|
f"-DPython_EXECUTABLE={sys.executable}", |
|
|
f"-DCMAKE_BUILD_TYPE={cfg}", |
|
|
] |
|
|
build_args = [] |
|
|
if "CMAKE_ARGS" in os.environ: |
|
|
cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item] |
|
|
|
|
|
if not cmake_generator or cmake_generator == "Ninja": |
|
|
try: |
|
|
import ninja |
|
|
|
|
|
ninja_executable_path = Path(ninja.BIN_DIR) / "ninja" |
|
|
cmake_args += [ |
|
|
"-GNinja", |
|
|
f"-DCMAKE_MAKE_PROGRAM:FILEPATH={ninja_executable_path}", |
|
|
] |
|
|
except ImportError: |
|
|
pass |
|
|
|
|
|
if is_sccache_available(): |
|
|
cmake_args += [ |
|
|
"-DCMAKE_C_COMPILER_LAUNCHER=sccache", |
|
|
"-DCMAKE_CXX_COMPILER_LAUNCHER=sccache", |
|
|
"-DCMAKE_CUDA_COMPILER_LAUNCHER=sccache", |
|
|
"-DCMAKE_HIP_COMPILER_LAUNCHER=sccache", |
|
|
] |
|
|
elif is_ccache_available(): |
|
|
cmake_args += [ |
|
|
"-DCMAKE_C_COMPILER_LAUNCHER=ccache", |
|
|
"-DCMAKE_CXX_COMPILER_LAUNCHER=ccache", |
|
|
"-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache", |
|
|
"-DCMAKE_HIP_COMPILER_LAUNCHER=ccache", |
|
|
] |
|
|
|
|
|
num_jobs = os.getenv("MAX_JOBS", None) |
|
|
if num_jobs is not None: |
|
|
num_jobs = int(num_jobs) |
|
|
logger.info("Using MAX_JOBS=%d as the number of jobs.", num_jobs) |
|
|
else: |
|
|
try: |
|
|
|
|
|
|
|
|
num_jobs = len(os.sched_getaffinity(0)) |
|
|
except AttributeError: |
|
|
num_jobs = os.cpu_count() |
|
|
|
|
|
nvcc_threads = os.getenv("NVCC_THREADS", None) |
|
|
if nvcc_threads is not None: |
|
|
nvcc_threads = int(nvcc_threads) |
|
|
logger.info( |
|
|
"Using NVCC_THREADS=%d as the number of nvcc threads.", nvcc_threads |
|
|
) |
|
|
else: |
|
|
nvcc_threads = 1 |
|
|
num_jobs = max(1, num_jobs // nvcc_threads) |
|
|
|
|
|
build_args += [f"-j{num_jobs}"] |
|
|
if sys.platform == "win32": |
|
|
build_args += ["--config", cfg] |
|
|
|
|
|
if nvcc_threads: |
|
|
cmake_args += ["-DNVCC_THREADS={}".format(nvcc_threads)] |
|
|
|
|
|
build_temp = Path(self.build_temp) / ext.name |
|
|
if not build_temp.exists(): |
|
|
build_temp.mkdir(parents=True) |
|
|
|
|
|
subprocess.run( |
|
|
["cmake", ext.sourcedir, *cmake_args], cwd=build_temp, check=True |
|
|
) |
|
|
subprocess.run( |
|
|
["cmake", "--build", ".", *build_args], cwd=build_temp, check=True |
|
|
) |
|
|
if sys.platform == "win32": |
|
|
|
|
|
for filename in os.listdir(extdir / cfg): |
|
|
move(extdir / cfg / filename, extdir / filename) |
|
|
|
|
|
|
|
|
|
|
|
setup( |
|
|
name="layer_norm", |
|
|
|
|
|
version="0.1.0", |
|
|
ext_modules=[CMakeExtension("layer_norm._layer_norm_711aa42_dirty")], |
|
|
cmdclass={"build_ext": CMakeBuild}, |
|
|
packages=find_packages(where="torch-ext", include=["layer_norm*"]), |
|
|
package_dir={"": "torch-ext"}, |
|
|
zip_safe=False, |
|
|
install_requires=["torch"], |
|
|
python_requires=">=3.9", |
|
|
) |