medmekk commited on
Commit
3370f0d
·
verified ·
1 Parent(s): 1bc1c3d

Delete cmake

Browse files
Files changed (2) hide show
  1. cmake/hipify.py +0 -76
  2. cmake/utils.cmake +0 -550
cmake/hipify.py DELETED
@@ -1,76 +0,0 @@
1
- #!/usr/bin/env python3
2
- # SPDX-License-Identifier: Apache-2.0
3
-
4
- # From vLLM: https://github.com/vllm-project/vllm/blob/main/cmake/hipify.py
5
-
6
- #
7
- # A command line tool for running pytorch's hipify preprocessor on CUDA
8
- # source files.
9
- #
10
- # See https://github.com/ROCm/hipify_torch
11
- # and <torch install dir>/utils/hipify/hipify_python.py
12
- #
13
-
14
- import argparse
15
- import os
16
- import shutil
17
-
18
- from torch.utils.hipify.hipify_python import hipify
19
-
20
- if __name__ == '__main__':
21
- parser = argparse.ArgumentParser()
22
-
23
- # Project directory where all the source + include files live.
24
- parser.add_argument(
25
- "-p",
26
- "--project_dir",
27
- help="The project directory.",
28
- )
29
-
30
- # Directory where hipified files are written.
31
- parser.add_argument(
32
- "-o",
33
- "--output_dir",
34
- help="The output directory.",
35
- )
36
-
37
- # Source files to convert.
38
- parser.add_argument("sources",
39
- help="Source files to hipify.",
40
- nargs="*",
41
- default=[])
42
-
43
- args = parser.parse_args()
44
-
45
- # Limit include scope to project_dir only
46
- includes = [os.path.join(args.project_dir, '*')]
47
-
48
- # Get absolute path for all source files.
49
- extra_files = [os.path.abspath(s) for s in args.sources]
50
-
51
- # Copy sources from project directory to output directory.
52
- # The directory might already exist to hold object files so we ignore that.
53
- shutil.copytree(args.project_dir, args.output_dir, dirs_exist_ok=True)
54
-
55
- hipify_result = hipify(project_directory=args.project_dir,
56
- output_directory=args.output_dir,
57
- header_include_dirs=[],
58
- includes=includes,
59
- extra_files=extra_files,
60
- show_detailed=True,
61
- is_pytorch_extension=True,
62
- hipify_extra_files_only=True)
63
-
64
- hipified_sources = []
65
- for source in args.sources:
66
- s_abs = os.path.abspath(source)
67
- hipified_s_abs = (hipify_result[s_abs].hipified_path if
68
- (s_abs in hipify_result
69
- and hipify_result[s_abs].hipified_path is not None)
70
- else s_abs)
71
- hipified_sources.append(hipified_s_abs)
72
-
73
- assert (len(hipified_sources) == len(args.sources))
74
-
75
- # Print hipified source files.
76
- print("\n".join(hipified_sources))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cmake/utils.cmake DELETED
@@ -1,550 +0,0 @@
1
- # Vendored from vLLM:
2
- #
3
- # https://github.com/vllm-project/vllm/blob/main/cmake/utils.cmake
4
- #
5
- # Attempt to find the python package that uses the same python executable as
6
- # `EXECUTABLE` and is one of the `SUPPORTED_VERSIONS`.
7
- #
8
- macro (find_python_from_executable EXECUTABLE SUPPORTED_VERSIONS)
9
- file(REAL_PATH ${EXECUTABLE} EXECUTABLE)
10
- set(Python_EXECUTABLE ${EXECUTABLE})
11
- find_package(Python COMPONENTS Interpreter Development.Module Development.SABIModule)
12
- if (NOT Python_FOUND)
13
- message(FATAL_ERROR "Unable to find python matching: ${EXECUTABLE}.")
14
- endif()
15
- set(_VER "${Python_VERSION_MAJOR}.${Python_VERSION_MINOR}")
16
- set(_SUPPORTED_VERSIONS_LIST ${SUPPORTED_VERSIONS} ${ARGN})
17
- if (NOT _VER IN_LIST _SUPPORTED_VERSIONS_LIST)
18
- message(FATAL_ERROR
19
- "Python version (${_VER}) is not one of the supported versions: "
20
- "${_SUPPORTED_VERSIONS_LIST}.")
21
- endif()
22
- message(STATUS "Found python matching: ${EXECUTABLE}.")
23
- endmacro()
24
-
25
- #
26
- # Run `EXPR` in python. The standard output of python is stored in `OUT` and
27
- # has trailing whitespace stripped. If an error is encountered when running
28
- # python, a fatal message `ERR_MSG` is issued.
29
- #
30
- function (run_python OUT EXPR ERR_MSG)
31
- execute_process(
32
- COMMAND
33
- "${Python_EXECUTABLE}" "-c" "${EXPR}"
34
- OUTPUT_VARIABLE PYTHON_OUT
35
- RESULT_VARIABLE PYTHON_ERROR_CODE
36
- ERROR_VARIABLE PYTHON_STDERR
37
- OUTPUT_STRIP_TRAILING_WHITESPACE)
38
-
39
- if(NOT PYTHON_ERROR_CODE EQUAL 0)
40
- message(FATAL_ERROR "${ERR_MSG}: ${PYTHON_STDERR}")
41
- endif()
42
- set(${OUT} ${PYTHON_OUT} PARENT_SCOPE)
43
- endfunction()
44
-
45
- # Run `EXPR` in python after importing `PKG`. Use the result of this to extend
46
- # `CMAKE_PREFIX_PATH` so the torch cmake configuration can be imported.
47
- macro (append_cmake_prefix_path PKG EXPR)
48
- run_python(_PREFIX_PATH
49
- "import ${PKG}; print(${EXPR})" "Failed to locate ${PKG} path")
50
- list(APPEND CMAKE_PREFIX_PATH ${_PREFIX_PATH})
51
- endmacro()
52
-
53
- #
54
- # Add a target named `hipify${NAME}` that runs the hipify preprocessor on a set
55
- # of CUDA source files. The names of the corresponding "hipified" sources are
56
- # stored in `OUT_SRCS`.
57
- #
58
- function (hipify_sources_target OUT_SRCS NAME ORIG_SRCS)
59
- #
60
- # Split into C++ and non-C++ (i.e. CUDA) sources.
61
- #
62
- set(NODUP_SRCS ${ORIG_SRCS})
63
- list(REMOVE_DUPLICATES NODUP_SRCS)
64
- set(SRCS ${NODUP_SRCS})
65
- set(CXX_SRCS ${NODUP_SRCS})
66
- list(FILTER SRCS INCLUDE REGEX "\.cu$")
67
- list(FILTER CXX_SRCS EXCLUDE REGEX "\.cu$")
68
-
69
- #
70
- # Generate ROCm/HIP source file names from CUDA file names.
71
- # Since HIP files are generated code, they will appear in the build area
72
- # `CMAKE_CURRENT_BINARY_DIR` directory rather than the original csrc dir.
73
- #
74
- set(HIP_SRCS)
75
- foreach (SRC ${SRCS})
76
- get_source_file_property(include_dirs "${SRC}" INCLUDE_DIRECTORIES)
77
- string(REGEX REPLACE "\.cu$" "\.hip" SRC ${SRC})
78
- string(REGEX REPLACE "cuda" "hip" SRC ${SRC})
79
-
80
- if(include_dirs)
81
- # Copy over include directories from the original CUDA file.
82
- set_source_files_properties(
83
- ${SRC}
84
- PROPERTIES INCLUDE_DIRECTORIES "${include_dirs}")
85
- endif()
86
-
87
- list(APPEND HIP_SRCS "${CMAKE_CURRENT_BINARY_DIR}/${SRC}")
88
- endforeach()
89
-
90
- add_custom_target(
91
- hipify${NAME}
92
- COMMAND "${Python_EXECUTABLE}" ${CMAKE_SOURCE_DIR}/cmake/hipify.py -p ${CMAKE_SOURCE_DIR} -o ${CMAKE_CURRENT_BINARY_DIR} ${SRCS}
93
- DEPENDS ${CMAKE_SOURCE_DIR}/cmake/hipify.py ${SRCS}
94
- BYPRODUCTS ${HIP_SRCS}
95
- COMMENT "Running hipify on ${NAME} extension source files.")
96
-
97
- # Swap out original extension sources with hipified sources.
98
- list(APPEND HIP_SRCS ${CXX_SRCS})
99
- set(${OUT_SRCS} ${HIP_SRCS} PARENT_SCOPE)
100
- endfunction()
101
-
102
- #
103
- # Get additional GPU compiler flags from torch.
104
- #
105
- function (get_torch_gpu_compiler_flags OUT_GPU_FLAGS GPU_LANG)
106
- if (${GPU_LANG} STREQUAL "CUDA")
107
- #
108
- # Get common NVCC flags from torch.
109
- #
110
- run_python(GPU_FLAGS
111
- "from torch.utils.cpp_extension import COMMON_NVCC_FLAGS; print(';'.join(COMMON_NVCC_FLAGS))"
112
- "Failed to determine torch nvcc compiler flags")
113
-
114
- if (CUDA_VERSION VERSION_GREATER_EQUAL 11.8)
115
- list(APPEND GPU_FLAGS "-DENABLE_FP8")
116
- list(REMOVE_ITEM GPU_FLAGS
117
- "-D__CUDA_NO_HALF_OPERATORS__"
118
- "-D__CUDA_NO_HALF_CONVERSIONS__"
119
- "-D__CUDA_NO_BFLOAT16_CONVERSIONS__"
120
- "-D__CUDA_NO_HALF2_OPERATORS__")
121
- endif()
122
-
123
- elseif(${GPU_LANG} STREQUAL "HIP")
124
- #
125
- # Get common HIP/HIPCC flags from torch.
126
- #
127
- run_python(GPU_FLAGS
128
- "import torch.utils.cpp_extension as t; print(';'.join(t.COMMON_HIP_FLAGS + t.COMMON_HIPCC_FLAGS))"
129
- "Failed to determine torch nvcc compiler flags")
130
-
131
- list(APPEND GPU_FLAGS
132
- "-DUSE_ROCM"
133
- "-DENABLE_FP8"
134
- "-U__HIP_NO_HALF_CONVERSIONS__"
135
- "-U__HIP_NO_HALF_OPERATORS__"
136
- "-fno-gpu-rdc")
137
-
138
- endif()
139
- set(${OUT_GPU_FLAGS} ${GPU_FLAGS} PARENT_SCOPE)
140
- endfunction()
141
-
142
- # Macro for converting a `gencode` version number to a cmake version number.
143
- macro(string_to_ver OUT_VER IN_STR)
144
- string(REGEX REPLACE "\([0-9]+\)\([0-9]\)" "\\1.\\2" ${OUT_VER} ${IN_STR})
145
- endmacro()
146
-
147
- #
148
- # Clear all `-gencode` flags from `CMAKE_CUDA_FLAGS` and store them in
149
- # `CUDA_ARCH_FLAGS`.
150
- #
151
- # Example:
152
- # CMAKE_CUDA_FLAGS="-Wall -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75"
153
- # clear_cuda_arches(CUDA_ARCH_FLAGS)
154
- # CUDA_ARCH_FLAGS="-gencode arch=compute_70,code=sm_70;-gencode arch=compute_75,code=sm_75"
155
- # CMAKE_CUDA_FLAGS="-Wall"
156
- #
157
- macro(clear_cuda_arches CUDA_ARCH_FLAGS)
158
- # Extract all `-gencode` flags from `CMAKE_CUDA_FLAGS`
159
- string(REGEX MATCHALL "-gencode arch=[^ ]+" CUDA_ARCH_FLAGS
160
- ${CMAKE_CUDA_FLAGS})
161
-
162
- # Remove all `-gencode` flags from `CMAKE_CUDA_FLAGS` since they will be modified
163
- # and passed back via the `CUDA_ARCHITECTURES` property.
164
- string(REGEX REPLACE "-gencode arch=[^ ]+ *" "" CMAKE_CUDA_FLAGS
165
- ${CMAKE_CUDA_FLAGS})
166
- endmacro()
167
-
168
- #
169
- # Extract unique CUDA architectures from a list of compute capabilities codes in
170
- # the form `<major><minor>[<letter>]`, convert them to the form sort
171
- # `<major>.<minor>`, dedupes them and then sorts them in ascending order and
172
- # stores them in `OUT_ARCHES`.
173
- #
174
- # Example:
175
- # CUDA_ARCH_FLAGS="-gencode arch=compute_75,code=sm_75;...;-gencode arch=compute_90a,code=sm_90a"
176
- # extract_unique_cuda_archs_ascending(OUT_ARCHES CUDA_ARCH_FLAGS)
177
- # OUT_ARCHES="7.5;...;9.0"
178
- function(extract_unique_cuda_archs_ascending OUT_ARCHES CUDA_ARCH_FLAGS)
179
- set(_CUDA_ARCHES)
180
- foreach(_ARCH ${CUDA_ARCH_FLAGS})
181
- string(REGEX MATCH "arch=compute_\([0-9]+a?\)" _COMPUTE ${_ARCH})
182
- if (_COMPUTE)
183
- set(_COMPUTE ${CMAKE_MATCH_1})
184
- endif()
185
-
186
- string_to_ver(_COMPUTE_VER ${_COMPUTE})
187
- list(APPEND _CUDA_ARCHES ${_COMPUTE_VER})
188
- endforeach()
189
-
190
- list(REMOVE_DUPLICATES _CUDA_ARCHES)
191
- list(SORT _CUDA_ARCHES COMPARE NATURAL ORDER ASCENDING)
192
- set(${OUT_ARCHES} ${_CUDA_ARCHES} PARENT_SCOPE)
193
- endfunction()
194
-
195
- #
196
- # For a specific file set the `-gencode` flag in compile options conditionally
197
- # for the CUDA language.
198
- #
199
- # Example:
200
- # set_gencode_flag_for_srcs(
201
- # SRCS "foo.cu"
202
- # ARCH "compute_75"
203
- # CODE "sm_75")
204
- # adds: "-gencode arch=compute_75,code=sm_75" to the compile options for
205
- # `foo.cu` (only for the CUDA language).
206
- #
207
- macro(set_gencode_flag_for_srcs)
208
- set(options)
209
- set(oneValueArgs ARCH CODE)
210
- set(multiValueArgs SRCS)
211
- cmake_parse_arguments(arg "${options}" "${oneValueArgs}"
212
- "${multiValueArgs}" ${ARGN} )
213
- set(_FLAG -gencode arch=${arg_ARCH},code=${arg_CODE})
214
- set_property(
215
- SOURCE ${arg_SRCS}
216
- APPEND PROPERTY
217
- COMPILE_OPTIONS "$<$<COMPILE_LANGUAGE:CUDA>:${_FLAG}>"
218
- )
219
-
220
- message(DEBUG "Setting gencode flag for ${arg_SRCS}: ${_FLAG}")
221
- endmacro(set_gencode_flag_for_srcs)
222
-
223
- #
224
- # For a list of source files set the `-gencode` flags in the files specific
225
- # compile options (specifically for the CUDA language).
226
- #
227
- # arguments are:
228
- # SRCS: list of source files
229
- # CUDA_ARCHS: list of CUDA architectures in the form `<major>.<minor>[letter]`
230
- # BUILD_PTX_FOR_ARCH: if set to true, then the PTX code will be built
231
- # for architecture `BUILD_PTX_FOR_ARCH` if there is a CUDA_ARCH in CUDA_ARCHS
232
- # that is larger than BUILD_PTX_FOR_ARCH.
233
- #
234
- macro(set_gencode_flags_for_srcs)
235
- set(options)
236
- set(oneValueArgs BUILD_PTX_FOR_ARCH)
237
- set(multiValueArgs SRCS CUDA_ARCHS)
238
- cmake_parse_arguments(arg "${options}" "${oneValueArgs}"
239
- "${multiValueArgs}" ${ARGN} )
240
-
241
- foreach(_ARCH ${arg_CUDA_ARCHS})
242
- # handle +PTX suffix: generate both sm and ptx codes if requested
243
- string(FIND "${_ARCH}" "+PTX" _HAS_PTX)
244
- if(NOT _HAS_PTX EQUAL -1)
245
- string(REPLACE "+PTX" "" _BASE_ARCH "${_ARCH}")
246
- string(REPLACE "." "" _STRIPPED_ARCH "${_BASE_ARCH}")
247
- set_gencode_flag_for_srcs(
248
- SRCS ${arg_SRCS}
249
- ARCH "compute_${_STRIPPED_ARCH}"
250
- CODE "sm_${_STRIPPED_ARCH}")
251
- set_gencode_flag_for_srcs(
252
- SRCS ${arg_SRCS}
253
- ARCH "compute_${_STRIPPED_ARCH}"
254
- CODE "compute_${_STRIPPED_ARCH}")
255
- else()
256
- string(REPLACE "." "" _STRIPPED_ARCH "${_ARCH}")
257
- set_gencode_flag_for_srcs(
258
- SRCS ${arg_SRCS}
259
- ARCH "compute_${_STRIPPED_ARCH}"
260
- CODE "sm_${_STRIPPED_ARCH}")
261
- endif()
262
- endforeach()
263
-
264
- if (${arg_BUILD_PTX_FOR_ARCH})
265
- list(SORT arg_CUDA_ARCHS COMPARE NATURAL ORDER ASCENDING)
266
- list(GET arg_CUDA_ARCHS -1 _HIGHEST_ARCH)
267
- if (_HIGHEST_ARCH VERSION_GREATER_EQUAL ${arg_BUILD_PTX_FOR_ARCH})
268
- string(REPLACE "." "" _PTX_ARCH "${arg_BUILD_PTX_FOR_ARCH}")
269
- set_gencode_flag_for_srcs(
270
- SRCS ${arg_SRCS}
271
- ARCH "compute_${_PTX_ARCH}"
272
- CODE "compute_${_PTX_ARCH}")
273
- endif()
274
- endif()
275
- endmacro()
276
-
277
- #
278
- # For the given `SRC_CUDA_ARCHS` list of gencode versions in the form
279
- # `<major>.<minor>[letter]` compute the "loose intersection" with the
280
- # `TGT_CUDA_ARCHS` list of gencodes. We also support the `+PTX` suffix in
281
- # `SRC_CUDA_ARCHS` which indicates that the PTX code should be built when there
282
- # is a CUDA_ARCH in `TGT_CUDA_ARCHS` that is equal to or larger than the
283
- # architecture in `SRC_CUDA_ARCHS`.
284
- # The loose intersection is defined as:
285
- # { max{ x \in tgt | x <= y } | y \in src, { x \in tgt | x <= y } != {} }
286
- # where `<=` is the version comparison operator.
287
- # In other words, for each version in `TGT_CUDA_ARCHS` find the highest version
288
- # in `SRC_CUDA_ARCHS` that is less or equal to the version in `TGT_CUDA_ARCHS`.
289
- # We have special handling for x.0a, if x.0a is in `SRC_CUDA_ARCHS` and x.0 is
290
- # in `TGT_CUDA_ARCHS` then we should remove x.0a from `SRC_CUDA_ARCHS` and add
291
- # x.0a to the result (and remove x.0 from TGT_CUDA_ARCHS).
292
- # The result is stored in `OUT_CUDA_ARCHS`.
293
- #
294
- # Example:
295
- # SRC_CUDA_ARCHS="7.5;8.0;8.6;9.0;9.0a"
296
- # TGT_CUDA_ARCHS="8.0;8.9;9.0"
297
- # cuda_archs_loose_intersection(OUT_CUDA_ARCHS SRC_CUDA_ARCHS TGT_CUDA_ARCHS)
298
- # OUT_CUDA_ARCHS="8.0;8.6;9.0;9.0a"
299
- #
300
- # Example With PTX:
301
- # SRC_CUDA_ARCHS="8.0+PTX"
302
- # TGT_CUDA_ARCHS="9.0"
303
- # cuda_archs_loose_intersection(OUT_CUDA_ARCHS SRC_CUDA_ARCHS TGT_CUDA_ARCHS)
304
- # OUT_CUDA_ARCHS="8.0+PTX"
305
- #
306
- function(cuda_archs_loose_intersection OUT_CUDA_ARCHS SRC_CUDA_ARCHS TGT_CUDA_ARCHS)
307
- set(_SRC_CUDA_ARCHS "${SRC_CUDA_ARCHS}")
308
- set(_TGT_CUDA_ARCHS ${TGT_CUDA_ARCHS})
309
-
310
- # handle +PTX suffix: separate base arch for matching, record PTX requests
311
- set(_PTX_ARCHS)
312
- foreach(_arch ${_SRC_CUDA_ARCHS})
313
- if(_arch MATCHES "\\+PTX$")
314
- string(REPLACE "+PTX" "" _base "${_arch}")
315
- list(APPEND _PTX_ARCHS "${_base}")
316
- list(REMOVE_ITEM _SRC_CUDA_ARCHS "${_arch}")
317
- list(APPEND _SRC_CUDA_ARCHS "${_base}")
318
- endif()
319
- endforeach()
320
- list(REMOVE_DUPLICATES _PTX_ARCHS)
321
- list(REMOVE_DUPLICATES _SRC_CUDA_ARCHS)
322
-
323
- # if x.0a is in SRC_CUDA_ARCHS and x.0 is in CUDA_ARCHS then we should
324
- # remove x.0a from SRC_CUDA_ARCHS and add x.0a to _CUDA_ARCHS
325
- set(_CUDA_ARCHS)
326
- foreach(_arch ${_SRC_CUDA_ARCHS})
327
- if(_arch MATCHES "\\a$")
328
- list(REMOVE_ITEM _SRC_CUDA_ARCHS "${_arch}")
329
- string(REPLACE "a" "" _base "${_arch}")
330
- if ("${_base}" IN_LIST TGT_CUDA_ARCHS)
331
- list(REMOVE_ITEM _TGT_CUDA_ARCHS "${_base}")
332
- list(APPEND _CUDA_ARCHS "${_arch}")
333
- endif()
334
- endif()
335
- endforeach()
336
-
337
- list(SORT _SRC_CUDA_ARCHS COMPARE NATURAL ORDER ASCENDING)
338
-
339
- # for each ARCH in TGT_CUDA_ARCHS find the highest arch in SRC_CUDA_ARCHS that
340
- # is less or equal to ARCH (but has the same major version since SASS binary
341
- # compatibility is only forward compatible within the same major version).
342
- foreach(_ARCH ${_TGT_CUDA_ARCHS})
343
- set(_TMP_ARCH)
344
- # Extract the major version of the target arch
345
- string(REGEX REPLACE "^([0-9]+)\\..*$" "\\1" TGT_ARCH_MAJOR "${_ARCH}")
346
- foreach(_SRC_ARCH ${_SRC_CUDA_ARCHS})
347
- # Extract the major version of the source arch
348
- string(REGEX REPLACE "^([0-9]+)\\..*$" "\\1" SRC_ARCH_MAJOR "${_SRC_ARCH}")
349
- # Check version-less-or-equal, and allow PTX arches to match across majors
350
- if (_SRC_ARCH VERSION_LESS_EQUAL _ARCH)
351
- if (_SRC_ARCH IN_LIST _PTX_ARCHS OR SRC_ARCH_MAJOR STREQUAL TGT_ARCH_MAJOR)
352
- set(_TMP_ARCH "${_SRC_ARCH}")
353
- endif()
354
- else()
355
- # If we hit a version greater than the target, we can break
356
- break()
357
- endif()
358
- endforeach()
359
-
360
- # If we found a matching _TMP_ARCH, append it to _CUDA_ARCHS
361
- if (_TMP_ARCH)
362
- list(APPEND _CUDA_ARCHS "${_TMP_ARCH}")
363
- endif()
364
- endforeach()
365
-
366
- list(REMOVE_DUPLICATES _CUDA_ARCHS)
367
-
368
- # reapply +PTX suffix to architectures that requested PTX
369
- set(_FINAL_ARCHS)
370
- foreach(_arch ${_CUDA_ARCHS})
371
- if(_arch IN_LIST _PTX_ARCHS)
372
- list(APPEND _FINAL_ARCHS "${_arch}+PTX")
373
- else()
374
- list(APPEND _FINAL_ARCHS "${_arch}")
375
- endif()
376
- endforeach()
377
- set(_CUDA_ARCHS ${_FINAL_ARCHS})
378
-
379
- set(${OUT_CUDA_ARCHS} ${_CUDA_ARCHS} PARENT_SCOPE)
380
- endfunction()
381
-
382
- #
383
- # For the given `SRC_ROCM_ARCHS` list of architecture versions in the form
384
- # `<name>` compute the "loose intersection" with the `TGT_ROCM_ARCHS` list.
385
- # The loose intersection is defined as:
386
- # { max{ x \in tgt | x <= y } | y \in src, { x \in tgt | x <= y } != {} }
387
- # where `<=` is the version comparison operator.
388
- # In other words, for each version in `TGT_ROCM_ARCHS` find the highest version
389
- # in `SRC_ROCM_ARCHS` that is less or equal to the version in `TGT_ROCM_ARCHS`.
390
- # The result is stored in `OUT_ROCM_ARCHS`.
391
- #
392
- # Example:
393
- # SRC_ROCM_ARCHS="gfx900;gfx906;gfx908;gfx90a"
394
- # TGT_ROCM_ARCHS="gfx906;gfx908;gfx1030"
395
- # hip_archs_loose_intersection(OUT_ROCM_ARCHS SRC_ROCM_ARCHS TGT_ROCM_ARCHS)
396
- # OUT_ROCM_ARCHS="gfx906;gfx908"
397
- #
398
- function(hip_archs_loose_intersection OUT_ROCM_ARCHS SRC_ROCM_ARCHS TGT_ROCM_ARCHS)
399
- list(REMOVE_DUPLICATES SRC_ROCM_ARCHS)
400
-
401
- # ROCm architectures are typically in format gfxNNN or gfxNNNx where N is a digit
402
- # and x is a letter. We can sort them by string comparison which works for this format.
403
- list(SORT SRC_ROCM_ARCHS COMPARE STRING ORDER ASCENDING)
404
-
405
- set(_ROCM_ARCHS)
406
-
407
- # Find the intersection of supported architectures
408
- foreach(_SRC_ARCH ${SRC_ROCM_ARCHS})
409
- if(_SRC_ARCH IN_LIST TGT_ROCM_ARCHS)
410
- list(APPEND _ROCM_ARCHS ${_SRC_ARCH})
411
- endif()
412
- endforeach()
413
-
414
- list(REMOVE_DUPLICATES _ROCM_ARCHS)
415
- set(${OUT_ROCM_ARCHS} ${_ROCM_ARCHS} PARENT_SCOPE)
416
- endfunction()
417
-
418
- #
419
- # Override the GPU architectures detected by cmake/torch and filter them by
420
- # `GPU_SUPPORTED_ARCHES`. Sets the final set of architectures in
421
- # `GPU_ARCHES`. This only applies to the HIP language since for CUDA we set
422
- # the architectures on a per file basis.
423
- #
424
- # Note: this is defined as a macro since it updates `CMAKE_CUDA_FLAGS`.
425
- #
426
- macro(override_gpu_arches GPU_ARCHES GPU_LANG GPU_SUPPORTED_ARCHES)
427
- set(_GPU_SUPPORTED_ARCHES_LIST ${GPU_SUPPORTED_ARCHES} ${ARGN})
428
- message(STATUS "${GPU_LANG} supported arches: ${_GPU_SUPPORTED_ARCHES_LIST}")
429
-
430
- if (${GPU_LANG} STREQUAL "HIP")
431
- #
432
- # `GPU_ARCHES` controls the `--offload-arch` flags.
433
- #
434
- # If PYTORCH_ROCM_ARCH env variable exists, then we take it as a list,
435
- # if not, then we use CMAKE_HIP_ARCHITECTURES which was generated by calling
436
- # "rocm_agent_enumerator" in "enable_language(HIP)"
437
- # (in file Modules/CMakeDetermineHIPCompiler.cmake)
438
- #
439
- if(DEFINED ENV{PYTORCH_ROCM_ARCH})
440
- set(HIP_ARCHITECTURES $ENV{PYTORCH_ROCM_ARCH})
441
- else()
442
- set(HIP_ARCHITECTURES ${CMAKE_HIP_ARCHITECTURES})
443
- endif()
444
- #
445
- # Find the intersection of the supported + detected architectures to
446
- # set the module architecture flags.
447
- #
448
- set(${GPU_ARCHES})
449
- foreach (_ARCH ${HIP_ARCHITECTURES})
450
- if (_ARCH IN_LIST _GPU_SUPPORTED_ARCHES_LIST)
451
- list(APPEND ${GPU_ARCHES} ${_ARCH})
452
- endif()
453
- endforeach()
454
-
455
- if(NOT ${GPU_ARCHES})
456
- message(FATAL_ERROR
457
- "None of the detected ROCm architectures: ${HIP_ARCHITECTURES} is"
458
- " supported. Supported ROCm architectures are: ${_GPU_SUPPORTED_ARCHES_LIST}.")
459
- endif()
460
- endif()
461
- endmacro()
462
-
463
- #
464
- # Define a target named `GPU_MOD_NAME` for a single extension. The
465
- # arguments are:
466
- #
467
- # DESTINATION <dest> - Module destination directory.
468
- # LANGUAGE <lang> - The GPU language for this module, e.g CUDA, HIP,
469
- # etc.
470
- # SOURCES <sources> - List of source files relative to CMakeLists.txt
471
- # directory.
472
- #
473
- # Optional arguments:
474
- #
475
- # ARCHITECTURES <arches> - A list of target GPU architectures in cmake
476
- # format.
477
- # Refer `CMAKE_CUDA_ARCHITECTURES` documentation
478
- # and `CMAKE_HIP_ARCHITECTURES` for more info.
479
- # ARCHITECTURES will use cmake's defaults if
480
- # not provided.
481
- # COMPILE_FLAGS <flags> - Extra compiler flags passed to NVCC/hip.
482
- # INCLUDE_DIRECTORIES <dirs> - Extra include directories.
483
- # LIBRARIES <libraries> - Extra link libraries.
484
- # WITH_SOABI - Generate library with python SOABI suffix name.
485
- # USE_SABI <version> - Use python stable api <version>
486
- #
487
- # Note: optimization level/debug info is set via cmake build type.
488
- #
489
- function (define_gpu_extension_target GPU_MOD_NAME)
490
- cmake_parse_arguments(PARSE_ARGV 1
491
- GPU
492
- "WITH_SOABI"
493
- "DESTINATION;LANGUAGE;USE_SABI"
494
- "SOURCES;ARCHITECTURES;COMPILE_FLAGS;INCLUDE_DIRECTORIES;LIBRARIES")
495
-
496
- # Add hipify preprocessing step when building with HIP/ROCm.
497
- if (GPU_LANGUAGE STREQUAL "HIP")
498
- hipify_sources_target(GPU_SOURCES ${GPU_MOD_NAME} "${GPU_SOURCES}")
499
- endif()
500
-
501
- if (GPU_WITH_SOABI)
502
- set(GPU_WITH_SOABI WITH_SOABI)
503
- else()
504
- set(GPU_WITH_SOABI)
505
- endif()
506
-
507
- if (GPU_USE_SABI)
508
- Python_add_library(${GPU_MOD_NAME} MODULE USE_SABI ${GPU_USE_SABI} ${GPU_WITH_SOABI} "${GPU_SOURCES}")
509
- else()
510
- Python_add_library(${GPU_MOD_NAME} MODULE ${GPU_WITH_SOABI} "${GPU_SOURCES}")
511
- endif()
512
-
513
- if (GPU_LANGUAGE STREQUAL "HIP")
514
- # Make this target dependent on the hipify preprocessor step.
515
- add_dependencies(${GPU_MOD_NAME} hipify${GPU_MOD_NAME})
516
- endif()
517
-
518
- if (GPU_ARCHITECTURES)
519
- if (GPU_LANGUAGE STREQUAL "HIP")
520
- # Clear target architectures, we are passing arch flags per source file.
521
- set_property(TARGET ${GPU_MOD_NAME} PROPERTY HIP_ARCHITECTURES off)
522
- else()
523
- set_target_properties(${GPU_MOD_NAME} PROPERTIES
524
- ${GPU_LANGUAGE}_ARCHITECTURES "${GPU_ARCHITECTURES}")
525
- endif()
526
- endif()
527
-
528
- set_property(TARGET ${GPU_MOD_NAME} PROPERTY CXX_STANDARD 17)
529
-
530
- target_compile_options(${GPU_MOD_NAME} PRIVATE
531
- $<$<COMPILE_LANGUAGE:${GPU_LANGUAGE}>:${GPU_COMPILE_FLAGS}>)
532
-
533
- target_compile_definitions(${GPU_MOD_NAME} PRIVATE
534
- "-DTORCH_EXTENSION_NAME=${GPU_MOD_NAME}")
535
-
536
- target_include_directories(${GPU_MOD_NAME} PRIVATE csrc
537
- ${GPU_INCLUDE_DIRECTORIES})
538
-
539
- target_link_libraries(${GPU_MOD_NAME} PRIVATE torch ${GPU_LIBRARIES})
540
-
541
- # Don't use `TORCH_LIBRARIES` for CUDA since it pulls in a bunch of
542
- # dependencies that are not necessary and may not be installed.
543
- if (GPU_LANGUAGE STREQUAL "CUDA")
544
- target_link_libraries(${GPU_MOD_NAME} PRIVATE CUDA::cudart)
545
- else()
546
- target_link_libraries(${GPU_MOD_NAME} PRIVATE ${TORCH_LIBRARIES})
547
- endif()
548
-
549
- install(TARGETS ${GPU_MOD_NAME} LIBRARY DESTINATION ${GPU_DESTINATION} COMPONENT ${GPU_MOD_NAME})
550
- endfunction()