|
|
# |
|
|
# vLLM CMakeLists.txt Patch for DGX Spark (GB10) |
|
|
# |
|
|
# This patch removes SM12.0/12.1 architectures from certain CUDA kernel |
|
|
# compilations that have issues on DGX Spark's GB10 GPU. |
|
|
# |
|
|
# The GB10 GPU has compute capability 12.1, but certain FP4 and scaled_mm |
|
|
# kernels compiled for SM12.x cause runtime errors. This patch restricts |
|
|
# those kernels to SM10.0 (Hopper) architecture only, while still allowing |
|
|
# the main model to run on SM12.x. |
|
|
# |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -594,9 +594,9 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") |
|
|
|
|
|
# FP4 Archs and flags |
|
|
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0) |
|
|
- cuda_archs_loose_intersection(FP4_ARCHS "10.0f;11.0f;12.0f" "${CUDA_ARCHS}") |
|
|
+ cuda_archs_loose_intersection(FP4_ARCHS "10.0f" "${CUDA_ARCHS}") |
|
|
else() |
|
|
- cuda_archs_loose_intersection(FP4_ARCHS "10.0a;10.1a;12.0a;12.1a" "${CUDA_ARCHS}") |
|
|
+ cuda_archs_loose_intersection(FP4_ARCHS "10.0a;10.1a" "${CUDA_ARCHS}") |
|
|
endif() |
|
|
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND FP4_ARCHS) |
|
|
set(SRCS |
|
|
@@ -668,7 +668,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") |
|
|
endif() |
|
|
|
|
|
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0) |
|
|
- cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0f;11.0f" "${CUDA_ARCHS}") |
|
|
+ cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0f" "${CUDA_ARCHS}") |
|
|
else() |
|
|
cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0a" "${CUDA_ARCHS}") |
|
|
endif() |
|
|
@@ -716,9 +716,9 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") |
|
|
endif() |
|
|
|
|
|
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 13.0) |
|
|
- cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0f;11.0f;12.0f" "${CUDA_ARCHS}") |
|
|
+ cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0f" "${CUDA_ARCHS}") |
|
|
else() |
|
|
- cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0a;10.1a;10.3a;12.0a;12.1a" "${CUDA_ARCHS}") |
|
|
+ cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0a;10.1a;10.3a" "${CUDA_ARCHS}") |
|
|
endif() |
|
|
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND SCALED_MM_ARCHS) |
|
|
set(SRCS "csrc/quantization/w8a8/cutlass/moe/blockwise_scaled_group_mm_sm100.cu") |
|
|
|