llm_mutil_npu / CMakeLists.txt
xianglarry's picture
Initial C++ aclnn EAGER inference for Qwen3-235B-A22B MoE on Ascend 910 × 16 NPU
4b9fefd
cmake_minimum_required(VERSION 3.16)
project(qwen3-moe-aclnn CXX)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_CXX_FLAGS_RELEASE "-O2 -g")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unused-function")
# CANN paths
if(NOT DEFINED CANN_INSTALL_DIR)
if(DEFINED ENV{ASCEND_TOOLKIT_HOME})
set(CANN_INSTALL_DIR $ENV{ASCEND_TOOLKIT_HOME})
else()
set(CANN_INSTALL_DIR /usr/local/Ascend/ascend-toolkit/latest)
endif()
endif()
message(STATUS "CANN_INSTALL_DIR: ${CANN_INSTALL_DIR}")
include_directories(
${CANN_INSTALL_DIR}/include
${CANN_INSTALL_DIR}/include/aclnn
${CMAKE_SOURCE_DIR}/include
${CMAKE_SOURCE_DIR}/external
)
link_directories(${CANN_INSTALL_DIR}/lib64)
set(CANN_LIBS ascendcl nnopbase opapi opapi_transformer acl_op_compiler hccl)
# HCCL headers live under include/ but we need explicit include dir for <hccl/hccl.h>.
include_directories(${CANN_INSTALL_DIR}/include/hccl)
# ---- Library: qwen3-moe-aclnn core ----
set(LCA_SOURCES
src/safetensors_loader.cpp
src/model_config.cpp
src/tokenizer.cpp
src/device_weights.cpp
src/runner.cpp
)
add_library(qwen3-moe-aclnn-core STATIC ${LCA_SOURCES})
target_link_libraries(qwen3-moe-aclnn-core PUBLIC ${CANN_LIBS})
# ---- Binaries ----
add_executable(hello_acl tests/hello_acl.cpp)
target_link_libraries(hello_acl qwen3-moe-aclnn-core)
add_executable(test_safetensors tests/test_safetensors.cpp)
target_link_libraries(test_safetensors qwen3-moe-aclnn-core)
add_executable(test_model_config tests/test_model_config.cpp)
target_link_libraries(test_model_config qwen3-moe-aclnn-core)
add_executable(test_tokenizer tests/test_tokenizer.cpp)
target_link_libraries(test_tokenizer qwen3-moe-aclnn-core)
add_executable(test_rms_norm tests/test_rms_norm.cpp)
target_link_libraries(test_rms_norm qwen3-moe-aclnn-core)
add_executable(test_weight_load tests/test_weight_load.cpp)
target_link_libraries(test_weight_load qwen3-moe-aclnn-core)
add_executable(test_linear_hf tests/test_linear_hf.cpp)
target_link_libraries(test_linear_hf qwen3-moe-aclnn-core)
add_executable(test_rope tests/test_rope.cpp)
target_link_libraries(test_rope qwen3-moe-aclnn-core)
add_executable(test_rope_manual tests/test_rope_manual.cpp)
target_link_libraries(test_rope_manual qwen3-moe-aclnn-core)
add_executable(test_attention_layer tests/test_attention_layer.cpp)
target_link_libraries(test_attention_layer qwen3-moe-aclnn-core)
add_executable(test_moe_layer tests/test_moe_layer.cpp)
target_link_libraries(test_moe_layer qwen3-moe-aclnn-core)
add_executable(test_attention_decode tests/test_attention_decode.cpp)
target_link_libraries(test_attention_decode qwen3-moe-aclnn-core)
add_executable(test_engine_smoke tests/test_engine_smoke.cpp)
target_link_libraries(test_engine_smoke qwen3-moe-aclnn-core)
add_executable(test_layer_forward tests/test_layer_forward.cpp)
target_link_libraries(test_layer_forward qwen3-moe-aclnn-core)
add_executable(test_runner tests/test_runner.cpp)
target_link_libraries(test_runner qwen3-moe-aclnn-core)
# ---- Main CLI ----
add_executable(qwen3-moe-aclnn src/main_cli.cpp)
target_link_libraries(qwen3-moe-aclnn qwen3-moe-aclnn-core)
add_executable(test_op_support tests/test_op_support.cpp)
target_link_libraries(test_op_support qwen3-moe-aclnn-core)
add_executable(test_rope_fused tests/test_rope_fused.cpp)
target_link_libraries(test_rope_fused qwen3-moe-aclnn-core)
add_executable(test_batch_decode tests/test_batch_decode.cpp)
target_link_libraries(test_batch_decode qwen3-moe-aclnn-core)
add_executable(test_batch_correctness tests/test_batch_correctness.cpp)
target_link_libraries(test_batch_correctness qwen3-moe-aclnn-core)