cmake_minimum_required(VERSION 3.16) project(qwen3-moe-aclnn CXX) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() set(CMAKE_CXX_FLAGS_RELEASE "-O2 -g") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-unused-function") # CANN paths if(NOT DEFINED CANN_INSTALL_DIR) if(DEFINED ENV{ASCEND_TOOLKIT_HOME}) set(CANN_INSTALL_DIR $ENV{ASCEND_TOOLKIT_HOME}) else() set(CANN_INSTALL_DIR /usr/local/Ascend/ascend-toolkit/latest) endif() endif() message(STATUS "CANN_INSTALL_DIR: ${CANN_INSTALL_DIR}") include_directories( ${CANN_INSTALL_DIR}/include ${CANN_INSTALL_DIR}/include/aclnn ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/external ) link_directories(${CANN_INSTALL_DIR}/lib64) set(CANN_LIBS ascendcl nnopbase opapi opapi_transformer acl_op_compiler hccl) # HCCL headers live under include/ but we need explicit include dir for . include_directories(${CANN_INSTALL_DIR}/include/hccl) # ---- Library: qwen3-moe-aclnn core ---- set(LCA_SOURCES src/safetensors_loader.cpp src/model_config.cpp src/tokenizer.cpp src/device_weights.cpp src/runner.cpp ) add_library(qwen3-moe-aclnn-core STATIC ${LCA_SOURCES}) target_link_libraries(qwen3-moe-aclnn-core PUBLIC ${CANN_LIBS}) # ---- Binaries ---- add_executable(hello_acl tests/hello_acl.cpp) target_link_libraries(hello_acl qwen3-moe-aclnn-core) add_executable(test_safetensors tests/test_safetensors.cpp) target_link_libraries(test_safetensors qwen3-moe-aclnn-core) add_executable(test_model_config tests/test_model_config.cpp) target_link_libraries(test_model_config qwen3-moe-aclnn-core) add_executable(test_tokenizer tests/test_tokenizer.cpp) target_link_libraries(test_tokenizer qwen3-moe-aclnn-core) add_executable(test_rms_norm tests/test_rms_norm.cpp) target_link_libraries(test_rms_norm qwen3-moe-aclnn-core) add_executable(test_weight_load tests/test_weight_load.cpp) target_link_libraries(test_weight_load qwen3-moe-aclnn-core) add_executable(test_linear_hf tests/test_linear_hf.cpp) target_link_libraries(test_linear_hf qwen3-moe-aclnn-core) add_executable(test_rope tests/test_rope.cpp) target_link_libraries(test_rope qwen3-moe-aclnn-core) add_executable(test_rope_manual tests/test_rope_manual.cpp) target_link_libraries(test_rope_manual qwen3-moe-aclnn-core) add_executable(test_attention_layer tests/test_attention_layer.cpp) target_link_libraries(test_attention_layer qwen3-moe-aclnn-core) add_executable(test_moe_layer tests/test_moe_layer.cpp) target_link_libraries(test_moe_layer qwen3-moe-aclnn-core) add_executable(test_attention_decode tests/test_attention_decode.cpp) target_link_libraries(test_attention_decode qwen3-moe-aclnn-core) add_executable(test_engine_smoke tests/test_engine_smoke.cpp) target_link_libraries(test_engine_smoke qwen3-moe-aclnn-core) add_executable(test_layer_forward tests/test_layer_forward.cpp) target_link_libraries(test_layer_forward qwen3-moe-aclnn-core) add_executable(test_runner tests/test_runner.cpp) target_link_libraries(test_runner qwen3-moe-aclnn-core) # ---- Main CLI ---- add_executable(qwen3-moe-aclnn src/main_cli.cpp) target_link_libraries(qwen3-moe-aclnn qwen3-moe-aclnn-core) add_executable(test_op_support tests/test_op_support.cpp) target_link_libraries(test_op_support qwen3-moe-aclnn-core) add_executable(test_rope_fused tests/test_rope_fused.cpp) target_link_libraries(test_rope_fused qwen3-moe-aclnn-core) add_executable(test_batch_decode tests/test_batch_decode.cpp) target_link_libraries(test_batch_decode qwen3-moe-aclnn-core) add_executable(test_batch_correctness tests/test_batch_correctness.cpp) target_link_libraries(test_batch_correctness qwen3-moe-aclnn-core)