File size: 1,882 Bytes
17db41a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
CC = nvcc
CFLAGS =
fPIC_flag = -Xcompiler=-fPIC
obj_files = reshape_0.obj batch_gather_1.obj batch_gather_3.obj size_5.obj fused_elementwise_376.obj layernorm_7.obj gemm_rcr_bias_permute_9.obj split_11.obj reshape_12.obj mem_eff_attention_15.obj gemm_rcr_bias_add_17.obj layernorm_19.obj gemm_rcr_bias_gelu_20.obj gemm_rcr_bias_add_21.obj constants.obj model_container_base.obj debug_utility.obj model_container.obj model_interface.obj utility.obj
%.obj : %.cu
nvcc -DCUTLASS_ENABLE_TENSOR_CORE_MMA=1 -DCUTLASS_USE_TANH_FOR_SIGMOID=1 -w -gencode=arch=compute_80,code=[sm_80,compute_80] -Xcompiler=-fPIC -Xcompiler=-Wconversion -Xcompiler=-fno-strict-aliasing -Xcompiler -fvisibility=hidden -O3 -std=c++17 --expt-relaxed-constexpr --use_fast_math -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/include -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/tools/util/include -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/examples/35_gemm_softmax -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/examples/41_fused_multi_head_attention -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/examples/45_dual_gemm -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/../../backend/cuda/attention/src/./ -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/../../backend/cuda/attention/src/fmha -DNDEBUG -c -o $@ $<
%.obj : %.bin
ld -r -b binary -o $@ $<
.PHONY: all clean clean_constants
all: test.so
test.so: $(obj_files)
$(CC) -shared $(fPIC_flag) $(CFLAGS) -o $@ $(obj_files)
clean:
rm -f *.obj test.so
clean_constants:
rm -f constants.bin |