| CC = nvcc | |
| CFLAGS = | |
| fPIC_flag = -Xcompiler=-fPIC | |
| obj_files = reshape_0.obj batch_gather_1.obj batch_gather_3.obj size_5.obj fused_elementwise_376.obj layernorm_7.obj gemm_rcr_bias_permute_9.obj split_11.obj reshape_12.obj mem_eff_attention_15.obj gemm_rcr_bias_add_17.obj layernorm_19.obj gemm_rcr_bias_gelu_20.obj gemm_rcr_bias_add_21.obj constants.obj model_container_base.obj debug_utility.obj model_container.obj model_interface.obj utility.obj | |
| %.obj : %.cu | |
| nvcc -DCUTLASS_ENABLE_TENSOR_CORE_MMA=1 -DCUTLASS_USE_TANH_FOR_SIGMOID=1 -w -gencode=arch=compute_80,code=[sm_80,compute_80] -Xcompiler=-fPIC -Xcompiler=-Wconversion -Xcompiler=-fno-strict-aliasing -Xcompiler -fvisibility=hidden -O3 -std=c++17 --expt-relaxed-constexpr --use_fast_math -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/include -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/tools/util/include -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/examples/35_gemm_softmax -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/examples/41_fused_multi_head_attention -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/examples/45_dual_gemm -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/../../backend/cuda/attention/src/./ -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/../../backend/cuda/attention/src/fmha -DNDEBUG -c -o $@ $< | |
| %.obj : %.bin | |
| ld -r -b binary -o $@ $< | |
| .PHONY: all clean clean_constants | |
| all: test.so | |
| test.so: $(obj_files) | |
| $(CC) -shared $(fPIC_flag) $(CFLAGS) -o $@ $(obj_files) | |
| clean: | |
| rm -f *.obj test.so | |
| clean_constants: | |
| rm -f constants.bin |