File size: 1,882 Bytes
17db41a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

CC = nvcc
CFLAGS = 
fPIC_flag = -Xcompiler=-fPIC

obj_files = reshape_0.obj batch_gather_1.obj batch_gather_3.obj size_5.obj fused_elementwise_376.obj layernorm_7.obj gemm_rcr_bias_permute_9.obj split_11.obj reshape_12.obj mem_eff_attention_15.obj gemm_rcr_bias_add_17.obj layernorm_19.obj gemm_rcr_bias_gelu_20.obj gemm_rcr_bias_add_21.obj constants.obj model_container_base.obj debug_utility.obj model_container.obj model_interface.obj utility.obj

%.obj : %.cu
	nvcc -DCUTLASS_ENABLE_TENSOR_CORE_MMA=1 -DCUTLASS_USE_TANH_FOR_SIGMOID=1 -w -gencode=arch=compute_80,code=[sm_80,compute_80] -Xcompiler=-fPIC -Xcompiler=-Wconversion -Xcompiler=-fno-strict-aliasing -Xcompiler -fvisibility=hidden -O3 -std=c++17 --expt-relaxed-constexpr --use_fast_math -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/include -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/tools/util/include -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/examples/35_gemm_softmax -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/examples/41_fused_multi_head_attention -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/examples/45_dual_gemm -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/../../backend/cuda/attention/src/./ -I/home/olab/kirstain/anaconda3/envs/ait/lib/python3.10/site-packages/aitemplate/3rdparty/cutlass/../../backend/cuda/attention/src/fmha -DNDEBUG -c -o $@ $<
%.obj : %.bin
	ld -r -b binary -o $@ $<

.PHONY: all clean clean_constants
all: test.so

test.so: $(obj_files)
	$(CC) -shared $(fPIC_flag) $(CFLAGS) -o $@ $(obj_files)



clean:
	rm -f *.obj test.so

clean_constants:
	rm -f constants.bin