include ../../../common/make.config NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 NVCC = $(CUDA_DIR)/bin/nvcc EXECUTABLE := vector_seq CUFILES := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp EXECUTABLE_4096_256 := vector_seq_4096_256 CUFILES_4096_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=4096 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 EXECUTABLE_2048_256 := vector_seq_2048_256 CUFILES_2048_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=2048 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 EXECUTABLE_1024_256 := vector_seq_1024_256 CUFILES_1024_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=1024 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 EXECUTABLE_512_256 := vector_seq_512_256 CUFILES_512_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=512 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 EXECUTABLE_256_256 := vector_seq_256_256 CUFILES_256_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=256 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 EXECUTABLE_128_256 := vector_seq_128_256 CUFILES_128_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=128 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 EXECUTABLE_64_256 := vector_seq_64_256 CUFILES_64_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 EXECUTABLE_32_256 := vector_seq_32_256 CUFILES_32_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=32 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 EXECUTABLE_16_256 := vector_seq_16_256 CUFILES_16_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=16 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 EXECUTABLE_1024_4 := vector_seq_1024_4 CUFILES_1024_4 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=1024 -DBATCH_SIZE=4 EXECUTABLE_512_8 := vector_seq_512_8 CUFILES_512_8 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=512 -DBATCH_SIZE=8 EXECUTABLE_256_16 := vector_seq_256_16 CUFILES_256_16 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 EXECUTABLE_128_32 := vector_seq_128_32 CUFILES_128_32 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=32 EXECUTABLE_64_64 := vector_seq_64_64 CUFILES_64_64 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=64 -DBATCH_SIZE=64 EXECUTABLE_32_128 := vector_seq_32_128 CUFILES_32_128 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=32 -DBATCH_SIZE=128 EXECUTABLE_2 := vector_seq_2 CUFILES_2:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=2 EXECUTABLE_4 := vector_seq_4 CUFILES_4:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=4 EXECUTABLE_8 := vector_seq_8 CUFILES_8:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=8 EXECUTABLE_16 := vector_seq_16 CUFILES_16:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=16 EXECUTABLE_32 := vector_seq_32 CUFILES_32:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=32 EXECUTABLE_64 := vector_seq_64 CUFILES_64:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=64 EXECUTABLE_128 := vector_seq_128 CUFILES_128:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=128 all: $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} $(NVCC) ${NVCCCFLAGS} ${CUFILES_4096_256} ${DEF} -o ${EXECUTABLE_4096_256} $(NVCC) ${NVCCCFLAGS} ${CUFILES_2048_256} ${DEF} -o ${EXECUTABLE_2048_256} $(NVCC) ${NVCCCFLAGS} ${CUFILES_1024_256} ${DEF} -o ${EXECUTABLE_1024_256} $(NVCC) ${NVCCCFLAGS} ${CUFILES_512_256} ${DEF} -o ${EXECUTABLE_512_256} $(NVCC) ${NVCCCFLAGS} ${CUFILES_256_256} ${DEF} -o ${EXECUTABLE_256_256} $(NVCC) ${NVCCCFLAGS} ${CUFILES_128_256} ${DEF} -o ${EXECUTABLE_128_256} $(NVCC) ${NVCCCFLAGS} ${CUFILES_64_256} ${DEF} -o ${EXECUTABLE_64_256} $(NVCC) ${NVCCCFLAGS} ${CUFILES_32_256} ${DEF} -o ${EXECUTABLE_32_256} $(NVCC) ${NVCCCFLAGS} ${CUFILES_16_256} ${DEF} -o ${EXECUTABLE_16_256} $(NVCC) ${NVCCCFLAGS} ${CUFILES_1024_4} ${DEF} -o ${EXECUTABLE_1024_4} $(NVCC) ${NVCCCFLAGS} ${CUFILES_512_8} ${DEF} -o ${EXECUTABLE_512_8} $(NVCC) ${NVCCCFLAGS} ${CUFILES_256_16} ${DEF} -o ${EXECUTABLE_256_16} $(NVCC) ${NVCCCFLAGS} ${CUFILES_128_32} ${DEF} -o ${EXECUTABLE_128_32} $(NVCC) ${NVCCCFLAGS} ${CUFILES_64_64} ${DEF} -o ${EXECUTABLE_64_64} $(NVCC) ${NVCCCFLAGS} ${CUFILES_32_128} ${DEF} -o ${EXECUTABLE_32_128} $(NVCC) ${NVCCCFLAGS} ${CUFILES_2} ${DEF} -o ${EXECUTABLE_2} $(NVCC) ${NVCCCFLAGS} ${CUFILES_4} ${DEF} -o ${EXECUTABLE_4} $(NVCC) ${NVCCCFLAGS} ${CUFILES_8} ${DEF} -o ${EXECUTABLE_8} $(NVCC) ${NVCCCFLAGS} ${CUFILES_16} ${DEF} -o ${EXECUTABLE_16} $(NVCC) ${NVCCCFLAGS} ${CUFILES_32} ${DEF} -o ${EXECUTABLE_32} $(NVCC) ${NVCCCFLAGS} ${CUFILES_64} ${DEF} -o ${EXECUTABLE_64} $(NVCC) ${NVCCCFLAGS} ${CUFILES_128} ${DEF} -o ${EXECUTABLE_128} clean: rm -f *.o vector_seq vector_seq_*