lrh12580
first commit
5cb6c4b
include ../../../common/make.config
NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3
NVCC = $(CUDA_DIR)/bin/nvcc
EXECUTABLE := vector_seq
CUFILES := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp
EXECUTABLE_4096_256 := vector_seq_4096_256
CUFILES_4096_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=4096 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16
EXECUTABLE_2048_256 := vector_seq_2048_256
CUFILES_2048_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=2048 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16
EXECUTABLE_1024_256 := vector_seq_1024_256
CUFILES_1024_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=1024 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16
EXECUTABLE_512_256 := vector_seq_512_256
CUFILES_512_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=512 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16
EXECUTABLE_256_256 := vector_seq_256_256
CUFILES_256_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=256 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16
EXECUTABLE_128_256 := vector_seq_128_256
CUFILES_128_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=128 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16
EXECUTABLE_64_256 := vector_seq_64_256
CUFILES_64_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16
EXECUTABLE_32_256 := vector_seq_32_256
CUFILES_32_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=32 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16
EXECUTABLE_16_256 := vector_seq_16_256
CUFILES_16_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=16 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16
EXECUTABLE_1024_4 := vector_seq_1024_4
CUFILES_1024_4 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=1024 -DBATCH_SIZE=4
EXECUTABLE_512_8 := vector_seq_512_8
CUFILES_512_8 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=512 -DBATCH_SIZE=8
EXECUTABLE_256_16 := vector_seq_256_16
CUFILES_256_16 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16
EXECUTABLE_128_32 := vector_seq_128_32
CUFILES_128_32 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=32
EXECUTABLE_64_64 := vector_seq_64_64
CUFILES_64_64 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=64 -DBATCH_SIZE=64
EXECUTABLE_32_128 := vector_seq_32_128
CUFILES_32_128 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=32 -DBATCH_SIZE=128
EXECUTABLE_2 := vector_seq_2
CUFILES_2:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=2
EXECUTABLE_4 := vector_seq_4
CUFILES_4:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=4
EXECUTABLE_8 := vector_seq_8
CUFILES_8:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=8
EXECUTABLE_16 := vector_seq_16
CUFILES_16:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=16
EXECUTABLE_32 := vector_seq_32
CUFILES_32:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=32
EXECUTABLE_64 := vector_seq_64
CUFILES_64:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=64
EXECUTABLE_128 := vector_seq_128
CUFILES_128:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=128
all:
$(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_4096_256} ${DEF} -o ${EXECUTABLE_4096_256}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_2048_256} ${DEF} -o ${EXECUTABLE_2048_256}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_1024_256} ${DEF} -o ${EXECUTABLE_1024_256}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_512_256} ${DEF} -o ${EXECUTABLE_512_256}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_256_256} ${DEF} -o ${EXECUTABLE_256_256}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_128_256} ${DEF} -o ${EXECUTABLE_128_256}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_64_256} ${DEF} -o ${EXECUTABLE_64_256}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_32_256} ${DEF} -o ${EXECUTABLE_32_256}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_16_256} ${DEF} -o ${EXECUTABLE_16_256}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_1024_4} ${DEF} -o ${EXECUTABLE_1024_4}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_512_8} ${DEF} -o ${EXECUTABLE_512_8}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_256_16} ${DEF} -o ${EXECUTABLE_256_16}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_128_32} ${DEF} -o ${EXECUTABLE_128_32}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_64_64} ${DEF} -o ${EXECUTABLE_64_64}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_32_128} ${DEF} -o ${EXECUTABLE_32_128}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_2} ${DEF} -o ${EXECUTABLE_2}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_4} ${DEF} -o ${EXECUTABLE_4}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_8} ${DEF} -o ${EXECUTABLE_8}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_16} ${DEF} -o ${EXECUTABLE_16}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_32} ${DEF} -o ${EXECUTABLE_32}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_64} ${DEF} -o ${EXECUTABLE_64}
$(NVCC) ${NVCCCFLAGS} ${CUFILES_128} ${DEF} -o ${EXECUTABLE_128}
clean:
rm -f *.o vector_seq vector_seq_*