| include ../../../common/make.config |
|
|
| NVCCCFLAGS = -I$(CUPTI_INCLUDE) -L$(CUPTI_LIB_DIR) -std=c++11 -lcuda -lcupti -arch=sm_80 -O3 |
| NVCC = $(CUDA_DIR)/bin/nvcc |
|
|
| EXECUTABLE := vector_seq |
| CUFILES := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp |
|
|
|
|
| EXECUTABLE_4096_256 := vector_seq_4096_256 |
| CUFILES_4096_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=4096 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 |
|
|
| EXECUTABLE_2048_256 := vector_seq_2048_256 |
| CUFILES_2048_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=2048 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 |
|
|
| EXECUTABLE_1024_256 := vector_seq_1024_256 |
| CUFILES_1024_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=1024 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 |
|
|
| EXECUTABLE_512_256 := vector_seq_512_256 |
| CUFILES_512_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=512 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 |
|
|
| EXECUTABLE_256_256 := vector_seq_256_256 |
| CUFILES_256_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=256 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 |
|
|
| EXECUTABLE_128_256 := vector_seq_128_256 |
| CUFILES_128_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=128 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 |
|
|
| EXECUTABLE_64_256 := vector_seq_64_256 |
| CUFILES_64_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 |
|
|
| EXECUTABLE_32_256 := vector_seq_32_256 |
| CUFILES_32_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=32 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 |
|
|
| EXECUTABLE_16_256 := vector_seq_16_256 |
| CUFILES_16_256 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=16 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 |
|
|
|
|
| EXECUTABLE_1024_4 := vector_seq_1024_4 |
| CUFILES_1024_4 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=1024 -DBATCH_SIZE=4 |
|
|
| EXECUTABLE_512_8 := vector_seq_512_8 |
| CUFILES_512_8 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=512 -DBATCH_SIZE=8 |
|
|
| EXECUTABLE_256_16 := vector_seq_256_16 |
| CUFILES_256_16 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=256 -DBATCH_SIZE=16 |
|
|
| EXECUTABLE_128_32 := vector_seq_128_32 |
| CUFILES_128_32 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=32 |
|
|
| EXECUTABLE_64_64 := vector_seq_64_64 |
| CUFILES_64_64 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=64 -DBATCH_SIZE=64 |
|
|
| EXECUTABLE_32_128 := vector_seq_32_128 |
| CUFILES_32_128 := vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DNBLOCK=64 -DDIM_THREAD_BLOCK=32 -DBATCH_SIZE=128 |
|
|
|
|
| EXECUTABLE_2 := vector_seq_2 |
| CUFILES_2:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=2 |
|
|
| EXECUTABLE_4 := vector_seq_4 |
| CUFILES_4:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=4 |
|
|
| EXECUTABLE_8 := vector_seq_8 |
| CUFILES_8:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=8 |
|
|
| EXECUTABLE_16 := vector_seq_16 |
| CUFILES_16:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=16 |
|
|
| EXECUTABLE_32 := vector_seq_32 |
| CUFILES_32:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=32 |
|
|
| EXECUTABLE_64 := vector_seq_64 |
| CUFILES_64:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=64 |
|
|
| EXECUTABLE_128 := vector_seq_128 |
| CUFILES_128:= vector_seq.cu $(CUPTI_ADD_COMMON)/cupti_add.cpp $(CUPTI_ADD_COMMON)/cpu_timestamps.cpp -DDIM_THREAD_BLOCK=128 -DBATCH_SIZE=128 |
|
|
|
|
|
|
| all: |
| $(NVCC) ${NVCCCFLAGS} ${CUFILES} ${DEF} -o ${EXECUTABLE} |
|
|
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_4096_256} ${DEF} -o ${EXECUTABLE_4096_256} |
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_2048_256} ${DEF} -o ${EXECUTABLE_2048_256} |
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_1024_256} ${DEF} -o ${EXECUTABLE_1024_256} |
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_512_256} ${DEF} -o ${EXECUTABLE_512_256} |
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_256_256} ${DEF} -o ${EXECUTABLE_256_256} |
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_128_256} ${DEF} -o ${EXECUTABLE_128_256} |
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_64_256} ${DEF} -o ${EXECUTABLE_64_256} |
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_32_256} ${DEF} -o ${EXECUTABLE_32_256} |
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_16_256} ${DEF} -o ${EXECUTABLE_16_256} |
|
|
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_1024_4} ${DEF} -o ${EXECUTABLE_1024_4} |
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_512_8} ${DEF} -o ${EXECUTABLE_512_8} |
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_256_16} ${DEF} -o ${EXECUTABLE_256_16} |
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_128_32} ${DEF} -o ${EXECUTABLE_128_32} |
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_64_64} ${DEF} -o ${EXECUTABLE_64_64} |
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_32_128} ${DEF} -o ${EXECUTABLE_32_128} |
|
|
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_2} ${DEF} -o ${EXECUTABLE_2} |
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_4} ${DEF} -o ${EXECUTABLE_4} |
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_8} ${DEF} -o ${EXECUTABLE_8} |
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_16} ${DEF} -o ${EXECUTABLE_16} |
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_32} ${DEF} -o ${EXECUTABLE_32} |
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_64} ${DEF} -o ${EXECUTABLE_64} |
| $(NVCC) ${NVCCCFLAGS} ${CUFILES_128} ${DEF} -o ${EXECUTABLE_128} |
|
|
| clean: |
| rm -f *.o vector_seq vector_seq_* |
|
|