[ { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/cc -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu11 -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wdouble-promotion -march=native -fopenmp -o CMakeFiles/ggml.dir/ggml.c.o -c /content/tmp/ggml/src/ggml.c", "file": "/content/tmp/ggml/src/ggml.c", "output": "ggml/src/CMakeFiles/ggml.dir/ggml.c.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/cc -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu11 -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wdouble-promotion -march=native -fopenmp -o CMakeFiles/ggml.dir/ggml-alloc.c.o -c /content/tmp/ggml/src/ggml-alloc.c", "file": "/content/tmp/ggml/src/ggml-alloc.c", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-alloc.c.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/cc -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu11 -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wdouble-promotion -march=native -fopenmp -o CMakeFiles/ggml.dir/ggml-backend.c.o -c /content/tmp/ggml/src/ggml-backend.c", "file": "/content/tmp/ggml/src/ggml-backend.c", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-backend.c.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/cc -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu11 -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wdouble-promotion -march=native -fopenmp -o CMakeFiles/ggml.dir/ggml-quants.c.o -c /content/tmp/ggml/src/ggml-quants.c", "file": "/content/tmp/ggml/src/ggml-quants.c", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-quants.c.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/acc.cu -o CMakeFiles/ggml.dir/ggml-cuda/acc.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/acc.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/acc.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/add-id.cu -o CMakeFiles/ggml.dir/ggml-cuda/add-id.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/add-id.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/add-id.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/arange.cu -o CMakeFiles/ggml.dir/ggml-cuda/arange.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/arange.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/arange.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/argsort.cu -o CMakeFiles/ggml.dir/ggml-cuda/argsort.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/argsort.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/argsort.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/binbcast.cu -o CMakeFiles/ggml.dir/ggml-cuda/binbcast.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/binbcast.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/binbcast.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/clamp.cu -o CMakeFiles/ggml.dir/ggml-cuda/clamp.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/clamp.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/clamp.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/concat.cu -o CMakeFiles/ggml.dir/ggml-cuda/concat.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/concat.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/concat.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/conv-transpose-1d.cu -o CMakeFiles/ggml.dir/ggml-cuda/conv-transpose-1d.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/conv-transpose-1d.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/conv-transpose-1d.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/convert.cu -o CMakeFiles/ggml.dir/ggml-cuda/convert.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/convert.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/convert.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/cpy.cu -o CMakeFiles/ggml.dir/ggml-cuda/cpy.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/cpy.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/cpy.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/diagmask.cu -o CMakeFiles/ggml.dir/ggml-cuda/diagmask.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/diagmask.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/diagmask.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/dmmv.cu -o CMakeFiles/ggml.dir/ggml-cuda/dmmv.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/dmmv.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/dmmv.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/fattn-new-mma.cu -o CMakeFiles/ggml.dir/ggml-cuda/fattn-new-mma.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/fattn-new-mma.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/fattn-new-mma.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/fattn-tile-f16.cu -o CMakeFiles/ggml.dir/ggml-cuda/fattn-tile-f16.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/fattn-tile-f16.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/fattn-tile-f16.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/fattn-tile-f32.cu -o CMakeFiles/ggml.dir/ggml-cuda/fattn-tile-f32.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/fattn-tile-f32.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/fattn-tile-f32.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/fattn.cu -o CMakeFiles/ggml.dir/ggml-cuda/fattn.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/fattn.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/fattn.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/getrows.cu -o CMakeFiles/ggml.dir/ggml-cuda/getrows.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/getrows.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/getrows.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/im2col.cu -o CMakeFiles/ggml.dir/ggml-cuda/im2col.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/im2col.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/im2col.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/iqk_mmvq.cu -o CMakeFiles/ggml.dir/ggml-cuda/iqk_mmvq.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/iqk_mmvq.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/iqk_mmvq.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/mmq.cu -o CMakeFiles/ggml.dir/ggml-cuda/mmq.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/mmq.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/mmq.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/mmvq.cu -o CMakeFiles/ggml.dir/ggml-cuda/mmvq.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/mmvq.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/mmvq.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/norm.cu -o CMakeFiles/ggml.dir/ggml-cuda/norm.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/norm.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/norm.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/pad.cu -o CMakeFiles/ggml.dir/ggml-cuda/pad.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/pad.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/pad.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/pool2d.cu -o CMakeFiles/ggml.dir/ggml-cuda/pool2d.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/pool2d.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/pool2d.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/quantize.cu -o CMakeFiles/ggml.dir/ggml-cuda/quantize.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/quantize.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/quantize.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/rope.cu -o CMakeFiles/ggml.dir/ggml-cuda/rope.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/rope.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/rope.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/scale.cu -o CMakeFiles/ggml.dir/ggml-cuda/scale.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/scale.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/scale.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/softcap.cu -o CMakeFiles/ggml.dir/ggml-cuda/softcap.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/softcap.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/softcap.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/softmax.cu -o CMakeFiles/ggml.dir/ggml-cuda/softmax.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/softmax.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/softmax.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/sumrows.cu -o CMakeFiles/ggml.dir/ggml-cuda/sumrows.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/sumrows.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/sumrows.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/tsembd.cu -o CMakeFiles/ggml.dir/ggml-cuda/tsembd.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/tsembd.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/tsembd.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/unary.cu -o CMakeFiles/ggml.dir/ggml-cuda/unary.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/unary.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/unary.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/upscale.cu -o CMakeFiles/ggml.dir/ggml-cuda/upscale.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/upscale.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/upscale.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda.cu -o CMakeFiles/ggml.dir/ggml-cuda.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_kt.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq1_kt.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_kt.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq1_kt.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq1_s.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq1_s.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s_r4.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq1_s_r4.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s_r4.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq1_s_r4.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_k.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq2_k.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_k.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq2_k.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_k_r4.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq2_k_r4.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_k_r4.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq2_k_r4.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_kl.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq2_kl.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_kl.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq2_kl.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_ks.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq2_ks.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_ks.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq2_ks.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_kt.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq2_kt.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_kt.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq2_kt.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq2_s.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq2_s.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_k.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq3_k.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_k.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq3_k.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_k_r4.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq3_k_r4.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_k_r4.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq3_k_r4.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_ks.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq3_ks.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_ks.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq3_ks.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_kt.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq3_kt.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_kt.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq3_kt.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq3_s.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq3_s.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_k.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq4_k.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_k.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq4_k.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_k_r4.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq4_k_r4.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_k_r4.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq4_k_r4.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_ks.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq4_ks.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_ks.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq4_ks.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_ks_r4.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq4_ks_r4.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_ks_r4.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq4_ks_r4.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_kss.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq4_kss.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_kss.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq4_kss.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_kt.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq4_kt.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_kt.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq4_kt.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq5_k.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq5_k.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq5_k.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq5_k.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq5_k_r4.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq5_k_r4.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq5_k_r4.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq5_k_r4.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq5_ks.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq5_ks.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq5_ks.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq5_ks.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq5_ks_r4.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq5_ks_r4.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq5_ks_r4.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq5_ks_r4.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq6_k.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq6_k.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq6_k.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-iq6_k.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q2_k.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q2_k.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q3_k.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q3_k.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q4_0.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q4_0.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q4_1.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q4_1.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q4_k.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q4_k.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q5_0.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q5_0.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q5_1.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q5_1.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q5_k.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q5_k.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_0.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q6_0.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_0.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q6_0.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q6_k.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q6_k.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q8_0.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/mmq-instance-q8_0.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs192-q8_0-q8_0.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs192-q8_0-q8_0.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs192-q8_0-q8_0.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs192-q8_0-q8_0.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-q8_0-q8_0.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-q8_0-q8_0.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-q8_0-q8_0.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-q8_0-q8_0.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs192-q8_0-q8_0.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs192-q8_0-q8_0.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs192-q8_0-q8_0.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs192-q8_0-q8_0.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-q8_0-q8_0.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-q8_0-q8_0.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-q8_0-q8_0.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-q8_0-q8_0.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs192-f16-f16.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs192-f16-f16.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs192-f16-f16.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs192-f16-f16.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs192-f16-f16.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs192-f16-f16.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs192-f16-f16.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs192-f16-f16.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-iq4_nl.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-iq4_nl.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-iq4_nl.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-iq4_nl.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-iq4_nl.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-iq4_nl.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-iq4_nl.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-iq4_nl.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-iq4_nl-iq4_nl.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-iq4_nl-iq4_nl.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-iq4_nl-iq4_nl.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-iq4_nl-iq4_nl.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-iq4_nl-iq4_nl.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-iq4_nl-iq4_nl.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-iq4_nl-iq4_nl.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-iq4_nl-iq4_nl.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q6_0-q5_0.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q6_0-q5_0.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q6_0-q5_0.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q6_0-q5_0.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q6_0-q5_0.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q6_0-q5_0.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q6_0-q5_0.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q6_0-q5_0.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q6_0.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q6_0.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q6_0.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q6_0.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/local/cuda/bin/nvcc -forward-unknown-to-host-compiler -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=c++17 \"--generate-code=arch=compute_75,code=[compute_75,sm_75]\" -use_fast_math -extended-lambda -Xcompiler \"-Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -Wno-pedantic -march=native\" -x cu -c /content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q6_0.cu -o CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q6_0.cu.o", "file": "/content/tmp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q6_0.cu", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q6_0.cu.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/c++ -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu++17 -Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -march=native -fopenmp -o CMakeFiles/ggml.dir/llamafile/sgemm.cpp.o -c /content/tmp/ggml/src/llamafile/sgemm.cpp", "file": "/content/tmp/ggml/src/llamafile/sgemm.cpp", "output": "ggml/src/CMakeFiles/ggml.dir/llamafile/sgemm.cpp.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/c++ -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu++17 -Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -march=native -fopenmp -o CMakeFiles/ggml.dir/iqk/iqk_mul_mat.cpp.o -c /content/tmp/ggml/src/iqk/iqk_mul_mat.cpp", "file": "/content/tmp/ggml/src/iqk/iqk_mul_mat.cpp", "output": "ggml/src/CMakeFiles/ggml.dir/iqk/iqk_mul_mat.cpp.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/c++ -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu++17 -Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -march=native -fopenmp -o CMakeFiles/ggml.dir/iqk/iqk_flash_attn.cpp.o -c /content/tmp/ggml/src/iqk/iqk_flash_attn.cpp", "file": "/content/tmp/ggml/src/iqk/iqk_flash_attn.cpp", "output": "ggml/src/CMakeFiles/ggml.dir/iqk/iqk_flash_attn.cpp.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/c++ -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu++17 -Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -march=native -fopenmp -o CMakeFiles/ggml.dir/iqk/fa/iqk_fa_576_512.cpp.o -c /content/tmp/ggml/src/iqk/fa/iqk_fa_576_512.cpp", "file": "/content/tmp/ggml/src/iqk/fa/iqk_fa_576_512.cpp", "output": "ggml/src/CMakeFiles/ggml.dir/iqk/fa/iqk_fa_576_512.cpp.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/c++ -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu++17 -Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -march=native -fopenmp -o CMakeFiles/ggml.dir/iqk/fa/iqk_fa_192_128.cpp.o -c /content/tmp/ggml/src/iqk/fa/iqk_fa_192_128.cpp", "file": "/content/tmp/ggml/src/iqk/fa/iqk_fa_192_128.cpp", "output": "ggml/src/CMakeFiles/ggml.dir/iqk/fa/iqk_fa_192_128.cpp.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/c++ -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu++17 -Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -march=native -fopenmp -o CMakeFiles/ggml.dir/iqk/fa/iqk_fa_256_256.cpp.o -c /content/tmp/ggml/src/iqk/fa/iqk_fa_256_256.cpp", "file": "/content/tmp/ggml/src/iqk/fa/iqk_fa_256_256.cpp", "output": "ggml/src/CMakeFiles/ggml.dir/iqk/fa/iqk_fa_256_256.cpp.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/c++ -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu++17 -Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -march=native -fopenmp -o CMakeFiles/ggml.dir/iqk/fa/iqk_fa_128_128.cpp.o -c /content/tmp/ggml/src/iqk/fa/iqk_fa_128_128.cpp", "file": "/content/tmp/ggml/src/iqk/fa/iqk_fa_128_128.cpp", "output": "ggml/src/CMakeFiles/ggml.dir/iqk/fa/iqk_fa_128_128.cpp.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/c++ -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu++17 -Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -march=native -fopenmp -o CMakeFiles/ggml.dir/iqk/fa/iqk_fa_96_96.cpp.o -c /content/tmp/ggml/src/iqk/fa/iqk_fa_96_96.cpp", "file": "/content/tmp/ggml/src/iqk/fa/iqk_fa_96_96.cpp", "output": "ggml/src/CMakeFiles/ggml.dir/iqk/fa/iqk_fa_96_96.cpp.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/c++ -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu++17 -Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -march=native -fopenmp -o CMakeFiles/ggml.dir/iqk/fa/iqk_fa_64_64.cpp.o -c /content/tmp/ggml/src/iqk/fa/iqk_fa_64_64.cpp", "file": "/content/tmp/ggml/src/iqk/fa/iqk_fa_64_64.cpp", "output": "ggml/src/CMakeFiles/ggml.dir/iqk/fa/iqk_fa_64_64.cpp.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/c++ -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu++17 -Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -march=native -fopenmp -o CMakeFiles/ggml.dir/iqk/iqk_gemm_floats.cpp.o -c /content/tmp/ggml/src/iqk/iqk_gemm_floats.cpp", "file": "/content/tmp/ggml/src/iqk/iqk_gemm_floats.cpp", "output": "ggml/src/CMakeFiles/ggml.dir/iqk/iqk_gemm_floats.cpp.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/c++ -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu++17 -Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -march=native -fopenmp -o CMakeFiles/ggml.dir/iqk/iqk_gemm_kquants.cpp.o -c /content/tmp/ggml/src/iqk/iqk_gemm_kquants.cpp", "file": "/content/tmp/ggml/src/iqk/iqk_gemm_kquants.cpp", "output": "ggml/src/CMakeFiles/ggml.dir/iqk/iqk_gemm_kquants.cpp.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/c++ -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu++17 -Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -march=native -fopenmp -o CMakeFiles/ggml.dir/iqk/iqk_gemm_ktquants.cpp.o -c /content/tmp/ggml/src/iqk/iqk_gemm_ktquants.cpp", "file": "/content/tmp/ggml/src/iqk/iqk_gemm_ktquants.cpp", "output": "ggml/src/CMakeFiles/ggml.dir/iqk/iqk_gemm_ktquants.cpp.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/c++ -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu++17 -Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -march=native -fopenmp -o CMakeFiles/ggml.dir/iqk/iqk_gemm_iquants.cpp.o -c /content/tmp/ggml/src/iqk/iqk_gemm_iquants.cpp", "file": "/content/tmp/ggml/src/iqk/iqk_gemm_iquants.cpp", "output": "ggml/src/CMakeFiles/ggml.dir/iqk/iqk_gemm_iquants.cpp.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/c++ -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu++17 -Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -march=native -fopenmp -o CMakeFiles/ggml.dir/iqk/iqk_gemm_iqk_quants.cpp.o -c /content/tmp/ggml/src/iqk/iqk_gemm_iqk_quants.cpp", "file": "/content/tmp/ggml/src/iqk/iqk_gemm_iqk_quants.cpp", "output": "ggml/src/CMakeFiles/ggml.dir/iqk/iqk_gemm_iqk_quants.cpp.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/c++ -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu++17 -Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -march=native -fopenmp -o CMakeFiles/ggml.dir/iqk/iqk_gemm_1bit.cpp.o -c /content/tmp/ggml/src/iqk/iqk_gemm_1bit.cpp", "file": "/content/tmp/ggml/src/iqk/iqk_gemm_1bit.cpp", "output": "ggml/src/CMakeFiles/ggml.dir/iqk/iqk_gemm_1bit.cpp.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/c++ -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu++17 -Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -march=native -fopenmp -o CMakeFiles/ggml.dir/iqk/iqk_gemm_legacy_quants.cpp.o -c /content/tmp/ggml/src/iqk/iqk_gemm_legacy_quants.cpp", "file": "/content/tmp/ggml/src/iqk/iqk_gemm_legacy_quants.cpp", "output": "ggml/src/CMakeFiles/ggml.dir/iqk/iqk_gemm_legacy_quants.cpp.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/c++ -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu++17 -Wmissing-declarations -Wmissing-noreturn -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-array-bounds -Wno-format-truncation -Wextra-semi -march=native -fopenmp -o CMakeFiles/ggml.dir/iqk/iqk_quantize.cpp.o -c /content/tmp/ggml/src/iqk/iqk_quantize.cpp", "file": "/content/tmp/ggml/src/iqk/iqk_quantize.cpp", "output": "ggml/src/CMakeFiles/ggml.dir/iqk/iqk_quantize.cpp.o" }, { "directory": "/content/tmp/build/ggml/src", "command": "/usr/bin/cc -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MIN_BATCH_OFFLOAD=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_USE_GRAPHS -DGGML_IQK_FLASH_ATTENTION -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_CUDA -DGGML_USE_IQK_MULMAT -DGGML_USE_LLAMAFILE -DGGML_USE_OPENMP -DK_QUANTS_PER_ITERATION=2 -DNDEBUG -D_GNU_SOURCE -D_XOPEN_SOURCE=600 -I/content/tmp/ggml/src/../include -I/content/tmp/ggml/src/. -isystem /usr/local/cuda/targets/x86_64-linux/include -O3 -DNDEBUG -std=gnu11 -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wdouble-promotion -march=native -fopenmp -o CMakeFiles/ggml.dir/ggml-aarch64.c.o -c /content/tmp/ggml/src/ggml-aarch64.c", "file": "/content/tmp/ggml/src/ggml-aarch64.c", "output": "ggml/src/CMakeFiles/ggml.dir/ggml-aarch64.c.o" }, { "directory": "/content/tmp/build/src", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/src/../ggml/src -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama.dir/llama.cpp.o -c /content/tmp/src/llama.cpp", "file": "/content/tmp/src/llama.cpp", "output": "src/CMakeFiles/llama.dir/llama.cpp.o" }, { "directory": "/content/tmp/build/src", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/src/../ggml/src -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama.dir/llama-vocab.cpp.o -c /content/tmp/src/llama-vocab.cpp", "file": "/content/tmp/src/llama-vocab.cpp", "output": "src/CMakeFiles/llama.dir/llama-vocab.cpp.o" }, { "directory": "/content/tmp/build/src", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/src/../ggml/src -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama.dir/llama-grammar.cpp.o -c /content/tmp/src/llama-grammar.cpp", "file": "/content/tmp/src/llama-grammar.cpp", "output": "src/CMakeFiles/llama.dir/llama-grammar.cpp.o" }, { "directory": "/content/tmp/build/src", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/src/../ggml/src -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama.dir/llama-sampling.cpp.o -c /content/tmp/src/llama-sampling.cpp", "file": "/content/tmp/src/llama-sampling.cpp", "output": "src/CMakeFiles/llama.dir/llama-sampling.cpp.o" }, { "directory": "/content/tmp/build/src", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/src/../ggml/src -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama.dir/llama-mmap.cpp.o -c /content/tmp/src/llama-mmap.cpp", "file": "/content/tmp/src/llama-mmap.cpp", "output": "src/CMakeFiles/llama.dir/llama-mmap.cpp.o" }, { "directory": "/content/tmp/build/src", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/src/../ggml/src -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama.dir/llama-model-loader.cpp.o -c /content/tmp/src/llama-model-loader.cpp", "file": "/content/tmp/src/llama-model-loader.cpp", "output": "src/CMakeFiles/llama.dir/llama-model-loader.cpp.o" }, { "directory": "/content/tmp/build/src", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/src/../ggml/src -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama.dir/unicode.cpp.o -c /content/tmp/src/unicode.cpp", "file": "/content/tmp/src/unicode.cpp", "output": "src/CMakeFiles/llama.dir/unicode.cpp.o" }, { "directory": "/content/tmp/build/src", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/src/../ggml/src -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama.dir/unicode-data.cpp.o -c /content/tmp/src/unicode-data.cpp", "file": "/content/tmp/src/unicode-data.cpp", "output": "src/CMakeFiles/llama.dir/unicode-data.cpp.o" }, { "directory": "/content/tmp/build/common", "command": "/usr/bin/c++ -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/build_info.dir/build-info.cpp.o -c /content/tmp/common/build-info.cpp", "file": "/content/tmp/common/build-info.cpp", "output": "common/CMakeFiles/build_info.dir/build-info.cpp.o" }, { "directory": "/content/tmp/build/common", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/common.dir/common.cpp.o -c /content/tmp/common/common.cpp", "file": "/content/tmp/common/common.cpp", "output": "common/CMakeFiles/common.dir/common.cpp.o" }, { "directory": "/content/tmp/build/common", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/common.dir/chat.cpp.o -c /content/tmp/common/chat.cpp", "file": "/content/tmp/common/chat.cpp", "output": "common/CMakeFiles/common.dir/chat.cpp.o" }, { "directory": "/content/tmp/build/common", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/common.dir/chat-parser.cpp.o -c /content/tmp/common/chat-parser.cpp", "file": "/content/tmp/common/chat-parser.cpp", "output": "common/CMakeFiles/common.dir/chat-parser.cpp.o" }, { "directory": "/content/tmp/build/common", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/common.dir/json-partial.cpp.o -c /content/tmp/common/json-partial.cpp", "file": "/content/tmp/common/json-partial.cpp", "output": "common/CMakeFiles/common.dir/json-partial.cpp.o" }, { "directory": "/content/tmp/build/common", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/common.dir/regex-partial.cpp.o -c /content/tmp/common/regex-partial.cpp", "file": "/content/tmp/common/regex-partial.cpp", "output": "common/CMakeFiles/common.dir/regex-partial.cpp.o" }, { "directory": "/content/tmp/build/common", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/common.dir/sampling.cpp.o -c /content/tmp/common/sampling.cpp", "file": "/content/tmp/common/sampling.cpp", "output": "common/CMakeFiles/common.dir/sampling.cpp.o" }, { "directory": "/content/tmp/build/common", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/common.dir/console.cpp.o -c /content/tmp/common/console.cpp", "file": "/content/tmp/common/console.cpp", "output": "common/CMakeFiles/common.dir/console.cpp.o" }, { "directory": "/content/tmp/build/common", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/common.dir/grammar-parser.cpp.o -c /content/tmp/common/grammar-parser.cpp", "file": "/content/tmp/common/grammar-parser.cpp", "output": "common/CMakeFiles/common.dir/grammar-parser.cpp.o" }, { "directory": "/content/tmp/build/common", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/common.dir/json-schema-to-grammar.cpp.o -c /content/tmp/common/json-schema-to-grammar.cpp", "file": "/content/tmp/common/json-schema-to-grammar.cpp", "output": "common/CMakeFiles/common.dir/json-schema-to-grammar.cpp.o" }, { "directory": "/content/tmp/build/common", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/common.dir/train.cpp.o -c /content/tmp/common/train.cpp", "file": "/content/tmp/common/train.cpp", "output": "common/CMakeFiles/common.dir/train.cpp.o" }, { "directory": "/content/tmp/build/common", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/common.dir/ngram-cache.cpp.o -c /content/tmp/common/ngram-cache.cpp", "file": "/content/tmp/common/ngram-cache.cpp", "output": "common/CMakeFiles/common.dir/ngram-cache.cpp.o" }, { "directory": "/content/tmp/build/common", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/common.dir/speculative.cpp.o -c /content/tmp/common/speculative.cpp", "file": "/content/tmp/common/speculative.cpp", "output": "common/CMakeFiles/common.dir/speculative.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-tokenizer-0.dir/test-tokenizer-0.cpp.o -c /content/tmp/tests/test-tokenizer-0.cpp", "file": "/content/tmp/tests/test-tokenizer-0.cpp", "output": "tests/CMakeFiles/test-tokenizer-0.dir/test-tokenizer-0.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-tokenizer-1-bpe.dir/test-tokenizer-1-bpe.cpp.o -c /content/tmp/tests/test-tokenizer-1-bpe.cpp", "file": "/content/tmp/tests/test-tokenizer-1-bpe.cpp", "output": "tests/CMakeFiles/test-tokenizer-1-bpe.dir/test-tokenizer-1-bpe.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-tokenizer-1-spm.dir/test-tokenizer-1-spm.cpp.o -c /content/tmp/tests/test-tokenizer-1-spm.cpp", "file": "/content/tmp/tests/test-tokenizer-1-spm.cpp", "output": "tests/CMakeFiles/test-tokenizer-1-spm.dir/test-tokenizer-1-spm.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-quantize-fns.dir/test-quantize-fns.cpp.o -c /content/tmp/tests/test-quantize-fns.cpp", "file": "/content/tmp/tests/test-quantize-fns.cpp", "output": "tests/CMakeFiles/test-quantize-fns.dir/test-quantize-fns.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-quantize-fns.dir/get-model.cpp.o -c /content/tmp/tests/get-model.cpp", "file": "/content/tmp/tests/get-model.cpp", "output": "tests/CMakeFiles/test-quantize-fns.dir/get-model.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-quantize-perf.dir/test-quantize-perf.cpp.o -c /content/tmp/tests/test-quantize-perf.cpp", "file": "/content/tmp/tests/test-quantize-perf.cpp", "output": "tests/CMakeFiles/test-quantize-perf.dir/test-quantize-perf.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-quantize-perf.dir/get-model.cpp.o -c /content/tmp/tests/get-model.cpp", "file": "/content/tmp/tests/get-model.cpp", "output": "tests/CMakeFiles/test-quantize-perf.dir/get-model.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-sampling.dir/test-sampling.cpp.o -c /content/tmp/tests/test-sampling.cpp", "file": "/content/tmp/tests/test-sampling.cpp", "output": "tests/CMakeFiles/test-sampling.dir/test-sampling.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-sampling.dir/get-model.cpp.o -c /content/tmp/tests/get-model.cpp", "file": "/content/tmp/tests/get-model.cpp", "output": "tests/CMakeFiles/test-sampling.dir/get-model.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-chat-template.dir/test-chat-template.cpp.o -c /content/tmp/tests/test-chat-template.cpp", "file": "/content/tmp/tests/test-chat-template.cpp", "output": "tests/CMakeFiles/test-chat-template.dir/test-chat-template.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-chat-template.dir/get-model.cpp.o -c /content/tmp/tests/get-model.cpp", "file": "/content/tmp/tests/get-model.cpp", "output": "tests/CMakeFiles/test-chat-template.dir/get-model.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-grammar-parser.dir/test-grammar-parser.cpp.o -c /content/tmp/tests/test-grammar-parser.cpp", "file": "/content/tmp/tests/test-grammar-parser.cpp", "output": "tests/CMakeFiles/test-grammar-parser.dir/test-grammar-parser.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-grammar-parser.dir/get-model.cpp.o -c /content/tmp/tests/get-model.cpp", "file": "/content/tmp/tests/get-model.cpp", "output": "tests/CMakeFiles/test-grammar-parser.dir/get-model.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-llama-grammar.dir/test-llama-grammar.cpp.o -c /content/tmp/tests/test-llama-grammar.cpp", "file": "/content/tmp/tests/test-llama-grammar.cpp", "output": "tests/CMakeFiles/test-llama-grammar.dir/test-llama-grammar.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-llama-grammar.dir/get-model.cpp.o -c /content/tmp/tests/get-model.cpp", "file": "/content/tmp/tests/get-model.cpp", "output": "tests/CMakeFiles/test-llama-grammar.dir/get-model.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-grammar-integration.dir/test-grammar-integration.cpp.o -c /content/tmp/tests/test-grammar-integration.cpp", "file": "/content/tmp/tests/test-grammar-integration.cpp", "output": "tests/CMakeFiles/test-grammar-integration.dir/test-grammar-integration.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-grammar-integration.dir/get-model.cpp.o -c /content/tmp/tests/get-model.cpp", "file": "/content/tmp/tests/get-model.cpp", "output": "tests/CMakeFiles/test-grammar-integration.dir/get-model.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-grad0.dir/test-grad0.cpp.o -c /content/tmp/tests/test-grad0.cpp", "file": "/content/tmp/tests/test-grad0.cpp", "output": "tests/CMakeFiles/test-grad0.dir/test-grad0.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-grad0.dir/get-model.cpp.o -c /content/tmp/tests/get-model.cpp", "file": "/content/tmp/tests/get-model.cpp", "output": "tests/CMakeFiles/test-grad0.dir/get-model.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-backend-ops.dir/test-backend-ops.cpp.o -c /content/tmp/tests/test-backend-ops.cpp", "file": "/content/tmp/tests/test-backend-ops.cpp", "output": "tests/CMakeFiles/test-backend-ops.dir/test-backend-ops.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-backend-ops.dir/get-model.cpp.o -c /content/tmp/tests/get-model.cpp", "file": "/content/tmp/tests/get-model.cpp", "output": "tests/CMakeFiles/test-backend-ops.dir/get-model.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-rope.dir/test-rope.cpp.o -c /content/tmp/tests/test-rope.cpp", "file": "/content/tmp/tests/test-rope.cpp", "output": "tests/CMakeFiles/test-rope.dir/test-rope.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-rope.dir/get-model.cpp.o -c /content/tmp/tests/get-model.cpp", "file": "/content/tmp/tests/get-model.cpp", "output": "tests/CMakeFiles/test-rope.dir/get-model.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-model-load-cancel.dir/test-model-load-cancel.cpp.o -c /content/tmp/tests/test-model-load-cancel.cpp", "file": "/content/tmp/tests/test-model-load-cancel.cpp", "output": "tests/CMakeFiles/test-model-load-cancel.dir/test-model-load-cancel.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-model-load-cancel.dir/get-model.cpp.o -c /content/tmp/tests/get-model.cpp", "file": "/content/tmp/tests/get-model.cpp", "output": "tests/CMakeFiles/test-model-load-cancel.dir/get-model.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-autorelease.dir/test-autorelease.cpp.o -c /content/tmp/tests/test-autorelease.cpp", "file": "/content/tmp/tests/test-autorelease.cpp", "output": "tests/CMakeFiles/test-autorelease.dir/test-autorelease.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-autorelease.dir/get-model.cpp.o -c /content/tmp/tests/get-model.cpp", "file": "/content/tmp/tests/get-model.cpp", "output": "tests/CMakeFiles/test-autorelease.dir/get-model.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/tests/../examples/server -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-json-schema-to-grammar.dir/test-json-schema-to-grammar.cpp.o -c /content/tmp/tests/test-json-schema-to-grammar.cpp", "file": "/content/tmp/tests/test-json-schema-to-grammar.cpp", "output": "tests/CMakeFiles/test-json-schema-to-grammar.dir/test-json-schema-to-grammar.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/tests/../examples/server -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-json-schema-to-grammar.dir/get-model.cpp.o -c /content/tmp/tests/get-model.cpp", "file": "/content/tmp/tests/get-model.cpp", "output": "tests/CMakeFiles/test-json-schema-to-grammar.dir/get-model.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/tests/../examples/server -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-function-calls.dir/test-function-calls.cpp.o -c /content/tmp/tests/test-function-calls.cpp", "file": "/content/tmp/tests/test-function-calls.cpp", "output": "tests/CMakeFiles/test-function-calls.dir/test-function-calls.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/tests/../examples/server -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/test-function-calls.dir/get-model.cpp.o -c /content/tmp/tests/get-model.cpp", "file": "/content/tmp/tests/get-model.cpp", "output": "tests/CMakeFiles/test-function-calls.dir/get-model.cpp.o" }, { "directory": "/content/tmp/build/tests", "command": "/usr/bin/cc -DGGML_USE_CUDA -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -o CMakeFiles/test-c.dir/test-c.c.o -c /content/tmp/tests/test-c.c", "file": "/content/tmp/tests/test-c.c", "output": "tests/CMakeFiles/test-c.dir/test-c.c.o" }, { "directory": "/content/tmp/build/examples/cvector-generator", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-cvector-generator.dir/cvector-generator.cpp.o -c /content/tmp/examples/cvector-generator/cvector-generator.cpp", "file": "/content/tmp/examples/cvector-generator/cvector-generator.cpp", "output": "examples/cvector-generator/CMakeFiles/llama-cvector-generator.dir/cvector-generator.cpp.o" }, { "directory": "/content/tmp/build/examples/baby-llama", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-baby-llama.dir/baby-llama.cpp.o -c /content/tmp/examples/baby-llama/baby-llama.cpp", "file": "/content/tmp/examples/baby-llama/baby-llama.cpp", "output": "examples/baby-llama/CMakeFiles/llama-baby-llama.dir/baby-llama.cpp.o" }, { "directory": "/content/tmp/build/examples/batched-bench", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-batched-bench.dir/batched-bench.cpp.o -c /content/tmp/examples/batched-bench/batched-bench.cpp", "file": "/content/tmp/examples/batched-bench/batched-bench.cpp", "output": "examples/batched-bench/CMakeFiles/llama-batched-bench.dir/batched-bench.cpp.o" }, { "directory": "/content/tmp/build/examples/batched", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-batched.dir/batched.cpp.o -c /content/tmp/examples/batched/batched.cpp", "file": "/content/tmp/examples/batched/batched.cpp", "output": "examples/batched/CMakeFiles/llama-batched.dir/batched.cpp.o" }, { "directory": "/content/tmp/build/examples/benchmark", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/examples/benchmark/../../common -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-bench-matmult.dir/benchmark-matmult.cpp.o -c /content/tmp/examples/benchmark/benchmark-matmult.cpp", "file": "/content/tmp/examples/benchmark/benchmark-matmult.cpp", "output": "examples/benchmark/CMakeFiles/llama-bench-matmult.dir/benchmark-matmult.cpp.o" }, { "directory": "/content/tmp/build/examples/convert-llama2c-to-ggml", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-convert-llama2c-to-ggml.dir/convert-llama2c-to-ggml.cpp.o -c /content/tmp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp", "file": "/content/tmp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp", "output": "examples/convert-llama2c-to-ggml/CMakeFiles/llama-convert-llama2c-to-ggml.dir/convert-llama2c-to-ggml.cpp.o" }, { "directory": "/content/tmp/build/examples/embedding", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-embedding.dir/embedding.cpp.o -c /content/tmp/examples/embedding/embedding.cpp", "file": "/content/tmp/examples/embedding/embedding.cpp", "output": "examples/embedding/CMakeFiles/llama-embedding.dir/embedding.cpp.o" }, { "directory": "/content/tmp/build/examples/eval-callback", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-eval-callback.dir/eval-callback.cpp.o -c /content/tmp/examples/eval-callback/eval-callback.cpp", "file": "/content/tmp/examples/eval-callback/eval-callback.cpp", "output": "examples/eval-callback/CMakeFiles/llama-eval-callback.dir/eval-callback.cpp.o" }, { "directory": "/content/tmp/build/examples/export-lora", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-export-lora.dir/export-lora.cpp.o -c /content/tmp/examples/export-lora/export-lora.cpp", "file": "/content/tmp/examples/export-lora/export-lora.cpp", "output": "examples/export-lora/CMakeFiles/llama-export-lora.dir/export-lora.cpp.o" }, { "directory": "/content/tmp/build/examples/gbnf-validator", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-gbnf-validator.dir/gbnf-validator.cpp.o -c /content/tmp/examples/gbnf-validator/gbnf-validator.cpp", "file": "/content/tmp/examples/gbnf-validator/gbnf-validator.cpp", "output": "examples/gbnf-validator/CMakeFiles/llama-gbnf-validator.dir/gbnf-validator.cpp.o" }, { "directory": "/content/tmp/build/examples/gguf-hash", "command": "/usr/bin/cc -I/content/tmp/examples -I/content/tmp/examples/gguf-hash/deps -O3 -DNDEBUG -o CMakeFiles/xxhash.dir/deps/xxhash/xxhash.c.o -c /content/tmp/examples/gguf-hash/deps/xxhash/xxhash.c", "file": "/content/tmp/examples/gguf-hash/deps/xxhash/xxhash.c", "output": "examples/gguf-hash/CMakeFiles/xxhash.dir/deps/xxhash/xxhash.c.o" }, { "directory": "/content/tmp/build/examples/gguf-hash", "command": "/usr/bin/cc -I/content/tmp/examples -I/content/tmp/examples/gguf-hash/deps -O3 -DNDEBUG -o CMakeFiles/sha1.dir/deps/sha1/sha1.c.o -c /content/tmp/examples/gguf-hash/deps/sha1/sha1.c", "file": "/content/tmp/examples/gguf-hash/deps/sha1/sha1.c", "output": "examples/gguf-hash/CMakeFiles/sha1.dir/deps/sha1/sha1.c.o" }, { "directory": "/content/tmp/build/examples/gguf-hash", "command": "/usr/bin/cc -I/content/tmp/examples -I/content/tmp/examples/gguf-hash/deps -O3 -DNDEBUG -o CMakeFiles/sha256.dir/deps/sha256/sha256.c.o -c /content/tmp/examples/gguf-hash/deps/sha256/sha256.c", "file": "/content/tmp/examples/gguf-hash/deps/sha256/sha256.c", "output": "examples/gguf-hash/CMakeFiles/sha256.dir/deps/sha256/sha256.c.o" }, { "directory": "/content/tmp/build/examples/gguf-hash", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/examples/gguf-hash/deps -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-gguf-hash.dir/gguf-hash.cpp.o -c /content/tmp/examples/gguf-hash/gguf-hash.cpp", "file": "/content/tmp/examples/gguf-hash/gguf-hash.cpp", "output": "examples/gguf-hash/CMakeFiles/llama-gguf-hash.dir/gguf-hash.cpp.o" }, { "directory": "/content/tmp/build/examples/gguf-split", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-gguf-split.dir/gguf-split.cpp.o -c /content/tmp/examples/gguf-split/gguf-split.cpp", "file": "/content/tmp/examples/gguf-split/gguf-split.cpp", "output": "examples/gguf-split/CMakeFiles/llama-gguf-split.dir/gguf-split.cpp.o" }, { "directory": "/content/tmp/build/examples/gguf", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-gguf.dir/gguf.cpp.o -c /content/tmp/examples/gguf/gguf.cpp", "file": "/content/tmp/examples/gguf/gguf.cpp", "output": "examples/gguf/CMakeFiles/llama-gguf.dir/gguf.cpp.o" }, { "directory": "/content/tmp/build/examples/gritlm", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-gritlm.dir/gritlm.cpp.o -c /content/tmp/examples/gritlm/gritlm.cpp", "file": "/content/tmp/examples/gritlm/gritlm.cpp", "output": "examples/gritlm/CMakeFiles/llama-gritlm.dir/gritlm.cpp.o" }, { "directory": "/content/tmp/build/examples/imatrix", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-imatrix.dir/imatrix.cpp.o -c /content/tmp/examples/imatrix/imatrix.cpp", "file": "/content/tmp/examples/imatrix/imatrix.cpp", "output": "examples/imatrix/CMakeFiles/llama-imatrix.dir/imatrix.cpp.o" }, { "directory": "/content/tmp/build/examples/infill", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-infill.dir/infill.cpp.o -c /content/tmp/examples/infill/infill.cpp", "file": "/content/tmp/examples/infill/infill.cpp", "output": "examples/infill/CMakeFiles/llama-infill.dir/infill.cpp.o" }, { "directory": "/content/tmp/build/examples/llama-bench", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-bench.dir/llama-bench.cpp.o -c /content/tmp/examples/llama-bench/llama-bench.cpp", "file": "/content/tmp/examples/llama-bench/llama-bench.cpp", "output": "examples/llama-bench/CMakeFiles/llama-bench.dir/llama-bench.cpp.o" }, { "directory": "/content/tmp/build/examples/llava", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/examples/llava/. -I/content/tmp/examples/llava/../.. -I/content/tmp/examples/llava/../../common -I/content/tmp/ggml/src/../include -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -O3 -DNDEBUG -std=gnu++17 -Wno-cast-qual -o CMakeFiles/llava.dir/llava.cpp.o -c /content/tmp/examples/llava/llava.cpp", "file": "/content/tmp/examples/llava/llava.cpp", "output": "examples/llava/CMakeFiles/llava.dir/llava.cpp.o" }, { "directory": "/content/tmp/build/examples/llava", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/examples/llava/. -I/content/tmp/examples/llava/../.. -I/content/tmp/examples/llava/../../common -I/content/tmp/ggml/src/../include -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -O3 -DNDEBUG -std=gnu++17 -Wno-cast-qual -o CMakeFiles/llava.dir/clip.cpp.o -c /content/tmp/examples/llava/clip.cpp", "file": "/content/tmp/examples/llava/clip.cpp", "output": "examples/llava/CMakeFiles/llava.dir/clip.cpp.o" }, { "directory": "/content/tmp/build/examples/llava", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -I/content/tmp/examples/llava/. -I/content/tmp/examples/llava/../.. -I/content/tmp/examples/llava/../../common -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-llava-cli.dir/llava-cli.cpp.o -c /content/tmp/examples/llava/llava-cli.cpp", "file": "/content/tmp/examples/llava/llava-cli.cpp", "output": "examples/llava/CMakeFiles/llama-llava-cli.dir/llava-cli.cpp.o" }, { "directory": "/content/tmp/build/examples/llava", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -I/content/tmp/examples/llava/. -I/content/tmp/examples/llava/../.. -I/content/tmp/examples/llava/../../common -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-minicpmv-cli.dir/minicpmv-cli.cpp.o -c /content/tmp/examples/llava/minicpmv-cli.cpp", "file": "/content/tmp/examples/llava/minicpmv-cli.cpp", "output": "examples/llava/CMakeFiles/llama-minicpmv-cli.dir/minicpmv-cli.cpp.o" }, { "directory": "/content/tmp/build/examples/lookahead", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-lookahead.dir/lookahead.cpp.o -c /content/tmp/examples/lookahead/lookahead.cpp", "file": "/content/tmp/examples/lookahead/lookahead.cpp", "output": "examples/lookahead/CMakeFiles/llama-lookahead.dir/lookahead.cpp.o" }, { "directory": "/content/tmp/build/examples/lookup", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-lookup.dir/lookup.cpp.o -c /content/tmp/examples/lookup/lookup.cpp", "file": "/content/tmp/examples/lookup/lookup.cpp", "output": "examples/lookup/CMakeFiles/llama-lookup.dir/lookup.cpp.o" }, { "directory": "/content/tmp/build/examples/lookup", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-lookup-create.dir/lookup-create.cpp.o -c /content/tmp/examples/lookup/lookup-create.cpp", "file": "/content/tmp/examples/lookup/lookup-create.cpp", "output": "examples/lookup/CMakeFiles/llama-lookup-create.dir/lookup-create.cpp.o" }, { "directory": "/content/tmp/build/examples/lookup", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-lookup-merge.dir/lookup-merge.cpp.o -c /content/tmp/examples/lookup/lookup-merge.cpp", "file": "/content/tmp/examples/lookup/lookup-merge.cpp", "output": "examples/lookup/CMakeFiles/llama-lookup-merge.dir/lookup-merge.cpp.o" }, { "directory": "/content/tmp/build/examples/lookup", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-lookup-stats.dir/lookup-stats.cpp.o -c /content/tmp/examples/lookup/lookup-stats.cpp", "file": "/content/tmp/examples/lookup/lookup-stats.cpp", "output": "examples/lookup/CMakeFiles/llama-lookup-stats.dir/lookup-stats.cpp.o" }, { "directory": "/content/tmp/build/examples/main", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-cli.dir/main.cpp.o -c /content/tmp/examples/main/main.cpp", "file": "/content/tmp/examples/main/main.cpp", "output": "examples/main/CMakeFiles/llama-cli.dir/main.cpp.o" }, { "directory": "/content/tmp/build/examples/parallel", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-parallel.dir/parallel.cpp.o -c /content/tmp/examples/parallel/parallel.cpp", "file": "/content/tmp/examples/parallel/parallel.cpp", "output": "examples/parallel/CMakeFiles/llama-parallel.dir/parallel.cpp.o" }, { "directory": "/content/tmp/build/examples/passkey", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-passkey.dir/passkey.cpp.o -c /content/tmp/examples/passkey/passkey.cpp", "file": "/content/tmp/examples/passkey/passkey.cpp", "output": "examples/passkey/CMakeFiles/llama-passkey.dir/passkey.cpp.o" }, { "directory": "/content/tmp/build/examples/perplexity", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-perplexity.dir/perplexity.cpp.o -c /content/tmp/examples/perplexity/perplexity.cpp", "file": "/content/tmp/examples/perplexity/perplexity.cpp", "output": "examples/perplexity/CMakeFiles/llama-perplexity.dir/perplexity.cpp.o" }, { "directory": "/content/tmp/build/examples/quantize-stats", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/examples/quantize-stats/../../common -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -march=native -o CMakeFiles/llama-quantize-stats.dir/quantize-stats.cpp.o -c /content/tmp/examples/quantize-stats/quantize-stats.cpp", "file": "/content/tmp/examples/quantize-stats/quantize-stats.cpp", "output": "examples/quantize-stats/CMakeFiles/llama-quantize-stats.dir/quantize-stats.cpp.o" }, { "directory": "/content/tmp/build/examples/quantize", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/examples/quantize/../../common -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -I/content/tmp/common/. -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-quantize.dir/quantize.cpp.o -c /content/tmp/examples/quantize/quantize.cpp", "file": "/content/tmp/examples/quantize/quantize.cpp", "output": "examples/quantize/CMakeFiles/llama-quantize.dir/quantize.cpp.o" }, { "directory": "/content/tmp/build/examples/retrieval", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-retrieval.dir/retrieval.cpp.o -c /content/tmp/examples/retrieval/retrieval.cpp", "file": "/content/tmp/examples/retrieval/retrieval.cpp", "output": "examples/retrieval/CMakeFiles/llama-retrieval.dir/retrieval.cpp.o" }, { "directory": "/content/tmp/build/examples/server", "command": "/usr/bin/c++ -DGGML_USE_CUDA -DSERVER_VERBOSE=1 -I/content/tmp/examples -I/content/tmp/examples/server -I/content/tmp/build/examples/server -I/content/tmp -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-server.dir/server.cpp.o -c /content/tmp/examples/server/server.cpp", "file": "/content/tmp/examples/server/server.cpp", "output": "examples/server/CMakeFiles/llama-server.dir/server.cpp.o" }, { "directory": "/content/tmp/build/examples/save-load-state", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-save-load-state.dir/save-load-state.cpp.o -c /content/tmp/examples/save-load-state/save-load-state.cpp", "file": "/content/tmp/examples/save-load-state/save-load-state.cpp", "output": "examples/save-load-state/CMakeFiles/llama-save-load-state.dir/save-load-state.cpp.o" }, { "directory": "/content/tmp/build/examples/simple", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-simple.dir/simple.cpp.o -c /content/tmp/examples/simple/simple.cpp", "file": "/content/tmp/examples/simple/simple.cpp", "output": "examples/simple/CMakeFiles/llama-simple.dir/simple.cpp.o" }, { "directory": "/content/tmp/build/examples/speculative", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-speculative.dir/speculative.cpp.o -c /content/tmp/examples/speculative/speculative.cpp", "file": "/content/tmp/examples/speculative/speculative.cpp", "output": "examples/speculative/CMakeFiles/llama-speculative.dir/speculative.cpp.o" }, { "directory": "/content/tmp/build/examples/sweep-bench", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-sweep-bench.dir/sweep-bench.cpp.o -c /content/tmp/examples/sweep-bench/sweep-bench.cpp", "file": "/content/tmp/examples/sweep-bench/sweep-bench.cpp", "output": "examples/sweep-bench/CMakeFiles/llama-sweep-bench.dir/sweep-bench.cpp.o" }, { "directory": "/content/tmp/build/examples/tokenize", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/examples -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-tokenize.dir/tokenize.cpp.o -c /content/tmp/examples/tokenize/tokenize.cpp", "file": "/content/tmp/examples/tokenize/tokenize.cpp", "output": "examples/tokenize/CMakeFiles/llama-tokenize.dir/tokenize.cpp.o" }, { "directory": "/content/tmp/build/pocs/vdot", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/pocs -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-vdot.dir/vdot.cpp.o -c /content/tmp/pocs/vdot/vdot.cpp", "file": "/content/tmp/pocs/vdot/vdot.cpp", "output": "pocs/vdot/CMakeFiles/llama-vdot.dir/vdot.cpp.o" }, { "directory": "/content/tmp/build/pocs/vdot", "command": "/usr/bin/c++ -DGGML_USE_CUDA -I/content/tmp/pocs -I/content/tmp/common/. -I/content/tmp/src/. -I/content/tmp/src/../include -I/content/tmp/src/../common -I/content/tmp/ggml/src/../include -O3 -DNDEBUG -std=gnu++17 -o CMakeFiles/llama-q8dot.dir/q8dot.cpp.o -c /content/tmp/pocs/vdot/q8dot.cpp", "file": "/content/tmp/pocs/vdot/q8dot.cpp", "output": "pocs/vdot/CMakeFiles/llama-q8dot.dir/q8dot.cpp.o" } ]