| #!/usr/bin/env bash |
| set -eux |
|
|
| cd "$(dirname "$0")" |
|
|
| MODEL_DIR="bench-TriLMs-models" |
| LLAMA_CPP_PATH="." |
| sizes=("1.5" "2.4" "3.9") |
| types=("TQ1_0" "TQ2_0" "Q4_K_M" "Q8_0" "F16" "BF16") |
| gputypes=("TQ2_0" "Q4_K_M" "Q8_0" "F16") |
|
|
| function gather_models() { |
| echo Gather the models |
| if [ ! -d "$MODEL_DIR" ]; then |
| mkdir -p -- "$MODEL_DIR" |
| fi |
| ( |
| cd "$MODEL_DIR" |
| for sz in "${sizes[@]}"; do |
| filename="TriLM_${sz}B_Unpacked-TQ1_0-F16.gguf" |
| if [ ! -f "$filename" ]; then |
| wget "https://huggingface.co/compilade/quant-tests/resolve/main/${filename}" |
| fi |
| done |
| ) |
| } |
|
|
| function build_llama_cpp() { |
| echo Build llama.cpp for CPU |
|
|
| ( |
| cd -- "$LLAMA_CPP_PATH" |
| if [ -d build ]; then |
| pwd |
| rm -rf build |
| fi |
| mkdir build |
| cd build |
| cmake .. "$@" |
| make -j llama-bench llama-quantize |
| ) |
| } |
|
|
| function quantize() { |
| echo "Make all model types we'll test" |
| ( |
| for sz in "${sizes[@]}"; do |
| for ty in "${types[@]}"; do |
| filenames=("$MODEL_DIR"/TriLM_"${sz}"B_Unpacked-{TQ1_0-F16,"$ty"}.gguf) |
| if [ ! -f "${filenames[1]}" ]; then |
| "$LLAMA_CPP_PATH"/build/bin/llama-quantize --allow-requantize "${filenames[@]}" "$ty" |
| fi |
| done |
| done |
| ) |
| } |
|
|
| function bench() { |
| echo Test each model one by one for different numbers of threads |
|
|
| for sz in "${sizes[@]}"; do |
| for ty in "$@"; do |
| for th in 1 2 4 8; do |
| { |
| "$LLAMA_CPP_PATH"/build/bin/llama-bench -v -m "${MODEL_DIR}/TriLM_${sz}B_Unpacked-${ty}.gguf" -t "${th}" -p 512 -n 128 -r 4 -o json |
| printf "%s\n" "," |
| } |
| done |
| done |
| done |
| } |
|
|
| function bench_cpu() { |
| bench "${types[@]}" >> "$1" |
| } |
|
|
| function bench_gpu() { |
| bench "${gputypes[@]}" >> "$1" |
| } |
|
|
| currentTime="$(date +'%s')" |
| resultFile="results-${currentTime}.json" |
| infoFile="results-${currentTime}-info.txt" |
| lscpu > "$infoFile" |
|
|
| gather_models |
| build_llama_cpp -DGGML_NATIVE=ON -DGGML_CPU=ON |
| quantize |
|
|
| echo "---" >> "$infoFile" |
| ls -go "$MODEL_DIR" >> "$infoFile" |
|
|
| bench_cpu "$resultFile" |
|
|
| if [ -x "$(command -v nvidia-smi)" ]; then |
| echo GPU detected, benchark with that too. |
| build_llama_cpp -DGGML_NATIVE=ON -DGGML_CUDA=ON -DGGML_CUDA_F16=ON |
| bench_gpu "$resultFile" |
| fi |
|
|
|
|