File size: 290 Bytes
1db7196 | 1 2 3 4 5 6 7 8 9 10 11 12 | cd /home/mshahidul/llama.cpp
export CUDA_VISIBLE_DEVICES=4
./build/bin/llama-server \
-m /home/mshahidul/readctrl_model/gguf/gemma-3-12b-it-Q8_0.gguf \
--host 0.0.0.0 \
--port 8085 \
--n-gpu-layers 100 \
--ctx-size 16384 \
--batch-size 1024 \
--threads 16 \
--flash-attn on |