| cd /home/mshahidul/llama.cpp | |
| export CUDA_VISIBLE_DEVICES=4 | |
| ./build/bin/llama-server \ | |
| -m /home/mshahidul/readctrl_model/gguf/gemma-3-12b-it-Q8_0.gguf \ | |
| --host 0.0.0.0 \ | |
| --port 8085 \ | |
| --n-gpu-layers 100 \ | |
| --ctx-size 16384 \ | |
| --batch-size 1024 \ | |
| --threads 16 \ | |
| --flash-attn on |