cd /home/mshahidul/llama.cpp export CUDA_VISIBLE_DEVICES=4 ./build/bin/llama-server \ -m /home/mshahidul/readctrl_model/gguf/gemma-3-12b-it-Q8_0.gguf \ --host 0.0.0.0 \ --port 8085 \ --n-gpu-layers 100 \ --ctx-size 16384 \ --batch-size 1024 \ --threads 16 \ --flash-attn on