readctrl / script /r_v2.sh
shahidul034's picture
Add files using upload-large-folder tool
1db7196 verified
cd /home/mshahidul/llama.cpp
export CUDA_VISIBLE_DEVICES=4
./build/bin/llama-server \
-m /home/mshahidul/readctrl_model/gguf/gemma-3-12b-it-Q8_0.gguf \
--host 0.0.0.0 \
--port 8085 \
--n-gpu-layers 100 \
--ctx-size 16384 \
--batch-size 1024 \
--threads 16 \
--flash-attn on