shahidul034
"Update readCtrl repo"
93694bb
CUDA_DEVICE_ORDER="PCI_BUS_ID" CUDA_VISIBLE_DEVICES=5 vllm serve google/gemma-3-27b-it \
--gpu-memory-utilization 0.95 \
--max-model-len 16384 \
--enable-prefix-caching \
--kv-cache-dtype fp8 \
--max-num-batched-tokens 32768 \
--trust-remote-code \
--port 8055 \
--served-model-name subclaim-extractor