readctrl / code /bash_script /vllm_server.sh
shahidul034's picture
Add files using upload-large-folder tool
c7a6fe6 verified
#!/bin/bash
# 1. Set Device Order and Visibility
# This ensures we are targeting the physical GPU ID 1 as requested.
export CUDA_DEVICE_ORDER="PCI_BUS_ID"
export CUDA_VISIBLE_DEVICES="1"
# 2. Define Paths and Configuration
# Using the path where we just saved the BF16 model
MODEL_PATH="/home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-support-check-8b_ctx-bf16"
SERVE_PORT=8015
python -m vllm.entrypoints.openai.api_server \
--model $MODEL_PATH \
--dtype bfloat16 \
--max-model-len 8192 \
--gpu-memory-utilization 0.95 \
--port $SERVE_PORT \
--trust-remote-code
# python /home/mshahidul/readctrl/code/finetune-inference/api_call_vllm_v2.py \
# --file1 /home/mshahidul/readctrl/data/hand_create_gpt5_other_model/synthetic_data_es_raw_592.jsonl \
# --file2 /home/mshahidul/readctrl/data/testing_data/es_testing_data.json