shahidul034
/

readctrl

Model card Files Files and versions

readctrl / code /bash_script /vllm_server.sh

shahidul034's picture

Add files using upload-large-folder tool

c7a6fe6 verified 28 days ago

history blame contribute delete

870 Bytes

	#!/bin/bash

	# 1. Set Device Order and Visibility
	# This ensures we are targeting the physical GPU ID 1 as requested.
	export CUDA_DEVICE_ORDER="PCI_BUS_ID"
	export CUDA_VISIBLE_DEVICES="1"

	# 2. Define Paths and Configuration
	# Using the path where we just saved the BF16 model
	MODEL_PATH="/home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-support-check-8b_ctx-bf16"
	SERVE_PORT=8015

	python -m vllm.entrypoints.openai.api_server \
	--model $MODEL_PATH \
	--dtype bfloat16 \
	--max-model-len 8192 \
	--gpu-memory-utilization 0.95 \
	--port $SERVE_PORT \
	--trust-remote-code

	# python /home/mshahidul/readctrl/code/finetune-inference/api_call_vllm_v2.py \
	# --file1 /home/mshahidul/readctrl/data/hand_create_gpt5_other_model/synthetic_data_es_raw_592.jsonl \
	# --file2 /home/mshahidul/readctrl/data/testing_data/es_testing_data.json