File size: 1,034 Bytes
c7a6fe6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | cd /home/mshahidul/readctrl/code/RL_model/verl/verl_train
python scripts/legacy_model_merger.py merge \
--backend fsdp \
--local_dir /home/mshahidul/readctrl/code/RL_model/RL_model_subclaim_classifier/global_step_45/actor \
--target_dir /home/mshahidul/readctrl/code/RL_model/converted_model/v1
CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=2 python -m vllm.entrypoints.openai.api_server \
--model /home/mshahidul/readctrl/code/RL_model/converted_model/v1 \
--served-model-name inference \
--dtype bfloat16 \
--port 8001
# Qwen/Qwen3-4B-Instruct-2507
# /home/mshahidul/readctrl/code/RL_model/models/converted_model/v1
VLLM_USE_V1=0 CUDA_VISIBLE_DEVICES=2 python -m vllm.entrypoints.openai.api_server \
--model Qwen/Qwen3-4B-Instruct-2507 \
--served-model-name inference \
--dtype float16 \
--port 8001 \
--max-model-len 16384
python /home/mshahidul/readctrl/code/rl_inference/run_inference_vllm_server.py \
--base_url http://127.0.0.1:8001/v1 \
--served_model_name inference \
--batch_size 8 |