| cd /home/mshahidul/readctrl/code/RL_model/verl/verl_train | |
| python scripts/legacy_model_merger.py merge \ | |
| --backend fsdp \ | |
| --local_dir /home/mshahidul/readctrl/code/RL_model/RL_model_subclaim_classifier/global_step_45/actor \ | |
| --target_dir /home/mshahidul/readctrl/code/RL_model/converted_model/v1 | |
| CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=2 python -m vllm.entrypoints.openai.api_server \ | |
| --model /home/mshahidul/readctrl/code/RL_model/converted_model/v1 \ | |
| --served-model-name inference \ | |
| --dtype bfloat16 \ | |
| --port 8001 | |
| # Qwen/Qwen3-4B-Instruct-2507 | |
| # /home/mshahidul/readctrl/code/RL_model/models/converted_model/v1 | |
| VLLM_USE_V1=0 CUDA_VISIBLE_DEVICES=2 python -m vllm.entrypoints.openai.api_server \ | |
| --model Qwen/Qwen3-4B-Instruct-2507 \ | |
| --served-model-name inference \ | |
| --dtype float16 \ | |
| --port 8001 \ | |
| --max-model-len 16384 | |
| python /home/mshahidul/readctrl/code/rl_inference/run_inference_vllm_server.py \ | |
| --base_url http://127.0.0.1:8001/v1 \ | |
| --served_model_name inference \ | |
| --batch_size 8 |