import os os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "1" from unsloth import FastLanguageModel # Path to your finetuned model directory MODEL_PATH = "/home/mshahidul/readctrl_model/qwen3-8B_subclaims-verifier_lora_nonreasoning" model, tokenizer = FastLanguageModel.from_pretrained( model_name = MODEL_PATH, max_seq_length = 8192, load_in_4bit = False, load_in_8bit = False, ) # Save merged 4-bit model for vLLM SAVE_PATH = "/home/mshahidul/readctrl_model/support_checking_vllm" model.save_pretrained_merged(SAVE_PATH, tokenizer, save_method = "merged_16bit")