readCtrl_lambda / code /finetune /convert_qwen3_gguf.py
mshahidul
Initial commit of readCtrl code without large models
030876e
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
from unsloth import FastLanguageModel
# Path to your finetuned model directory
MODEL_PATH = "/home/mshahidul/readctrl_model/qwen3-8B_subclaims-verifier_lora_nonreasoning"
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = MODEL_PATH,
max_seq_length = 8192,
load_in_4bit = False,
load_in_8bit = False,
)
# Save merged 4-bit model for vLLM
SAVE_PATH = "/home/mshahidul/readctrl_model/support_checking_vllm"
model.save_pretrained_merged(SAVE_PATH, tokenizer, save_method = "merged_16bit")