# import torch # from transformers import AutoProcessor, AutoModelForVision2Seq # MODEL_NAME = "Qwen/Qwen2.5-VL-7B-Instruct" # device = "cuda" if torch.cuda.is_available() else "cpu" # print("Loading processor...") # processor = AutoProcessor.from_pretrained( # MODEL_NAME, # trust_remote_code=True, # use_fast=True) # use_fast to avoid warnings in logs # print("Loading model...") # model = AutoModelForVision2Seq.from_pretrained( # MODEL_NAME, # trust_remote_code=True, # torch_dtype=torch.float16, # device_map="auto" # ) # print("Model loaded successfully") import os import threading import torch from transformers import AutoProcessor, AutoModelForVision2Seq MODEL_NAME = "Qwen/Qwen2.5-VL-7B-Instruct" model = None processor = None device = "cuda" if torch.cuda.is_available() else "cpu" _model_lock = threading.Lock() def get_model(): global model, processor, device if model is None or processor is None: with _model_lock: if model is None or processor is None: print("Loading processor...") processor = AutoProcessor.from_pretrained( MODEL_NAME, trust_remote_code=True, use_fast=True, min_pixels=224 * 224, # add on 8/5/26 max_pixels=1536 * 1536 # add on 8/5/26 ) print("Loading model...") model = AutoModelForVision2Seq.from_pretrained( MODEL_NAME, trust_remote_code=True, torch_dtype=torch.float16, device_map="auto", low_cpu_mem_usage=True ) model.eval() print("Model loaded successfully") return model, processor, device