Spaces:
Paused
Paused
| # import torch | |
| # from transformers import AutoProcessor, AutoModelForVision2Seq | |
| # MODEL_NAME = "Qwen/Qwen2.5-VL-7B-Instruct" | |
| # device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # print("Loading processor...") | |
| # processor = AutoProcessor.from_pretrained( | |
| # MODEL_NAME, | |
| # trust_remote_code=True, | |
| # use_fast=True) # use_fast to avoid warnings in logs | |
| # print("Loading model...") | |
| # model = AutoModelForVision2Seq.from_pretrained( | |
| # MODEL_NAME, | |
| # trust_remote_code=True, | |
| # torch_dtype=torch.float16, | |
| # device_map="auto" | |
| # ) | |
| # print("Model loaded successfully") | |
| import os | |
| import threading | |
| import torch | |
| from transformers import AutoProcessor, AutoModelForVision2Seq | |
| MODEL_NAME = "Qwen/Qwen2.5-VL-7B-Instruct" | |
| model = None | |
| processor = None | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| _model_lock = threading.Lock() | |
| def get_model(): | |
| global model, processor, device | |
| if model is None or processor is None: | |
| with _model_lock: | |
| if model is None or processor is None: | |
| print("Loading processor...") | |
| processor = AutoProcessor.from_pretrained( | |
| MODEL_NAME, | |
| trust_remote_code=True, | |
| use_fast=True, | |
| min_pixels=224 * 224, # add on 8/5/26 | |
| max_pixels=1536 * 1536 # add on 8/5/26 | |
| ) | |
| print("Loading model...") | |
| model = AutoModelForVision2Seq.from_pretrained( | |
| MODEL_NAME, | |
| trust_remote_code=True, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| low_cpu_mem_usage=True | |
| ) | |
| model.eval() | |
| print("Model loaded successfully") | |
| return model, processor, device | |