Spaces:
Paused
Paused
File size: 1,882 Bytes
75ae506 f9d9cb2 75ae506 0a81395 75ae506 0a81395 75ae506 f26ff7c 75ae506 e6c5e5e 75ae506 f26ff7c 75ae506 f26ff7c 75ae506 f26ff7c 75ae506 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | # import torch
# from transformers import AutoProcessor, AutoModelForVision2Seq
# MODEL_NAME = "Qwen/Qwen2.5-VL-7B-Instruct"
# device = "cuda" if torch.cuda.is_available() else "cpu"
# print("Loading processor...")
# processor = AutoProcessor.from_pretrained(
# MODEL_NAME,
# trust_remote_code=True,
# use_fast=True) # use_fast to avoid warnings in logs
# print("Loading model...")
# model = AutoModelForVision2Seq.from_pretrained(
# MODEL_NAME,
# trust_remote_code=True,
# torch_dtype=torch.float16,
# device_map="auto"
# )
# print("Model loaded successfully") dmen fjem
# this is the testing branch
import os
import threading
import torch
from transformers import AutoProcessor, AutoModelForVision2Seq
MODEL_NAME = "Qwen/Qwen2.5-VL-7B-Instruct"
model = None
processor = None
device = "cuda" if torch.cuda.is_available() else "cpu"
_model_lock = threading.Lock()
def get_model():
global model, processor, device
if model is None or processor is None:
with _model_lock:
if model is None or processor is None:
print("Loading processor...")
processor = AutoProcessor.from_pretrained(
MODEL_NAME,
trust_remote_code=True,
use_fast=True,
min_pixels=224 * 224, # add on 8/5/26
max_pixels=1536 * 1536 # add on 8/5/26
)
print("Loading model...")
model = AutoModelForVision2Seq.from_pretrained(
MODEL_NAME,
trust_remote_code=True,
torch_dtype=torch.float16,
device_map="auto",
low_cpu_mem_usage=True
)
model.eval()
print("Model loaded successfully")
return model, processor, device
|