from transformers import AutoModel, AutoTokenizer import torch import time model_id = "openbmb/MiniCPM-V-4-int4" model = AutoModel.from_pretrained( model_id, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16 ) model = model.eval().cuda() tokenizer = AutoTokenizer.from_pretrained( model_id, trust_remote_code=True ) time.sleep(1000000)