File size: 389 Bytes
f8ba0eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from transformers import AutoModel, AutoTokenizer
import torch
import time

model_id = "openbmb/MiniCPM-V-4-int4"

model = AutoModel.from_pretrained(
    model_id, 
    trust_remote_code=True,
    attn_implementation='sdpa', 
    torch_dtype=torch.bfloat16
)
model = model.eval().cuda()
tokenizer = AutoTokenizer.from_pretrained(
    model_id, trust_remote_code=True
)

time.sleep(1000000)