| from transformers import AutoModel, AutoTokenizer | |
| import torch | |
| import time | |
| model_id = "openbmb/MiniCPM-V-4-int4" | |
| model = AutoModel.from_pretrained( | |
| model_id, | |
| trust_remote_code=True, | |
| attn_implementation='sdpa', | |
| torch_dtype=torch.bfloat16 | |
| ) | |
| model = model.eval().cuda() | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| model_id, trust_remote_code=True | |
| ) | |
| time.sleep(1000000) |