xrx
Initial commit
c3314a5
raw
history blame
1.28 kB
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import warnings
import os
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
# disable some warnings
transformers.logging.set_verbosity_error()
transformers.logging.disable_progress_bar()
warnings.filterwarnings("ignore")
# set device
device = "cuda" # or cpu
# create model
model = AutoModelForCausalLM.from_pretrained(
"Zero-Vision/Llama-3-MixSenseV1_1",
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(
"Zero-Vision/Llama-3-MixSenseV1_1",
trust_remote_code=True,
)
qs = "describe the image detailly."
input_ids = model.text_process(qs, tokenizer).to(device)
image = Image.open("example.jpg")
image_tensor = model.image_process([image]).to(dtype=model.dtype, device=device)
# generate
with torch.inference_mode():
output_ids = model.generate(
input_ids,
images=image_tensor,
max_new_tokens=2048,
use_cache=True,
eos_token_id=[
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids(["<|eot_id|>"])[0],
],
)
print(tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0].strip())