iris / README.md
DavidSeyserHF's picture
checkpoint from run with coco dataset
87cc4c9 verified

Iris

Custom multimodal Iris checkpoint exported in Hugging Face format.

Load

import torch
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForCausalLM, AutoTokenizer

model_id = "iris-hf"
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
image_processor = AutoImageProcessor.from_pretrained(model_id)

image = Image.open("example.jpg").convert("RGB")
pixel_values = image_processor(images=[image], return_tensors="pt")["pixel_values"].to(model.device)
prompt = "Describe this image: "
input_ids = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)["input_ids"].to(model.device)
attention_mask = torch.ones_like(input_ids)

output_ids = model.generate(
    input_ids=input_ids,
    attention_mask=attention_mask,
    pixel_values=pixel_values,
    prompt_len=input_ids.size(1),
    max_new_tokens=64,
)
print(tokenizer.decode(output_ids[0, input_ids.size(1):], skip_special_tokens=True))