# Load model directly
from transformers import AutoProcessor, AutoModelForImageTextToText
processor = AutoProcessor.from_pretrained("trunks/blip-image-captioning-base")
model = AutoModelForImageTextToText.from_pretrained("trunks/blip-image-captioning-base")Quick Links
Load model
from transformers import AutoProcessor, BlipForConditionalGeneration
processor = AutoProcessor.from_pretrained("trunks/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("trunks/blip-image-captioning-base")
prepare image for model
from PIL import Image from IPython.display import display
img1 = Image.open("imagepath/img.jpeg")
width, height = img1.size
img1_resized = img1.resize((int(0.3 * width), int(0.3 * height))
display(img1_resized)
testing image
inputs = processor(images=img1, return_tensors="pt")
pixel_values = inputs.pixel_values
generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(generated_caption)
- Downloads last month
- 7
# Use a pipeline as a high-level helper # Warning: Pipeline type "image-to-text" is no longer supported in transformers v5. # You must load the model directly (see below) or downgrade to v4.x with: # 'pip install "transformers<5.0.0' from transformers import pipeline pipe = pipeline("image-to-text", model="trunks/blip-image-captioning-base")