YAML Metadata Warning:empty or missing yaml metadata in repo card

Check out the documentation for more information.

Finetune HuggingFaceM4/idefics2-8b on nielsr/docvqa_1200_examples dataset

test.py
!pip install -q git+https://github.com/huggingface/transformers.git
!pip install -q accelerate datasets peft bitsandbytes
import torch
from peft import LoraConfig
from transformers import AutoProcessor, BitsAndBytesConfig, Idefics2ForConditionalGeneration

USE_QLORA = True
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(DEVICE)

processor = AutoProcessor.from_pretrained(
    "HuggingFaceM4/idefics2-8b",
    do_image_splitting=False
)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

model = Idefics2ForConditionalGeneration.from_pretrained(
    "Thanhstar/Idefics2-8b-multimodal",
    torch_dtype=torch.float16,
    quantization_config=bnb_config if USE_QLORA else None,
)

from PIL import Image

model.eval()

image_path = "output.png"
image = Image.open(image_path)
image = image.convert("RGB")
query = "What percentage of smokers feel the need to find more excitement and sensation in life?"


messages = [
    {
        "role": "user",
        "content": [
            {"type": "text", "text": "Answer briefly."},
            {"type": "image"},
            {"type": "text", "text": query}
        ]
    }
]


text = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(text=[text.strip()], images=[image], return_tensors="pt", padding=True)
generated_ids = model.generate(**inputs, max_new_tokens=64)
generated_texts = processor.batch_decode(generated_ids[:, inputs["input_ids"].size(1):], skip_special_tokens=True)
print()
print(generated_texts) # ['70']
image
Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support