YAML Metadata Warning:empty or missing yaml metadata in repo card
Check out the documentation for more information.
Finetune HuggingFaceM4/idefics2-8b on nielsr/docvqa_1200_examples dataset
!pip install -q git+https://github.com/huggingface/transformers.git
!pip install -q accelerate datasets peft bitsandbytes
import torch
from peft import LoraConfig
from transformers import AutoProcessor, BitsAndBytesConfig, Idefics2ForConditionalGeneration
USE_QLORA = True
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(DEVICE)
processor = AutoProcessor.from_pretrained(
"HuggingFaceM4/idefics2-8b",
do_image_splitting=False
)
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16
)
model = Idefics2ForConditionalGeneration.from_pretrained(
"Thanhstar/Idefics2-8b-multimodal",
torch_dtype=torch.float16,
quantization_config=bnb_config if USE_QLORA else None,
)
from PIL import Image
model.eval()
image_path = "output.png"
image = Image.open(image_path)
image = image.convert("RGB")
query = "What percentage of smokers feel the need to find more excitement and sensation in life?"
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Answer briefly."},
{"type": "image"},
{"type": "text", "text": query}
]
}
]
text = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(text=[text.strip()], images=[image], return_tensors="pt", padding=True)
generated_ids = model.generate(**inputs, max_new_tokens=64)
generated_texts = processor.batch_decode(generated_ids[:, inputs["input_ids"].size(1):], skip_special_tokens=True)
print()
print(generated_texts) # ['70']
image
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support