khazic
/

kkk

Model card Files Files and versions

kkk / Unicorn /script /batch_inference.py

khazic's picture

Upload folder using huggingface_hub

f85774e verified about 2 months ago

history blame contribute delete

2.42 kB

	import torch
	import transformers
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from PIL import Image
	import warnings

	# disable some warnings
	transformers.logging.set_verbosity_error()
	transformers.logging.disable_progress_bar()
	warnings.filterwarnings('ignore')

	# set device
	device = 'cuda' # or cpu
	torch.set_default_device(device)

	model_name = 'BAAI/Bunny-v1_1-Llama-3-8B-V' # or 'BAAI/Bunny-Llama-3-8B-V' or 'BAAI/Bunny-v1_1-4B' or 'BAAI/Bunny-v1_0-4B' or 'BAAI/Bunny-v1_0-3B' or 'BAAI/Bunny-v1_0-3B-zh' or 'BAAI/Bunny-v1_0-2B-zh'

	# create model
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float16, # float32 for cpu
	device_map='auto',
	trust_remote_code=True)
	tokenizer = AutoTokenizer.from_pretrained(
	model_name,
	trust_remote_code=True)

	# for batch inference
	tokenizer.padding_side = "left"
	tokenizer.pad_token_id = model.generation_config.pad_token_id
	padding_max_length = 128 # customize for your circumstance
	tokenizer.add_tokens(['<image>'])
	image_token_id = tokenizer.convert_tokens_to_ids('<image>')

	# text prompts
	prompts = [
	'What is the astronaut holding in his hand?',
	'Why is the image funny?',
	'What is the occupation of the person in the picture?',
	'What animal is in the picture?'
	]
	texts = [
	f"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\n{prompt} ASSISTANT:"
	for prompt in prompts]
	input_ids = torch.tensor(
	[tokenizer(text, padding='max_length', max_length=padding_max_length).input_ids for text in texts],
	dtype=torch.long).to(device)
	input_ids[input_ids == image_token_id] = -200

	# images, sample images can be found in https://huggingface.co/BAAI/Bunny-v1_1-Llama-3-8B-V/tree/main/images
	image_paths = [
	'example_1.png',
	'example_2.png',
	'example_1.png',
	'example_2.png'
	]
	images = [Image.open(image_path) for image_path in image_paths]
	image_tensor = model.process_images(images, model.config).to(dtype=model.dtype, device=device)

	# generate
	output_ids = model.generate(
	input_ids,
	images=image_tensor,
	max_new_tokens=100,
	use_cache=True,
	repetition_penalty=1.0 # increase this to avoid chattering
	)

	print([ans.strip() for ans in tokenizer.batch_decode(output_ids[:, input_ids.shape[1]:], skip_special_tokens=True)])