Duplicated from Mueris/TurkishVLMTAMGA

pusulcuk
/

TurkishVLMTAMGA

clip2mt5-crossattention

Model card Files Files and versions

TurkishVLMTAMGA / inference.py

pusulcuk's picture

Duplicate from Mueris/TurkishVLMTAMGA

6d6a355 16 days ago

history blame contribute delete

1.24 kB

	import torch
	from PIL import Image
	from torchvision import transforms

	from model import load_model


	# Preprocessing
	_transform = transforms.Compose([
	transforms.Resize((224, 224)),
	transforms.ToTensor(),
	transforms.Normalize(
	mean=[0.4815, 0.4578, 0.4082],
	std=[0.2686, 0.2613, 0.2758]
	)
	])


	def load_for_inference(repo_id, filename="model.pt"):
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model = load_model(repo_id=repo_id, filename=filename, device=device)
	tokenizer = model.tokenizer
	return model, tokenizer, device


	def predict(model, tokenizer, device, image: Image.Image, question: str):
	image_tensor = _transform(image).unsqueeze(0).to(device)

	q = tokenizer(
	question,
	return_tensors='pt',
	padding=True,
	truncation=True,
	max_length=64
	).to(device)

	with torch.no_grad():
	output_ids = model.generate(
	images=image_tensor,
	input_ids=q.input_ids,
	attention_mask=q.attention_mask,
	max_length=64,
	num_beams=4
	)

	return tokenizer.decode(output_ids[0], skip_special_tokens=True)