MrOvkill
/

obsidian-3b-v0.5-inference-endpoint

Text Generation

llava_stablelm_epoch

Model card Files Files and versions

obsidian-3b-v0.5-inference-endpoint / handler.py

MrOvkill's picture

Update handler

3c0f209 about 2 years ago

history blame contribute delete

1.97 kB

	from typing import Dict, List, Any
	import torch
	from transformers import AutoProcessor, LlavaForConditionalGeneration

	class EndpointHandler():
	def __init__(self, path="", vision_model="obsidian3b"):
	self.model = LlavaForConditionalGeneration.from_pretrained(
	"NousResearch/Obsidian-3B-V0.5",
	torch_dtype=torch.float16,
	low_cpu_mem_usage=True,
	).to("cuda" if torch.is_cuda_available() else "cpu")
	self.processor = AutoProcessor.from_pretrained("NousResearch/Obsidian-3B-V0.5")


	def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
	"""
	data args:
	inputs (:obj: `str`)
	image (:obj: `Image`)
	Return:
	A :obj:`list` \| `dict`: will be serialized and returned
	"""
	# get inputs
	inputs = data.pop("inputs", "")
	image = data.pop("image", None)

	inputs = self.processor(inputs, image, return_tensors="pt")
	res = self.model.generate(**inputs, do_sample=False, max_new_tokens=4096)
	return self.processor.decode(res[0], skip_special_tokens=True)

	#if image:
	# perform image classification using Obsidian 3b vision
	#image_features = self.vision.encode_image(image)
	#image_embedding = self.vision.extract_feature(image_features)
	#image_caption = self.vision.generate_caption(image_embedding)

	# combine text and image captions
	#combined_captions = [inputs, image_caption]

	# run text classification on combined captions
	#prediction = self.pipeline(combined_captions, temperature=0.33, num_beams=5, stop=[], do_sample=True)

	#return prediction


	#else:
	# run text classification on plain text input
	# prediction = self.pipeline(inputs, temperature=0.33, num_beams=5, stop=[], do_sample=True)

	# return prediction