florentgbelidji
/

layoutlmv2-base-uncased

Model card Files Files and versions

layoutlmv2-base-uncased / handler.py

florentgbelidji's picture

florentgbelidji

Update handler.py

4a51a9f over 3 years ago

history blame contribute delete

2.91 kB

	import os
	import numpy as np
	from typing import Dict, List, Any
	from PIL import Image, ImageDraw, ImageFont
	from transformers import LayoutLMv2Processor, LayoutLMv2ForTokenClassification
	from transformers import pipeline, AutoTokenizer

	os.system('apt-get install gcc -y')
	os.system('pip3 install pycocotools')
	os.system('pip3 install -q detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu113/torch1.10/index.html')

	import detectron2

	print(f"DETECTRON2 {detectron2.__version__}")

	class EndpointHandler():
	def __init__(self, path=""):
	# load the processor and model

	self.processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased")
	self.model = LayoutLMv2ForTokenClassification.from_pretrained("nielsr/layoutlmv2-finetuned-funsd")
	self.id2label = {
	0: 'O',
	1: 'B-HEADER',
	2: 'I-HEADER',
	3: 'B-QUESTION',
	4: 'I-QUESTION',
	5: 'B-ANSWER',
	6: 'I-ANSWER'
	}


	def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
	"""
	Args:
	data (:obj:):
	includes the input data and the parameters for the inference.
	Return:
	A :obj:`list`:. The object returned should be a list of one list like [[{"label": 0.9939950108528137}]] containing :
	- "label": A string representing what the label/class is. There can be multiple labels.
	- "score": A score between 0 and 1 describing how confident the model is for this label/class.
	"""

	def unnormalize_box(bbox, width, height):
	return [
	width * (bbox[0] / 1000),
	height * (bbox[1] / 1000),
	width * (bbox[2] / 1000),
	height * (bbox[3] / 1000),
	]

	image = data.pop("inputs", data)
	# encode
	encoding = self.processor(image, truncation=True, return_offsets_mapping=True, return_tensors="pt")
	offset_mapping = encoding.pop('offset_mapping')

	# forward pass
	outputs = self.model(**encoding)

	# get predictions
	predictions = outputs.logits.argmax(-1).squeeze().tolist()
	token_boxes = encoding.bbox.squeeze().tolist()

	# only keep non-subword predictions
	#is_subword = np.array(offset_mapping.squeeze().tolist())[:,0] != 0
	width, height = image.size

	true_predictions = [self.id2label[prediction] for prediction in predictions]
	true_boxes = [unnormalize_box(box, width, height) for box in token_boxes]
	is_subword = np.array(offset_mapping.squeeze().tolist())[:,0] != 0

	# postprocess the prediction
	return {"labels": true_predictions, "boxes": true_boxes, "is_subword": is_subword}