Spaces:

z-uo
/

LETR

Running

App Files Files Community

LETR / test.py

z-uo

add raw text output

b4d55e3 almost 4 years ago

raw

history blame contribute delete

2.01 kB

	from PIL import Image, ImageDraw

	import torch.nn.functional as F
	import torch

	from models.letr import build
	from models.misc import nested_tensor_from_tensor_list
	from models.preprocessing import Compose, ToTensor, Resize, Normalize

	def create_letr(path):
	# obtain checkpoints
	checkpoint = torch.load(path, map_location='cpu')

	# load model
	args = checkpoint['args']
	args.device = 'cpu'
	model, _, _ = build(args)
	model.load_state_dict(checkpoint['model'])
	model.eval()
	return model

	def get_lines_and_draw(image, outputs, orig_size):
	# find lines
	out_logits, out_line = outputs['pred_logits'], outputs['pred_lines']
	prob = F.softmax(out_logits, -1)
	scores, labels = prob[..., :-1].max(-1)
	img_h, img_w = orig_size.unbind(0)
	scale_fct = torch.unsqueeze(torch.stack(
	[img_w, img_h, img_w, img_h], dim=0), dim=0)
	lines = out_line * scale_fct[:, None, :]
	lines = lines.view(1000, 2, 2)
	lines = lines.flip([-1]) # this is yxyx format
	scores = scores.detach().numpy()
	keep = scores >= 0.7
	keep = keep.squeeze()
	lines = lines[keep]
	if len(lines) != 0:
	lines = lines.reshape(lines.shape[0], -1)

	# draw lines
	draw = ImageDraw.Draw(image)
	for tp_id, line in enumerate(lines):
	y1, x1, y2, x2 = line
	draw.line((x1, y1, x2, y2), fill=500)
	return lines

	if __name__ == '__main__':
	model = create_letr('resnet50/checkpoint0024.pth')

	test_size = 256
	normalize = Compose([
	ToTensor(),
	Normalize([0.538, 0.494, 0.453], [0.257, 0.263, 0.273]),
	Resize([test_size]),
	])

	image = Image.open('demo.png')
	h, w = image.height, image.width
	orig_size = torch.as_tensor([int(h), int(w)])

	img = normalize(image)
	inputs = nested_tensor_from_tensor_list([img])

	with torch.no_grad():
	outputs = model(inputs)[0]
	lines = get_lines_and_draw(image, outputs, orig_size)

	image.save('output.png')