Spaces:
Running
Running
| from PIL import Image, ImageDraw | |
| import torch.nn.functional as F | |
| import torch | |
| from models.letr import build | |
| from models.misc import nested_tensor_from_tensor_list | |
| from models.preprocessing import Compose, ToTensor, Resize, Normalize | |
| def create_letr(path): | |
| # obtain checkpoints | |
| checkpoint = torch.load(path, map_location='cpu') | |
| # load model | |
| args = checkpoint['args'] | |
| args.device = 'cpu' | |
| model, _, _ = build(args) | |
| model.load_state_dict(checkpoint['model']) | |
| model.eval() | |
| return model | |
| def get_lines_and_draw(image, outputs, orig_size): | |
| # find lines | |
| out_logits, out_line = outputs['pred_logits'], outputs['pred_lines'] | |
| prob = F.softmax(out_logits, -1) | |
| scores, labels = prob[..., :-1].max(-1) | |
| img_h, img_w = orig_size.unbind(0) | |
| scale_fct = torch.unsqueeze(torch.stack( | |
| [img_w, img_h, img_w, img_h], dim=0), dim=0) | |
| lines = out_line * scale_fct[:, None, :] | |
| lines = lines.view(1000, 2, 2) | |
| lines = lines.flip([-1]) # this is yxyx format | |
| scores = scores.detach().numpy() | |
| keep = scores >= 0.7 | |
| keep = keep.squeeze() | |
| lines = lines[keep] | |
| if len(lines) != 0: | |
| lines = lines.reshape(lines.shape[0], -1) | |
| # draw lines | |
| draw = ImageDraw.Draw(image) | |
| for tp_id, line in enumerate(lines): | |
| y1, x1, y2, x2 = line | |
| draw.line((x1, y1, x2, y2), fill=500) | |
| return lines | |
| if __name__ == '__main__': | |
| model = create_letr('resnet50/checkpoint0024.pth') | |
| test_size = 256 | |
| normalize = Compose([ | |
| ToTensor(), | |
| Normalize([0.538, 0.494, 0.453], [0.257, 0.263, 0.273]), | |
| Resize([test_size]), | |
| ]) | |
| image = Image.open('demo.png') | |
| h, w = image.height, image.width | |
| orig_size = torch.as_tensor([int(h), int(w)]) | |
| img = normalize(image) | |
| inputs = nested_tensor_from_tensor_list([img]) | |
| with torch.no_grad(): | |
| outputs = model(inputs)[0] | |
| lines = get_lines_and_draw(image, outputs, orig_size) | |
| image.save('output.png') |