Spaces:
Runtime error
Runtime error
| from img2table.ocr import DocTR | |
| from torchvision import transforms | |
| from transformers import AutoModelForObjectDetection | |
| import torch | |
| # attribution: KalbeDigitalLab/nutrigenme-paper-extractor | |
| def box_cxcywh_to_xyxy(x): | |
| x_c, y_c, w, h = x.unbind(-1) | |
| b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)] | |
| return torch.stack(b, dim=1) | |
| def rescale_bboxes(out_bbox, size): | |
| width, height = size | |
| boxes = box_cxcywh_to_xyxy(out_bbox) | |
| boxes = boxes * torch.tensor( | |
| [width, height, width, height], dtype=torch.float32 | |
| ) | |
| return boxes | |
| def outputs_to_objects(outputs, img_size, id2label): | |
| m = outputs.logits.softmax(-1).max(-1) | |
| pred_labels = list(m.indices.detach().cpu().numpy())[0] | |
| pred_scores = list(m.values.detach().cpu().numpy())[0] | |
| pred_bboxes = outputs["pred_boxes"].detach().cpu()[0] | |
| pred_bboxes = [ | |
| elem.tolist() for elem in rescale_bboxes(pred_bboxes, img_size) | |
| ] | |
| objects = [] | |
| for label, score, bbox in zip(pred_labels, pred_scores, pred_bboxes): | |
| class_label = id2label[int(label)] | |
| if not class_label == "no object": | |
| objects.append( | |
| { | |
| "label": class_label, | |
| "score": float(score), | |
| "bbox": [float(elem) for elem in bbox], | |
| } | |
| ) | |
| return objects | |
| class MaxResize(object): | |
| def __init__(self, max_size=800): | |
| self.max_size = max_size | |
| def __call__(self, image): | |
| width, height = image.size | |
| current_max_size = max(width, height) | |
| scale = self.max_size / current_max_size | |
| resized_image = image.resize( | |
| (int(round(scale * width)), int(round(scale * height))) | |
| ) | |
| return resized_image | |
| detection_transform = transforms.Compose( | |
| [ | |
| MaxResize(800), | |
| transforms.ToTensor(), | |
| transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), | |
| ] | |
| ) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| def init(): | |
| model = AutoModelForObjectDetection.from_pretrained("microsoft/table-transformer-detection", revision="no_timm").to(device) | |
| ocr = DocTR() | |
| return ocr, model | |