Spaces:
Runtime error
Runtime error
| # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| import random | |
| import ast | |
| from PIL import Image, ImageDraw, ImageFont | |
| import numpy as np | |
| from tools.infer.utility import draw_ocr_box_txt, str2bool, str2int_tuple, init_args as infer_args | |
| def init_args(): | |
| parser = infer_args() | |
| # params for output | |
| parser.add_argument("--output", type=str, default='./output') | |
| # params for table structure | |
| parser.add_argument("--table_max_len", type=int, default=488) | |
| parser.add_argument("--table_algorithm", type=str, default='TableAttn') | |
| parser.add_argument("--table_model_dir", type=str) | |
| parser.add_argument( | |
| "--merge_no_span_structure", type=str2bool, default=True) | |
| parser.add_argument( | |
| "--table_char_dict_path", | |
| type=str, | |
| default="../ppocr/utils/dict/table_structure_dict_ch.txt") | |
| # params for layout | |
| parser.add_argument("--layout_model_dir", type=str) | |
| parser.add_argument( | |
| "--layout_dict_path", | |
| type=str, | |
| default="../ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt") | |
| parser.add_argument( | |
| "--layout_score_threshold", | |
| type=float, | |
| default=0.5, | |
| help="Threshold of score.") | |
| parser.add_argument( | |
| "--layout_nms_threshold", | |
| type=float, | |
| default=0.5, | |
| help="Threshold of nms.") | |
| # params for kie | |
| parser.add_argument("--kie_algorithm", type=str, default='LayoutXLM') | |
| parser.add_argument("--ser_model_dir", type=str) | |
| parser.add_argument("--re_model_dir", type=str) | |
| parser.add_argument("--use_visual_backbone", type=str2bool, default=True) | |
| parser.add_argument( | |
| "--ser_dict_path", | |
| type=str, | |
| default="../train_data/XFUND/class_list_xfun.txt") | |
| # need to be None or tb-yx | |
| parser.add_argument("--ocr_order_method", type=str, default=None) | |
| # params for inference | |
| parser.add_argument( | |
| "--mode", | |
| type=str, | |
| choices=['structure', 'kie'], | |
| default='structure', | |
| help='structure and kie is supported') | |
| parser.add_argument( | |
| "--image_orientation", | |
| type=bool, | |
| default=False, | |
| help='Whether to enable image orientation recognition') | |
| parser.add_argument( | |
| "--layout", | |
| type=str2bool, | |
| default=True, | |
| help='Whether to enable layout analysis') | |
| parser.add_argument( | |
| "--table", | |
| type=str2bool, | |
| default=True, | |
| help='In the forward, whether the table area uses table recognition') | |
| parser.add_argument( | |
| "--ocr", | |
| type=str2bool, | |
| default=True, | |
| help='In the forward, whether the non-table area is recognition by ocr') | |
| # param for recovery | |
| parser.add_argument( | |
| "--recovery", | |
| type=str2bool, | |
| default=False, | |
| help='Whether to enable layout of recovery') | |
| parser.add_argument( | |
| "--use_pdf2docx_api", | |
| type=str2bool, | |
| default=False, | |
| help='Whether to use pdf2docx api') | |
| parser.add_argument( | |
| "--invert", | |
| type=str2bool, | |
| default=False, | |
| help='Whether to invert image before processing') | |
| parser.add_argument( | |
| "--binarize", | |
| type=str2bool, | |
| default=False, | |
| help='Whether to threshold binarize image before processing') | |
| parser.add_argument( | |
| "--alphacolor", | |
| type=str2int_tuple, | |
| default=(255, 255, 255), | |
| help='Replacement color for the alpha channel, if the latter is present; R,G,B integers') | |
| return parser | |
| def parse_args(): | |
| parser = init_args() | |
| return parser.parse_args() | |
| def draw_structure_result(image, result, font_path): | |
| if isinstance(image, np.ndarray): | |
| image = Image.fromarray(image) | |
| boxes, txts, scores = [], [], [] | |
| img_layout = image.copy() | |
| draw_layout = ImageDraw.Draw(img_layout) | |
| text_color = (255, 255, 255) | |
| text_background_color = (80, 127, 255) | |
| catid2color = {} | |
| font_size = 15 | |
| font = ImageFont.truetype(font_path, font_size, encoding="utf-8") | |
| for region in result: | |
| if region['type'] not in catid2color: | |
| box_color = (random.randint(0, 255), random.randint(0, 255), | |
| random.randint(0, 255)) | |
| catid2color[region['type']] = box_color | |
| else: | |
| box_color = catid2color[region['type']] | |
| box_layout = region['bbox'] | |
| draw_layout.rectangle( | |
| [(box_layout[0], box_layout[1]), (box_layout[2], box_layout[3])], | |
| outline=box_color, | |
| width=3) | |
| left, top, right, bottom = font.getbbox(region['type']) | |
| text_w, text_h = right - left, bottom - top | |
| draw_layout.rectangle( | |
| [(box_layout[0], box_layout[1]), | |
| (box_layout[0] + text_w, box_layout[1] + text_h)], | |
| fill=text_background_color) | |
| draw_layout.text( | |
| (box_layout[0], box_layout[1]), | |
| region['type'], | |
| fill=text_color, | |
| font=font) | |
| if region['type'] == 'table': | |
| pass | |
| else: | |
| for text_result in region['res']: | |
| boxes.append(np.array(text_result['text_region'])) | |
| txts.append(text_result['text']) | |
| scores.append(text_result['confidence']) | |
| im_show = draw_ocr_box_txt( | |
| img_layout, boxes, txts, scores, font_path=font_path, drop_score=0) | |
| return im_show | |