Spaces:
Paused
Paused
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # | |
| import os, sys | |
| sys.path.insert( | |
| 0, | |
| os.path.abspath( | |
| os.path.join( | |
| os.path.dirname( | |
| os.path.abspath(__file__)), | |
| '../../'))) | |
| from deepdoc.vision.seeit import draw_box | |
| from deepdoc.vision import Recognizer, LayoutRecognizer, TableStructureRecognizer, OCR, init_in_out | |
| from api.utils.file_utils import get_project_base_directory | |
| import argparse | |
| import re | |
| import numpy as np | |
| def main(args): | |
| images, outputs = init_in_out(args) | |
| if args.mode.lower() == "layout": | |
| labels = LayoutRecognizer.labels | |
| detr = Recognizer( | |
| labels, | |
| "layout", | |
| os.path.join( | |
| get_project_base_directory(), | |
| "rag/res/deepdoc/")) | |
| if args.mode.lower() == "tsr": | |
| labels = TableStructureRecognizer.labels | |
| detr = TableStructureRecognizer() | |
| ocr = OCR() | |
| layouts = detr(images, float(args.threshold)) | |
| for i, lyt in enumerate(layouts): | |
| if args.mode.lower() == "tsr": | |
| #lyt = [t for t in lyt if t["type"] == "table column"] | |
| html = get_table_html(images[i], lyt, ocr) | |
| with open(outputs[i] + ".html", "w+") as f: | |
| f.write(html) | |
| lyt = [{ | |
| "type": t["label"], | |
| "bbox": [t["x0"], t["top"], t["x1"], t["bottom"]], | |
| "score": t["score"] | |
| } for t in lyt] | |
| img = draw_box(images[i], lyt, labels, float(args.threshold)) | |
| img.save(outputs[i], quality=95) | |
| print("save result to: " + outputs[i]) | |
| def get_table_html(img, tb_cpns, ocr): | |
| boxes = ocr(np.array(img)) | |
| boxes = Recognizer.sort_Y_firstly( | |
| [{"x0": b[0][0], "x1": b[1][0], | |
| "top": b[0][1], "text": t[0], | |
| "bottom": b[-1][1], | |
| "layout_type": "table", | |
| "page_number": 0} for b, t in boxes if b[0][0] <= b[1][0] and b[0][1] <= b[-1][1]], | |
| np.mean([b[-1][1] - b[0][1] for b, _ in boxes]) / 3 | |
| ) | |
| def gather(kwd, fzy=10, ption=0.6): | |
| nonlocal boxes | |
| eles = Recognizer.sort_Y_firstly( | |
| [r for r in tb_cpns if re.match(kwd, r["label"])], fzy) | |
| eles = Recognizer.layouts_cleanup(boxes, eles, 5, ption) | |
| return Recognizer.sort_Y_firstly(eles, 0) | |
| headers = gather(r".*header$") | |
| rows = gather(r".* (row|header)") | |
| spans = gather(r".*spanning") | |
| clmns = sorted([r for r in tb_cpns if re.match( | |
| r"table column$", r["label"])], key=lambda x: x["x0"]) | |
| clmns = Recognizer.layouts_cleanup(boxes, clmns, 5, 0.5) | |
| for b in boxes: | |
| ii = Recognizer.find_overlapped_with_threashold(b, rows, thr=0.3) | |
| if ii is not None: | |
| b["R"] = ii | |
| b["R_top"] = rows[ii]["top"] | |
| b["R_bott"] = rows[ii]["bottom"] | |
| ii = Recognizer.find_overlapped_with_threashold(b, headers, thr=0.3) | |
| if ii is not None: | |
| b["H_top"] = headers[ii]["top"] | |
| b["H_bott"] = headers[ii]["bottom"] | |
| b["H_left"] = headers[ii]["x0"] | |
| b["H_right"] = headers[ii]["x1"] | |
| b["H"] = ii | |
| ii = Recognizer.find_horizontally_tightest_fit(b, clmns) | |
| if ii is not None: | |
| b["C"] = ii | |
| b["C_left"] = clmns[ii]["x0"] | |
| b["C_right"] = clmns[ii]["x1"] | |
| ii = Recognizer.find_overlapped_with_threashold(b, spans, thr=0.3) | |
| if ii is not None: | |
| b["H_top"] = spans[ii]["top"] | |
| b["H_bott"] = spans[ii]["bottom"] | |
| b["H_left"] = spans[ii]["x0"] | |
| b["H_right"] = spans[ii]["x1"] | |
| b["SP"] = ii | |
| html = """ | |
| <html> | |
| <head> | |
| <style> | |
| ._table_1nkzy_11 { | |
| margin: auto; | |
| width: 70%%; | |
| padding: 10px; | |
| } | |
| ._table_1nkzy_11 p { | |
| margin-bottom: 50px; | |
| border: 1px solid #e1e1e1; | |
| } | |
| caption { | |
| color: #6ac1ca; | |
| font-size: 20px; | |
| height: 50px; | |
| line-height: 50px; | |
| font-weight: 600; | |
| margin-bottom: 10px; | |
| } | |
| ._table_1nkzy_11 table { | |
| width: 100%%; | |
| border-collapse: collapse; | |
| } | |
| th { | |
| color: #fff; | |
| background-color: #6ac1ca; | |
| } | |
| td:hover { | |
| background: #c1e8e8; | |
| } | |
| tr:nth-child(even) { | |
| background-color: #f2f2f2; | |
| } | |
| ._table_1nkzy_11 th, | |
| ._table_1nkzy_11 td { | |
| text-align: center; | |
| border: 1px solid #ddd; | |
| padding: 8px; | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| %s | |
| </body> | |
| </html> | |
| """ % TableStructureRecognizer.construct_table(boxes, html=True) | |
| return html | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('--inputs', | |
| help="Directory where to store images or PDFs, or a file path to a single image or PDF", | |
| required=True) | |
| parser.add_argument('--output_dir', help="Directory where to store the output images. Default: './layouts_outputs'", | |
| default="./layouts_outputs") | |
| parser.add_argument( | |
| '--threshold', | |
| help="A threshold to filter out detections. Default: 0.5", | |
| default=0.5) | |
| parser.add_argument('--mode', help="Task mode: layout recognition or table structure recognition", choices=["layout", "tsr"], | |
| default="layout") | |
| args = parser.parse_args() | |
| main(args) | |