File size: 1,561 Bytes
f3270e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# Copyright (C) 2021-2025, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.


from doctr.io import DocumentFile
from doctr.models import ocr_predictor


def main(args):
    model = ocr_predictor(args.detection, args.recognition, pretrained=True)

    if args.path.lower().endswith(".pdf"):
        doc = DocumentFile.from_pdf(args.path)
    else:
        doc = DocumentFile.from_images(args.path)

    out = model(doc)

    for page in out.pages:
        page.show(block=not args.noblock, interactive=not args.static)


def parse_args():
    import argparse

    parser = argparse.ArgumentParser(
        description="DocTR end-to-end analysis", formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument("path", type=str, help="Path to the input document (PDF or image)")
    parser.add_argument("--detection", type=str, default="fast_base", help="Text detection model to use for analysis")
    parser.add_argument(
        "--recognition", type=str, default="crnn_vgg16_bn", help="Text recognition model to use for analysis"
    )
    parser.add_argument(
        "--noblock", dest="noblock", help="Disables blocking visualization. Used only for CI.", action="store_true"
    )
    parser.add_argument("--static", dest="static", help="Switches to static visualization", action="store_true")
    args = parser.parse_args()

    return args


if __name__ == "__main__":
    parsed_args = parse_args()
    main(parsed_args)