Spaces:
Running
Running
File size: 1,561 Bytes
f3270e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
# Copyright (C) 2021-2025, Mindee.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
def main(args):
model = ocr_predictor(args.detection, args.recognition, pretrained=True)
if args.path.lower().endswith(".pdf"):
doc = DocumentFile.from_pdf(args.path)
else:
doc = DocumentFile.from_images(args.path)
out = model(doc)
for page in out.pages:
page.show(block=not args.noblock, interactive=not args.static)
def parse_args():
import argparse
parser = argparse.ArgumentParser(
description="DocTR end-to-end analysis", formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("path", type=str, help="Path to the input document (PDF or image)")
parser.add_argument("--detection", type=str, default="fast_base", help="Text detection model to use for analysis")
parser.add_argument(
"--recognition", type=str, default="crnn_vgg16_bn", help="Text recognition model to use for analysis"
)
parser.add_argument(
"--noblock", dest="noblock", help="Disables blocking visualization. Used only for CI.", action="store_true"
)
parser.add_argument("--static", dest="static", help="Switches to static visualization", action="store_true")
args = parser.parse_args()
return args
if __name__ == "__main__":
parsed_args = parse_args()
main(parsed_args)
|