# Copyright (C) 2021-2025, Mindee. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from doctr.io import DocumentFile from doctr.models import ocr_predictor def main(args): model = ocr_predictor(args.detection, args.recognition, pretrained=True) if args.path.lower().endswith(".pdf"): doc = DocumentFile.from_pdf(args.path) else: doc = DocumentFile.from_images(args.path) out = model(doc) for page in out.pages: page.show(block=not args.noblock, interactive=not args.static) def parse_args(): import argparse parser = argparse.ArgumentParser( description="DocTR end-to-end analysis", formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument("path", type=str, help="Path to the input document (PDF or image)") parser.add_argument("--detection", type=str, default="fast_base", help="Text detection model to use for analysis") parser.add_argument( "--recognition", type=str, default="crnn_vgg16_bn", help="Text recognition model to use for analysis" ) parser.add_argument( "--noblock", dest="noblock", help="Disables blocking visualization. Used only for CI.", action="store_true" ) parser.add_argument("--static", dest="static", help="Switches to static visualization", action="store_true") args = parser.parse_args() return args if __name__ == "__main__": parsed_args = parse_args() main(parsed_args)