| | |
| | |
| | |
| |
|
| | import argparse |
| |
|
| | from nemotron_ocr.inference.pipeline import NemotronOCR |
| |
|
| |
|
| | def main(image_path, merge_level, no_visualize, model_dir): |
| | ocr_pipeline = NemotronOCR(model_dir=model_dir) |
| |
|
| | predictions = ocr_pipeline(image_path, merge_level=merge_level, visualize=not no_visualize) |
| |
|
| | print(f"Found {len(predictions)} text regions.") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | parser = argparse.ArgumentParser(description="Run OCR inference and annotate image.") |
| | parser.add_argument("image_path", type=str, help="Path to the input image.") |
| | parser.add_argument( |
| | "--merge-level", |
| | type=str, |
| | choices=["word", "sentence", "paragraph"], |
| | default="paragraph", |
| | help="Merge level for OCR output (word, sentence, paragraph).", |
| | ) |
| | parser.add_argument("--no-visualize", action="store_true", help="Do not save the annotated image.") |
| | parser.add_argument( |
| | "--model-dir", |
| | type=str, |
| | help="Path to the model checkpoints.", |
| | default="./checkpoints", |
| | ) |
| | args = parser.parse_args() |
| |
|
| | main( |
| | args.image_path, |
| | merge_level=args.merge_level, |
| | no_visualize=args.no_visualize, |
| | model_dir=args.model_dir, |
| | ) |
| |
|