File size: 2,164 Bytes
b28505d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | #!/usr/bin/env python3
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import argparse
from nemotron_ocr.inference.pipeline import NemotronOCR
def main(image_path, merge_level, no_visualize, model_dir, lang):
if model_dir is not None:
ocr_pipeline = NemotronOCR(model_dir=model_dir)
else:
ocr_pipeline = NemotronOCR(lang=lang)
predictions = ocr_pipeline(image_path, merge_level=merge_level, visualize=not no_visualize)
print(f"Found {len(predictions)} text regions.")
for pred in predictions:
print(
f" - Text: '{pred['text']}', "
f"Confidence: {pred['confidence']:.2f}, "
f"Bbox: [left={pred['left']:.4f}, upper={pred['upper']:.4f}, "
f"right={pred['right']:.4f}, lower={pred['lower']:.4f}]"
)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run OCR inference and annotate image.")
parser.add_argument("image_path", type=str, help="Path to the input image.")
parser.add_argument(
"--merge-level",
type=str,
choices=["word", "sentence", "paragraph"],
default="paragraph",
help="Merge level for OCR output (word, sentence, paragraph).",
)
parser.add_argument("--no-visualize", action="store_true", help="Do not save the annotated image.")
parser.add_argument(
"--model-dir",
type=str,
default=None,
help="Path to a directory with detector.pth, recognizer.pth, relational.pth, charset.txt. "
"If omitted, weights are downloaded from Hugging Face (default: v2 multilingual).",
)
parser.add_argument(
"--lang",
type=str,
choices=["en", "multi", "v1"],
default=None,
help="Hub checkpoint when --model-dir is omitted: en=v2 English, multi=v2 multilingual (default), v1=legacy.",
)
args = parser.parse_args()
main(
args.image_path,
merge_level=args.merge_level,
no_visualize=args.no_visualize,
model_dir=args.model_dir,
lang=args.lang,
)
|