BoLiu

jdye64 commited on Nov 20, 2025

Commit

29d49e1

verified ·

1 Parent(s): a5c0f8c

pip-install (#7)

Browse files

- Introduction facilities for building a simple wheel for the nemotron-page-elements-v3 project (b452d69c702459cb68cb5c49e03e81a40622f1f8)
- Update Demo, README, and model with local tweaks (d2e30b3efa91e91124125044bdf8fd44f833277f)
- updated model path, readme and demo (adb5d1e25d3e6367a17a2681efbdd22527ea0952)

Co-authored-by: Jeremy <jdye64@users.noreply.huggingface.co>

Files changed (22) hide show

.gitignore +8 -0
Demo.ipynb +2 -2
MANIFEST.in +5 -0
README.md +8 -3
config.json → nemotron_page_elements_v3/config.json +0 -0
model.py → nemotron_page_elements_v3/model.py +12 -8
page_element_v3.py → nemotron_page_elements_v3/page_element_v3.py +3 -1
nemotron_page_elements_v3/post_processing/__init__.py +60 -0
{post_processing → nemotron_page_elements_v3/post_processing}/page_elt_pp.py +0 -0
{post_processing → nemotron_page_elements_v3/post_processing}/text_pp.py +0 -0
{post_processing → nemotron_page_elements_v3/post_processing}/wbf.py +0 -0
utils.py → nemotron_page_elements_v3/utils.py +0 -0
weights.pth → nemotron_page_elements_v3/weights.pth +0 -0
{yolox → nemotron_page_elements_v3/yolox}/boxes.py +0 -0
{yolox → nemotron_page_elements_v3/yolox}/darknet.py +0 -0
{yolox → nemotron_page_elements_v3/yolox}/network_blocks.py +0 -0
{yolox → nemotron_page_elements_v3/yolox}/yolo_fpn.py +0 -0
{yolox → nemotron_page_elements_v3/yolox}/yolo_head.py +0 -0
{yolox → nemotron_page_elements_v3/yolox}/yolo_pafpn.py +0 -0
{yolox → nemotron_page_elements_v3/yolox}/yolox.py +0 -0
pyproject.toml +87 -0
yolox/__init__.py → setup.py +13 -7

.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+dist/
+build/
+*.egg-info/
+*.pyc
+*.pyo
+*.pyd
+*.pyw
+*.pyz

Demo.ipynb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1f448b52f30c3bdb41d9db1c3fea0274ac39726338c19a8b48005017245d1dd
-size 2363478

 version https://git-lfs.github.com/spec/v1
+oid sha256:1abc855769ac788f46bc709cd33405fe02c816c68ad500a75ae82444998fbb61
+size 1812701

MANIFEST.in ADDED Viewed

	@@ -0,0 +1,5 @@

+include README.md
+include THIRD_PARTY_NOTICES.md
+recursive-include nemotron_page_elements_v3

README.md CHANGED Viewed

@@ -121,7 +121,12 @@ git clone https://huggingface.co/nvidia/nemotron-page-elements-v3
 ```
 git clone git@hf.co:nvidia/nemotron-page-elements-v3
 ```
 2. Run the model using the following code:
 ```
@@ -130,8 +135,8 @@ import numpy as np
 import matplotlib.pyplot as plt
 from PIL import Image
-from model import define_model
-from utils import plot_sample, postprocess_preds_page_element, reformat_for_plotting
 # Load image
 path = "./example.png"

 ```
 git clone git@hf.co:nvidia/nemotron-page-elements-v3
 ```
+Optional:
+This can be installed as a package using pip
+```
+cd nemotron-page-elements-v3
+pip install -e .
+```
 2. Run the model using the following code:
 ```
 import matplotlib.pyplot as plt
 from PIL import Image
+from nemotron_page_elements_v3.model import define_model
+from nemotron_page_elements_v3.utils import plot_sample, postprocess_preds_page_element, reformat_for_plotting
 # Load image
 path = "./example.png"

config.json → nemotron_page_elements_v3/config.json RENAMED Viewed

File without changes

model.py → nemotron_page_elements_v3/model.py RENAMED Viewed

@@ -4,12 +4,13 @@ import os
 import sys
 import torch
 import importlib
 import numpy as np
 import numpy.typing as npt
 import torch.nn as nn
 import torch.nn.functional as F
 from typing import Dict, List, Tuple, Union
-from yolox.boxes import postprocess
 def define_model(config_name: str = "page_element_v3", verbose: bool = True) -> nn.Module:
@@ -24,8 +25,9 @@ def define_model(config_name: str = "page_element_v3", verbose: bool = True) ->
         torch.nn.Module: The initialized YOLOX model.
     """
     # Load model from exp_file
-    sys.path.append(os.path.dirname(config_name))
-    exp_module = importlib.import_module(os.path.basename(config_name).split(".")[0])
     config = exp_module.Exp()
     model = config.get_model()
@@ -34,13 +36,15 @@ def define_model(config_name: str = "page_element_v3", verbose: bool = True) ->
     if verbose:
         print(" -> Loading weights from", config.ckpt)
-    ckpt = torch.load(config.ckpt, map_location="cpu", weights_only=False)
-    model.load_state_dict(ckpt["model"], strict=True)
     model = YoloXWrapper(model, config)
     return model.eval().to(config.device)
 def resize_pad(img: torch.Tensor, size: tuple) -> torch.Tensor:
     """
     Resizes and pads an image to a given size.

 import sys
 import torch
 import importlib
+import importlib.resources
 import numpy as np
 import numpy.typing as npt
 import torch.nn as nn
 import torch.nn.functional as F
 from typing import Dict, List, Tuple, Union
+from nemotron_page_elements_v3.yolox.boxes import postprocess
 def define_model(config_name: str = "page_element_v3", verbose: bool = True) -> nn.Module:
         torch.nn.Module: The initialized YOLOX model.
     """
     # Load model from exp_file
+    # page_element_v3.py is in the same directory as model.py
+    sys.path.append(os.path.dirname(__file__))
+    exp_module = importlib.import_module("page_element_v3")
     config = exp_module.Exp()
     model = config.get_model()
     if verbose:
         print(" -> Loading weights from", config.ckpt)
+    # Find package directory and load weights (nemotron_page_elements_v3)
+    package_dir = os.path.dirname(os.path.abspath(__file__))
+    weights_path = os.path.join(package_dir, "weights.pth")
+    state_dict = torch.load(weights_path, map_location="cpu", weights_only=False)
+    model.load_state_dict(state_dict["model"], strict=True)
     model = YoloXWrapper(model, config)
     return model.eval().to(config.device)
 def resize_pad(img: torch.Tensor, size: tuple) -> torch.Tensor:
     """
     Resizes and pads an image to a given size.

page_element_v3.py → nemotron_page_elements_v3/page_element_v3.py RENAMED Viewed

@@ -63,7 +63,9 @@ class Exp:
         Returns:
             nn.Module: The YOLOX model with configured parameters.
         """
-        from yolox import YOLOX, YOLOPAFPN, YOLOXHead
         # Build model
         if getattr(self, "model", None) is None:

         Returns:
             nn.Module: The YOLOX model with configured parameters.
         """
+        from nemotron_page_elements_v3.yolox.yolox import YOLOX
+        from nemotron_page_elements_v3.yolox.yolo_pafpn import YOLOPAFPN
+        from nemotron_page_elements_v3.yolox.yolo_head import YOLOXHead
         # Build model
         if getattr(self, "model", None) is None:

nemotron_page_elements_v3/post_processing/__init__.py ADDED Viewed

	@@ -0,0 +1,60 @@

+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""
+Post-processing utilities for page element predictions.
+This module provides utilities for advanced post-processing of page element
+detection results, including box expansion, matching with titles, and
+weighted box fusion.
+"""
+# Import from page_elt_pp
+from .page_elt_pp import (
+    expand_boxes,
+    merge_boxes,
+    bb_iou_array,
+    match_with_title,
+    match_boxes_with_title,
+)
+# Import from text_pp
+from .text_pp import (
+    get_overlaps,
+    get_distances,
+    find_titles,
+    postprocess_included,
+)
+# Import from wbf
+from .wbf import (
+    weighted_boxes_fusion,
+    prefilter_boxes,
+    merge_labels,
+    get_weighted_box,
+    get_biggest_box,
+    find_matching_box_fast,
+)
+__all__ = [
+    # page_elt_pp
+    "expand_boxes",
+    "merge_boxes",
+    "bb_iou_array",
+    "match_with_title",
+    "match_boxes_with_title",
+    # text_pp
+    "get_overlaps",
+    "get_distances",
+    "find_titles",
+    "postprocess_included",
+    # wbf
+    "weighted_boxes_fusion",
+    "prefilter_boxes",
+    "merge_labels",
+    "get_weighted_box",
+    "get_biggest_box",
+    "find_matching_box_fast",
+]

{post_processing → nemotron_page_elements_v3/post_processing}/page_elt_pp.py RENAMED Viewed

File without changes

{post_processing → nemotron_page_elements_v3/post_processing}/text_pp.py RENAMED Viewed

File without changes

{post_processing → nemotron_page_elements_v3/post_processing}/wbf.py RENAMED Viewed

File without changes

utils.py → nemotron_page_elements_v3/utils.py RENAMED Viewed

File without changes

weights.pth → nemotron_page_elements_v3/weights.pth RENAMED Viewed

File without changes

{yolox → nemotron_page_elements_v3/yolox}/boxes.py RENAMED Viewed

File without changes

{yolox → nemotron_page_elements_v3/yolox}/darknet.py RENAMED Viewed

File without changes

{yolox → nemotron_page_elements_v3/yolox}/network_blocks.py RENAMED Viewed

File without changes

{yolox → nemotron_page_elements_v3/yolox}/yolo_fpn.py RENAMED Viewed

File without changes

{yolox → nemotron_page_elements_v3/yolox}/yolo_head.py RENAMED Viewed

File without changes

{yolox → nemotron_page_elements_v3/yolox}/yolo_pafpn.py RENAMED Viewed

File without changes

{yolox → nemotron_page_elements_v3/yolox}/yolox.py RENAMED Viewed

File without changes

pyproject.toml ADDED Viewed

	@@ -0,0 +1,87 @@

+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "nemotron-page-elements-v3"
+version = "3.0.0"
+description = "NVIDIA Nemotron Page Elements v3: A specialized YOLOX-based object detection model for identifying tables, charts, infographics, titles, headers/footers, and text in document pages"
+readme = "README.md"
+requires-python = ">=3.8"
+license = {text = "NVIDIA Open Model License"}
+authors = [
+    {name = "NVIDIA Corporation", email = "boli@nvidia.com"},
+    {name = "Theo Viel", email = "tviel@nvidia.com"},
+    {name = "Bo Liu", email = "boli@nvidia.com"},
+]
+maintainers = [
+    {name = "Theo Viel", email = "tviel@nvidia.com"},
+    {name = "Bo Liu", email = "boli@nvidia.com"},
+]
+keywords = [
+    "object-detection",
+    "document-understanding",
+    "pdf-extraction",
+    "yolox",
+    "page-layout",
+    "nvidia",
+    "nemotron",
+    "deep-learning",
+    "computer-vision",
+]
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Scientific/Engineering :: Image Recognition",
+    "License :: Other/Proprietary License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+]
+dependencies = [
+    "numpy",
+    "torch>=1.7",
+    "opencv-python",
+    "loguru",
+    "tqdm",
+    "torchvision",
+    "thop",
+    "ninja",
+    "tabulate",
+    "psutil",
+    "tensorboard",
+    "pycocotools>=2.0.2",
+    "onnx>=1.13.0",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.0",
+    "black>=22.0",
+    "flake8>=4.0",
+    "mypy>=0.950",
+]
+[project.urls]
+Homepage = "https://huggingface.co/nvidia/nemotron-page-elements-v3"
+Documentation = "https://huggingface.co/nvidia/nemotron-page-elements-v3"
+Repository = "https://huggingface.co/nvidia/nemotron-page-elements-v3"
+"Bug Tracker" = "https://huggingface.co/nvidia/nemotron-page-elements-v3/discussions"
+"Model Card" = "https://huggingface.co/nvidia/nemotron-page-elements-v3"
+[tool.setuptools]
+include-package-data = true
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["nemotron_page_elements_v3*"]
+[tool.setuptools.package-data]
+"nemotron_page_elements_v3" = ["*.json", "*.pth", "*.png"]

yolox/__init__.py → setup.py RENAMED Viewed

@@ -1,11 +1,17 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
 # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-# Copyright (c) Megvii Inc. All rights reserved.
-from .yolo_head import YOLOXHead
-from .yolo_pafpn import YOLOPAFPN
-from .yolox import YOLOX

+#!/usr/bin/env python
 # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
+"""
+Setup script for backward compatibility.
+This project uses pyproject.toml for configuration (PEP 621).
+"""
+from setuptools import setup
+# Configuration is in pyproject.toml
+setup()