BoLiu jdye64 commited on
Commit
29d49e1
Β·
verified Β·
1 Parent(s): a5c0f8c

pip-install (#7)

Browse files

- Introduction facilities for building a simple wheel for the nemotron-page-elements-v3 project (b452d69c702459cb68cb5c49e03e81a40622f1f8)
- Update Demo, README, and model with local tweaks (d2e30b3efa91e91124125044bdf8fd44f833277f)
- updated model path, readme and demo (adb5d1e25d3e6367a17a2681efbdd22527ea0952)


Co-authored-by: Jeremy <jdye64@users.noreply.huggingface.co>

.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ dist/
2
+ build/
3
+ *.egg-info/
4
+ *.pyc
5
+ *.pyo
6
+ *.pyd
7
+ *.pyw
8
+ *.pyz
Demo.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1f448b52f30c3bdb41d9db1c3fea0274ac39726338c19a8b48005017245d1dd
3
- size 2363478
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1abc855769ac788f46bc709cd33405fe02c816c68ad500a75ae82444998fbb61
3
+ size 1812701
MANIFEST.in ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ include README.md
2
+ include THIRD_PARTY_NOTICES.md
3
+ recursive-include nemotron_page_elements_v3
4
+
5
+
README.md CHANGED
@@ -121,7 +121,12 @@ git clone https://huggingface.co/nvidia/nemotron-page-elements-v3
121
  ```
122
  git clone git@hf.co:nvidia/nemotron-page-elements-v3
123
  ```
124
-
 
 
 
 
 
125
  2. Run the model using the following code:
126
 
127
  ```
@@ -130,8 +135,8 @@ import numpy as np
130
  import matplotlib.pyplot as plt
131
  from PIL import Image
132
 
133
- from model import define_model
134
- from utils import plot_sample, postprocess_preds_page_element, reformat_for_plotting
135
 
136
  # Load image
137
  path = "./example.png"
 
121
  ```
122
  git clone git@hf.co:nvidia/nemotron-page-elements-v3
123
  ```
124
+ Optional:
125
+ This can be installed as a package using pip
126
+ ```
127
+ cd nemotron-page-elements-v3
128
+ pip install -e .
129
+ ```
130
  2. Run the model using the following code:
131
 
132
  ```
 
135
  import matplotlib.pyplot as plt
136
  from PIL import Image
137
 
138
+ from nemotron_page_elements_v3.model import define_model
139
+ from nemotron_page_elements_v3.utils import plot_sample, postprocess_preds_page_element, reformat_for_plotting
140
 
141
  # Load image
142
  path = "./example.png"
config.json β†’ nemotron_page_elements_v3/config.json RENAMED
File without changes
model.py β†’ nemotron_page_elements_v3/model.py RENAMED
@@ -4,12 +4,13 @@ import os
4
  import sys
5
  import torch
6
  import importlib
 
7
  import numpy as np
8
  import numpy.typing as npt
9
  import torch.nn as nn
10
  import torch.nn.functional as F
11
  from typing import Dict, List, Tuple, Union
12
- from yolox.boxes import postprocess
13
 
14
 
15
  def define_model(config_name: str = "page_element_v3", verbose: bool = True) -> nn.Module:
@@ -24,8 +25,9 @@ def define_model(config_name: str = "page_element_v3", verbose: bool = True) ->
24
  torch.nn.Module: The initialized YOLOX model.
25
  """
26
  # Load model from exp_file
27
- sys.path.append(os.path.dirname(config_name))
28
- exp_module = importlib.import_module(os.path.basename(config_name).split(".")[0])
 
29
 
30
  config = exp_module.Exp()
31
  model = config.get_model()
@@ -34,13 +36,15 @@ def define_model(config_name: str = "page_element_v3", verbose: bool = True) ->
34
  if verbose:
35
  print(" -> Loading weights from", config.ckpt)
36
 
37
- ckpt = torch.load(config.ckpt, map_location="cpu", weights_only=False)
38
- model.load_state_dict(ckpt["model"], strict=True)
39
-
 
 
 
40
  model = YoloXWrapper(model, config)
41
  return model.eval().to(config.device)
42
-
43
-
44
  def resize_pad(img: torch.Tensor, size: tuple) -> torch.Tensor:
45
  """
46
  Resizes and pads an image to a given size.
 
4
  import sys
5
  import torch
6
  import importlib
7
+ import importlib.resources
8
  import numpy as np
9
  import numpy.typing as npt
10
  import torch.nn as nn
11
  import torch.nn.functional as F
12
  from typing import Dict, List, Tuple, Union
13
+ from nemotron_page_elements_v3.yolox.boxes import postprocess
14
 
15
 
16
  def define_model(config_name: str = "page_element_v3", verbose: bool = True) -> nn.Module:
 
25
  torch.nn.Module: The initialized YOLOX model.
26
  """
27
  # Load model from exp_file
28
+ # page_element_v3.py is in the same directory as model.py
29
+ sys.path.append(os.path.dirname(__file__))
30
+ exp_module = importlib.import_module("page_element_v3")
31
 
32
  config = exp_module.Exp()
33
  model = config.get_model()
 
36
  if verbose:
37
  print(" -> Loading weights from", config.ckpt)
38
 
39
+ # Find package directory and load weights (nemotron_page_elements_v3)
40
+ package_dir = os.path.dirname(os.path.abspath(__file__))
41
+ weights_path = os.path.join(package_dir, "weights.pth")
42
+ state_dict = torch.load(weights_path, map_location="cpu", weights_only=False)
43
+ model.load_state_dict(state_dict["model"], strict=True)
44
+
45
  model = YoloXWrapper(model, config)
46
  return model.eval().to(config.device)
47
+
 
48
  def resize_pad(img: torch.Tensor, size: tuple) -> torch.Tensor:
49
  """
50
  Resizes and pads an image to a given size.
page_element_v3.py β†’ nemotron_page_elements_v3/page_element_v3.py RENAMED
@@ -63,7 +63,9 @@ class Exp:
63
  Returns:
64
  nn.Module: The YOLOX model with configured parameters.
65
  """
66
- from yolox import YOLOX, YOLOPAFPN, YOLOXHead
 
 
67
 
68
  # Build model
69
  if getattr(self, "model", None) is None:
 
63
  Returns:
64
  nn.Module: The YOLOX model with configured parameters.
65
  """
66
+ from nemotron_page_elements_v3.yolox.yolox import YOLOX
67
+ from nemotron_page_elements_v3.yolox.yolo_pafpn import YOLOPAFPN
68
+ from nemotron_page_elements_v3.yolox.yolo_head import YOLOXHead
69
 
70
  # Build model
71
  if getattr(self, "model", None) is None:
nemotron_page_elements_v3/post_processing/__init__.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """
5
+ Post-processing utilities for page element predictions.
6
+
7
+ This module provides utilities for advanced post-processing of page element
8
+ detection results, including box expansion, matching with titles, and
9
+ weighted box fusion.
10
+ """
11
+
12
+ # Import from page_elt_pp
13
+ from .page_elt_pp import (
14
+ expand_boxes,
15
+ merge_boxes,
16
+ bb_iou_array,
17
+ match_with_title,
18
+ match_boxes_with_title,
19
+ )
20
+
21
+ # Import from text_pp
22
+ from .text_pp import (
23
+ get_overlaps,
24
+ get_distances,
25
+ find_titles,
26
+ postprocess_included,
27
+ )
28
+
29
+ # Import from wbf
30
+ from .wbf import (
31
+ weighted_boxes_fusion,
32
+ prefilter_boxes,
33
+ merge_labels,
34
+ get_weighted_box,
35
+ get_biggest_box,
36
+ find_matching_box_fast,
37
+ )
38
+
39
+ __all__ = [
40
+ # page_elt_pp
41
+ "expand_boxes",
42
+ "merge_boxes",
43
+ "bb_iou_array",
44
+ "match_with_title",
45
+ "match_boxes_with_title",
46
+ # text_pp
47
+ "get_overlaps",
48
+ "get_distances",
49
+ "find_titles",
50
+ "postprocess_included",
51
+ # wbf
52
+ "weighted_boxes_fusion",
53
+ "prefilter_boxes",
54
+ "merge_labels",
55
+ "get_weighted_box",
56
+ "get_biggest_box",
57
+ "find_matching_box_fast",
58
+ ]
59
+
60
+
{post_processing β†’ nemotron_page_elements_v3/post_processing}/page_elt_pp.py RENAMED
File without changes
{post_processing β†’ nemotron_page_elements_v3/post_processing}/text_pp.py RENAMED
File without changes
{post_processing β†’ nemotron_page_elements_v3/post_processing}/wbf.py RENAMED
File without changes
utils.py β†’ nemotron_page_elements_v3/utils.py RENAMED
File without changes
weights.pth β†’ nemotron_page_elements_v3/weights.pth RENAMED
File without changes
{yolox β†’ nemotron_page_elements_v3/yolox}/boxes.py RENAMED
File without changes
{yolox β†’ nemotron_page_elements_v3/yolox}/darknet.py RENAMED
File without changes
{yolox β†’ nemotron_page_elements_v3/yolox}/network_blocks.py RENAMED
File without changes
{yolox β†’ nemotron_page_elements_v3/yolox}/yolo_fpn.py RENAMED
File without changes
{yolox β†’ nemotron_page_elements_v3/yolox}/yolo_head.py RENAMED
File without changes
{yolox β†’ nemotron_page_elements_v3/yolox}/yolo_pafpn.py RENAMED
File without changes
{yolox β†’ nemotron_page_elements_v3/yolox}/yolox.py RENAMED
File without changes
pyproject.toml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "nemotron-page-elements-v3"
7
+ version = "3.0.0"
8
+ description = "NVIDIA Nemotron Page Elements v3: A specialized YOLOX-based object detection model for identifying tables, charts, infographics, titles, headers/footers, and text in document pages"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ license = {text = "NVIDIA Open Model License"}
12
+ authors = [
13
+ {name = "NVIDIA Corporation", email = "boli@nvidia.com"},
14
+ {name = "Theo Viel", email = "tviel@nvidia.com"},
15
+ {name = "Bo Liu", email = "boli@nvidia.com"},
16
+ ]
17
+ maintainers = [
18
+ {name = "Theo Viel", email = "tviel@nvidia.com"},
19
+ {name = "Bo Liu", email = "boli@nvidia.com"},
20
+ ]
21
+ keywords = [
22
+ "object-detection",
23
+ "document-understanding",
24
+ "pdf-extraction",
25
+ "yolox",
26
+ "page-layout",
27
+ "nvidia",
28
+ "nemotron",
29
+ "deep-learning",
30
+ "computer-vision",
31
+ ]
32
+ classifiers = [
33
+ "Development Status :: 5 - Production/Stable",
34
+ "Intended Audience :: Developers",
35
+ "Intended Audience :: Science/Research",
36
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
37
+ "Topic :: Scientific/Engineering :: Image Recognition",
38
+ "License :: Other/Proprietary License",
39
+ "Programming Language :: Python :: 3",
40
+ "Programming Language :: Python :: 3.8",
41
+ "Programming Language :: Python :: 3.9",
42
+ "Programming Language :: Python :: 3.10",
43
+ "Programming Language :: Python :: 3.11",
44
+ "Programming Language :: Python :: 3.12",
45
+ ]
46
+
47
+ dependencies = [
48
+ "numpy",
49
+ "torch>=1.7",
50
+ "opencv-python",
51
+ "loguru",
52
+ "tqdm",
53
+ "torchvision",
54
+ "thop",
55
+ "ninja",
56
+ "tabulate",
57
+ "psutil",
58
+ "tensorboard",
59
+ "pycocotools>=2.0.2",
60
+ "onnx>=1.13.0",
61
+ ]
62
+
63
+ [project.optional-dependencies]
64
+ dev = [
65
+ "pytest>=7.0",
66
+ "black>=22.0",
67
+ "flake8>=4.0",
68
+ "mypy>=0.950",
69
+ ]
70
+
71
+ [project.urls]
72
+ Homepage = "https://huggingface.co/nvidia/nemotron-page-elements-v3"
73
+ Documentation = "https://huggingface.co/nvidia/nemotron-page-elements-v3"
74
+ Repository = "https://huggingface.co/nvidia/nemotron-page-elements-v3"
75
+ "Bug Tracker" = "https://huggingface.co/nvidia/nemotron-page-elements-v3/discussions"
76
+ "Model Card" = "https://huggingface.co/nvidia/nemotron-page-elements-v3"
77
+
78
+ [tool.setuptools]
79
+ include-package-data = true
80
+
81
+ [tool.setuptools.packages.find]
82
+ where = ["."]
83
+ include = ["nemotron_page_elements_v3*"]
84
+
85
+ [tool.setuptools.package-data]
86
+ "nemotron_page_elements_v3" = ["*.json", "*.pth", "*.png"]
87
+
yolox/__init__.py β†’ setup.py RENAMED
@@ -1,11 +1,17 @@
1
- #!/usr/bin/env python3
2
- # -*- coding:utf-8 -*-
3
-
4
  # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5
  # SPDX-License-Identifier: Apache-2.0
6
 
7
- # Copyright (c) Megvii Inc. All rights reserved.
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- from .yolo_head import YOLOXHead
10
- from .yolo_pafpn import YOLOPAFPN
11
- from .yolox import YOLOX
 
1
+ #!/usr/bin/env python
 
 
2
  # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
  # SPDX-License-Identifier: Apache-2.0
4
 
5
+ """
6
+ Setup script for backward compatibility.
7
+ This project uses pyproject.toml for configuration (PEP 621).
8
+ """
9
+
10
+ from setuptools import setup
11
+
12
+ # Configuration is in pyproject.toml
13
+ setup()
14
+
15
+
16
+
17