jdye64 commited on
Commit
b452d69
Β·
1 Parent(s): a5c0f8c

Introduction facilities for building a simple wheel for the nemotron-page-elements-v3 project

Browse files
.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ dist/
2
+ build/
3
+ *.egg-info/
4
+ *.pyc
5
+ *.pyo
6
+ *.pyd
7
+ *.pyw
8
+ *.pyz
MANIFEST.in ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ include README.md
2
+ include THIRD_PARTY_NOTICES.md
3
+ recursive-include nemotron_page_elements_v3
4
+
5
+
config.json β†’ nemotron_page_elements_v3/config.json RENAMED
File without changes
model.py β†’ nemotron_page_elements_v3/model.py RENAMED
@@ -4,12 +4,13 @@ import os
4
  import sys
5
  import torch
6
  import importlib
 
7
  import numpy as np
8
  import numpy.typing as npt
9
  import torch.nn as nn
10
  import torch.nn.functional as F
11
  from typing import Dict, List, Tuple, Union
12
- from yolox.boxes import postprocess
13
 
14
 
15
  def define_model(config_name: str = "page_element_v3", verbose: bool = True) -> nn.Module:
@@ -24,8 +25,9 @@ def define_model(config_name: str = "page_element_v3", verbose: bool = True) ->
24
  torch.nn.Module: The initialized YOLOX model.
25
  """
26
  # Load model from exp_file
27
- sys.path.append(os.path.dirname(config_name))
28
- exp_module = importlib.import_module(os.path.basename(config_name).split(".")[0])
 
29
 
30
  config = exp_module.Exp()
31
  model = config.get_model()
@@ -34,7 +36,9 @@ def define_model(config_name: str = "page_element_v3", verbose: bool = True) ->
34
  if verbose:
35
  print(" -> Loading weights from", config.ckpt)
36
 
37
- ckpt = torch.load(config.ckpt, map_location="cpu", weights_only=False)
 
 
38
  model.load_state_dict(ckpt["model"], strict=True)
39
 
40
  model = YoloXWrapper(model, config)
 
4
  import sys
5
  import torch
6
  import importlib
7
+ import importlib.resources
8
  import numpy as np
9
  import numpy.typing as npt
10
  import torch.nn as nn
11
  import torch.nn.functional as F
12
  from typing import Dict, List, Tuple, Union
13
+ from nemotron_page_elements_v3.yolox.boxes import postprocess
14
 
15
 
16
  def define_model(config_name: str = "page_element_v3", verbose: bool = True) -> nn.Module:
 
25
  torch.nn.Module: The initialized YOLOX model.
26
  """
27
  # Load model from exp_file
28
+ # page_element_v3.py is in the same directory as model.py
29
+ sys.path.append(os.path.dirname(__file__))
30
+ exp_module = importlib.import_module("page_element_v3")
31
 
32
  config = exp_module.Exp()
33
  model = config.get_model()
 
36
  if verbose:
37
  print(" -> Loading weights from", config.ckpt)
38
 
39
+ # Use importlib.resources to locate 'weights.pth' inside the module's directory (nmtron_page_elements_v3)
40
+ with importlib.resources.path("nemotron_page_elements_v3", "weights.pth") as weights_path:
41
+ ckpt = torch.load(str(weights_path), map_location="cpu", weights_only=False)
42
  model.load_state_dict(ckpt["model"], strict=True)
43
 
44
  model = YoloXWrapper(model, config)
page_element_v3.py β†’ nemotron_page_elements_v3/page_element_v3.py RENAMED
@@ -63,7 +63,9 @@ class Exp:
63
  Returns:
64
  nn.Module: The YOLOX model with configured parameters.
65
  """
66
- from yolox import YOLOX, YOLOPAFPN, YOLOXHead
 
 
67
 
68
  # Build model
69
  if getattr(self, "model", None) is None:
 
63
  Returns:
64
  nn.Module: The YOLOX model with configured parameters.
65
  """
66
+ from nemotron_page_elements_v3.yolox.yolox import YOLOX
67
+ from nemotron_page_elements_v3.yolox.yolo_pafpn import YOLOPAFPN
68
+ from nemotron_page_elements_v3.yolox.yolo_head import YOLOXHead
69
 
70
  # Build model
71
  if getattr(self, "model", None) is None:
nemotron_page_elements_v3/post_processing/__init__.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """
5
+ Post-processing utilities for page element predictions.
6
+
7
+ This module provides utilities for advanced post-processing of page element
8
+ detection results, including box expansion, matching with titles, and
9
+ weighted box fusion.
10
+ """
11
+
12
+ # Import from page_elt_pp
13
+ from .page_elt_pp import (
14
+ expand_boxes,
15
+ merge_boxes,
16
+ bb_iou_array,
17
+ match_with_title,
18
+ match_boxes_with_title,
19
+ )
20
+
21
+ # Import from text_pp
22
+ from .text_pp import (
23
+ get_overlaps,
24
+ get_distances,
25
+ find_titles,
26
+ postprocess_included,
27
+ )
28
+
29
+ # Import from wbf
30
+ from .wbf import (
31
+ weighted_boxes_fusion,
32
+ prefilter_boxes,
33
+ merge_labels,
34
+ get_weighted_box,
35
+ get_biggest_box,
36
+ find_matching_box_fast,
37
+ )
38
+
39
+ __all__ = [
40
+ # page_elt_pp
41
+ "expand_boxes",
42
+ "merge_boxes",
43
+ "bb_iou_array",
44
+ "match_with_title",
45
+ "match_boxes_with_title",
46
+ # text_pp
47
+ "get_overlaps",
48
+ "get_distances",
49
+ "find_titles",
50
+ "postprocess_included",
51
+ # wbf
52
+ "weighted_boxes_fusion",
53
+ "prefilter_boxes",
54
+ "merge_labels",
55
+ "get_weighted_box",
56
+ "get_biggest_box",
57
+ "find_matching_box_fast",
58
+ ]
59
+
60
+
{post_processing β†’ nemotron_page_elements_v3/post_processing}/page_elt_pp.py RENAMED
File without changes
{post_processing β†’ nemotron_page_elements_v3/post_processing}/text_pp.py RENAMED
File without changes
{post_processing β†’ nemotron_page_elements_v3/post_processing}/wbf.py RENAMED
File without changes
utils.py β†’ nemotron_page_elements_v3/utils.py RENAMED
File without changes
weights.pth β†’ nemotron_page_elements_v3/weights.pth RENAMED
File without changes
{yolox β†’ nemotron_page_elements_v3/yolox}/boxes.py RENAMED
File without changes
{yolox β†’ nemotron_page_elements_v3/yolox}/darknet.py RENAMED
File without changes
{yolox β†’ nemotron_page_elements_v3/yolox}/network_blocks.py RENAMED
File without changes
{yolox β†’ nemotron_page_elements_v3/yolox}/yolo_fpn.py RENAMED
File without changes
{yolox β†’ nemotron_page_elements_v3/yolox}/yolo_head.py RENAMED
File without changes
{yolox β†’ nemotron_page_elements_v3/yolox}/yolo_pafpn.py RENAMED
File without changes
{yolox β†’ nemotron_page_elements_v3/yolox}/yolox.py RENAMED
File without changes
pyproject.toml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "nemotron-page-elements-v3"
7
+ version = "3.0.0"
8
+ description = "NVIDIA Nemotron Page Elements v3: A specialized YOLOX-based object detection model for identifying tables, charts, infographics, titles, headers/footers, and text in document pages"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ license = {text = "NVIDIA Open Model License"}
12
+ authors = [
13
+ {name = "NVIDIA Corporation", email = "boli@nvidia.com"},
14
+ {name = "Theo Viel", email = "tviel@nvidia.com"},
15
+ {name = "Bo Liu", email = "boli@nvidia.com"},
16
+ ]
17
+ maintainers = [
18
+ {name = "Theo Viel", email = "tviel@nvidia.com"},
19
+ {name = "Bo Liu", email = "boli@nvidia.com"},
20
+ ]
21
+ keywords = [
22
+ "object-detection",
23
+ "document-understanding",
24
+ "pdf-extraction",
25
+ "yolox",
26
+ "page-layout",
27
+ "nvidia",
28
+ "nemotron",
29
+ "deep-learning",
30
+ "computer-vision",
31
+ ]
32
+ classifiers = [
33
+ "Development Status :: 5 - Production/Stable",
34
+ "Intended Audience :: Developers",
35
+ "Intended Audience :: Science/Research",
36
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
37
+ "Topic :: Scientific/Engineering :: Image Recognition",
38
+ "License :: Other/Proprietary License",
39
+ "Programming Language :: Python :: 3",
40
+ "Programming Language :: Python :: 3.8",
41
+ "Programming Language :: Python :: 3.9",
42
+ "Programming Language :: Python :: 3.10",
43
+ "Programming Language :: Python :: 3.11",
44
+ "Programming Language :: Python :: 3.12",
45
+ ]
46
+
47
+ dependencies = [
48
+ "numpy",
49
+ "torch>=1.7",
50
+ "opencv-python",
51
+ "loguru",
52
+ "tqdm",
53
+ "torchvision",
54
+ "thop",
55
+ "ninja",
56
+ "tabulate",
57
+ "psutil",
58
+ "tensorboard",
59
+ "pycocotools>=2.0.2",
60
+ "onnx>=1.13.0",
61
+ ]
62
+
63
+ [project.optional-dependencies]
64
+ dev = [
65
+ "pytest>=7.0",
66
+ "black>=22.0",
67
+ "flake8>=4.0",
68
+ "mypy>=0.950",
69
+ ]
70
+
71
+ [project.urls]
72
+ Homepage = "https://huggingface.co/nvidia/nemotron-page-elements-v3"
73
+ Documentation = "https://huggingface.co/nvidia/nemotron-page-elements-v3"
74
+ Repository = "https://huggingface.co/nvidia/nemotron-page-elements-v3"
75
+ "Bug Tracker" = "https://huggingface.co/nvidia/nemotron-page-elements-v3/discussions"
76
+ "Model Card" = "https://huggingface.co/nvidia/nemotron-page-elements-v3"
77
+
78
+ [tool.setuptools]
79
+ include-package-data = true
80
+
81
+ [tool.setuptools.packages.find]
82
+ where = ["."]
83
+ include = ["nemotron_page_elements_v3*"]
84
+
85
+ [tool.setuptools.package-data]
86
+ "nemotron_page_elements_v3" = ["*.json", "*.pth", "*.png"]
87
+
yolox/__init__.py β†’ setup.py RENAMED
@@ -1,11 +1,17 @@
1
- #!/usr/bin/env python3
2
- # -*- coding:utf-8 -*-
3
-
4
  # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5
  # SPDX-License-Identifier: Apache-2.0
6
 
7
- # Copyright (c) Megvii Inc. All rights reserved.
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- from .yolo_head import YOLOXHead
10
- from .yolo_pafpn import YOLOPAFPN
11
- from .yolox import YOLOX
 
1
+ #!/usr/bin/env python
 
 
2
  # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
  # SPDX-License-Identifier: Apache-2.0
4
 
5
+ """
6
+ Setup script for backward compatibility.
7
+ This project uses pyproject.toml for configuration (PEP 621).
8
+ """
9
+
10
+ from setuptools import setup
11
+
12
+ # Configuration is in pyproject.toml
13
+ setup()
14
+
15
+
16
+
17