Simplify weight download to use hf_hub_download consistently- Remove get_weights_path helper, inline hf_hub_download in define_model- Fix WEIGHTS_FILENAME to use subdirectory path- Fix copy-paste bug in define_model default config_name- Remove get_weights_path from init.py exports

Files changed (2) hide show

nemotron_table_structure_v1/__init__.py +1 -2
nemotron_table_structure_v1/model.py +17 -31

nemotron_table_structure_v1/__init__.py CHANGED Viewed

@@ -9,7 +9,7 @@ A specialized object detection model for table structure extraction based on YOL
 __version__ = "1.0.0"
-from .model import define_model, YoloXWrapper, get_weights_path
 from .utils import (
     plot_sample,
     postprocess_preds_table_structure,
@@ -19,7 +19,6 @@ from .utils import (
 __all__ = [
     "define_model",
-    "get_weights_path",
     "YoloXWrapper",
     "plot_sample",
     "postprocess_preds_table_structure",

 __version__ = "1.0.0"
+from .model import define_model, YoloXWrapper
 from .utils import (
     plot_sample,
     postprocess_preds_table_structure,
 __all__ = [
     "define_model",
     "YoloXWrapper",
     "plot_sample",
     "postprocess_preds_table_structure",

nemotron_table_structure_v1/model.py CHANGED Viewed

@@ -13,56 +13,42 @@ from typing import Dict, List, Tuple, Union
 from huggingface_hub import hf_hub_download
 from .yolox.boxes import postprocess
-# HuggingFace repository for weights
 HF_REPO_ID = "nvidia/nemotron-table-structure-v1"
-WEIGHTS_FILENAME = "weights.pth"
-def get_weights_path(verbose: bool = True) -> str:
-    """
-    Get the path to the model weights, downloading from HuggingFace if necessary.
-    The weights are cached in the HuggingFace cache directory after the first download.
-    Args:
-        verbose (bool): Whether to print download progress. Defaults to True.
-    Returns:
-        str: Path to the weights file.
-    """
-    if verbose:
-        print(f" -> Downloading/loading weights from HuggingFace: {HF_REPO_ID}")
-    weights_path = hf_hub_download(
-        repo_id=HF_REPO_ID,
-        filename=WEIGHTS_FILENAME,
-        repo_type="model",
-    )
-    return weights_path
-def define_model(config_name: str = "page_element_v3", verbose: bool = True) -> nn.Module:
     """
     Defines and initializes the model based on the configuration.
     Args:
-        config_name (str): Configuration name. Defaults to "page_element_v3".
         verbose (bool): Whether to print verbose output. Defaults to True.
     Returns:
         torch.nn.Module: The initialized YOLOX model.
     """
     # Load model from exp_file
-    # page_element_v3.py is in the same directory as model.py
     sys.path.append(os.path.dirname(__file__))
     exp_module = importlib.import_module("table_structure_v1")
     config = exp_module.Exp()
     model = config.get_model()
-    # Load weights (downloaded from HuggingFace if not cached)
-    weights_path = get_weights_path(verbose=verbose)
     state_dict = torch.load(weights_path, map_location="cpu", weights_only=False)
     model.load_state_dict(state_dict["model"], strict=True)

 from huggingface_hub import hf_hub_download
 from .yolox.boxes import postprocess
+# HuggingFace repository for downloading model weights
 HF_REPO_ID = "nvidia/nemotron-table-structure-v1"
+WEIGHTS_FILENAME = "nemotron_table_structure_v1/weights.pth"
+def define_model(config_name: str = "table_structure_v1", verbose: bool = True) -> nn.Module:
     """
     Defines and initializes the model based on the configuration.
     Args:
+        config_name (str): Configuration name. Defaults to "table_structure_v1".
         verbose (bool): Whether to print verbose output. Defaults to True.
     Returns:
         torch.nn.Module: The initialized YOLOX model.
     """
     # Load model from exp_file
+    # table_structure_v1.py is in the same directory as model.py
     sys.path.append(os.path.dirname(__file__))
     exp_module = importlib.import_module("table_structure_v1")
     config = exp_module.Exp()
     model = config.get_model()
+    # Download weights from HuggingFace Hub (cached locally after first download)
+    if verbose:
+        print(f" -> Downloading/loading weights from HuggingFace: {HF_REPO_ID}")
+    weights_path = hf_hub_download(
+        repo_id=HF_REPO_ID,
+        filename=WEIGHTS_FILENAME,
+    )
+    if verbose:
+        print(f" -> Weights cached at: {weights_path}")
     state_dict = torch.load(weights_path, map_location="cpu", weights_only=False)
     model.load_state_dict(state_dict["model"], strict=True)

Simplify weight download to use hf_hub_download consistently- Remove get_weights_path helper, inline hf_hub_download in define_model- Fix WEIGHTS_FILENAME to use subdirectory path- Fix copy-paste bug in define_model default config_name- Remove get_weights_path from __init__.py exports

Simplify weight download to use hf_hub_download consistently- Remove get_weights_path helper, inline hf_hub_download in define_model- Fix WEIGHTS_FILENAME to use subdirectory path- Fix copy-paste bug in define_model default config_name- Remove get_weights_path from init.py exports