Simplify weight download to use hf_hub_download consistently- Remove weight_downloader.py module- Inline hf_hub_download calls in pipeline.py- Remove hf_token and force_download params from NemotronOCR- Simplify example.py

Files changed (3) hide show

example.py +1 -2
nemotron-ocr/src/nemotron_ocr/inference/pipeline.py +24 -32
nemotron-ocr/src/nemotron_ocr/inference/weight_downloader.py +0 -168

example.py CHANGED Viewed

@@ -8,8 +8,7 @@ from nemotron_ocr.inference.pipeline import NemotronOCR
 def main(image_path, merge_level, no_visualize, model_dir):
-    # model_dir can be None to use HuggingFace cache, or a path to local checkpoints
-    ocr_pipeline = NemotronOCR(model_dir=model_dir if model_dir else None)
     predictions = ocr_pipeline(image_path, merge_level=merge_level, visualize=not no_visualize)

 def main(image_path, merge_level, no_visualize, model_dir):
+    ocr_pipeline = NemotronOCR(model_dir=model_dir)
     predictions = ocr_pipeline(image_path, merge_level=merge_level, visualize=not no_visualize)

nemotron-ocr/src/nemotron_ocr/inference/pipeline.py CHANGED Viewed

@@ -21,7 +21,7 @@ from nemotron_ocr.inference.post_processing.data.text_region import TextBlock
 from nemotron_ocr.inference.post_processing.quad_rectify import QuadRectify
 from nemotron_ocr.inference.post_processing.research_ops import parse_relational_results, reorder_boxes
 from nemotron_ocr.inference.pre_processing import interpolate_and_pad, pad_to_square
-from nemotron_ocr.inference.weight_downloader import ensure_weights_available
 from nemotron_ocr_cpp import quad_non_maximal_suppression, region_counts_to_indices, rrect_to_quads
 from PIL import Image, ImageDraw, ImageFont
 from torch import amp
@@ -39,54 +39,46 @@ MERGE_LEVELS = {"word", "sentence", "paragraph"}
 DEFAULT_MERGE_LEVEL = "paragraph"
 class NemotronOCR:
     """
     A high-level pipeline for performing OCR on images.
     Model weights are automatically downloaded from Hugging Face Hub
     (nvidia/nemotron-ocr-v1) if not found locally.
-    Args:
-        model_dir: Path to directory containing model checkpoints.
-                   If None, weights are downloaded to HuggingFace cache.
-                   If provided path exists and contains weights, uses them directly.
-                   If provided path doesn't have weights, downloads to HF cache.
-        hf_token: Hugging Face authentication token (optional).
-        force_download: If True, re-download weights even if they exist.
     """
-    def __init__(
-        self,
-        model_dir: Optional[str] = None,
-        hf_token: Optional[str] = None,
-        force_download: bool = False,
-    ):
-        # Resolve model directory - download from HuggingFace if needed
         if model_dir is not None:
             local_path = Path(model_dir)
-            # Check if the provided path has all required files
-            required_files = ["detector.pth", "recognizer.pth", "relational.pth", "charset.txt"]
-            if all((local_path / f).is_file() for f in required_files) and not force_download:
                 self._model_dir = local_path
             else:
-                # Download from HuggingFace
-                self._model_dir = ensure_weights_available(
-                    model_dir=local_path,
-                    force_download=force_download,
-                    token=hf_token,
-                )
         else:
-            # No model_dir specified - download to HuggingFace cache
-            self._model_dir = ensure_weights_available(
-                model_dir=None,
-                force_download=force_download,
-                token=hf_token,
-            )
         self._load_models()
         self._load_charset()
         self._initialize_processors()
     def _load_models(self):
         """Loads all necessary models into memory."""
         self.detector = FOTSDetector(coordinate_mode="RBOX", backbone="regnet_y_8gf", verbose=False)

 from nemotron_ocr.inference.post_processing.quad_rectify import QuadRectify
 from nemotron_ocr.inference.post_processing.research_ops import parse_relational_results, reorder_boxes
 from nemotron_ocr.inference.pre_processing import interpolate_and_pad, pad_to_square
+from huggingface_hub import hf_hub_download
 from nemotron_ocr_cpp import quad_non_maximal_suppression, region_counts_to_indices, rrect_to_quads
 from PIL import Image, ImageDraw, ImageFont
 from torch import amp
 DEFAULT_MERGE_LEVEL = "paragraph"
+# HuggingFace repository for downloading model weights
+HF_REPO_ID = "nvidia/nemotron-ocr-v1"
+CHECKPOINT_FILES = ["detector.pth", "recognizer.pth", "relational.pth", "charset.txt"]
 class NemotronOCR:
     """
     A high-level pipeline for performing OCR on images.
     Model weights are automatically downloaded from Hugging Face Hub
     (nvidia/nemotron-ocr-v1) if not found locally.
     """
+    def __init__(self, model_dir: Optional[str] = None):
+        # If model_dir is provided and contains all required files, use it directly
         if model_dir is not None:
             local_path = Path(model_dir)
+            if all((local_path / f).is_file() for f in CHECKPOINT_FILES):
                 self._model_dir = local_path
             else:
+                self._model_dir = self._download_checkpoints()
         else:
+            self._model_dir = self._download_checkpoints()
         self._load_models()
         self._load_charset()
         self._initialize_processors()
+    @staticmethod
+    def _download_checkpoints() -> Path:
+        """Download model checkpoints from HuggingFace Hub (cached locally after first download)."""
+        downloaded_path = None
+        for filename in CHECKPOINT_FILES:
+            downloaded_path = hf_hub_download(
+                repo_id=HF_REPO_ID,
+                filename=f"checkpoints/{filename}",
+            )
+        # All checkpoint files are in the same directory
+        return Path(downloaded_path).parent
     def _load_models(self):
         """Loads all necessary models into memory."""
         self.detector = FOTSDetector(coordinate_mode="RBOX", backbone="regnet_y_8gf", verbose=False)

nemotron-ocr/src/nemotron_ocr/inference/weight_downloader.py DELETED Viewed

@@ -1,168 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-"""
-Utility for downloading model weights from Hugging Face Hub.
-This module provides functionality to automatically download the Nemotron OCR
-model weights from the Hugging Face repository if they are not present locally.
-"""
-from pathlib import Path
-from typing import Optional
-from huggingface_hub import hf_hub_download, snapshot_download
-# Hugging Face repository for Nemotron OCR weights
-HF_REPO_ID = "nvidia/nemotron-ocr-v1"
-# List of required checkpoint files
-CHECKPOINT_FILES = [
-    "checkpoints/detector.pth",
-    "checkpoints/recognizer.pth",
-    "checkpoints/relational.pth",
-    "checkpoints/charset.txt",
-]
-def get_default_cache_dir() -> Path:
-    """
-    Get the default cache directory for storing downloaded weights.
-    Uses the standard HuggingFace cache location.
-    Returns:
-        Path to the cache directory.
-    """
-    from huggingface_hub import constants
-    return Path(constants.HF_HUB_CACHE)
-def ensure_weights_available(
-    model_dir: Optional[Path] = None,
-    repo_id: str = HF_REPO_ID,
-    force_download: bool = False,
-    token: Optional[str] = None,
-) -> Path:
-    """
-    Ensure model weights are available, downloading them if necessary.
-    This function checks if the required checkpoint files exist in the specified
-    model directory. If any files are missing, it downloads them from the
-    Hugging Face Hub.
-    Args:
-        model_dir: Path to the directory containing model weights.
-                   If None, uses the HuggingFace cache directory.
-        repo_id: Hugging Face repository ID.
-        force_download: If True, re-download even if files exist.
-        token: Hugging Face authentication token (optional, for private repos).
-    Returns:
-        Path to the directory containing the model checkpoints.
-    Raises:
-        RuntimeError: If download fails.
-    """
-    # If model_dir is provided and all files exist, use it directly
-    if model_dir is not None and not force_download:
-        model_path = Path(model_dir)
-        if _all_checkpoints_present(model_path):
-            return model_path
-    # Download to HuggingFace cache if no local path provided or files missing
-    try:
-        # Download only the checkpoints folder from the repo
-        cache_dir = snapshot_download(
-            repo_id=repo_id,
-            allow_patterns=["checkpoints/*"],
-            force_download=force_download,
-            token=token,
-        )
-        checkpoint_dir = Path(cache_dir) / "checkpoints"
-        if not _all_checkpoints_present_flat(checkpoint_dir):
-            raise RuntimeError(
-                f"Downloaded weights are incomplete. Expected files in {checkpoint_dir}"
-            )
-        return checkpoint_dir
-    except Exception as e:
-        raise RuntimeError(
-            f"Failed to download model weights from {repo_id}. "
-            f"Please ensure you have internet access and the repository exists. "
-            f"Error: {e}"
-        ) from e
-def _all_checkpoints_present(base_path: Path) -> bool:
-    """Check if all required checkpoint files are present in the given directory."""
-    required_files = ["detector.pth", "recognizer.pth", "relational.pth", "charset.txt"]
-    return all((base_path / f).is_file() for f in required_files)
-def _all_checkpoints_present_flat(checkpoint_dir: Path) -> bool:
-    """Check if all required checkpoint files are present in a flat directory."""
-    required_files = ["detector.pth", "recognizer.pth", "relational.pth", "charset.txt"]
-    return all((checkpoint_dir / f).is_file() for f in required_files)
-def download_weights(
-    output_dir: Optional[Path] = None,
-    repo_id: str = HF_REPO_ID,
-    force_download: bool = False,
-    token: Optional[str] = None,
-) -> Path:
-    """
-    Explicitly download model weights to a specified directory.
-    This is a convenience function for users who want to pre-download
-    weights to a specific location.
-    Args:
-        output_dir: Directory to save the weights. If None, uses HuggingFace cache.
-        repo_id: Hugging Face repository ID.
-        force_download: If True, re-download even if files exist.
-        token: Hugging Face authentication token (optional).
-    Returns:
-        Path to the directory containing the downloaded checkpoints.
-    Example:
-        >>> from nemotron_ocr.inference.weight_downloader import download_weights
-        >>> checkpoint_dir = download_weights(output_dir=Path("./my_checkpoints"))
-        >>> # Use checkpoint_dir with NemotronOCR
-        >>> from nemotron_ocr.inference.pipeline import NemotronOCR
-        >>> ocr = NemotronOCR(model_dir=checkpoint_dir)
-    """
-    if output_dir is not None:
-        output_path = Path(output_dir)
-        output_path.mkdir(parents=True, exist_ok=True)
-        # Download individual files to the output directory
-        required_files = ["detector.pth", "recognizer.pth", "relational.pth", "charset.txt"]
-        for filename in required_files:
-            hf_hub_download(
-                repo_id=repo_id,
-                filename=f"checkpoints/{filename}",
-                local_dir=output_path.parent,
-                force_download=force_download,
-                token=token,
-            )
-        # The files are downloaded to output_path.parent/checkpoints/
-        checkpoint_dir = output_path.parent / "checkpoints"
-        if output_path != checkpoint_dir:
-            # If user specified a different path, we downloaded to parent/checkpoints
-            # Return the actual location
-            return checkpoint_dir
-        return output_path
-    else:
-        return ensure_weights_available(
-            model_dir=None,
-            repo_id=repo_id,
-            force_download=force_download,
-            token=token,
-        )