nemotron-ocr-v1 / nemotron-ocr /hatch_build.py
BoLiu's picture
Rename NeMo Retriever references to Nemotron (#3)
ea7747b verified
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import os
import sys
import subprocess
from pathlib import Path
from hatchling.builders.hooks.plugin.interface import BuildHookInterface
def _extension_up_to_date(project_root: Path) -> bool:
"""Return True if a built .so exists and is newer than all sources.
Respects the following directories:
- src/nemotron_ocr_cpp (Python shim and built .so location)
- cpp/ (C++/CUDA sources)
- scripts/ (build script)
"""
extension_dir = project_root / "src" / "nemotron_ocr_cpp"
candidates = list(extension_dir.glob("_nemotron_ocr_cpp*.so"))
if not candidates:
return False
newest_so_mtime = max(p.stat().st_mtime for p in candidates)
newest_src_mtime = 0.0
for directory in (project_root / "cpp", project_root / "scripts", extension_dir):
if not directory.exists():
continue
for path in directory.rglob("*"):
if not path.is_file():
continue
if path.suffix in {".cu", ".cpp", ".cuh", ".h", ".py"}:
mtime = path.stat().st_mtime
if mtime > newest_src_mtime:
newest_src_mtime = mtime
return newest_so_mtime >= newest_src_mtime
class CustomBuildHook(BuildHookInterface):
def initialize(self, version: str, build_data: dict) -> None:
project_root = Path(__file__).parent
script_path = project_root / "scripts" / "build-extension.py"
env = os.environ.copy()
# Ensure the extension actually builds during package build
env.setdefault("BUILD_CPP_EXTENSION", "1")
# Allow users to force rebuild or skip if up-to-date
force_rebuild = env.get("BUILD_CPP_FORCE", "0") == "1"
build_enabled = env.get("BUILD_CPP_EXTENSION", "1") == "1"
if build_enabled and not force_rebuild and _extension_up_to_date(project_root):
# Cached build found and sources unchanged; skip rebuild
return
subprocess.run(
[
os.fspath(sys.executable),
os.fspath(script_path),
],
cwd=os.fspath(project_root),
env=env,
check=True,
)