Spaces:
Running
Running
| """Shared lazy singleton for SAM 2.1 Tiny (model + processor). | |
| Both card detection (prompt-based) and hand segmentation use the same | |
| HuggingFace weights, so loading them once per process halves cold-start | |
| cost and keeps only one copy of the encoder in memory. | |
| """ | |
| from __future__ import annotations | |
| import logging | |
| import os | |
| import time | |
| from typing import Tuple | |
| # Bump the default HF Hub HEAD/download timeout (10s) before transformers | |
| # reads the env var. On flaky networks the 10s HEAD check fires a retry storm | |
| # even when the weights are already cached locally. | |
| os.environ.setdefault("HF_HUB_DOWNLOAD_TIMEOUT", "60") | |
| logger = logging.getLogger(__name__) | |
| SAM2_MODEL_ID = "facebook/sam2.1-hiera-small" | |
| # SAM resizes internally to 1024 — feeding >1024 wastes CPU on image encoding. | |
| INFERENCE_MAX_SIDE = 1024 | |
| _model = None | |
| _processor = None | |
| def get_sam2() -> Tuple[object, object]: | |
| """Return (model, processor) singletons, loading on first call. | |
| Tries the local HF cache first (``local_files_only=True``). This avoids | |
| the HEAD-request retry storm that happens when huggingface.co is slow or | |
| unreachable but the weights are already on disk. On a true cache miss we | |
| fall through to a normal online load. | |
| """ | |
| global _model, _processor | |
| if _model is None or _processor is None: | |
| from transformers import Sam2Model, Sam2Processor | |
| t0 = time.time() | |
| logger.info("loading SAM 2.1 (%s)", SAM2_MODEL_ID) | |
| try: | |
| _processor = Sam2Processor.from_pretrained(SAM2_MODEL_ID, local_files_only=True) | |
| _model = Sam2Model.from_pretrained(SAM2_MODEL_ID, local_files_only=True).to("cpu").eval() | |
| logger.info("SAM 2.1 loaded (offline cache) in %.1fs", time.time() - t0) | |
| except (OSError, ValueError): | |
| # Cache miss — fall back to online download. | |
| _processor = Sam2Processor.from_pretrained(SAM2_MODEL_ID) | |
| _model = Sam2Model.from_pretrained(SAM2_MODEL_ID).to("cpu").eval() | |
| logger.info("SAM 2.1 loaded (online) in %.1fs", time.time() - t0) | |
| return _model, _processor | |