"""Shared lazy singleton for SAM 2.1 Tiny (model + processor).

Both card detection (prompt-based) and hand segmentation use the same
HuggingFace weights, so loading them once per process halves cold-start
cost and keeps only one copy of the encoder in memory.
"""

from __future__ import annotations

import logging
import os
import time
from typing import Tuple

# Bump the default HF Hub HEAD/download timeout (10s) before transformers
# reads the env var. On flaky networks the 10s HEAD check fires a retry storm
# even when the weights are already cached locally.
os.environ.setdefault("HF_HUB_DOWNLOAD_TIMEOUT", "60")

logger = logging.getLogger(__name__)

SAM2_MODEL_ID = "facebook/sam2.1-hiera-small"

# SAM resizes internally to 1024 — feeding >1024 wastes CPU on image encoding.
INFERENCE_MAX_SIDE = 1024

_model = None
_processor = None


def get_sam2() -> Tuple[object, object]:
    """Return (model, processor) singletons, loading on first call.

    Tries the local HF cache first (``local_files_only=True``). This avoids
    the HEAD-request retry storm that happens when huggingface.co is slow or
    unreachable but the weights are already on disk. On a true cache miss we
    fall through to a normal online load.
    """
    global _model, _processor
    if _model is None or _processor is None:
        from transformers import Sam2Model, Sam2Processor
        t0 = time.time()
        logger.info("loading SAM 2.1 (%s)", SAM2_MODEL_ID)
        try:
            _processor = Sam2Processor.from_pretrained(SAM2_MODEL_ID, local_files_only=True)
            _model = Sam2Model.from_pretrained(SAM2_MODEL_ID, local_files_only=True).to("cpu").eval()
            logger.info("SAM 2.1 loaded (offline cache) in %.1fs", time.time() - t0)
        except (OSError, ValueError):
            # Cache miss — fall back to online download.
            _processor = Sam2Processor.from_pretrained(SAM2_MODEL_ID)
            _model = Sam2Model.from_pretrained(SAM2_MODEL_ID).to("cpu").eval()
            logger.info("SAM 2.1 loaded (online) in %.1fs", time.time() - t0)
    return _model, _processor