1ForrestW1
/

moondream3-endpoint

Forrest Wargo commited on Oct 6, 2025

Commit

34b89db

1 Parent(s): 5daee26

Support gated model: pass HF token to from_pretrained

Files changed (1) hide show

handler.py CHANGED Viewed

@@ -53,11 +53,18 @@ class EndpointHandler:
         os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
         # Load local repo (or remote if MODEL_ID points to hub id)
         self.model = AutoModelForCausalLM.from_pretrained(
             model_id,
-            trust_remote_code=True,
-            torch_dtype=torch.bfloat16,
-            device_map="auto",
         )
         # Optional compilation for speed if exposed by remote code

         os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
         # Load local repo (or remote if MODEL_ID points to hub id)
+        # Pass token when accessing gated repos
+        hub_token = os.environ.get("HUGGINGFACE_HUB_TOKEN") or os.environ.get("HF_HUB_TOKEN") or os.environ.get("HF_TOKEN")
+        load_kwargs = {
+            "trust_remote_code": True,
+            "torch_dtype": torch.bfloat16,
+            "device_map": "auto",
+        }
+        if hub_token:
+            load_kwargs["token"] = hub_token
         self.model = AutoModelForCausalLM.from_pretrained(
             model_id,
+            **load_kwargs,
         )
         # Optional compilation for speed if exposed by remote code