Instructions to use openai/clip-vit-large-patch14 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use openai/clip-vit-large-patch14 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("zero-shot-image-classification", model="openai/clip-vit-large-patch14") pipe( "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/parrots.png", candidate_labels=["animals", "humans", "landscape"], )# Load model directly from transformers import AutoProcessor, AutoModelForZeroShotImageClassification processor = AutoProcessor.from_pretrained("openai/clip-vit-large-patch14") model = AutoModelForZeroShotImageClassification.from_pretrained("openai/clip-vit-large-patch14") - Notebooks
- Google Colab
- Kaggle
| from typing import Dict, Any | |
| from PIL import Image | |
| from transformers import CLIPProcessor, CLIPModel | |
| import torch | |
| import base64 | |
| from io import BytesIO | |
| class EndpointHandler: | |
| def __init__(self, path=""): | |
| self.model = CLIPModel.from_pretrained(path) | |
| self.processor = CLIPProcessor.from_pretrained(path) | |
| self.model.eval() | |
| def _to_image(self, x) -> Image.Image: | |
| if isinstance(x, Image.Image): | |
| return x.convert("RGB") | |
| if isinstance(x, (bytes, bytearray)): | |
| return Image.open(BytesIO(x)).convert("RGB") | |
| if isinstance(x, str): | |
| return Image.open(BytesIO(base64.b64decode(x))).convert("RGB") | |
| if isinstance(x, dict) and "image" in x: | |
| return self._to_image(x["image"]) | |
| raise ValueError("Unsupported image input") | |
| def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: | |
| inputs = data.get("inputs", data) | |
| image = self._to_image(inputs) | |
| proc = self.processor(images=image, return_tensors="pt") | |
| with torch.no_grad(): | |
| feats = self.model.get_image_features(**proc) | |
| feats = feats / feats.norm(p=2, dim=-1, keepdim=True) # L2 normalize | |
| return {"embedding": feats[0].tolist(), "dim": int(feats.shape[-1])} |