File size: 1,948 Bytes
424bd46 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | #!/usr/bin/env python3
"""Minimal end-to-end example: image-to-image similarity with this model.
Loads the 8-bit Core ML SigLIP2 image encoder, embeds two images, prints
cosine similarity. ANE-accelerated on Apple Silicon.
pip install coremltools pillow numpy
python example_usage.py path/to/image1.jpg path/to/image2.jpg
"""
import sys
from pathlib import Path
import coremltools as ct
import numpy as np
from PIL import Image
# Pick whichever variant suits you — see README "Available variants" table.
DEFAULT_MODEL = "ViT-B-16-SigLIP2_image_8bit.mlpackage"
def embed(model, image_path: Path) -> np.ndarray:
"""PIL → 224×224 RGB → Core ML predict → L2-normalized 768-d embedding."""
img = Image.open(image_path).convert("RGB").resize((224, 224), Image.BICUBIC)
out = model.predict({"image": img})
emb = next(iter(out.values()))[0].astype(np.float32)
# Model already L2-normalizes internally; this is belt-and-suspenders.
return emb / np.linalg.norm(emb)
def main():
if len(sys.argv) != 3:
sys.exit(f"usage: {sys.argv[0]} <image1> <image2>")
img1, img2 = Path(sys.argv[1]), Path(sys.argv[2])
model_path = Path(__file__).parent / DEFAULT_MODEL
if not model_path.exists():
sys.exit(f"model not found: {model_path}")
print(f"loading {model_path.name} on ANE …")
model = ct.models.MLModel(str(model_path), compute_units=ct.ComputeUnit.CPU_AND_NE)
print(f"embedding {img1.name} + {img2.name} …")
e1 = embed(model, img1)
e2 = embed(model, img2)
similarity = float(np.dot(e1, e2))
print(f"\ncosine similarity: {similarity:.4f}")
if similarity > 0.7:
print(" → very similar (likely same scene/subject)")
elif similarity > 0.4:
print(" → moderately similar")
elif similarity > 0.2:
print(" → loosely related")
else:
print(" → unrelated")
if __name__ == "__main__":
main()
|