#!/usr/bin/env python3 """Minimal end-to-end example: image-to-image similarity with this model. Loads the 8-bit Core ML SigLIP2 image encoder, embeds two images, prints cosine similarity. ANE-accelerated on Apple Silicon. pip install coremltools pillow numpy python example_usage.py path/to/image1.jpg path/to/image2.jpg """ import sys from pathlib import Path import coremltools as ct import numpy as np from PIL import Image # Pick whichever variant suits you — see README "Available variants" table. DEFAULT_MODEL = "ViT-B-16-SigLIP2_image_8bit.mlpackage" def embed(model, image_path: Path) -> np.ndarray: """PIL → 224×224 RGB → Core ML predict → L2-normalized 768-d embedding.""" img = Image.open(image_path).convert("RGB").resize((224, 224), Image.BICUBIC) out = model.predict({"image": img}) emb = next(iter(out.values()))[0].astype(np.float32) # Model already L2-normalizes internally; this is belt-and-suspenders. return emb / np.linalg.norm(emb) def main(): if len(sys.argv) != 3: sys.exit(f"usage: {sys.argv[0]} ") img1, img2 = Path(sys.argv[1]), Path(sys.argv[2]) model_path = Path(__file__).parent / DEFAULT_MODEL if not model_path.exists(): sys.exit(f"model not found: {model_path}") print(f"loading {model_path.name} on ANE …") model = ct.models.MLModel(str(model_path), compute_units=ct.ComputeUnit.CPU_AND_NE) print(f"embedding {img1.name} + {img2.name} …") e1 = embed(model, img1) e2 = embed(model, img2) similarity = float(np.dot(e1, e2)) print(f"\ncosine similarity: {similarity:.4f}") if similarity > 0.7: print(" → very similar (likely same scene/subject)") elif similarity > 0.4: print(" → moderately similar") elif similarity > 0.2: print(" → loosely related") else: print(" → unrelated") if __name__ == "__main__": main()