File size: 1,695 Bytes
59830d4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | from pathlib import Path
import numpy as np
from PIL import Image
from sentence_transformers import SentenceTransformer
from utils.utils import SRC_PATH
from config import cnf
IMAGE_DIR = SRC_PATH / "data" / "images"
image_paths = sorted(IMAGE_DIR.glob("*.jpg"))
if len(image_paths) < 2:
raise FileNotFoundError(f"Need at least 2 JPG images in {IMAGE_DIR}")
model = SentenceTransformer(
str(cnf.local_emb_path),
device="cpu",
local_files_only=True,
)
images = [Image.open(path).convert("RGB") for path in image_paths]
embeddings = model.encode(
images,
batch_size=16,
normalize_embeddings=True,
convert_to_numpy=True,
show_progress_bar=True,
)
# Normalized embeddings: dot product == cosine similarity
similarities = embeddings @ embeddings.T
names = [path.stem[:8] for path in image_paths]
heat_chars = " .:-=+*#%@"
print("\nImages:")
for i, path in enumerate(image_paths):
print(f"{i:2}: {path.name}")
print("\nCLI similarity heatmap:")
print(" " + " ".join(f"{i:2}" for i in range(len(image_paths))))
for i, row in enumerate(similarities):
cells = []
for score in row:
# Map typical cosine range 0–1 to an ASCII character
normalized = float(np.clip(score, 0, 1))
index = round(normalized * (len(heat_chars) - 1))
cells.append(f" {heat_chars[index]}")
print(f"{i:2} |" + "".join(cells))
print("\nNearest neighbour for each image:")
for i, row in enumerate(similarities):
row = row.copy()
row[i] = -np.inf
best_index = int(np.argmax(row))
print(
f"{image_paths[i].name:16} -> "
f"{image_paths[best_index].name:16} "
f"{row[best_index]:.3f}"
) |