from pathlib import Path import numpy as np from PIL import Image from sentence_transformers import SentenceTransformer from utils.utils import SRC_PATH from config import cnf IMAGE_DIR = SRC_PATH / "data" / "images" image_paths = sorted(IMAGE_DIR.glob("*.jpg")) if len(image_paths) < 2: raise FileNotFoundError(f"Need at least 2 JPG images in {IMAGE_DIR}") model = SentenceTransformer( str(cnf.local_emb_path), device="cpu", local_files_only=True, ) images = [Image.open(path).convert("RGB") for path in image_paths] embeddings = model.encode( images, batch_size=16, normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=True, ) # Normalized embeddings: dot product == cosine similarity similarities = embeddings @ embeddings.T names = [path.stem[:8] for path in image_paths] heat_chars = " .:-=+*#%@" print("\nImages:") for i, path in enumerate(image_paths): print(f"{i:2}: {path.name}") print("\nCLI similarity heatmap:") print(" " + " ".join(f"{i:2}" for i in range(len(image_paths)))) for i, row in enumerate(similarities): cells = [] for score in row: # Map typical cosine range 0–1 to an ASCII character normalized = float(np.clip(score, 0, 1)) index = round(normalized * (len(heat_chars) - 1)) cells.append(f" {heat_chars[index]}") print(f"{i:2} |" + "".join(cells)) print("\nNearest neighbour for each image:") for i, row in enumerate(similarities): row = row.copy() row[i] = -np.inf best_index = int(np.argmax(row)) print( f"{image_paths[i].name:16} -> " f"{image_paths[best_index].name:16} " f"{row[best_index]:.3f}" )