| from pathlib import Path |
|
|
| import numpy as np |
| from PIL import Image |
| from sentence_transformers import SentenceTransformer |
|
|
| from utils.utils import SRC_PATH |
| from config import cnf |
|
|
| IMAGE_DIR = SRC_PATH / "data" / "images" |
|
|
| image_paths = sorted(IMAGE_DIR.glob("*.jpg")) |
|
|
| if len(image_paths) < 2: |
| raise FileNotFoundError(f"Need at least 2 JPG images in {IMAGE_DIR}") |
|
|
| model = SentenceTransformer( |
| str(cnf.local_emb_path), |
| device="cpu", |
| local_files_only=True, |
| ) |
|
|
| images = [Image.open(path).convert("RGB") for path in image_paths] |
|
|
| embeddings = model.encode( |
| images, |
| batch_size=16, |
| normalize_embeddings=True, |
| convert_to_numpy=True, |
| show_progress_bar=True, |
| ) |
|
|
| |
| similarities = embeddings @ embeddings.T |
|
|
| names = [path.stem[:8] for path in image_paths] |
| heat_chars = " .:-=+*#%@" |
|
|
| print("\nImages:") |
| for i, path in enumerate(image_paths): |
| print(f"{i:2}: {path.name}") |
|
|
| print("\nCLI similarity heatmap:") |
| print(" " + " ".join(f"{i:2}" for i in range(len(image_paths)))) |
|
|
| for i, row in enumerate(similarities): |
| cells = [] |
|
|
| for score in row: |
| |
| normalized = float(np.clip(score, 0, 1)) |
| index = round(normalized * (len(heat_chars) - 1)) |
| cells.append(f" {heat_chars[index]}") |
|
|
| print(f"{i:2} |" + "".join(cells)) |
|
|
| print("\nNearest neighbour for each image:") |
|
|
| for i, row in enumerate(similarities): |
| row = row.copy() |
| row[i] = -np.inf |
|
|
| best_index = int(np.argmax(row)) |
|
|
| print( |
| f"{image_paths[i].name:16} -> " |
| f"{image_paths[best_index].name:16} " |
| f"{row[best_index]:.3f}" |
| ) |