wardrobe-ai / src /scripts /test_local_embedding.py
elalber2000's picture
first commit
59830d4 verified
from pathlib import Path
import numpy as np
from PIL import Image
from sentence_transformers import SentenceTransformer
from utils.utils import SRC_PATH
from config import cnf
IMAGE_DIR = SRC_PATH / "data" / "images"
image_paths = sorted(IMAGE_DIR.glob("*.jpg"))
if len(image_paths) < 2:
raise FileNotFoundError(f"Need at least 2 JPG images in {IMAGE_DIR}")
model = SentenceTransformer(
str(cnf.local_emb_path),
device="cpu",
local_files_only=True,
)
images = [Image.open(path).convert("RGB") for path in image_paths]
embeddings = model.encode(
images,
batch_size=16,
normalize_embeddings=True,
convert_to_numpy=True,
show_progress_bar=True,
)
# Normalized embeddings: dot product == cosine similarity
similarities = embeddings @ embeddings.T
names = [path.stem[:8] for path in image_paths]
heat_chars = " .:-=+*#%@"
print("\nImages:")
for i, path in enumerate(image_paths):
print(f"{i:2}: {path.name}")
print("\nCLI similarity heatmap:")
print(" " + " ".join(f"{i:2}" for i in range(len(image_paths))))
for i, row in enumerate(similarities):
cells = []
for score in row:
# Map typical cosine range 0–1 to an ASCII character
normalized = float(np.clip(score, 0, 1))
index = round(normalized * (len(heat_chars) - 1))
cells.append(f" {heat_chars[index]}")
print(f"{i:2} |" + "".join(cells))
print("\nNearest neighbour for each image:")
for i, row in enumerate(similarities):
row = row.copy()
row[i] = -np.inf
best_index = int(np.argmax(row))
print(
f"{image_paths[i].name:16} -> "
f"{image_paths[best_index].name:16} "
f"{row[best_index]:.3f}"
)