"""Tests for high-level explorer workflows.""" from __future__ import annotations from pathlib import Path from PIL import Image from mathvision_explorer.dataset import MathVisionRecord from mathvision_explorer.embeddings import ColorStatsEmbedder from mathvision_explorer.explorer import build_image_index, find_similar_records def test_build_image_index_skips_records_without_images(tmp_path: Path) -> None: """Only records with image paths are embedded.""" red_path = tmp_path / "red.png" blue_path = tmp_path / "blue.png" Image.new("RGB", (3, 3), color=(255, 0, 0)).save(red_path) Image.new("RGB", (3, 3), color=(0, 0, 255)).save(blue_path) records = [ MathVisionRecord(problem_id="red", question="Q", answer="A", image_path=red_path), MathVisionRecord(problem_id="blue", question="Q", answer="A", image_path=blue_path), MathVisionRecord(problem_id="missing", question="Q", answer="A"), ] index = build_image_index(records, ColorStatsEmbedder()) assert len(index) == 2 def test_find_similar_records_returns_record_metadata(tmp_path: Path) -> None: """Nearest-neighbor output keeps the original dataset record alongside the score.""" red_path = tmp_path / "red.png" near_red_path = tmp_path / "near-red.png" blue_path = tmp_path / "blue.png" Image.new("RGB", (3, 3), color=(255, 0, 0)).save(red_path) Image.new("RGB", (3, 3), color=(240, 10, 10)).save(near_red_path) Image.new("RGB", (3, 3), color=(0, 0, 255)).save(blue_path) records = [ MathVisionRecord(problem_id="red", question="Red", answer="A", image_path=red_path), MathVisionRecord( problem_id="near-red", question="Near red", answer="A", image_path=near_red_path, ), MathVisionRecord(problem_id="blue", question="Blue", answer="A", image_path=blue_path), ] embedder = ColorStatsEmbedder() index = build_image_index(records, embedder) matches = find_similar_records( records, index, "red", embedder.embed_image(red_path), limit=1, ) assert matches[0][0].problem_id == "near-red"