mathvision-jepa-explorer / tests /test_explorer.py
ddebree's picture
Prepare Hugging Face Space deploy
3e67073
"""Tests for high-level explorer workflows."""
from __future__ import annotations
from pathlib import Path
from PIL import Image
from mathvision_explorer.dataset import MathVisionRecord
from mathvision_explorer.embeddings import ColorStatsEmbedder
from mathvision_explorer.explorer import build_image_index, find_similar_records
def test_build_image_index_skips_records_without_images(tmp_path: Path) -> None:
"""Only records with image paths are embedded."""
red_path = tmp_path / "red.png"
blue_path = tmp_path / "blue.png"
Image.new("RGB", (3, 3), color=(255, 0, 0)).save(red_path)
Image.new("RGB", (3, 3), color=(0, 0, 255)).save(blue_path)
records = [
MathVisionRecord(problem_id="red", question="Q", answer="A", image_path=red_path),
MathVisionRecord(problem_id="blue", question="Q", answer="A", image_path=blue_path),
MathVisionRecord(problem_id="missing", question="Q", answer="A"),
]
index = build_image_index(records, ColorStatsEmbedder())
assert len(index) == 2
def test_find_similar_records_returns_record_metadata(tmp_path: Path) -> None:
"""Nearest-neighbor output keeps the original dataset record alongside the score."""
red_path = tmp_path / "red.png"
near_red_path = tmp_path / "near-red.png"
blue_path = tmp_path / "blue.png"
Image.new("RGB", (3, 3), color=(255, 0, 0)).save(red_path)
Image.new("RGB", (3, 3), color=(240, 10, 10)).save(near_red_path)
Image.new("RGB", (3, 3), color=(0, 0, 255)).save(blue_path)
records = [
MathVisionRecord(problem_id="red", question="Red", answer="A", image_path=red_path),
MathVisionRecord(
problem_id="near-red",
question="Near red",
answer="A",
image_path=near_red_path,
),
MathVisionRecord(problem_id="blue", question="Blue", answer="A", image_path=blue_path),
]
embedder = ColorStatsEmbedder()
index = build_image_index(records, embedder)
matches = find_similar_records(
records,
index,
"red",
embedder.embed_image(red_path),
limit=1,
)
assert matches[0][0].problem_id == "near-red"