"""Tests for hosted Space VLM validation tooling.""" from __future__ import annotations import tempfile import unittest from datetime import datetime, timezone from pathlib import Path from scripts.check_space_vlm import ( DEFAULT_HARDWARE, TEST_ASSETS, ValidationResult, extract_trace_record, parse_space_repo_id, render_report, space_client_url, update_failure_notes, validate_prediction, write_json_results, write_trace_record, ) from src.models.schema import DiaryEntry, ObjectInfo, ObjectUnderstanding, Persona, PersonaEnvelope, TraceRecord from src.utils.zero_gpu import zero_gpu class SpaceVlmToolingTest(unittest.TestCase): def test_asset_manifest_covers_three_validation_objects(self) -> None: keys = {asset.key for asset in TEST_ASSETS} self.assertEqual(keys, {"mug", "keyboard", "shoe"}) self.assertTrue(all(asset.source_page.startswith("https://commons.wikimedia.org/") for asset in TEST_ASSETS)) self.assertTrue(all(asset.download_url.startswith("https://commons.wikimedia.org/") for asset in TEST_ASSETS)) def test_parse_space_repo_id_from_space_url(self) -> None: repo_id = parse_space_repo_id("https://huggingface.co/spaces/build-small-hackathon/ObjectverseDiary") self.assertEqual(repo_id, "build-small-hackathon/ObjectverseDiary") def test_space_client_url_uses_direct_hf_space_host(self) -> None: client_url = space_client_url("https://huggingface.co/spaces/build-small-hackathon/ObjectverseDiary") self.assertEqual(client_url, "https://build-small-hackathon-objectversediary.hf.space") def test_default_hardware_targets_zero_gpu_live_space(self) -> None: self.assertEqual(DEFAULT_HARDWARE, "zero-a10g") def test_zero_gpu_decorator_is_noop_without_spaces_package(self) -> None: def sample(value: int) -> int: return value + 1 decorated = zero_gpu(duration=10)(sample) self.assertEqual(decorated(2), 3) def test_validate_prediction_accepts_minicpm_runtime(self) -> None: asset = TEST_ASSETS[0] trace = _trace_record( object_name="striped coffee mug", visible_features=["ceramic cup", "handle", "striped surface"], runtime_vision="minicpm-v object understanding", fallbacks=["mock-text-runtime"], ) response = [None, {}, {}, "", "", "", trace.model_dump(mode="json")] result = validate_prediction(asset, Path("/tmp/mug.jpg"), response) self.assertTrue(result.passed) self.assertEqual(result.object_name, "striped coffee mug") self.assertEqual(result.runtime_text, "mock persona and diary generation") def test_validate_prediction_rejects_vision_fallback(self) -> None: asset = TEST_ASSETS[1] trace = _trace_record( object_name="computer keyboard", visible_features=["black keys"], runtime_vision="minicpm-v object understanding", fallbacks=["vision-fallback-to-mock", "mock-text-runtime"], ) response = [None, {}, {}, "", "", "", trace.model_dump(mode="json")] result = validate_prediction(asset, Path("/tmp/keyboard.jpg"), response) self.assertFalse(result.passed) self.assertIn("vision fallback marker", result.error) def test_extract_trace_record_accepts_gradio_response(self) -> None: trace = _trace_record( object_name="running shoe", visible_features=["laces", "rubber sole"], runtime_vision="minicpm-v object understanding", fallbacks=["mock-text-runtime"], ) response = [None, {}, {}, "", "", "", trace.model_dump(mode="json")] extracted = extract_trace_record(response) self.assertEqual(extracted.object_understanding.object.name, "running shoe") self.assertEqual(extracted.model_runtime["vision"], "minicpm-v object understanding") def test_write_trace_record_writes_valid_public_json(self) -> None: trace = _trace_record( object_name="striped coffee mug", visible_features=["ceramic cup", "handle"], runtime_vision="minicpm-v object understanding", fallbacks=["mock-text-runtime"], ) with tempfile.TemporaryDirectory() as tmp_dir: output_path = write_trace_record(trace, Path(tmp_dir) / "mug.json") payload = output_path.read_text(encoding="utf-8") parsed = TraceRecord.model_validate_json(payload) self.assertEqual(parsed.trace_id, trace.trace_id) self.assertNotIn("HUGGINGFACE_TOKEN", payload) self.assertNotIn("HF_TOKEN", payload) self.assertNotIn("hf_", payload) def test_write_trace_record_rejects_sensitive_token_markers(self) -> None: trace = _trace_record( object_name="computer keyboard", visible_features=["black keys"], runtime_vision="minicpm-v object understanding", fallbacks=["mock-text-runtime"], ) trace.model_runtime["runtime"] = "vision model id: openbmb/MiniCPM-V-2_6; token hf_forbidden" with tempfile.TemporaryDirectory() as tmp_dir: output_path = Path(tmp_dir) / "keyboard.json" with self.assertRaises(ValueError): write_trace_record(trace, output_path) self.assertFalse(output_path.exists()) def test_render_report_includes_results_and_safe_config(self) -> None: result = ValidationResult( key="shoe", label="Running shoe", source_page="https://commons.wikimedia.org/wiki/File:Running_shoes.jpg", image_path="/tmp/shoe.jpg", passed=True, object_name="running shoe", visible_features=["laces", "athletic sole"], likely_context="sports gear", confidence=0.86, runtime_vision="minicpm-v object understanding", runtime_text="mock persona and diary generation", fallbacks=["mock-text-runtime"], ) report = render_report( space_url="https://huggingface.co/spaces/build-small-hackathon/ObjectverseDiary", repo_id="build-small-hackathon/ObjectverseDiary", results=[result], probe_result=_probe_result(minicpm_load_ok=True), configured={"hardware": "zero-a10g", "OBJECTVERSE_VISION_BACKEND": "minicpm-v"}, ) self.assertIn("Overall status: PASS", report) self.assertIn("Vision Runtime Probe", report) self.assertIn("minicpm_load_ok", report) self.assertIn("Running shoe", report) self.assertIn("OBJECTVERSE_VISION_BACKEND", report) self.assertIn("live MiniCPM-V configuration remains active", report) self.assertNotIn("hf_", report.lower()) self.assertNotIn("HUGGINGFACE_TOKEN", report) def test_render_report_includes_configuration_error(self) -> None: report = render_report( space_url="https://huggingface.co/spaces/build-small-hackathon/ObjectverseDiary", repo_id="build-small-hackathon/ObjectverseDiary", results=[], rollback={"hardware": "cpu-basic", "OBJECTVERSE_VISION_BACKEND": "mock"}, configuration_error="HfHubHTTPError: 402 Payment Required", ) self.assertIn("Overall status: FAIL", report) self.assertIn("Configuration Error", report) self.assertIn("402 Payment Required", report) def test_write_json_results_includes_probe_when_present(self) -> None: result = ValidationResult( key="mug", label="Coffee mug", source_page="https://commons.wikimedia.org/wiki/File:Striped_coffee_mug.jpg", image_path="/tmp/mug.jpg", passed=False, object_name="coffee mug", visible_features=["uploaded photo provided"], likely_context="everyday human environment", confidence=0.42, runtime_vision="minicpm-v object understanding", runtime_text="mock persona and diary generation", fallbacks=["vision-fallback-to-mock", "mock-text-runtime"], error="vision fallback marker was present", ) with tempfile.TemporaryDirectory() as tmp_dir: output_path = write_json_results( [result], Path(tmp_dir) / "report.json", probe_result=_probe_result(minicpm_load_ok=False), ) payload = output_path.read_text(encoding="utf-8") parsed = output_path.read_text(encoding="utf-8") self.assertIn('"probe"', payload) self.assertIn('"results"', payload) self.assertNotIn("hf_", parsed) self.assertNotIn("HF_TOKEN", parsed) def test_update_failure_notes_replaces_latest_failure_section(self) -> None: failed = ValidationResult( key="keyboard", label="Computer keyboard", source_page="https://commons.wikimedia.org/wiki/File:Computer_keyboard.jpg", image_path="/tmp/keyboard.jpg", passed=False, object_name="keyboard", visible_features=["uploaded photo provided"], likely_context="everyday human environment", confidence=0.42, runtime_vision="minicpm-v object understanding", runtime_text="mock persona and diary generation", fallbacks=["vision-fallback-to-mock", "mock-text-runtime"], error="vision fallback marker was present", ) with tempfile.TemporaryDirectory() as tmp_dir: notes_path = Path(tmp_dir) / "FAILURES.md" notes_path.write_text("# Failure Notes\n\n## Current Status\n\nStable.\n", encoding="utf-8") update_failure_notes(results=[failed], probe_result=_probe_result(False), output_path=notes_path) update_failure_notes(results=[failed], probe_result=_probe_result(False), output_path=notes_path) content = notes_path.read_text(encoding="utf-8") self.assertEqual(content.count("## Latest Space VLM Validation Failure"), 1) self.assertIn("keyboard: vision fallback marker was present", content) self.assertNotIn("hf_", content) def _trace_record( *, object_name: str, visible_features: list[str], runtime_vision: str, fallbacks: list[str], ) -> TraceRecord: persona = PersonaEnvelope( persona=Persona( object_name=object_name, character_name="Test Object", mood="watchful", secret_fear="being ignored", core_memory="It remembers the test bench.", complaint="I am more than a fixture.", tags=["test", "object", "archive"], ) ) return TraceRecord( trace_id="space-vlm-test", created_at=datetime.now(timezone.utc), mode="Cynical", input={"has_image": True, "image_filename": "asset.jpg", "description": "public test asset"}, object_understanding=ObjectUnderstanding( object=ObjectInfo( name=object_name, visible_features=visible_features, likely_context="test environment", confidence=0.9, ) ), persona=persona, diary=DiaryEntry( title="Secret Diary - Day 1", english="I was tested today.", chinese="今天我被测试了。", ), model_runtime={ "vision": runtime_vision, "text": "mock persona and diary generation", "runtime": "vision model id: openbmb/MiniCPM-V-2_6; no llama.cpp model connected yet", }, fallbacks=fallbacks, ) def _probe_result(minicpm_load_ok: bool) -> dict[str, object]: return { "backend": "minicpm-v", "vision_model_id": "openbmb/MiniCPM-V-2_6", "torch_import": True, "transformers_import": True, "cuda_available": True, "device_count": 1, "device_name": "NVIDIA test device", "mps_available": False, "minicpm_load_attempted": True, "minicpm_load_ok": minicpm_load_ok, "errors": [] if minicpm_load_ok else [{"stage": "minicpm_load", "type": "RuntimeError", "summary": "test failure"}], } if __name__ == "__main__": unittest.main()