"""Tests for the recommender module.""" import json from unittest.mock import MagicMock, patch import pytest # Import the units under test from the recommender module from src.services.recommender import ( _enrich_one_item, _fetch_wiki_image, _haversine_km, _is_media_entertainment_page, _parse_json_response, name_key, ) # ────────────────────────────────────────────────────────────────────── # 1. _parse_json_response # ────────────────────────────────────────────────────────────────────── class TestParseJsonResponse: """Tests for _parse_json_response — robust LLM JSON extraction.""" def test_valid_json_array(self): """Parses a standard JSON array.""" result = _parse_json_response('[{"name": "Eiffel Tower"}, {"name": "Louvre"}]') assert result == [{"name": "Eiffel Tower"}, {"name": "Louvre"}] def test_valid_json_object_wraps_to_list(self): """A bare JSON object is wrapped into a single-element list.""" result = _parse_json_response('{"name": "Sagrada Familia", "city": "Barcelona"}') assert result == [{"name": "Sagrada Familia", "city": "Barcelona"}] def test_json_in_markdown_fences(self): """Strips ```json ... ``` fences before parsing.""" raw = '```json\n[{"name": "Colosseum"}]\n```' result = _parse_json_response(raw) assert result == [{"name": "Colosseum"}] def test_truncated_json_last_object_missing_brace(self): """Truncated JSON without closing bracket returns None — parser requires a ']'. The function only attempts bracket-delimited recovery when both [ and ] are present. A truly truncated stream with no closing ] is unresolvable. """ raw = '[{"name": "Eiffel"}, {"name": "Louvre"' result = _parse_json_response(raw) assert result is None def test_empty_string_returns_none(self): """Empty string yields None.""" assert _parse_json_response("") is None def test_whitespace_only_returns_none(self): """Whitespace-only string yields None.""" assert _parse_json_response(" \n\t ") is None def test_garbage_string_returns_none(self): """Non-JSON garbage returns None.""" assert _parse_json_response("not json at all") is None def test_none_like_text_returns_none(self): """Random text that happens to have braces but is not valid JSON.""" result = _parse_json_response("just some random { text } with [ brackets") assert result is None # ────────────────────────────────────────────────────────────────────── # 2. _haversine_km # ────────────────────────────────────────────────────────────────────── class TestHaversineKm: """Tests for haversine distance calculation.""" def test_london_to_paris(self): """London → Paris is approximately 344 km.""" # London: 51.5074° N, 0.1278° W # Paris: 48.8566° N, 2.3522° E dist = _haversine_km(51.5074, -0.1278, 48.8566, 2.3522) assert 330 <= dist <= 360, f"Expected ~344 km, got {dist:.1f}" def test_tokyo_to_osaka(self): """Tokyo → Osaka is approximately 403 km.""" # Tokyo: 35.6762° N, 139.6503° E # Osaka: 34.6937° N, 135.5023° E dist = _haversine_km(35.6762, 139.6503, 34.6937, 135.5023) assert 390 <= dist <= 420, f"Expected ~403 km, got {dist:.1f}" def test_same_point_zero_distance(self): """Distance from a point to itself is zero.""" dist = _haversine_km(48.8566, 2.3522, 48.8566, 2.3522) assert dist == pytest.approx(0.0, abs=0.001) def test_symmetric(self): """Haversine is symmetric (A→B equals B→A).""" a = (40.7128, -74.0060) # NYC b = (34.0522, -118.2437) # LA assert _haversine_km(*a, *b) == pytest.approx(_haversine_km(*b, *a)) # ────────────────────────────────────────────────────────────────────── # 3. _is_media_entertainment_page # ────────────────────────────────────────────────────────────────────── class TestIsMediaEntertainmentPage: """Tests for detecting non-tourist media/entertainment Wikipedia pages.""" # The function takes (title, extract). All test cases below focus on # title-based detection (disambiguation patterns) with empty extracts. # The extract-based detection is tested separately. def test_star_wars_film(self): """Film disambiguation in title → True.""" assert _is_media_entertainment_page("Star Wars (film)", "") is True def test_dark_knight_movie(self): """Movie disambiguation in title → True.""" assert _is_media_entertainment_page("The Dark Knight (movie)", "") is True def test_friends_tv_series(self): """TV series disambiguation in title → True.""" assert _is_media_entertainment_page("Friends (TV series)", "") is True def test_short_film_media(self): """'short film' disambiguation pattern in title → True.""" assert _is_media_entertainment_page("Colosseum (short film)", "") is True def test_eiffel_tower_not_media(self): """Real landmark with no media indicator → False.""" assert _is_media_entertainment_page("Eiffel Tower", "") is False def test_central_park_not_media(self): """Park with no media indicator → False.""" assert _is_media_entertainment_page("Central Park", "") is False def test_extract_based_detection(self): """Title is clean but extract reveals it's a film.""" assert _is_media_entertainment_page( "Inception", "Inception is a 2010 science fiction film directed by Christopher Nolan...", ) is True def test_extract_tv_series_detection(self): """Extract reveals TV series.""" assert _is_media_entertainment_page( "Breaking Bad", "Breaking Bad is an American television series created by Vince Gilligan...", ) is True def test_clean_title_and_extract(self): """Both title and extract are about a real place → False.""" assert _is_media_entertainment_page( "Machu Picchu", "Machu Picchu is a 15th-century Inca citadel situated on a mountain ridge...", ) is False # ────────────────────────────────────────────────────────────────────── # 4. name_key # ────────────────────────────────────────────────────────────────────── class TestNameKey: """Tests for attraction name normalization for deduplication.""" def test_eiffel_tower(self): """Simple name: 'Tower' is an attraction suffix so it gets stripped.""" # " tower" is in _ATTRACTION_SUFFIXES, so it's removed from the key assert name_key({"name": "Eiffel Tower"}) == "eiffel" def test_louvre_museum(self): """Museum suffix is stripped from the normalized key.""" assert name_key({"name": "Louvre Museum"}) == "louvre" def test_notre_dame_with_parenthetical_and_church_suffix(self): """Parenthetical removed, ' Church' suffix removed, non-alphanumeric stripped.""" # ' church' IS in _ATTRACTION_SUFFIXES (unlike ' cathedral') result = name_key({"name": "Notre-Dame Church (Paris)"}) assert result == "notredame" def test_empty_name(self): """Empty name returns empty string.""" assert name_key({"name": ""}) == "" def test_missing_name_key(self): """Item dict with no 'name' key returns empty string.""" assert name_key({}) == "" def test_same_name_with_different_punctuation(self): """Punctuation differences produce the same key.""" key1 = name_key({"name": "St. Peter's Basilica"}) key2 = name_key({"name": "St Peters Basilica"}) assert key1 == key2 def test_same_name_different_casing(self): """Case differences produce the same key.""" key1 = name_key({"name": "COLOSSEUM"}) key2 = name_key({"name": "colosseum"}) assert key1 == key2 # ────────────────────────────────────────────────────────────────────── # 5. _fetch_wiki_image # ────────────────────────────────────────────────────────────────────── class TestFetchWikiImage: """Integration-style tests for Wikipedia image fetching with mocked HTTP.""" def test_returns_thumbnail_url_when_present(self): """Mocks _http_get_json to return a thumbnail; function returns URL.""" with patch("src.services.recommender._http_get_json") as mock_get: # First call: _resolve_wiki_title → Wikipedia search API # Second call: REST summary API with thumbnail def side_effect(url, **kwargs): if "action=query" in url and "list=search" in url: return { "query": { "search": [{"title": "Eiffel Tower"}] } } if "rest_v1/page/summary" in url: return { "title": "Eiffel Tower", "extract": "The Eiffel Tower is a wrought-iron lattice tower...", "thumbnail": {"source": "https://upload.wikimedia.org/thumb_eiffel.jpg"}, } return None mock_get.side_effect = side_effect result = _fetch_wiki_image("Eiffel Tower", city="Paris") assert result == "https://upload.wikimedia.org/thumb_eiffel.jpg" def test_returns_empty_string_when_no_thumbnail(self): """When no thumbnail is found across all candidates, returns ''.""" with patch("src.services.recommender._http_get_json") as mock_get: def side_effect(url, **kwargs): if "action=query" in url and "list=search" in url: return { "query": { "search": [{"title": "Some Obscure Place"}] } } if "rest_v1/page/summary" in url: # Summary exists but no thumbnail return { "title": "Some Obscure Place", "extract": "Some Obscure Place is a location...", } if "prop=pageimages" in url: # pageimages API also has no thumbnail return { "query": { "pages": { "12345": { "pageid": 12345, "title": "Some Obscure Place", } } } } return None mock_get.side_effect = side_effect result = _fetch_wiki_image("Some Obscure Place") assert result == "" def test_returns_empty_string_when_search_finds_nothing(self): """When Wikipedia search returns no results, returns ''.""" with patch("src.services.recommender._http_get_json") as mock_get: def side_effect(url, **kwargs): if "action=query" in url and "list=search" in url: return {"query": {"search": []}} return None mock_get.side_effect = side_effect result = _fetch_wiki_image("XyzzyNonexistentPlace") assert result == "" # ────────────────────────────────────────────────────────────────────── # 6. _enrich_one_item — Pixabay fallback re-check # ────────────────────────────────────────────────────────────────────── class TestEnrichOneItemPixabayRecheck: """Tests for Pixabay fallback re-checking Wikipedia/Wikidata for specific images. When Pixabay returns a generic stock photo, the function re-checks Wikipedia/Wikidata tiers (which may have been rate-limited on first pass). If a specific Wikimedia image is now available, it should be preferred. """ def test_prefers_wikipedia_over_pixabay_on_recheck(self): """After Pixabay returns a URL, re-check Wikipedia finds a specific image -> use Wikipedia URL instead of Pixabay generic.""" item = {"name": "Adashino Nenbutsuji"} with ( patch("src.services.recommender._IMAGE_CACHE", {}), patch("src.services.recommender._fetch_wiki_image") as mock_wiki, patch("src.services.recommender._fetch_wiki_image_multilang", return_value=""), patch("src.services.recommender._fetch_wikidata_image", return_value=""), patch("src.services.recommender._fetch_commons_image", return_value=""), patch("src.services.recommender._fetch_local_name_image", return_value=""), patch("src.services.recommender._fetch_pixabay_api_image", return_value="https://pixabay.com/generic.jpg"), patch("src.services.recommender._fetch_unsplash_api_image", return_value=""), patch("src.services.recommender._save_image_cache"), ): # First call (Tier 1): Wikipedia fails (rate-limited) # Second call (second chance after Pixabay): Wikipedia succeeds mock_wiki.side_effect = ["", "https://upload.wikimedia.org/specific.jpg"] _enrich_one_item(item, city="Kyoto") assert item["image_url"] == "https://upload.wikimedia.org/specific.jpg" def test_prefers_wikidata_over_pixabay_on_recheck(self): """After Pixabay returns a URL and Wikipedia still fails, re-check Wikidata finds a specific image -> use Wikidata URL instead.""" item = {"name": "Some Temple"} with ( patch("src.services.recommender._IMAGE_CACHE", {}), patch("src.services.recommender._fetch_wiki_image", return_value=""), patch("src.services.recommender._fetch_wiki_image_multilang", return_value=""), patch("src.services.recommender._fetch_wikidata_image") as mock_wikidata, patch("src.services.recommender._fetch_commons_image", return_value=""), patch("src.services.recommender._fetch_local_name_image", return_value=""), patch("src.services.recommender._fetch_pixabay_api_image", return_value="https://pixabay.com/generic.jpg"), patch("src.services.recommender._fetch_unsplash_api_image", return_value=""), patch("src.services.recommender._save_image_cache"), ): # First call (Tier 3): Wikidata fails (rate-limited) # Second call (second chance after Pixabay): Wikidata succeeds mock_wikidata.side_effect = ["", "https://upload.wikimedia.org/commons/specific.jpg"] _enrich_one_item(item, city="Kyoto") assert item["image_url"] == "https://upload.wikimedia.org/commons/specific.jpg" def test_keeps_pixabay_when_both_wikipedia_and_wikidata_still_fail(self): """When both re-checks still fail, Pixabay URL is used as-is.""" item = {"name": "Obscure Place"} with ( patch("src.services.recommender._IMAGE_CACHE", {}), patch("src.services.recommender._fetch_wiki_image", return_value=""), patch("src.services.recommender._fetch_wiki_image_multilang", return_value=""), patch("src.services.recommender._fetch_wikidata_image", return_value=""), patch("src.services.recommender._fetch_commons_image", return_value=""), patch("src.services.recommender._fetch_local_name_image", return_value=""), patch("src.services.recommender._fetch_pixabay_api_image", return_value="https://pixabay.com/generic.jpg"), patch("src.services.recommender._fetch_unsplash_api_image", return_value=""), patch("src.services.recommender._save_image_cache"), ): _enrich_one_item(item, city="Kyoto") assert item["image_url"] == "https://pixabay.com/generic.jpg"