| """Tests for the recommender module.""" |
|
|
| import json |
| from unittest.mock import MagicMock, patch |
|
|
| import pytest |
|
|
| |
| from src.services.recommender import ( |
| _enrich_one_item, |
| _fetch_wiki_image, |
| _haversine_km, |
| _is_media_entertainment_page, |
| _parse_json_response, |
| name_key, |
| ) |
|
|
|
|
| |
| |
| |
| class TestParseJsonResponse: |
| """Tests for _parse_json_response — robust LLM JSON extraction.""" |
|
|
| def test_valid_json_array(self): |
| """Parses a standard JSON array.""" |
| result = _parse_json_response('[{"name": "Eiffel Tower"}, {"name": "Louvre"}]') |
| assert result == [{"name": "Eiffel Tower"}, {"name": "Louvre"}] |
|
|
| def test_valid_json_object_wraps_to_list(self): |
| """A bare JSON object is wrapped into a single-element list.""" |
| result = _parse_json_response('{"name": "Sagrada Familia", "city": "Barcelona"}') |
| assert result == [{"name": "Sagrada Familia", "city": "Barcelona"}] |
|
|
| def test_json_in_markdown_fences(self): |
| """Strips ```json ... ``` fences before parsing.""" |
| raw = '```json\n[{"name": "Colosseum"}]\n```' |
| result = _parse_json_response(raw) |
| assert result == [{"name": "Colosseum"}] |
|
|
| def test_truncated_json_last_object_missing_brace(self): |
| """Truncated JSON without closing bracket returns None — parser requires a ']'. |
| |
| The function only attempts bracket-delimited recovery when both [ and ] are |
| present. A truly truncated stream with no closing ] is unresolvable. |
| """ |
| raw = '[{"name": "Eiffel"}, {"name": "Louvre"' |
| result = _parse_json_response(raw) |
| assert result is None |
|
|
| def test_empty_string_returns_none(self): |
| """Empty string yields None.""" |
| assert _parse_json_response("") is None |
|
|
| def test_whitespace_only_returns_none(self): |
| """Whitespace-only string yields None.""" |
| assert _parse_json_response(" \n\t ") is None |
|
|
| def test_garbage_string_returns_none(self): |
| """Non-JSON garbage returns None.""" |
| assert _parse_json_response("not json at all") is None |
|
|
| def test_none_like_text_returns_none(self): |
| """Random text that happens to have braces but is not valid JSON.""" |
| result = _parse_json_response("just some random { text } with [ brackets") |
| assert result is None |
|
|
|
|
| |
| |
| |
| class TestHaversineKm: |
| """Tests for haversine distance calculation.""" |
|
|
| def test_london_to_paris(self): |
| """London → Paris is approximately 344 km.""" |
| |
| |
| dist = _haversine_km(51.5074, -0.1278, 48.8566, 2.3522) |
| assert 330 <= dist <= 360, f"Expected ~344 km, got {dist:.1f}" |
|
|
| def test_tokyo_to_osaka(self): |
| """Tokyo → Osaka is approximately 403 km.""" |
| |
| |
| dist = _haversine_km(35.6762, 139.6503, 34.6937, 135.5023) |
| assert 390 <= dist <= 420, f"Expected ~403 km, got {dist:.1f}" |
|
|
| def test_same_point_zero_distance(self): |
| """Distance from a point to itself is zero.""" |
| dist = _haversine_km(48.8566, 2.3522, 48.8566, 2.3522) |
| assert dist == pytest.approx(0.0, abs=0.001) |
|
|
| def test_symmetric(self): |
| """Haversine is symmetric (A→B equals B→A).""" |
| a = (40.7128, -74.0060) |
| b = (34.0522, -118.2437) |
| assert _haversine_km(*a, *b) == pytest.approx(_haversine_km(*b, *a)) |
|
|
|
|
| |
| |
| |
| class TestIsMediaEntertainmentPage: |
| """Tests for detecting non-tourist media/entertainment Wikipedia pages.""" |
|
|
| |
| |
| |
|
|
| def test_star_wars_film(self): |
| """Film disambiguation in title → True.""" |
| assert _is_media_entertainment_page("Star Wars (film)", "") is True |
|
|
| def test_dark_knight_movie(self): |
| """Movie disambiguation in title → True.""" |
| assert _is_media_entertainment_page("The Dark Knight (movie)", "") is True |
|
|
| def test_friends_tv_series(self): |
| """TV series disambiguation in title → True.""" |
| assert _is_media_entertainment_page("Friends (TV series)", "") is True |
|
|
| def test_short_film_media(self): |
| """'short film' disambiguation pattern in title → True.""" |
| assert _is_media_entertainment_page("Colosseum (short film)", "") is True |
|
|
| def test_eiffel_tower_not_media(self): |
| """Real landmark with no media indicator → False.""" |
| assert _is_media_entertainment_page("Eiffel Tower", "") is False |
|
|
| def test_central_park_not_media(self): |
| """Park with no media indicator → False.""" |
| assert _is_media_entertainment_page("Central Park", "") is False |
|
|
| def test_extract_based_detection(self): |
| """Title is clean but extract reveals it's a film.""" |
| assert _is_media_entertainment_page( |
| "Inception", |
| "Inception is a 2010 science fiction film directed by Christopher Nolan...", |
| ) is True |
|
|
| def test_extract_tv_series_detection(self): |
| """Extract reveals TV series.""" |
| assert _is_media_entertainment_page( |
| "Breaking Bad", |
| "Breaking Bad is an American television series created by Vince Gilligan...", |
| ) is True |
|
|
| def test_clean_title_and_extract(self): |
| """Both title and extract are about a real place → False.""" |
| assert _is_media_entertainment_page( |
| "Machu Picchu", |
| "Machu Picchu is a 15th-century Inca citadel situated on a mountain ridge...", |
| ) is False |
|
|
|
|
| |
| |
| |
| class TestNameKey: |
| """Tests for attraction name normalization for deduplication.""" |
|
|
| def test_eiffel_tower(self): |
| """Simple name: 'Tower' is an attraction suffix so it gets stripped.""" |
| |
| assert name_key({"name": "Eiffel Tower"}) == "eiffel" |
|
|
| def test_louvre_museum(self): |
| """Museum suffix is stripped from the normalized key.""" |
| assert name_key({"name": "Louvre Museum"}) == "louvre" |
|
|
| def test_notre_dame_with_parenthetical_and_church_suffix(self): |
| """Parenthetical removed, ' Church' suffix removed, non-alphanumeric stripped.""" |
| |
| result = name_key({"name": "Notre-Dame Church (Paris)"}) |
| assert result == "notredame" |
|
|
| def test_empty_name(self): |
| """Empty name returns empty string.""" |
| assert name_key({"name": ""}) == "" |
|
|
| def test_missing_name_key(self): |
| """Item dict with no 'name' key returns empty string.""" |
| assert name_key({}) == "" |
|
|
| def test_same_name_with_different_punctuation(self): |
| """Punctuation differences produce the same key.""" |
| key1 = name_key({"name": "St. Peter's Basilica"}) |
| key2 = name_key({"name": "St Peters Basilica"}) |
| assert key1 == key2 |
|
|
| def test_same_name_different_casing(self): |
| """Case differences produce the same key.""" |
| key1 = name_key({"name": "COLOSSEUM"}) |
| key2 = name_key({"name": "colosseum"}) |
| assert key1 == key2 |
|
|
|
|
| |
| |
| |
| class TestFetchWikiImage: |
| """Integration-style tests for Wikipedia image fetching with mocked HTTP.""" |
|
|
| def test_returns_thumbnail_url_when_present(self): |
| """Mocks _http_get_json to return a thumbnail; function returns URL.""" |
| with patch("src.services.recommender._http_get_json") as mock_get: |
| |
| |
| def side_effect(url, **kwargs): |
| if "action=query" in url and "list=search" in url: |
| return { |
| "query": { |
| "search": [{"title": "Eiffel Tower"}] |
| } |
| } |
| if "rest_v1/page/summary" in url: |
| return { |
| "title": "Eiffel Tower", |
| "extract": "The Eiffel Tower is a wrought-iron lattice tower...", |
| "thumbnail": {"source": "https://upload.wikimedia.org/thumb_eiffel.jpg"}, |
| } |
| return None |
|
|
| mock_get.side_effect = side_effect |
|
|
| result = _fetch_wiki_image("Eiffel Tower", city="Paris") |
| assert result == "https://upload.wikimedia.org/thumb_eiffel.jpg" |
|
|
| def test_returns_empty_string_when_no_thumbnail(self): |
| """When no thumbnail is found across all candidates, returns ''.""" |
| with patch("src.services.recommender._http_get_json") as mock_get: |
| def side_effect(url, **kwargs): |
| if "action=query" in url and "list=search" in url: |
| return { |
| "query": { |
| "search": [{"title": "Some Obscure Place"}] |
| } |
| } |
| if "rest_v1/page/summary" in url: |
| |
| return { |
| "title": "Some Obscure Place", |
| "extract": "Some Obscure Place is a location...", |
| } |
| if "prop=pageimages" in url: |
| |
| return { |
| "query": { |
| "pages": { |
| "12345": { |
| "pageid": 12345, |
| "title": "Some Obscure Place", |
| } |
| } |
| } |
| } |
| return None |
|
|
| mock_get.side_effect = side_effect |
|
|
| result = _fetch_wiki_image("Some Obscure Place") |
| assert result == "" |
|
|
| def test_returns_empty_string_when_search_finds_nothing(self): |
| """When Wikipedia search returns no results, returns ''.""" |
| with patch("src.services.recommender._http_get_json") as mock_get: |
| def side_effect(url, **kwargs): |
| if "action=query" in url and "list=search" in url: |
| return {"query": {"search": []}} |
| return None |
|
|
| mock_get.side_effect = side_effect |
|
|
| result = _fetch_wiki_image("XyzzyNonexistentPlace") |
| assert result == "" |
|
|
|
|
| |
| |
| |
| class TestEnrichOneItemPixabayRecheck: |
| """Tests for Pixabay fallback re-checking Wikipedia/Wikidata for specific images. |
| |
| When Pixabay returns a generic stock photo, the function re-checks |
| Wikipedia/Wikidata tiers (which may have been rate-limited on first pass). |
| If a specific Wikimedia image is now available, it should be preferred. |
| """ |
|
|
| def test_prefers_wikipedia_over_pixabay_on_recheck(self): |
| """After Pixabay returns a URL, re-check Wikipedia finds a specific |
| image -> use Wikipedia URL instead of Pixabay generic.""" |
| item = {"name": "Adashino Nenbutsuji"} |
|
|
| with ( |
| patch("src.services.recommender._IMAGE_CACHE", {}), |
| patch("src.services.recommender._fetch_wiki_image") as mock_wiki, |
| patch("src.services.recommender._fetch_wiki_image_multilang", return_value=""), |
| patch("src.services.recommender._fetch_wikidata_image", return_value=""), |
| patch("src.services.recommender._fetch_commons_image", return_value=""), |
| patch("src.services.recommender._fetch_local_name_image", return_value=""), |
| patch("src.services.recommender._fetch_pixabay_api_image", return_value="https://pixabay.com/generic.jpg"), |
| patch("src.services.recommender._fetch_unsplash_api_image", return_value=""), |
| patch("src.services.recommender._save_image_cache"), |
| ): |
| |
| |
| mock_wiki.side_effect = ["", "https://upload.wikimedia.org/specific.jpg"] |
|
|
| _enrich_one_item(item, city="Kyoto") |
|
|
| assert item["image_url"] == "https://upload.wikimedia.org/specific.jpg" |
|
|
| def test_prefers_wikidata_over_pixabay_on_recheck(self): |
| """After Pixabay returns a URL and Wikipedia still fails, re-check |
| Wikidata finds a specific image -> use Wikidata URL instead.""" |
| item = {"name": "Some Temple"} |
|
|
| with ( |
| patch("src.services.recommender._IMAGE_CACHE", {}), |
| patch("src.services.recommender._fetch_wiki_image", return_value=""), |
| patch("src.services.recommender._fetch_wiki_image_multilang", return_value=""), |
| patch("src.services.recommender._fetch_wikidata_image") as mock_wikidata, |
| patch("src.services.recommender._fetch_commons_image", return_value=""), |
| patch("src.services.recommender._fetch_local_name_image", return_value=""), |
| patch("src.services.recommender._fetch_pixabay_api_image", return_value="https://pixabay.com/generic.jpg"), |
| patch("src.services.recommender._fetch_unsplash_api_image", return_value=""), |
| patch("src.services.recommender._save_image_cache"), |
| ): |
| |
| |
| mock_wikidata.side_effect = ["", "https://upload.wikimedia.org/commons/specific.jpg"] |
|
|
| _enrich_one_item(item, city="Kyoto") |
|
|
| assert item["image_url"] == "https://upload.wikimedia.org/commons/specific.jpg" |
|
|
| def test_keeps_pixabay_when_both_wikipedia_and_wikidata_still_fail(self): |
| """When both re-checks still fail, Pixabay URL is used as-is.""" |
| item = {"name": "Obscure Place"} |
|
|
| with ( |
| patch("src.services.recommender._IMAGE_CACHE", {}), |
| patch("src.services.recommender._fetch_wiki_image", return_value=""), |
| patch("src.services.recommender._fetch_wiki_image_multilang", return_value=""), |
| patch("src.services.recommender._fetch_wikidata_image", return_value=""), |
| patch("src.services.recommender._fetch_commons_image", return_value=""), |
| patch("src.services.recommender._fetch_local_name_image", return_value=""), |
| patch("src.services.recommender._fetch_pixabay_api_image", return_value="https://pixabay.com/generic.jpg"), |
| patch("src.services.recommender._fetch_unsplash_api_image", return_value=""), |
| patch("src.services.recommender._save_image_cache"), |
| ): |
| _enrich_one_item(item, city="Kyoto") |
|
|
| assert item["image_url"] == "https://pixabay.com/generic.jpg" |
|
|