import os import unittest from unittest.mock import MagicMock import numpy as np import pandas as pd # FastAPI related imports from fastapi.testclient import TestClient import src.book_recommender.core.config as config from src.book_recommender.api.dependencies import ( # Import actual dependencies to override get_clusters_data, get_recommender, get_sentence_transformer_model, ) from src.book_recommender.api.main import app as fastapi_app # Import the FastAPI app instance from src.book_recommender.ml.recommender import BookRecommender class TestFastAPIEndpoints(unittest.TestCase): dummy_processed_data = pd.DataFrame( { "id": ["1", "2", "3", "4", "5"], "title": ["Test Book 1", "Test Book 2", "Related Test Book 1", "Unrelated Book", "Sci-Fi Classic"], "authors": ["Author A", "Author B", "Author A", "Author C", "Author D"], "genres": ["Fiction", "Science Fiction", "Fiction", "Fantasy", "Science Fiction"], "description": [ "Description for book 1", "Description for book 2 with sci-fi themes", "Another book by Author A", "A magical adventure", "Deep space exploration", ], "title_lower": ["test book 1", "test book 2", "related test book 1", "unrelated book", "sci-fi classic"], "authors_lower": ["author a", "author b", "author a", "author c", "author d"], "combined_text": [ "test book 1 by author a. genres: fiction. " "description: description for book 1. tags: ", "test book 2 by author b. genres: science fiction. " "description: description for book 2 with sci-fi themes. tags: ", "related test book 1 by author a. genres: fiction. " "description: another book by author a. tags: ", "unrelated book by author c. genres: fantasy. " "description: a magical adventure. tags: ", "sci-fi classic by author d. genres: science fiction. " "description: deep space exploration. tags: ", ], "cluster_id": [0, 1, 0, 2, 1], } ) dummy_embeddings = np.random.rand(len(dummy_processed_data), config.EMBEDDING_DIMENSION).astype("float32") dummy_cluster_names = {0: "Fiction Collection", 1: "Sci-Fi Collection", 2: "Fantasy Collection"} def setUp(self): super().setUp() # Clear lru_cache for dependencies get_recommender.cache_clear() get_sentence_transformer_model.cache_clear() get_clusters_data.cache_clear() # 1. Create mock objects for dependencies self.mock_model = MagicMock() self.mock_recommender_instance = MagicMock( spec=BookRecommender ) # Note: this is the instance returned by get_recommender self.mock_clusters_data_value = ( # Note: this is the actual value returned by get_clusters_data self.dummy_processed_data["cluster_id"].values, self.dummy_cluster_names, self.dummy_processed_data, ) # 2. Configure mock behaviors self.mock_model.encode.side_effect = lambda x, **kwargs: ( np.array([np.random.rand(config.EMBEDDING_DIMENSION)]).astype("float32") if isinstance(x, str) else np.random.rand(len(x), config.EMBEDDING_DIMENSION).astype("float32") ) self.mock_recommender_instance.book_data = self.dummy_processed_data self.mock_recommender_instance.embeddings = self.dummy_embeddings self.mock_recommender_instance.get_recommendations.return_value = [ { "id": "3", "title": "Related Test Book 1", "authors": "Author A", "description": "Another book by Author A", "genres": "Fiction", "similarity": 0.8, }, { "id": "1", "title": "Test Book 1", "authors": "Author A", "description": "Description for book 1", "genres": "Fiction", "similarity": 0.7, }, ] self.mock_recommender_instance.get_recommendations_from_vector.return_value = [ { "id": "5", "title": "Sci-Fi Classic", "authors": "Author D", "description": "Deep space exploration", "genres": "Science Fiction", "similarity": 0.9, }, { "id": "2", "title": "Test Book 2", "authors": "Author B", "description": "Description for book 2 with sci-fi themes", "genres": "Science Fiction", "similarity": 0.8, }, ] # 3. Override dependencies for the FastAPI app fastapi_app.dependency_overrides[get_sentence_transformer_model] = lambda: self.mock_model fastapi_app.dependency_overrides[get_recommender] = ( lambda: self.mock_recommender_instance ) # Return the instance fastapi_app.dependency_overrides[get_clusters_data] = ( lambda: self.mock_clusters_data_value ) # Return the actual value # self.client needs to be created *after* the dependencies are mocked self.client = TestClient(fastapi_app) # Ensure feedback file is clean before each test feedback_file_path = os.path.join(config.BASE_DIR, "data", "feedback", "user_feedback.jsonl") if os.path.exists(feedback_file_path): os.remove(feedback_file_path) def tearDown(self): # Clear dependency overrides after each test fastapi_app.dependency_overrides = {} super().tearDown() def test_health_check(self): response = self.client.get("/health") self.assertEqual(response.status_code, 200) self.assertEqual( response.json(), {"status": "OK", "message": "BookFinder API is healthy and core services are loaded."} ) def test_recommend_by_query(self): response = self.client.post("/recommend/query", json={"query": "sci-fi books", "top_k": 2}) self.assertEqual(response.status_code, 200) recommendations = response.json() self.assertEqual(len(recommendations), 2) self.assertEqual(recommendations[0]["book"]["title"], "Sci-Fi Classic") self.assertEqual(recommendations[1]["book"]["title"], "Test Book 2") def test_recommend_by_title(self): response = self.client.post("/recommend/title", json={"title": "Test Book 1", "top_k": 2}) self.assertEqual(response.status_code, 200) recommendations = response.json() self.assertEqual(len(recommendations), 2) self.assertEqual(recommendations[0]["book"]["title"], "Related Test Book 1") self.assertEqual(recommendations[1]["book"]["title"], "Test Book 1") def test_list_books(self): response = self.client.get("/books?page=1&page_size=3") self.assertEqual(response.status_code, 200) data = response.json() self.assertEqual(data["total"], 5) self.assertEqual(len(data["books"]), 3) self.assertEqual(data["books"][0]["title"], "Test Book 1") def test_search_books(self): response = self.client.get("/books/search?query=author A&page=1&page_size=5") self.assertEqual(response.status_code, 200) data = response.json() self.assertEqual(data["total"], 2) self.assertEqual(len(data["books"]), 2) self.assertEqual(data["books"][0]["title"], "Test Book 1") self.assertEqual(data["books"][1]["title"], "Related Test Book 1") def test_get_stats(self): response = self.client.get("/stats") self.assertEqual(response.status_code, 200) data = response.json() self.assertEqual(data["total_books"], 5) self.assertIn("fiction", data["genres_count"]) self.assertIn("author a", data["authors_count"]) def test_list_clusters(self): response = self.client.get("/clusters") self.assertEqual(response.status_code, 200) clusters = response.json() self.assertEqual(len(clusters), 3) self.assertEqual(clusters[0]["name"], "Fiction Collection") self.assertEqual(clusters[0]["size"], 2) # Test Book 1, Related Test Book 1 self.assertEqual(len(clusters[0]["top_books"]), 2) # Should sample up to 3, but there are only 2 def test_get_books_in_cluster(self): response = self.client.get("/clusters/0?page=1&page_size=10") self.assertEqual(response.status_code, 200) data = response.json() self.assertEqual(data["total"], 2) self.assertEqual(len(data["books"]), 2) self.assertEqual(data["books"][0]["title"], "Test Book 1") def test_get_cluster_sample(self): response = self.client.get("/clusters/1/sample?sample_size=1") self.assertEqual(response.status_code, 200) books = response.json() self.assertEqual(len(books), 1) self.assertEqual(books[0]["genres"], ["Science Fiction"]) # Only Sci-Fi books in cluster 1 def test_explain_recommendation_endpoint(self): sample_book = { "id": "1", "title": "Test Book 1", "authors": ["Author A"], "description": "Description for book 1", "genres": ["Fiction"], "cover_image_url": None, } response = self.client.post( "/explain", json={"query_text": "A book about fiction", "recommended_book": sample_book, "similarity_score": 0.75}, ) self.assertEqual(response.status_code, 200) explanation = response.json() self.assertIn("match_score", explanation) self.assertIn("confidence", explanation) self.assertIn("summary", explanation) self.assertIn("details", explanation) def test_submit_feedback_and_get_stats(self): # Submit feedback feedback_payload = { "query": "fantasy adventure", "book_id": "4", # Unrelated Book "feedback_type": "positive", "session_id": "test_session_1", } response = self.client.post("/feedback", json=feedback_payload) self.assertEqual(response.status_code, 204) # No Content feedback_payload_2 = { "query": "sci-fi classic", "book_id": "5", # Sci-Fi Classic "feedback_type": "negative", "session_id": "test_session_1", } response = self.client.post("/feedback", json=feedback_payload_2) self.assertEqual(response.status_code, 204) # No Content # Get stats response = self.client.get("/feedback/stats") self.assertEqual(response.status_code, 200) stats = response.json() self.assertEqual(stats["total_feedback"], 2) self.assertEqual(stats["positive_feedback"], 1) self.assertEqual(stats["negative_feedback"], 1) self.assertIn("Unrelated Book", stats["feedback_by_book_title"]) self.assertIn("fantasy adventure", stats["feedback_by_query"])