Spaces:
Running
Running
| # tests/test_integration.py | |
| import os | |
| import tempfile | |
| import unittest | |
| import pandas as pd | |
| # FastAPI related imports | |
| import src.book_recommender.core.config as config | |
| from src.book_recommender.data.processor import process_dataframe | |
| from src.book_recommender.ml.embedder import generate_embeddings | |
| from src.book_recommender.ml.recommender import BookRecommender | |
| class TestIntegration(unittest.TestCase): | |
| def setUp(self): | |
| self.test_dir = tempfile.TemporaryDirectory() | |
| self.raw_csv_path = os.path.join(self.test_dir.name, "test_books.csv") | |
| # Create a dummy CSV file for the test | |
| sample_data = { | |
| "title": ["The Sun Also Rises", "A Farewell to Arms", "For Whom the Bell Tolls", "The Old Man and the Sea"], | |
| "authors": ["Ernest Hemingway", "Ernest Hemingway", "Ernest Hemingway", "Ernest Hemingway"], | |
| "genres": ["Fiction", "War, Fiction", "War, Fiction", "Fiction"], | |
| "description": [ | |
| "A story of American and British expatriates in Paris.", | |
| "A love story during World War I.", | |
| "An American in the Spanish Civil War.", | |
| "An old fisherman struggles with a giant marlin.", | |
| ], | |
| "tags": ["lost generation", "war", "spain", "cuba"], | |
| } | |
| pd.DataFrame(sample_data).to_csv(self.raw_csv_path, index=False) | |
| def tearDown(self): | |
| self.test_dir.cleanup() | |
| def test_full_pipeline(self): | |
| """ | |
| Tests the full data processing and recommendation pipeline end-to-end. | |
| """ | |
| # --- 1. Load Raw Data --- | |
| raw_df = pd.read_csv(self.raw_csv_path) | |
| self.assertEqual(len(raw_df), 4) | |
| # --- 2. Process DataFrame --- | |
| processed_df = process_dataframe(raw_df) | |
| self.assertEqual(len(processed_df), 4) | |
| self.assertIn("combined_text", processed_df.columns) | |
| # --- 3. Generate Embeddings --- | |
| embeddings = generate_embeddings(df=processed_df, model_name=config.EMBEDDING_MODEL) | |
| self.assertEqual(embeddings.shape, (4, config.EMBEDDING_DIMENSION)) | |
| # --- 4. Initialize Recommender --- | |
| recommender = BookRecommender(book_data=processed_df, embeddings=embeddings) | |
| self.assertIsNotNone(recommender) | |
| # --- 5. Get Recommendations --- | |
| # "A Farewell to Arms" and "For Whom the Bell Tolls" are both war novels by the same author, | |
| # so they should be highly similar. | |
| recommendations = recommender.get_recommendations("A Farewell to Arms", top_k=1) | |
| # --- 6. Assert Results --- | |
| self.assertEqual(len(recommendations), 1) | |
| top_recommendation = recommendations[0] | |
| # The exact top book can vary with model updates, so we check for reasonableness. | |
| # It should be another Hemingway novel from our list. | |
| expected_titles = ["The Sun Also Rises", "For Whom the Bell Tolls", "The Old Man and the Sea"] | |
| self.assertIn(top_recommendation["title"], expected_titles) | |
| # The similarity should be high, indicating a strong match. | |
| # NOTE: The threshold is set lower (e.g., > 0.3) because with normalized | |
| # text and the 'all-MiniLM-L6-v2' model, scores for even closely | |
| # related documents might not be extremely high. This ensures the test | |
| # is robust to minor model variations. | |
| self.assertTrue( | |
| top_recommendation["similarity"] > 0.3, f"Similarity score {top_recommendation['similarity']} is not > 0.3" | |
| ) | |
| if __name__ == "__main__": | |
| unittest.main() | |