deepshelf-api / tests /test_recommender.py
nice-bill's picture
initial commit
cdb73a8
# tests/test_recommender.py
import unittest
import numpy as np
import pandas as pd
import src.book_recommender.core.config as config # Import config for EMBEDDING_DIMENSION
from src.book_recommender.ml.recommender import BookRecommender
class TestRecommender(unittest.TestCase):
def setUp(self):
# Create dummy data directly in memory
self.book_data = pd.DataFrame(
{
"id": ["1", "2", "3", "4", "5"],
"title": ["Alpha Book", "Beta Book", "Gamma Book", "Delta Book", "Epsilon Book"],
"title_lower": ["alpha book", "beta book", "gamma book", "delta book", "epsilon book"],
"authors": ["Auth1", "Auth2", "Auth3", "Auth1", "Auth5"],
"authors_lower": ["auth1", "auth2", "auth3", "auth1", "auth5"],
"description": ["Desc1", "Desc2", "Desc3", "Desc1", "Desc5"],
"genres": ["Fiction", "Sci-Fi", "History", "Fiction", "Fantasy"],
"tags": ["tag1", "tag2", "tag3", "tag4", "tag5"],
"rating": [4.5, 4.0, 4.8, 4.3, 4.6],
"combined_text": ["alpha", "beta", "gamma", "delta", "epsilon"],
}
)
# Create dummy embeddings (simple, not from a real model)
# Make 'alpha book' very similar to 'delta book'
# Make 'beta book' very similar to 'epsilon book'
self.embeddings = np.array(
[
[0.9, 0.1, 0.1, 0.1, 0.1], # alpha
[0.1, 0.9, 0.1, 0.1, 0.1], # beta
[0.1, 0.1, 0.9, 0.1, 0.1], # gamma
[0.85, 0.15, 0.1, 0.1, 0.1], # delta (similar to alpha)
[0.1, 0.8, 0.1, 0.1, 0.1], # epsilon (similar to beta)
]
)
# Pad embeddings to match expected dimension if necessary
if self.embeddings.shape[1] < config.EMBEDDING_DIMENSION:
padding = np.zeros((self.embeddings.shape[0], config.EMBEDDING_DIMENSION - self.embeddings.shape[1]))
self.embeddings = np.hstack((self.embeddings, padding))
# Initialize the recommender with in-memory data
self.recommender = BookRecommender(book_data=self.book_data, embeddings=self.embeddings)
def test_get_recommendations(self):
# Test with a book that exists
recs = self.recommender.get_recommendations("alpha book", top_k=1, similarity_threshold=0.0)
# 1. Should return 1 recommendation
self.assertEqual(len(recs), 1)
# 2. The input book itself should not be in the recommendations
rec_titles = [r["title"] for r in recs]
self.assertNotIn("alpha book", rec_titles)
# 3. The most similar book ('delta book') should be first
self.assertEqual(recs[0]["title"], "Delta Book")
# 4. Check the structure of the output
self.assertIsInstance(recs[0], dict)
self.assertIn("title", recs[0])
self.assertIn("authors", recs[0])
self.assertIn("description", recs[0])
self.assertIn("genres", recs[0])
self.assertIn("tags", recs[0])
self.assertIn("rating", recs[0])
self.assertIn("similarity", recs[0])
def test_get_recommendations_non_existent_title(self):
# Test with a book title that does not exist
recs = self.recommender.get_recommendations("non existent book")
self.assertEqual(len(recs), 0)
def test_recommender_accuracy(self):
"""
Tests if the recommender returns expected similar books based on dummy embeddings.
"""
# Test for 'beta book', expecting 'epsilon book' as the most similar
recs = self.recommender.get_recommendations("beta book", top_k=1)
self.assertEqual(len(recs), 1)
self.assertEqual(recs[0]["title"], "Epsilon Book")
self.assertTrue(recs[0]["similarity"] > 0.7) # Expect a high similarity for a known similar book
if __name__ == "__main__":
unittest.main()