Spaces:

nice-bill
/

deepshelf-api

Running

App Files Files Community

deepshelf-api / tests /test_integration.py

nice-bill

initial commit

cdb73a8 3 months ago

raw

history blame contribute delete

3.57 kB

	# tests/test_integration.py

	import os
	import tempfile
	import unittest

	import pandas as pd

	# FastAPI related imports
	import src.book_recommender.core.config as config
	from src.book_recommender.data.processor import process_dataframe
	from src.book_recommender.ml.embedder import generate_embeddings
	from src.book_recommender.ml.recommender import BookRecommender


	class TestIntegration(unittest.TestCase):

	def setUp(self):
	self.test_dir = tempfile.TemporaryDirectory()

	self.raw_csv_path = os.path.join(self.test_dir.name, "test_books.csv")

	# Create a dummy CSV file for the test
	sample_data = {
	"title": ["The Sun Also Rises", "A Farewell to Arms", "For Whom the Bell Tolls", "The Old Man and the Sea"],
	"authors": ["Ernest Hemingway", "Ernest Hemingway", "Ernest Hemingway", "Ernest Hemingway"],
	"genres": ["Fiction", "War, Fiction", "War, Fiction", "Fiction"],
	"description": [
	"A story of American and British expatriates in Paris.",
	"A love story during World War I.",
	"An American in the Spanish Civil War.",
	"An old fisherman struggles with a giant marlin.",
	],
	"tags": ["lost generation", "war", "spain", "cuba"],
	}
	pd.DataFrame(sample_data).to_csv(self.raw_csv_path, index=False)

	def tearDown(self):
	self.test_dir.cleanup()

	def test_full_pipeline(self):
	"""
	Tests the full data processing and recommendation pipeline end-to-end.
	"""
	# --- 1. Load Raw Data ---
	raw_df = pd.read_csv(self.raw_csv_path)
	self.assertEqual(len(raw_df), 4)

	# --- 2. Process DataFrame ---
	processed_df = process_dataframe(raw_df)
	self.assertEqual(len(processed_df), 4)
	self.assertIn("combined_text", processed_df.columns)

	# --- 3. Generate Embeddings ---
	embeddings = generate_embeddings(df=processed_df, model_name=config.EMBEDDING_MODEL)
	self.assertEqual(embeddings.shape, (4, config.EMBEDDING_DIMENSION))

	# --- 4. Initialize Recommender ---
	recommender = BookRecommender(book_data=processed_df, embeddings=embeddings)
	self.assertIsNotNone(recommender)

	# --- 5. Get Recommendations ---
	# "A Farewell to Arms" and "For Whom the Bell Tolls" are both war novels by the same author,
	# so they should be highly similar.
	recommendations = recommender.get_recommendations("A Farewell to Arms", top_k=1)

	# --- 6. Assert Results ---
	self.assertEqual(len(recommendations), 1)

	top_recommendation = recommendations[0]

	# The exact top book can vary with model updates, so we check for reasonableness.
	# It should be another Hemingway novel from our list.
	expected_titles = ["The Sun Also Rises", "For Whom the Bell Tolls", "The Old Man and the Sea"]
	self.assertIn(top_recommendation["title"], expected_titles)

	# The similarity should be high, indicating a strong match.
	# NOTE: The threshold is set lower (e.g., > 0.3) because with normalized
	# text and the 'all-MiniLM-L6-v2' model, scores for even closely
	# related documents might not be extremely high. This ensures the test
	# is robust to minor model variations.
	self.assertTrue(
	top_recommendation["similarity"] > 0.3, f"Similarity score {top_recommendation['similarity']} is not > 0.3"
	)


	if __name__ == "__main__":
	unittest.main()