Spaces:
Running
Running
File size: 2,287 Bytes
cdb73a8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | # tests/test_embedder.py
import os
import tempfile
import unittest
import numpy as np # Changed from pickle
import pandas as pd
import src.book_recommender.core.config as config
from src.book_recommender.data.processor import clean_and_prepare_data
from src.book_recommender.ml.embedder import generate_embeddings
class TestEmbedder(unittest.TestCase):
def setUp(self):
self.test_dir = tempfile.TemporaryDirectory()
self.raw_path = os.path.join(self.test_dir.name, "test_books.csv")
self.processed_path = os.path.join(self.test_dir.name, "test_books_cleaned.parquet") # Changed to .parquet
self.embeddings_path = os.path.join(self.test_dir.name, "test_embeddings.npy") # Changed to .npy
# Create dummy data and process it
sample_data = {
"title": ["Book A", "Book B", "Book C"],
"authors": ["Author A", "Author B", "Author C"],
"genres": ["Fiction", "Sci-Fi", "History"],
"description": ["Desc A", "Desc B", "Desc C"],
"tags": ["tag1", "tag2", "tag3"],
}
pd.DataFrame(sample_data).to_csv(self.raw_path, index=False)
clean_and_prepare_data(self.raw_path, self.processed_path)
def tearDown(self):
self.test_dir.cleanup()
def test_generate_embeddings(self):
# Load the processed DataFrame first
processed_df = pd.read_parquet(self.processed_path) # Load DataFrame
# Run the function with correct signature
embeddings = generate_embeddings(processed_df, show_progress_bar=False) # Pass DataFrame
# Save embeddings
np.save(self.embeddings_path, embeddings)
# 1. Test that the output file is created
self.assertTrue(os.path.exists(self.embeddings_path))
# 2. Test that the embeddings shape is correct
expected_rows = 3
expected_dims = config.EMBEDDING_DIMENSION
self.assertEqual(embeddings.shape, (expected_rows, expected_dims))
# 3. Test that the saved file can be loaded and has the same shape
loaded_embeddings = np.load(self.embeddings_path) # Use np.load instead of pickle
self.assertEqual(loaded_embeddings.shape, (expected_rows, expected_dims))
if __name__ == "__main__":
unittest.main()
|