File size: 2,287 Bytes
cdb73a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# tests/test_embedder.py

import os
import tempfile
import unittest

import numpy as np  # Changed from pickle
import pandas as pd

import src.book_recommender.core.config as config
from src.book_recommender.data.processor import clean_and_prepare_data
from src.book_recommender.ml.embedder import generate_embeddings


class TestEmbedder(unittest.TestCase):

    def setUp(self):
        self.test_dir = tempfile.TemporaryDirectory()
        self.raw_path = os.path.join(self.test_dir.name, "test_books.csv")
        self.processed_path = os.path.join(self.test_dir.name, "test_books_cleaned.parquet")  # Changed to .parquet
        self.embeddings_path = os.path.join(self.test_dir.name, "test_embeddings.npy")  # Changed to .npy

        # Create dummy data and process it
        sample_data = {
            "title": ["Book A", "Book B", "Book C"],
            "authors": ["Author A", "Author B", "Author C"],
            "genres": ["Fiction", "Sci-Fi", "History"],
            "description": ["Desc A", "Desc B", "Desc C"],
            "tags": ["tag1", "tag2", "tag3"],
        }
        pd.DataFrame(sample_data).to_csv(self.raw_path, index=False)
        clean_and_prepare_data(self.raw_path, self.processed_path)

    def tearDown(self):
        self.test_dir.cleanup()

    def test_generate_embeddings(self):
        # Load the processed DataFrame first
        processed_df = pd.read_parquet(self.processed_path)  # Load DataFrame

        # Run the function with correct signature
        embeddings = generate_embeddings(processed_df, show_progress_bar=False)  # Pass DataFrame

        # Save embeddings
        np.save(self.embeddings_path, embeddings)

        # 1. Test that the output file is created
        self.assertTrue(os.path.exists(self.embeddings_path))

        # 2. Test that the embeddings shape is correct
        expected_rows = 3
        expected_dims = config.EMBEDDING_DIMENSION
        self.assertEqual(embeddings.shape, (expected_rows, expected_dims))

        # 3. Test that the saved file can be loaded and has the same shape
        loaded_embeddings = np.load(self.embeddings_path)  # Use np.load instead of pickle
        self.assertEqual(loaded_embeddings.shape, (expected_rows, expected_dims))


if __name__ == "__main__":
    unittest.main()