MovieApp / tests /test_vector_database_setup.py
Marija Maneva
Add files via upload
565af7a unverified
Raw
History Blame Contribute Delete
1.99 kB
import pandas as pd
import pytest
from unittest.mock import patch, MagicMock
from src.vector_database_setup import prepare_movie_descriptions, create_vector_database
# Sample DataFrame to use in both tests
@pytest.fixture
def sample_movies_df():
return pd.DataFrame({
'movieId': [1],
'title': ['The Matrix (1999)'],
'clean_title': ['The Matrix'],
'year': ['1999'],
'genres': [['Action', 'Sci-Fi']],
'avg_rating': [4.5],
'rating_count': [1200]
})
def test_prepare_movie_descriptions(sample_movies_df):
updated_df = prepare_movie_descriptions(sample_movies_df.copy())
assert 'description' in updated_df.columns
assert "The Matrix" in updated_df['description'].iloc[0]
assert "1999" in updated_df['description'].iloc[0]
assert "Action" in updated_df['description'].iloc[0]
@patch("src.vector_database_setup.chromadb.PersistentClient")
@patch("src.vector_database_setup.embedding_functions.SentenceTransformerEmbeddingFunction")
@patch("src.vector_database_setup.SentenceTransformer")
def test_create_vector_database(mock_model, mock_embed_func, mock_client, sample_movies_df):
# Add 'description' column just like prepare_movie_descriptions would
sample_movies_df['description'] = sample_movies_df.apply(
lambda row: f"Title: {row['clean_title']}. "
f"Year: {row['year']}. "
f"Genres: {', '.join(row['genres'])}. "
f"Average Rating: {row['avg_rating']:.1f}/5 from {int(row['rating_count'])} users.",
axis=1
)
# Mock Chroma collection and client
mock_collection = MagicMock()
mock_client.return_value.create_collection.return_value = mock_collection
# Run function
collection = create_vector_database(sample_movies_df)
# Assertions
mock_client.return_value.create_collection.assert_called_once()
mock_collection.add.assert_called_once()
assert collection == mock_collection