import copy import pytest from bertopic.vectorizers import OnlineCountVectorizer @pytest.mark.parametrize('model', [('kmeans_pca_topic_model'), ('custom_topic_model'), ('merged_topic_model'), ('reduced_topic_model'), ('online_topic_model')]) def test_online_cv(model, documents, request): topic_model = copy.deepcopy(request.getfixturevalue(model)) vectorizer_model = OnlineCountVectorizer(stop_words="english", ngram_range=(2, 2)) topics = [topic_model.get_topic(topic) for topic in set(topic_model.topics_)] topic_model.update_topics(documents, vectorizer_model=vectorizer_model) new_topics = [topic_model.get_topic(topic) for topic in set(topic_model.topics_)] for old_topic, new_topic in zip(topics, new_topics): if old_topic[0][0] != "": assert old_topic != new_topic @pytest.mark.parametrize('model', [('online_topic_model')]) def test_clean_bow(model, request): topic_model = copy.deepcopy(request.getfixturevalue(model)) original_shape = topic_model.vectorizer_model.X_.shape topic_model.vectorizer_model.delete_min_df = 2 topic_model.vectorizer_model._clean_bow() assert original_shape[0] == topic_model.vectorizer_model.X_.shape[0] assert original_shape[1] > topic_model.vectorizer_model.X_.shape[1]