File size: 1,287 Bytes
19b102a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

import copy
import pytest
from bertopic.vectorizers import OnlineCountVectorizer


@pytest.mark.parametrize('model', [('kmeans_pca_topic_model'), ('custom_topic_model'), ('merged_topic_model'), ('reduced_topic_model'), ('online_topic_model')])
def test_online_cv(model, documents, request):
    topic_model = copy.deepcopy(request.getfixturevalue(model))
    vectorizer_model = OnlineCountVectorizer(stop_words="english", ngram_range=(2, 2))

    topics = [topic_model.get_topic(topic) for topic in set(topic_model.topics_)]
    topic_model.update_topics(documents, vectorizer_model=vectorizer_model)
    new_topics = [topic_model.get_topic(topic) for topic in set(topic_model.topics_)]

    for old_topic, new_topic in zip(topics, new_topics):
        if old_topic[0][0] != "":
            assert old_topic != new_topic


@pytest.mark.parametrize('model', [('online_topic_model')])
def test_clean_bow(model, request):
    topic_model = copy.deepcopy(request.getfixturevalue(model))

    original_shape = topic_model.vectorizer_model.X_.shape
    topic_model.vectorizer_model.delete_min_df = 2
    topic_model.vectorizer_model._clean_bow()

    assert original_shape[0] == topic_model.vectorizer_model.X_.shape[0]
    assert original_shape[1] > topic_model.vectorizer_model.X_.shape[1]