File size: 951 Bytes
d4d8ed5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from sklearn.cluster import KMeans, MiniBatchKMeans, AgglomerativeClustering, SpectralClustering
from sklearn.metrics.pairwise import cosine_similarity
import hdbscan


def k_means(docs, k = 5):
    return KMeans(n_clusters=k).fit_predict(docs)

def mini_batch_means(docs, n_clusters):
    model = MiniBatchKMeans(n_clusters=n_clusters, batch_size=256, random_state=42)
    return model.fit_predict(docs)

def use_hdbscan(docs):
    model = hdbscan.HDBSCAN(metric="euclidean", min_cluster_size=3)
    return model.fit_predict(docs)

def agglomerative_clustering(docs, n_clusters=5):
    model = AgglomerativeClustering(n_clusters=n_clusters, metric="cosine", linkage="average")
    return model.fit_predict(docs)

def spectral_clustering(docs, n_clusters=5):
    sim = cosine_similarity(docs)
    model = SpectralClustering(
        n_clusters=n_clusters,
        affinity='precomputed',
        random_state=42
    )
    return model.fit_predict(sim)