File size: 951 Bytes
d4d8ed5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
from sklearn.cluster import KMeans, MiniBatchKMeans, AgglomerativeClustering, SpectralClustering
from sklearn.metrics.pairwise import cosine_similarity
import hdbscan
def k_means(docs, k = 5):
return KMeans(n_clusters=k).fit_predict(docs)
def mini_batch_means(docs, n_clusters):
model = MiniBatchKMeans(n_clusters=n_clusters, batch_size=256, random_state=42)
return model.fit_predict(docs)
def use_hdbscan(docs):
model = hdbscan.HDBSCAN(metric="euclidean", min_cluster_size=3)
return model.fit_predict(docs)
def agglomerative_clustering(docs, n_clusters=5):
model = AgglomerativeClustering(n_clusters=n_clusters, metric="cosine", linkage="average")
return model.fit_predict(docs)
def spectral_clustering(docs, n_clusters=5):
sim = cosine_similarity(docs)
model = SpectralClustering(
n_clusters=n_clusters,
affinity='precomputed',
random_state=42
)
return model.fit_predict(sim) |