File size: 638 Bytes
45fe8b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from data_loader import load_documents
from preprocess import clean_text
from embedder import embed_documents
from fuzzy_cluster import build_gmm_clusters, save_gmm_model


print("Loading dataset...")
docs, labels = load_documents("data/20_newsgroups")

print("Cleaning documents...")
docs = [clean_text(d) for d in docs]


print("Generating embeddings...")
embeddings = embed_documents(docs)


print("Building fuzzy clusters...")
gmm, cluster_probs = build_gmm_clusters(embeddings)


print("Saving clustering model...")
save_gmm_model(gmm)


print("Cluster probabilities shape:", cluster_probs.shape)
print("Fuzzy clustering completed.")