Spaces:
Sleeping
Sleeping
File size: 638 Bytes
45fe8b6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | from data_loader import load_documents
from preprocess import clean_text
from embedder import embed_documents
from fuzzy_cluster import build_gmm_clusters, save_gmm_model
print("Loading dataset...")
docs, labels = load_documents("data/20_newsgroups")
print("Cleaning documents...")
docs = [clean_text(d) for d in docs]
print("Generating embeddings...")
embeddings = embed_documents(docs)
print("Building fuzzy clusters...")
gmm, cluster_probs = build_gmm_clusters(embeddings)
print("Saving clustering model...")
save_gmm_model(gmm)
print("Cluster probabilities shape:", cluster_probs.shape)
print("Fuzzy clustering completed.") |