Spaces:
Sleeping
Sleeping
| import faiss | |
| import numpy as np | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| def semantic_clusters(documents, k=5): | |
| """ | |
| documents: list[str] | |
| returns: list[int] cluster ids | |
| """ | |
| if len(documents) < 2: | |
| return [0] * len(documents) | |
| vectorizer = TfidfVectorizer(max_features=512, stop_words="english") | |
| vectors = vectorizer.fit_transform(documents).toarray().astype("float32") | |
| dim = vectors.shape[1] | |
| index = faiss.IndexFlatL2(dim) | |
| index.add(vectors) | |
| _, labels = index.search(vectors, 1) | |
| return labels.flatten().tolist() |