Spaces:
Sleeping
Sleeping
| from data_loader import load_documents | |
| from preprocess import clean_text | |
| from embedder import embed_documents | |
| dataset_path = "data/20_newsgroups" | |
| docs, labels = load_documents(dataset_path) | |
| print("Loaded:", len(docs)) | |
| docs = [clean_text(d) for d in docs] | |
| print("Preprocessing done") | |
| embeddings = embed_documents(docs[:100]) | |
| print("Embedding shape:", embeddings.shape) |