Spaces:
Sleeping
Sleeping
| from data_loader import load_documents | |
| from preprocess import clean_text | |
| from embedder import embed_documents | |
| from vector_store import build_faiss_index, save_index | |
| dataset_path = "data/20_newsgroups" | |
| print("Loading dataset...") | |
| docs, labels = load_documents(dataset_path) | |
| print("Cleaning documents...") | |
| docs = [clean_text(d) for d in docs] | |
| print("Generating embeddings...") | |
| embeddings = embed_documents(docs) | |
| print("Building FAISS index...") | |
| index = build_faiss_index(embeddings) | |
| print("Saving FAISS index...") | |
| save_index(index) | |
| print("FAISS index saved in models/") |