from data_loader import load_documents from preprocess import clean_text from embedder import embed_documents from vector_store import build_faiss_index, save_index dataset_path = "data/20_newsgroups" print("Loading dataset...") docs, labels = load_documents(dataset_path) print("Cleaning documents...") docs = [clean_text(d) for d in docs] print("Generating embeddings...") embeddings = embed_documents(docs) print("Building FAISS index...") index = build_faiss_index(embeddings) print("Saving FAISS index...") save_index(index) print("FAISS index saved in models/")