File size: 575 Bytes
45fe8b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from data_loader import load_documents
from preprocess import clean_text
from embedder import embed_documents
from vector_store import build_faiss_index, save_index


dataset_path = "data/20_newsgroups"

print("Loading dataset...")
docs, labels = load_documents(dataset_path)

print("Cleaning documents...")
docs = [clean_text(d) for d in docs]

print("Generating embeddings...")
embeddings = embed_documents(docs)

print("Building FAISS index...")
index = build_faiss_index(embeddings)

print("Saving FAISS index...")
save_index(index)

print("FAISS index saved in models/")