Semantic-search-cache / src /test_pipeline.py
YENUGU SUJITH REDDY
Initial commit with Git LFS models
45fe8b6
from data_loader import load_documents
from preprocess import clean_text
from embedder import embed_documents
dataset_path = "data/20_newsgroups"
docs, labels = load_documents(dataset_path)
print("Loaded:", len(docs))
docs = [clean_text(d) for d in docs]
print("Preprocessing done")
embeddings = embed_documents(docs[:100])
print("Embedding shape:", embeddings.shape)