import numpy as np from sklearn.metrics.pairwise import cosine_similarity from sentence_transformers import SentenceTransformer from Chunk.chunkingData import chunkData, loadData def create_vector_database(): model = SentenceTransformer('intfloat/multilingual-e5-large') documents = loadData() chunks = chunkData(documents) texts = [chunk.page_content for chunk in chunks] doc_embedding = model.encode(texts) np.save("VectorDatabase/embeddings.npy", doc_embedding) np.save("VectorDatabase/texts.npy", texts) return model, texts, doc_embedding if __name__ == "__main__": create_vector_database()