botconming's picture
Cập nhật dữ liệu RAG và logic chatbot
936810c
raw
history blame contribute delete
648 Bytes
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from Chunk.chunkingData import chunkData, loadData
def create_vector_database():
model = SentenceTransformer('intfloat/multilingual-e5-large')
documents = loadData()
chunks = chunkData(documents)
texts = [chunk.page_content for chunk in chunks]
doc_embedding = model.encode(texts)
np.save("VectorDatabase/embeddings.npy", doc_embedding)
np.save("VectorDatabase/texts.npy", texts)
return model, texts, doc_embedding
if __name__ == "__main__":
create_vector_database()