import pandas as pd from langchain_community.vectorstores import FAISS from sentence_transformers import SentenceTransformer from langchain.document_loaders import DataFrameLoader from langchain.text_splitter import CharacterTextSplitter class VectorStore: def __init__(self, persist_dir="vector_db"): self.persist_dir = persist_dir self.store = None # Load the SentenceTransformer model self.embeddings_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") self.dfs = {} def add_dataframe(self, df, source_name): self.dfs[source_name] = df docs = DataFrameLoader(df).load() chunks = CharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(docs) if not self.store: self.store = FAISS.from_documents(chunks, self.embeddings_model) else: self.store.merge_from(FAISS.from_documents(chunks, self.embeddings_model)) def as_retriever(self): return self.store.as_retriever() if self.store else None def get_all_dataframes(self): return self.dfs def encode_sentences(self, sentences): # Generate embeddings for the given sentences return self.embeddings_model.encode(sentences) def calculate_similarity(self, embeddings): from sklearn.metrics.pairwise import cosine_similarity return cosine_similarity(embeddings)