Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from langchain_community.vectorstores import FAISS | |
| from sentence_transformers import SentenceTransformer | |
| from langchain.document_loaders import DataFrameLoader | |
| from langchain.text_splitter import CharacterTextSplitter | |
| class VectorStore: | |
| def __init__(self, persist_dir="vector_db"): | |
| self.persist_dir = persist_dir | |
| self.store = None | |
| # Load the SentenceTransformer model | |
| self.embeddings_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| self.dfs = {} | |
| def add_dataframe(self, df, source_name): | |
| self.dfs[source_name] = df | |
| docs = DataFrameLoader(df).load() | |
| chunks = CharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(docs) | |
| if not self.store: | |
| self.store = FAISS.from_documents(chunks, self.embeddings_model) | |
| else: | |
| self.store.merge_from(FAISS.from_documents(chunks, self.embeddings_model)) | |
| def as_retriever(self): | |
| return self.store.as_retriever() if self.store else None | |
| def get_all_dataframes(self): | |
| return self.dfs | |
| def encode_sentences(self, sentences): | |
| # Generate embeddings for the given sentences | |
| return self.embeddings_model.encode(sentences) | |
| def calculate_similarity(self, embeddings): | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| return cosine_similarity(embeddings) | |