Spaces:
Sleeping
Sleeping
| from imports import * | |
| loader = CSVLoader(file_path=r'Seed_Dataset.csv') | |
| data = loader.load() | |
| embeddings = HuggingFaceEmbeddings() | |
| # persist_directory = 'docs/chroma_db/' | |
| # !rm -rf ./docs/chroma_db | |
| vector_db = FAISS.from_documents( | |
| documents=data, | |
| embedding=embeddings, | |
| # persist_directory=persist_directory | |
| ) | |
| # vector_db.persist() | |
| def store_to_df(store): | |
| v_dict = store.docstore._dict | |
| data_rows = [] | |
| for k in v_dict.keys(): | |
| content = v_dict[k].page_content | |
| data_rows.append({"chunk_id" : k, "content" : content}) | |
| vector_df = pd.DataFrame(data_rows) | |
| return vector_df | |
| def show_vstore(store): | |
| vector_df = store_to_df(store) | |
| # display(vector_df) | |
| def delete_tool_examples(store, tool_name, arg_name = None): | |
| vector_df = store_to_df(store) | |
| if (arg_name is not None): | |
| mask = (vector_df['content'].str.contains(tool_name) & vector_df['content'].str.contains(arg_name)) | |
| else: | |
| mask = vector_df['content'].str.contains(tool_name) | |
| chunk_ids_to_delete = vector_df.loc[mask, 'chunk_id'] | |
| print(chunk_ids_to_delete) | |
| try: | |
| store.delete(chunk_ids_to_delete) | |
| except: | |
| print("Unable to delete") | |
| def add_to_vector_store(store, examples): | |
| for example in examples: | |
| doc = Document(page_content = example) | |
| extension = FAISS.from_documents([doc], embeddings) | |
| store.merge_from(extension) | |