Spaces:
Runtime error
Runtime error
| import os | |
| import chromadb | |
| from chromadb.utils import embedding_functions | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.vectorstores import Chroma | |
| from langchain.embeddings import SentenceTransformerEmbeddings | |
| from langchain.docstore.document import Document | |
| import uuid | |
| class VectorStore: | |
| def __init__(self): | |
| # Initialize embedding function | |
| self.embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") | |
| # Initialize ChromaDB client | |
| self.client = chromadb.PersistentClient(path="./chroma_db") | |
| # Create or get collection | |
| self.collection = self.client.get_or_create_collection( | |
| name="research_documents", | |
| embedding_function=embedding_functions.SentenceTransformerEmbeddingFunction( | |
| model_name="all-MiniLM-L6-v2" | |
| ) | |
| ) | |
| # Initialize LangChain vector store | |
| self.vector_store = Chroma( | |
| collection_name="research_documents", | |
| embedding_function=self.embedding_function, | |
| persist_directory="./chroma_db" | |
| ) | |
| # Initialize text splitter | |
| self.text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, | |
| chunk_overlap=200, | |
| length_function=len, | |
| ) | |
| def add_documents(self, documents): | |
| """Add documents to the vector store""" | |
| try: | |
| # Split documents into chunks | |
| split_docs = [] | |
| for doc in documents: | |
| splits = self.text_splitter.split_text(doc.page_content) | |
| for i, split in enumerate(splits): | |
| split_docs.append(Document( | |
| page_content=split, | |
| metadata={**doc.metadata, "chunk": i} | |
| )) | |
| # Add to vector store | |
| ids = [str(uuid.uuid4()) for _ in split_docs] | |
| self.vector_store.add_documents(split_docs, ids=ids) | |
| return {"status": "success", "count": len(split_docs)} | |
| except Exception as e: | |
| return {"status": "error", "message": str(e)} | |
| def search(self, query, k=5): | |
| """Search for relevant documents""" | |
| try: | |
| # Perform similarity search | |
| docs = self.vector_store.similarity_search(query, k=k) | |
| return {"status": "success", "documents": docs} | |
| except Exception as e: | |
| return {"status": "error", "message": str(e)} | |
| def delete_collection(self): | |
| """Delete the entire collection""" | |
| try: | |
| self.client.delete_collection("research_documents") | |
| self.collection = self.client.get_or_create_collection( | |
| name="research_documents", | |
| embedding_function=embedding_functions.SentenceTransformerEmbeddingFunction( | |
| model_name="all-MiniLM-L6-v2" | |
| ) | |
| ) | |
| return {"status": "success"} | |
| except Exception as e: | |
| return {"status": "error", "message": str(e)} | |