Spaces:
Runtime error
Runtime error
| import logging | |
| import os | |
| from typing import List, Optional, Tuple | |
| from langchain_chroma import Chroma | |
| from langchain_core.documents import Document | |
| from langchain_core.embeddings import Embeddings | |
| logger = logging.getLogger(__name__) | |
| class VectorStoreManager: | |
| def __init__(self, persist_directory: str = "./chroma_db", embedding_function: Optional[Embeddings] = None): | |
| self.persist_directory = persist_directory | |
| self.embedding_function = embedding_function | |
| self.vector_store = None | |
| self._ensure_persist_directory() | |
| def _ensure_persist_directory(self): | |
| try: | |
| os.makedirs(self.persist_directory, exist_ok=True) | |
| logger.info(f"Persist directory ensured: {self.persist_directory}") | |
| except Exception as e: | |
| logger.error(f"Error creating persist directory: {e}") | |
| raise e | |
| def initialize_vector_store(self, embedding_function: Optional[Embeddings] = None): | |
| if embedding_function: | |
| self.embedding_function = embedding_function | |
| if not self.embedding_function: | |
| raise ValueError("Embedding function must be provided") | |
| try: | |
| logger.info("Initializing vector store") | |
| self.vector_store = Chroma( | |
| persist_directory=self.persist_directory, | |
| embedding_function=self.embedding_function | |
| ) | |
| logger.info("Vector store initialized successfully") | |
| except Exception as e: | |
| logger.error(f"Error initializing vector store: {e}") | |
| raise e | |
| def add_documents(self, documents: List[Document]) -> bool: | |
| try: | |
| if not self.vector_store: | |
| raise ValueError("Vector store not initialized") | |
| logger.info(f"Adding {len(documents)} document(s) to vector store") | |
| self.vector_store.add_documents(documents) | |
| logger.info("Documents added successfully") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Error adding documents to vector store: {e}") | |
| return False | |
| def similarity_search(self, query: str, k: int = 5) -> List[Document]: | |
| try: | |
| if not self.vector_store: | |
| raise ValueError("Vector store not initialized") | |
| logger.info(f"Performing similarity search for query: '{query[:50]}...'") | |
| results = self.vector_store.similarity_search(query, k=k) | |
| logger.info(f"Found {len(results)} similar documents") | |
| return results | |
| except Exception as e: | |
| logger.error(f"Error performing similarity search: {e}") | |
| return [] | |
| def similarity_search_with_score(self, query: str, k: int = 5) -> List[Tuple[Document, float]]: | |
| try: | |
| if not self.vector_store: | |
| raise ValueError("Vector store not initialized") | |
| logger.info(f"Performing similarity search with scores for query: '{query[:50]}...'") | |
| results = self.vector_store.similarity_search_with_score(query, k=k) | |
| logger.info(f"Found {len(results)} similar documents with scores") | |
| return results | |
| except Exception as e: | |
| logger.error(f"Error performing similarity search with scores: {e}") | |
| return [] | |
| def get_retriever(self, search_kwargs: Optional[dict] = None): | |
| try: | |
| if not self.vector_store: | |
| raise ValueError("Vector store not initialized") | |
| default_kwargs = {"k": 5} | |
| if search_kwargs: | |
| default_kwargs.update(search_kwargs) | |
| retriever = self.vector_store.as_retriever(search_kwargs=default_kwargs) | |
| logger.info("Retriever created successfully") | |
| return retriever | |
| except Exception as e: | |
| logger.error(f"Error creating retriever: {e}") | |
| raise e | |
| def get_collection_stats(self) -> dict: | |
| try: | |
| if not self.vector_store: | |
| return {'total_documents': 0, 'collection_name': None} | |
| collection = self.vector_store._collection | |
| count = collection.count() | |
| return { | |
| 'total_documents': count, | |
| 'collection_name': collection.name, | |
| 'persist_directory': self.persist_directory | |
| } | |
| except Exception as e: | |
| logger.error(f"Error getting collection stats: {e}") | |
| return {'total_documents': 0, 'collection_name': None} | |
| def clear_vector_store(self) -> bool: | |
| try: | |
| if not self.vector_store: | |
| return True | |
| logger.info("Clearing vector store") | |
| self.vector_store._collection.delete(where={}) | |
| logger.info("Vector store cleared successfully") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Error clearing vector store: {e}") | |
| return False | |
| def is_initialized(self) -> bool: | |
| return self.vector_store is not None | |