Spaces:
Runtime error
Runtime error
| from llama_index import StorageContext | |
| from typing import List | |
| from abc import abstractmethod, ABC | |
| from llama_index import Document | |
| from core.lifecycle import Lifecycle | |
| from llama.data_loader import DirectoryLoader | |
| from llama.service_context import ServiceContextManager | |
| class StorageContextManager(Lifecycle, ABC): | |
| _storage_context: StorageContext | |
| def storage_context(self) -> StorageContext: | |
| return self._storage_context | |
| def storage_context(self, value: StorageContext) -> None: | |
| self._storage_context = value | |
| def _is_embedding_ready(self) -> bool: | |
| pass | |
| def _load_data(self) -> List[Document]: | |
| pass | |
| def _indexing_embedding(self, docs: List[Document]) -> StorageContext: | |
| pass | |
| def _load_storage_context(self) -> StorageContext: | |
| pass | |
| def _persist(self) -> None: | |
| pass | |
| def do_init(self) -> None: | |
| if self._is_embedding_ready(): | |
| self.storage_context = self._load_storage_context() | |
| else: | |
| self.storage_context = self._indexing_embedding(self._load_data()) | |
| self._persist() | |
| def do_dispose(self) -> None: | |
| if self._is_embedding_ready(): | |
| self._persist() | |
| def do_start(self) -> None: | |
| # self.logger.info("[do_start]%", self.storage_context.__str__()) | |
| pass | |
| def do_stop(self) -> None: | |
| # self.logger.info("[do_stop]%", self.storage_context.__str__()) | |
| pass | |
| class LocalStorageContextManager(StorageContextManager): | |
| def __init__( | |
| self, | |
| service_context_manager: ServiceContextManager, | |
| dataset_path: str = "./dataset", | |
| ) -> None: | |
| super().__init__() | |
| self._dataset_path = dataset_path | |
| self._service_context_manager = service_context_manager | |
| def _is_embedding_ready(self) -> bool: | |
| from llama.utils import is_local_storage_files_ready | |
| return is_local_storage_files_ready(self._dataset_path) | |
| def _load_data(self) -> List[Document]: | |
| return DirectoryLoader( | |
| dir_path="./docs/faq", required_exts=[".pdf"], exclude_glob=[] | |
| ).load() | |
| def _indexing_embedding(self, docs: List[Document]) -> StorageContext: | |
| from llama_index import GPTVectorStoreIndex | |
| index = GPTVectorStoreIndex.from_documents( | |
| docs, service_context=self._service_context_manager.get_service_context() | |
| ) | |
| return index.storage_context | |
| def _load_storage_context(self) -> StorageContext: | |
| return StorageContext.from_defaults(persist_dir=self._dataset_path) | |
| def _persist(self) -> None: | |
| self.storage_context.persist(self._dataset_path) | |