Spaces:
Sleeping
Sleeping
File size: 4,012 Bytes
bd5788b c9543aa bd5788b c25f87e bd5788b c25f87e bd5788b c25f87e bd5788b c25f87e bd5788b c25f87e bd5788b c25f87e bd5788b c25f87e c9543aa c25f87e c9543aa c25f87e c9543aa bd5788b c25f87e bd5788b c9543aa c25f87e bd5788b c25f87e bd5788b c9543aa bd5788b c9543aa bd5788b c25f87e 4100b79 c25f87e 4100b79 c25f87e bd5788b c9543aa bd5788b c9543aa bd5788b c9543aa bd5788b c9543aa bd5788b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 | from abc import abstractmethod, ABC
from typing import List
from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding, Document
from llama_index import StorageContext
from core.lifecycle import Lifecycle
from langchain_manager.manager import BaseLangChainManager
class ServiceContextManager(Lifecycle, ABC):
@abstractmethod
def get_service_context(self) -> ServiceContext:
pass
class AzureServiceContextManager(ServiceContextManager):
lc_manager: BaseLangChainManager
service_context: ServiceContext
def __init__(self, lc_manager: BaseLangChainManager):
super().__init__()
self.lc_manager = lc_manager
def get_service_context(self) -> ServiceContext:
if self.service_context is None:
raise ValueError(
"service context is not ready, check for lifecycle statement"
)
return self.service_context
def do_init(self) -> None:
# define embedding
embedding = LangchainEmbedding(self.lc_manager.get_embedding())
# define LLM
llm_predictor = LLMPredictor(llm=self.lc_manager.get_llm())
# configure service context
self.service_context = ServiceContext.from_defaults(
llm_predictor=llm_predictor, embed_model=embedding
)
def do_start(self) -> None:
self.logger.info(
"[do_start][embedding] last used usage: %d",
self.service_context.embed_model.total_tokens_used,
)
self.logger.info(
"[do_start][predict] last used usage: %d",
self.service_context.llm_predictor.total_tokens_used,
)
def do_stop(self) -> None:
self.logger.info(
"[do_stop][embedding] last used usage: %d",
self.service_context.embed_model.total_tokens_used,
)
self.logger.info(
"[do_stop][predict] last used usage: %d",
self.service_context.llm_predictor.total_tokens_used,
)
def do_dispose(self) -> None:
self.logger.info(
"[do_dispose] total used token: %d",
self.service_context.llm_predictor.total_tokens_used,
)
class StorageContextManager(Lifecycle, ABC):
@abstractmethod
def get_storage_context(self) -> StorageContext:
pass
class LocalStorageContextManager(StorageContextManager):
storage_context: StorageContext
def __init__(
self,
service_context_manager: ServiceContextManager,
dataset_path: str = "./dataset",
) -> None:
super().__init__()
self.dataset_path = dataset_path
self.service_context_manager = service_context_manager
def get_storage_context(self) -> StorageContext:
return self.storage_context
def do_init(self) -> None:
from llama.utils import is_local_storage_files_ready
if is_local_storage_files_ready(self.dataset_path):
self.storage_context = StorageContext.from_defaults(
persist_dir=self.dataset_path
)
else:
docs = self._download()
self._indexing(docs)
def do_start(self) -> None:
# self.logger.info("[do_start]%", **self.storage_context.to_dict())
pass
def do_stop(self) -> None:
# self.logger.info("[do_stop]%", **self.storage_context.to_dict())
pass
def do_dispose(self) -> None:
self.storage_context.persist(self.dataset_path)
def _download(self) -> List[Document]:
from llama.data_loader import GithubLoader
loader = GithubLoader()
return loader.load()
def _indexing(self, docs: List[Document]) -> None:
from llama_index import GPTVectorStoreIndex
index = GPTVectorStoreIndex.from_documents(
docs, service_context=self.service_context_manager.get_service_context()
)
index.storage_context.persist(persist_dir=self.dataset_path)
self.storage_context = index.storage_context
|