Spaces:
Sleeping
Sleeping
add chinese-large embedding optimize
#4
by
NickNYU
- opened
- .gitattributes +35 -0
- .gitignore +0 -1
- .idea/.gitignore +0 -8
- .idea/inspectionProfiles/profiles_settings.xml +0 -6
- .idea/llama-xpipe.iml +0 -11
- .idea/misc.xml +0 -4
- .idea/modules.xml +0 -8
- .idea/vcs.xml +0 -6
- app.py +43 -45
- core/__pycache__/__init__.cpython-310.pyc +0 -0
- core/__pycache__/lifecycle.cpython-310.pyc +0 -0
- core/__pycache__/logger_factory.cpython-310.pyc +0 -0
- core/helper.py +5 -12
- core/lifecycle.py +184 -184
- core/test_lifecycle.py +3 -0
- dataset/docstore.json +0 -0
- dataset/index_store.json +1 -1
- dataset/vector_store.json +0 -0
- docs/docs.pkl +0 -0
- github_retriever.py +63 -0
- langchain_manager/__pycache__/__init__.cpython-310.pyc +0 -0
- langchain_manager/manager.py +4 -35
- llama/{storage_context.py → context.py} +60 -20
- llama/data_loader.py +5 -6
- llama/index.py +18 -0
- llama/service_context.py +0 -142
- llama/vector_storage.py +18 -0
- local-requirements.txt +0 -1
- requirements.txt +6 -10
- xpipe_wiki/manager_factory.py +10 -45
- xpipe_wiki/robot_manager.py +4 -9
.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
|
@@ -56,7 +56,6 @@ coverage.xml
|
|
| 56 |
.hypothesis/
|
| 57 |
.pytest_cache/
|
| 58 |
.ruff_cache
|
| 59 |
-
wandb/
|
| 60 |
|
| 61 |
# Translations
|
| 62 |
*.mo
|
|
|
|
| 56 |
.hypothesis/
|
| 57 |
.pytest_cache/
|
| 58 |
.ruff_cache
|
|
|
|
| 59 |
|
| 60 |
# Translations
|
| 61 |
*.mo
|
.idea/.gitignore
DELETED
|
@@ -1,8 +0,0 @@
|
|
| 1 |
-
# Default ignored files
|
| 2 |
-
/shelf/
|
| 3 |
-
/workspace.xml
|
| 4 |
-
# Editor-based HTTP Client requests
|
| 5 |
-
/httpRequests/
|
| 6 |
-
# Datasource local storage ignored files
|
| 7 |
-
/dataSources/
|
| 8 |
-
/dataSources.local.xml
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.idea/inspectionProfiles/profiles_settings.xml
DELETED
|
@@ -1,6 +0,0 @@
|
|
| 1 |
-
<component name="InspectionProjectProfileManager">
|
| 2 |
-
<settings>
|
| 3 |
-
<option name="USE_PROJECT_PROFILE" value="false" />
|
| 4 |
-
<version value="1.0" />
|
| 5 |
-
</settings>
|
| 6 |
-
</component>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.idea/llama-xpipe.iml
DELETED
|
@@ -1,11 +0,0 @@
|
|
| 1 |
-
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
-
<module type="PYTHON_MODULE" version="4">
|
| 3 |
-
<component name="NewModuleRootManager">
|
| 4 |
-
<content url="file://$MODULE_DIR$">
|
| 5 |
-
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
| 6 |
-
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
| 7 |
-
</content>
|
| 8 |
-
<orderEntry type="inheritedJdk" />
|
| 9 |
-
<orderEntry type="sourceFolder" forTests="false" />
|
| 10 |
-
</component>
|
| 11 |
-
</module>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.idea/misc.xml
DELETED
|
@@ -1,4 +0,0 @@
|
|
| 1 |
-
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
-
<project version="4">
|
| 3 |
-
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (llama-xpipe)" project-jdk-type="Python SDK" />
|
| 4 |
-
</project>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.idea/modules.xml
DELETED
|
@@ -1,8 +0,0 @@
|
|
| 1 |
-
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
-
<project version="4">
|
| 3 |
-
<component name="ProjectModuleManager">
|
| 4 |
-
<modules>
|
| 5 |
-
<module fileurl="file://$PROJECT_DIR$/.idea/llama-xpipe.iml" filepath="$PROJECT_DIR$/.idea/llama-xpipe.iml" />
|
| 6 |
-
</modules>
|
| 7 |
-
</component>
|
| 8 |
-
</project>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.idea/vcs.xml
DELETED
|
@@ -1,6 +0,0 @@
|
|
| 1 |
-
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
-
<project version="4">
|
| 3 |
-
<component name="VcsDirectoryMappings">
|
| 4 |
-
<mapping directory="" vcs="Git" />
|
| 5 |
-
</component>
|
| 6 |
-
</project>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
|
@@ -1,45 +1,43 @@
|
|
| 1 |
-
import logging
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import streamlit as st
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
from xpipe_wiki.
|
| 8 |
-
|
| 9 |
-
logging.basicConfig(
|
| 10 |
-
stream=sys.stdout, level=logging.
|
| 11 |
-
) # logging.DEBUG for more verbose output
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
#
|
| 15 |
-
with st.sidebar:
|
| 16 |
-
st.title("🤗💬 LLM Chat App")
|
| 17 |
-
st.markdown(
|
| 18 |
-
"""
|
| 19 |
-
## About
|
| 20 |
-
This app is an LLM-powered chatbot built using:
|
| 21 |
-
- [Streamlit](https://streamlit.io/)
|
| 22 |
-
- [LangChain](https://python.langchain.com/)
|
| 23 |
-
- [X-Pipe](https://github.com/ctripcorp/x-pipe)
|
| 24 |
-
"""
|
| 25 |
-
)
|
| 26 |
-
# add_vertical_space(5)
|
| 27 |
-
st.write("Made by Nick")
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
def main() -> None:
|
| 31 |
-
st.header("X-Pipe Wiki 机器人 💬")
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
)
|
| 36 |
-
|
| 37 |
-
query
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
load_dotenv()
|
| 45 |
-
main()
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
from xpipe_wiki.manager_factory import XPipeRobotManagerFactory, XPipeRobotRevision
|
| 7 |
+
from xpipe_wiki.robot_manager import XPipeWikiRobot, AzureOpenAIXPipeWikiRobot
|
| 8 |
+
|
| 9 |
+
logging.basicConfig(
|
| 10 |
+
stream=sys.stdout, level=logging.DEBUG
|
| 11 |
+
) # logging.DEBUG for more verbose output
|
| 12 |
+
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
|
| 13 |
+
|
| 14 |
+
# Sidebar contents
|
| 15 |
+
with st.sidebar:
|
| 16 |
+
st.title("🤗💬 LLM Chat App")
|
| 17 |
+
st.markdown(
|
| 18 |
+
"""
|
| 19 |
+
## About
|
| 20 |
+
This app is an LLM-powered chatbot built using:
|
| 21 |
+
- [Streamlit](https://streamlit.io/)
|
| 22 |
+
- [LangChain](https://python.langchain.com/)
|
| 23 |
+
- [X-Pipe](https://github.com/ctripcorp/x-pipe)
|
| 24 |
+
"""
|
| 25 |
+
)
|
| 26 |
+
# add_vertical_space(5)
|
| 27 |
+
st.write("Made by Nick")
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def main() -> None:
|
| 31 |
+
st.header("X-Pipe Wiki 机器人 💬")
|
| 32 |
+
robot_manager = XPipeRobotManagerFactory.get_or_create(
|
| 33 |
+
XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0
|
| 34 |
+
)
|
| 35 |
+
robot = robot_manager.get_robot()
|
| 36 |
+
query = st.text_input("X-Pipe Wiki 问题:")
|
| 37 |
+
if query:
|
| 38 |
+
response = robot.ask(question=query)
|
| 39 |
+
st.write(response)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
if __name__ == "__main__":
|
| 43 |
+
main()
|
|
|
|
|
|
core/__pycache__/__init__.cpython-310.pyc
CHANGED
|
Binary files a/core/__pycache__/__init__.cpython-310.pyc and b/core/__pycache__/__init__.cpython-310.pyc differ
|
|
|
core/__pycache__/lifecycle.cpython-310.pyc
CHANGED
|
Binary files a/core/__pycache__/lifecycle.cpython-310.pyc and b/core/__pycache__/lifecycle.cpython-310.pyc differ
|
|
|
core/__pycache__/logger_factory.cpython-310.pyc
CHANGED
|
Binary files a/core/__pycache__/logger_factory.cpython-310.pyc and b/core/__pycache__/logger_factory.cpython-310.pyc differ
|
|
|
core/helper.py
CHANGED
|
@@ -2,30 +2,23 @@ from core.lifecycle import Lifecycle
|
|
| 2 |
|
| 3 |
|
| 4 |
class LifecycleHelper:
|
|
|
|
| 5 |
@classmethod
|
| 6 |
def initialize_if_possible(cls, ls: Lifecycle) -> None:
|
| 7 |
-
if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_initialize(
|
| 8 |
-
ls.lifecycle_state.phase
|
| 9 |
-
):
|
| 10 |
ls.initialize()
|
| 11 |
|
| 12 |
@classmethod
|
| 13 |
def start_if_possible(cls, ls: Lifecycle) -> None:
|
| 14 |
-
if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_start(
|
| 15 |
-
ls.lifecycle_state.phase
|
| 16 |
-
):
|
| 17 |
ls.start()
|
| 18 |
|
| 19 |
@classmethod
|
| 20 |
def stop_if_possible(cls, ls: Lifecycle) -> None:
|
| 21 |
-
if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_stop(
|
| 22 |
-
ls.lifecycle_state.phase
|
| 23 |
-
):
|
| 24 |
ls.stop()
|
| 25 |
|
| 26 |
@classmethod
|
| 27 |
def dispose_if_possible(cls, ls: Lifecycle) -> None:
|
| 28 |
-
if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_dispose(
|
| 29 |
-
ls.lifecycle_state.phase
|
| 30 |
-
):
|
| 31 |
ls.dispose()
|
|
|
|
| 2 |
|
| 3 |
|
| 4 |
class LifecycleHelper:
|
| 5 |
+
|
| 6 |
@classmethod
|
| 7 |
def initialize_if_possible(cls, ls: Lifecycle) -> None:
|
| 8 |
+
if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_initialize(ls.lifecycle_state.phase):
|
|
|
|
|
|
|
| 9 |
ls.initialize()
|
| 10 |
|
| 11 |
@classmethod
|
| 12 |
def start_if_possible(cls, ls: Lifecycle) -> None:
|
| 13 |
+
if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_start(ls.lifecycle_state.phase):
|
|
|
|
|
|
|
| 14 |
ls.start()
|
| 15 |
|
| 16 |
@classmethod
|
| 17 |
def stop_if_possible(cls, ls: Lifecycle) -> None:
|
| 18 |
+
if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_stop(ls.lifecycle_state.phase):
|
|
|
|
|
|
|
| 19 |
ls.stop()
|
| 20 |
|
| 21 |
@classmethod
|
| 22 |
def dispose_if_possible(cls, ls: Lifecycle) -> None:
|
| 23 |
+
if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_dispose(ls.lifecycle_state.phase):
|
|
|
|
|
|
|
| 24 |
ls.dispose()
|
core/lifecycle.py
CHANGED
|
@@ -1,184 +1,184 @@
|
|
| 1 |
-
import enum
|
| 2 |
-
from abc import ABC, abstractmethod
|
| 3 |
-
from typing import TypeVar, Optional
|
| 4 |
-
|
| 5 |
-
from core import logger_factory
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
class Initializable(ABC):
|
| 9 |
-
@abstractmethod
|
| 10 |
-
def initialize(self) -> None:
|
| 11 |
-
pass
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
class Startable(ABC):
|
| 15 |
-
@abstractmethod
|
| 16 |
-
def start(self) -> None:
|
| 17 |
-
pass
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
class Stoppable(ABC):
|
| 21 |
-
@abstractmethod
|
| 22 |
-
def stop(self) -> None:
|
| 23 |
-
pass
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
class Disposable(ABC):
|
| 27 |
-
@abstractmethod
|
| 28 |
-
def dispose(self) -> None:
|
| 29 |
-
pass
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
class LifecycleAware(ABC):
|
| 33 |
-
def __init__(self, state: "LifecycleState") -> None:
|
| 34 |
-
"""
|
| 35 |
-
Args:
|
| 36 |
-
state(LifecycleState): lifecycle state
|
| 37 |
-
"""
|
| 38 |
-
self.state = state
|
| 39 |
-
|
| 40 |
-
def get_lifecycle_state(self) -> "LifecycleState":
|
| 41 |
-
return self.state
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
class Lifecycle(Initializable, Startable, Stoppable, Disposable, LifecycleAware, ABC):
|
| 45 |
-
def __init__(self) -> None:
|
| 46 |
-
self.logger = logger_factory.get_logger(self.__class__.__name__)
|
| 47 |
-
self.lifecycle_state = LifecycleState(lifecycle=self)
|
| 48 |
-
|
| 49 |
-
def initialize(self) -> None:
|
| 50 |
-
if not self.lifecycle_state.can_initialize(self.lifecycle_state.get_phase()):
|
| 51 |
-
self.logger.warning("[{}]cannot initialize".format(self.__class__.__name__))
|
| 52 |
-
return
|
| 53 |
-
self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZING)
|
| 54 |
-
self.do_init()
|
| 55 |
-
self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZED)
|
| 56 |
-
|
| 57 |
-
def start(self) -> None:
|
| 58 |
-
if not self.lifecycle_state.can_start(self.lifecycle_state.get_phase()):
|
| 59 |
-
self.logger.warning("[{}]cannot start".format(self.__class__.__name__))
|
| 60 |
-
return
|
| 61 |
-
self.lifecycle_state.set_phase(LifecyclePhase.STARTING)
|
| 62 |
-
self.do_start()
|
| 63 |
-
self.lifecycle_state.set_phase(LifecyclePhase.STARTED)
|
| 64 |
-
|
| 65 |
-
def stop(self) -> None:
|
| 66 |
-
if not self.lifecycle_state.can_stop(self.lifecycle_state.get_phase()):
|
| 67 |
-
self.logger.warning("[{}]cannot stop".format(self.__class__.__name__))
|
| 68 |
-
return
|
| 69 |
-
self.lifecycle_state.set_phase(LifecyclePhase.STOPPING)
|
| 70 |
-
self.do_stop()
|
| 71 |
-
self.lifecycle_state.set_phase(LifecyclePhase.STOPPED)
|
| 72 |
-
|
| 73 |
-
def dispose(self) -> None:
|
| 74 |
-
if not self.lifecycle_state.can_dispose(self.lifecycle_state.get_phase()):
|
| 75 |
-
self.logger.warning("[{}]cannot dispose".format(self.__class__.__name__))
|
| 76 |
-
return
|
| 77 |
-
self.lifecycle_state.set_phase(LifecyclePhase.DISPOSING)
|
| 78 |
-
self.do_dispose()
|
| 79 |
-
self.lifecycle_state.set_phase(LifecyclePhase.DISPOSED)
|
| 80 |
-
|
| 81 |
-
@abstractmethod
|
| 82 |
-
def do_init(self) -> None:
|
| 83 |
-
pass
|
| 84 |
-
|
| 85 |
-
@abstractmethod
|
| 86 |
-
def do_start(self) -> None:
|
| 87 |
-
pass
|
| 88 |
-
|
| 89 |
-
@abstractmethod
|
| 90 |
-
def do_stop(self) -> None:
|
| 91 |
-
pass
|
| 92 |
-
|
| 93 |
-
@abstractmethod
|
| 94 |
-
def do_dispose(self) -> None:
|
| 95 |
-
pass
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
class LifecyclePhase(enum.Enum):
|
| 99 |
-
INITIALIZING = 1
|
| 100 |
-
INITIALIZED = 2
|
| 101 |
-
STARTING = 3
|
| 102 |
-
STARTED = 4
|
| 103 |
-
STOPPING = 5
|
| 104 |
-
STOPPED = 6
|
| 105 |
-
DISPOSING = 7
|
| 106 |
-
DISPOSED = 8
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
class LifecycleController(ABC):
|
| 110 |
-
def can_initialize(self, phase: Optional[LifecyclePhase]) -> bool:
|
| 111 |
-
return phase is None or phase == LifecyclePhase.DISPOSED
|
| 112 |
-
|
| 113 |
-
def can_start(self, phase: Optional[LifecyclePhase]) -> bool:
|
| 114 |
-
return phase is not None and (
|
| 115 |
-
|
| 116 |
-
)
|
| 117 |
-
|
| 118 |
-
def can_stop(self, phase: Optional[LifecyclePhase]) -> bool:
|
| 119 |
-
return phase is not None and phase == LifecyclePhase.STARTED
|
| 120 |
-
|
| 121 |
-
def can_dispose(self, phase: Optional[LifecyclePhase]) -> bool:
|
| 122 |
-
return phase is not None and (
|
| 123 |
-
|
| 124 |
-
)
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
LS = TypeVar("LS", bound=Lifecycle)
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
class LifecycleState(LifecycleController, ABC):
|
| 131 |
-
phase: Optional[LifecyclePhase]
|
| 132 |
-
|
| 133 |
-
def __init__(self, lifecycle: LS) -> None:
|
| 134 |
-
self.phase = None
|
| 135 |
-
self.prev_phase = None
|
| 136 |
-
self.lifecycle = lifecycle
|
| 137 |
-
self.logger = logger_factory.get_logger(__name__)
|
| 138 |
-
|
| 139 |
-
def is_initializing(self) -> bool:
|
| 140 |
-
return self.phase == LifecyclePhase.INITIALIZING
|
| 141 |
-
|
| 142 |
-
def is_initialized(self) -> bool:
|
| 143 |
-
return self.phase == LifecyclePhase.INITIALIZED
|
| 144 |
-
|
| 145 |
-
def is_starting(self) -> bool:
|
| 146 |
-
return self.phase == LifecyclePhase.STARTING
|
| 147 |
-
|
| 148 |
-
def is_started(self) -> bool:
|
| 149 |
-
return self.phase == LifecyclePhase.STARTED
|
| 150 |
-
|
| 151 |
-
def is_stopping(self) -> bool:
|
| 152 |
-
return self.phase == LifecyclePhase.STOPPING
|
| 153 |
-
|
| 154 |
-
def is_stopped(self) -> bool:
|
| 155 |
-
return self.phase == LifecyclePhase.STOPPED
|
| 156 |
-
|
| 157 |
-
def is_disposing(self) -> bool:
|
| 158 |
-
return self.phase == LifecyclePhase.DISPOSING
|
| 159 |
-
|
| 160 |
-
def is_disposed(self) -> bool:
|
| 161 |
-
return self.phase == LifecyclePhase.DISPOSED
|
| 162 |
-
|
| 163 |
-
def get_phase(self) -> Optional[LifecyclePhase]:
|
| 164 |
-
return self.phase
|
| 165 |
-
|
| 166 |
-
def set_phase(self, phase: Optional[LifecyclePhase]) -> None:
|
| 167 |
-
prev = "None"
|
| 168 |
-
if self.phase is not None:
|
| 169 |
-
prev = self.phase.name
|
| 170 |
-
current = "None"
|
| 171 |
-
if phase is not None:
|
| 172 |
-
current = phase.name
|
| 173 |
-
self.logger.info(
|
| 174 |
-
"[setPhaseName][{}]{} --> {}".format(
|
| 175 |
-
self.lifecycle.__class__.__name__,
|
| 176 |
-
prev,
|
| 177 |
-
current,
|
| 178 |
-
)
|
| 179 |
-
)
|
| 180 |
-
self.phase = phase
|
| 181 |
-
|
| 182 |
-
def rollback(self, err: Exception) -> None:
|
| 183 |
-
self.phase = self.prev_phase
|
| 184 |
-
self.prev_phase = None
|
|
|
|
| 1 |
+
import enum
|
| 2 |
+
from abc import ABC, abstractmethod
|
| 3 |
+
from typing import TypeVar, Optional
|
| 4 |
+
|
| 5 |
+
from core import logger_factory
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class Initializable(ABC):
|
| 9 |
+
@abstractmethod
|
| 10 |
+
def initialize(self) -> None:
|
| 11 |
+
pass
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class Startable(ABC):
|
| 15 |
+
@abstractmethod
|
| 16 |
+
def start(self) -> None:
|
| 17 |
+
pass
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class Stoppable(ABC):
|
| 21 |
+
@abstractmethod
|
| 22 |
+
def stop(self) -> None:
|
| 23 |
+
pass
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class Disposable(ABC):
|
| 27 |
+
@abstractmethod
|
| 28 |
+
def dispose(self) -> None:
|
| 29 |
+
pass
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class LifecycleAware(ABC):
|
| 33 |
+
def __init__(self, state: "LifecycleState") -> None:
|
| 34 |
+
"""
|
| 35 |
+
Args:
|
| 36 |
+
state(LifecycleState): lifecycle state
|
| 37 |
+
"""
|
| 38 |
+
self.state = state
|
| 39 |
+
|
| 40 |
+
def get_lifecycle_state(self) -> "LifecycleState":
|
| 41 |
+
return self.state
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class Lifecycle(Initializable, Startable, Stoppable, Disposable, LifecycleAware, ABC):
|
| 45 |
+
def __init__(self) -> None:
|
| 46 |
+
self.logger = logger_factory.get_logger(self.__class__.__name__)
|
| 47 |
+
self.lifecycle_state = LifecycleState(lifecycle=self)
|
| 48 |
+
|
| 49 |
+
def initialize(self) -> None:
|
| 50 |
+
if not self.lifecycle_state.can_initialize(self.lifecycle_state.get_phase()):
|
| 51 |
+
self.logger.warning("[{}]cannot initialize".format(self.__class__.__name__))
|
| 52 |
+
return
|
| 53 |
+
self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZING)
|
| 54 |
+
self.do_init()
|
| 55 |
+
self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZED)
|
| 56 |
+
|
| 57 |
+
def start(self) -> None:
|
| 58 |
+
if not self.lifecycle_state.can_start(self.lifecycle_state.get_phase()):
|
| 59 |
+
self.logger.warning("[{}]cannot start".format(self.__class__.__name__))
|
| 60 |
+
return
|
| 61 |
+
self.lifecycle_state.set_phase(LifecyclePhase.STARTING)
|
| 62 |
+
self.do_start()
|
| 63 |
+
self.lifecycle_state.set_phase(LifecyclePhase.STARTED)
|
| 64 |
+
|
| 65 |
+
def stop(self) -> None:
|
| 66 |
+
if not self.lifecycle_state.can_stop(self.lifecycle_state.get_phase()):
|
| 67 |
+
self.logger.warning("[{}]cannot stop".format(self.__class__.__name__))
|
| 68 |
+
return
|
| 69 |
+
self.lifecycle_state.set_phase(LifecyclePhase.STOPPING)
|
| 70 |
+
self.do_stop()
|
| 71 |
+
self.lifecycle_state.set_phase(LifecyclePhase.STOPPED)
|
| 72 |
+
|
| 73 |
+
def dispose(self) -> None:
|
| 74 |
+
if not self.lifecycle_state.can_dispose(self.lifecycle_state.get_phase()):
|
| 75 |
+
self.logger.warning("[{}]cannot dispose".format(self.__class__.__name__))
|
| 76 |
+
return
|
| 77 |
+
self.lifecycle_state.set_phase(LifecyclePhase.DISPOSING)
|
| 78 |
+
self.do_dispose()
|
| 79 |
+
self.lifecycle_state.set_phase(LifecyclePhase.DISPOSED)
|
| 80 |
+
|
| 81 |
+
@abstractmethod
|
| 82 |
+
def do_init(self) -> None:
|
| 83 |
+
pass
|
| 84 |
+
|
| 85 |
+
@abstractmethod
|
| 86 |
+
def do_start(self) -> None:
|
| 87 |
+
pass
|
| 88 |
+
|
| 89 |
+
@abstractmethod
|
| 90 |
+
def do_stop(self) -> None:
|
| 91 |
+
pass
|
| 92 |
+
|
| 93 |
+
@abstractmethod
|
| 94 |
+
def do_dispose(self) -> None:
|
| 95 |
+
pass
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
class LifecyclePhase(enum.Enum):
|
| 99 |
+
INITIALIZING = 1
|
| 100 |
+
INITIALIZED = 2
|
| 101 |
+
STARTING = 3
|
| 102 |
+
STARTED = 4
|
| 103 |
+
STOPPING = 5
|
| 104 |
+
STOPPED = 6
|
| 105 |
+
DISPOSING = 7
|
| 106 |
+
DISPOSED = 8
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
class LifecycleController(ABC):
|
| 110 |
+
def can_initialize(self, phase: Optional[LifecyclePhase]) -> bool:
|
| 111 |
+
return phase is None or phase == LifecyclePhase.DISPOSED
|
| 112 |
+
|
| 113 |
+
def can_start(self, phase: Optional[LifecyclePhase]) -> bool:
|
| 114 |
+
return phase is not None and (
|
| 115 |
+
phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
def can_stop(self, phase: Optional[LifecyclePhase]) -> bool:
|
| 119 |
+
return phase is not None and phase == LifecyclePhase.STARTED
|
| 120 |
+
|
| 121 |
+
def can_dispose(self, phase: Optional[LifecyclePhase]) -> bool:
|
| 122 |
+
return phase is not None and (
|
| 123 |
+
phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
LS = TypeVar("LS", bound=Lifecycle)
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
class LifecycleState(LifecycleController, ABC):
|
| 131 |
+
phase: Optional[LifecyclePhase]
|
| 132 |
+
|
| 133 |
+
def __init__(self, lifecycle: LS) -> None:
|
| 134 |
+
self.phase = None
|
| 135 |
+
self.prev_phase = None
|
| 136 |
+
self.lifecycle = lifecycle
|
| 137 |
+
self.logger = logger_factory.get_logger(__name__)
|
| 138 |
+
|
| 139 |
+
def is_initializing(self) -> bool:
|
| 140 |
+
return self.phase == LifecyclePhase.INITIALIZING
|
| 141 |
+
|
| 142 |
+
def is_initialized(self) -> bool:
|
| 143 |
+
return self.phase == LifecyclePhase.INITIALIZED
|
| 144 |
+
|
| 145 |
+
def is_starting(self) -> bool:
|
| 146 |
+
return self.phase == LifecyclePhase.STARTING
|
| 147 |
+
|
| 148 |
+
def is_started(self) -> bool:
|
| 149 |
+
return self.phase == LifecyclePhase.STARTED
|
| 150 |
+
|
| 151 |
+
def is_stopping(self) -> bool:
|
| 152 |
+
return self.phase == LifecyclePhase.STOPPING
|
| 153 |
+
|
| 154 |
+
def is_stopped(self) -> bool:
|
| 155 |
+
return self.phase == LifecyclePhase.STOPPED
|
| 156 |
+
|
| 157 |
+
def is_disposing(self) -> bool:
|
| 158 |
+
return self.phase == LifecyclePhase.DISPOSING
|
| 159 |
+
|
| 160 |
+
def is_disposed(self) -> bool:
|
| 161 |
+
return self.phase == LifecyclePhase.DISPOSED
|
| 162 |
+
|
| 163 |
+
def get_phase(self) -> Optional[LifecyclePhase]:
|
| 164 |
+
return self.phase
|
| 165 |
+
|
| 166 |
+
def set_phase(self, phase: Optional[LifecyclePhase]) -> None:
|
| 167 |
+
prev = "None"
|
| 168 |
+
if self.phase is not None:
|
| 169 |
+
prev = self.phase.name
|
| 170 |
+
current = "None"
|
| 171 |
+
if phase is not None:
|
| 172 |
+
current = phase.name
|
| 173 |
+
self.logger.info(
|
| 174 |
+
"[setPhaseName][{}]{} --> {}".format(
|
| 175 |
+
self.lifecycle.__class__.__name__,
|
| 176 |
+
prev,
|
| 177 |
+
current,
|
| 178 |
+
)
|
| 179 |
+
)
|
| 180 |
+
self.phase = phase
|
| 181 |
+
|
| 182 |
+
def rollback(self, err: Exception) -> None:
|
| 183 |
+
self.phase = self.prev_phase
|
| 184 |
+
self.prev_phase = None
|
core/test_lifecycle.py
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
|
|
| 1 |
from unittest import TestCase
|
| 2 |
|
| 3 |
from core.lifecycle import Lifecycle
|
| 4 |
|
|
|
|
|
|
|
| 5 |
|
| 6 |
class SubLifecycle(Lifecycle):
|
| 7 |
def __init__(self) -> None:
|
|
|
|
| 1 |
+
import logging
|
| 2 |
from unittest import TestCase
|
| 3 |
|
| 4 |
from core.lifecycle import Lifecycle
|
| 5 |
|
| 6 |
+
logging.basicConfig()
|
| 7 |
+
|
| 8 |
|
| 9 |
class SubLifecycle(Lifecycle):
|
| 10 |
def __init__(self) -> None:
|
dataset/docstore.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
dataset/index_store.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"index_store/data": {"
|
|
|
|
| 1 |
+
{"index_store/data": {"0928a9d6-bc3f-467f-9464-1d03e5aa155d": {"__type__": "vector_store", "__data__": "{\"index_id\": \"0928a9d6-bc3f-467f-9464-1d03e5aa155d\", \"summary\": null, \"nodes_dict\": {\"a763d37e-d6ef-42ad-b0ce-143ed9698d4b\": \"a763d37e-d6ef-42ad-b0ce-143ed9698d4b\", \"9d8b60d0-64b1-4074-8fb3-a33c961270ed\": \"9d8b60d0-64b1-4074-8fb3-a33c961270ed\", \"b051986d-d812-4326-b811-4ae17ae3c012\": \"b051986d-d812-4326-b811-4ae17ae3c012\", \"68d8c48a-1f9d-4b05-bf9a-e3b9490821d5\": \"68d8c48a-1f9d-4b05-bf9a-e3b9490821d5\", \"8cfea388-ca09-4fb3-88d5-8570f6231d28\": \"8cfea388-ca09-4fb3-88d5-8570f6231d28\", \"f1edfc04-f95c-40c4-8d95-e7cd42745b62\": \"f1edfc04-f95c-40c4-8d95-e7cd42745b62\", \"1ec3388c-c4e5-4706-a412-525b68481002\": \"1ec3388c-c4e5-4706-a412-525b68481002\", \"c2fcee2a-5c0d-4d26-86a0-273e87963874\": \"c2fcee2a-5c0d-4d26-86a0-273e87963874\", \"bd151167-8a9b-47e4-be4b-b03cb10c65b1\": \"bd151167-8a9b-47e4-be4b-b03cb10c65b1\", \"3f5d936e-80a3-463c-ae17-fd9c376ff3e1\": \"3f5d936e-80a3-463c-ae17-fd9c376ff3e1\", \"91ab0928-bc7a-4fef-8693-308ad9764ef4\": \"91ab0928-bc7a-4fef-8693-308ad9764ef4\", \"fae01dbb-1bd5-47b5-989f-38f6029e2f4c\": \"fae01dbb-1bd5-47b5-989f-38f6029e2f4c\", \"3424533f-5d8c-4149-8ffe-3b53cadfb4fb\": \"3424533f-5d8c-4149-8ffe-3b53cadfb4fb\", \"00e72630-a81a-4bd8-bebe-a52f47bd2087\": \"00e72630-a81a-4bd8-bebe-a52f47bd2087\", \"a32f87c5-45c7-4e21-85cb-d49b6dd759ab\": \"a32f87c5-45c7-4e21-85cb-d49b6dd759ab\", \"4593da6f-276b-4f80-bb95-00e23bafb74b\": \"4593da6f-276b-4f80-bb95-00e23bafb74b\", \"578f3e77-3ec2-4b86-a2f8-a0a34d9ce810\": \"578f3e77-3ec2-4b86-a2f8-a0a34d9ce810\", \"8fc8d087-6e09-4fb4-bac0-5fa6bfed7a15\": \"8fc8d087-6e09-4fb4-bac0-5fa6bfed7a15\", \"c38b7421-554a-49eb-b0cc-7c0de3723ef9\": \"c38b7421-554a-49eb-b0cc-7c0de3723ef9\", \"739c5748-36c1-4087-b926-419639d4da27\": \"739c5748-36c1-4087-b926-419639d4da27\", \"b80c39d8-a895-48d1-a2dc-dc13ab03fb0a\": \"b80c39d8-a895-48d1-a2dc-dc13ab03fb0a\", \"01d4860e-11ef-430d-9b9c-ee1bb34680f7\": \"01d4860e-11ef-430d-9b9c-ee1bb34680f7\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}
|
dataset/vector_store.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
docs/docs.pkl
CHANGED
|
Binary files a/docs/docs.pkl and b/docs/docs.pkl differ
|
|
|
github_retriever.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from llama_hub.github_repo import GithubRepositoryReader, GithubClient
|
| 2 |
+
from llama_index import download_loader, GPTVectorStoreIndex
|
| 3 |
+
from llama_index import LLMPredictor, VectorStoreIndex, ServiceContext
|
| 4 |
+
from langchain.llms import AzureOpenAI
|
| 5 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
| 6 |
+
from llama_index import LangchainEmbedding, ServiceContext
|
| 7 |
+
from llama_index import StorageContext, load_index_from_storage
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
import os
|
| 10 |
+
import pickle
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def main() -> None:
|
| 14 |
+
# define embedding
|
| 15 |
+
embedding = LangchainEmbedding(OpenAIEmbeddings(chunk_size=1))
|
| 16 |
+
# define LLM
|
| 17 |
+
llm_predictor = LLMPredictor(
|
| 18 |
+
llm=AzureOpenAI(
|
| 19 |
+
engine="text-davinci-003",
|
| 20 |
+
model_name="text-davinci-003",
|
| 21 |
+
)
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
# configure service context
|
| 25 |
+
service_context = ServiceContext.from_defaults(
|
| 26 |
+
llm_predictor=llm_predictor, embed_model=embedding
|
| 27 |
+
)
|
| 28 |
+
download_loader("GithubRepositoryReader")
|
| 29 |
+
docs = None
|
| 30 |
+
if os.path.exists("docs/docs.pkl"):
|
| 31 |
+
with open("docs/docs.pkl", "rb") as f:
|
| 32 |
+
docs = pickle.load(f)
|
| 33 |
+
|
| 34 |
+
if docs is None:
|
| 35 |
+
github_client = GithubClient(os.getenv("GITHUB_TOKEN"))
|
| 36 |
+
loader = GithubRepositoryReader(
|
| 37 |
+
github_client,
|
| 38 |
+
owner="ctripcorp",
|
| 39 |
+
repo="x-pipe",
|
| 40 |
+
filter_directories=(
|
| 41 |
+
[".", "doc"],
|
| 42 |
+
GithubRepositoryReader.FilterType.INCLUDE,
|
| 43 |
+
),
|
| 44 |
+
filter_file_extensions=([".md"], GithubRepositoryReader.FilterType.INCLUDE),
|
| 45 |
+
verbose=True,
|
| 46 |
+
concurrent_requests=10,
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
docs = loader.load_data(branch="master")
|
| 50 |
+
|
| 51 |
+
with open("docs/docs.pkl", "wb") as f:
|
| 52 |
+
pickle.dump(docs, f)
|
| 53 |
+
|
| 54 |
+
index = GPTVectorStoreIndex.from_documents(docs, service_context=service_context)
|
| 55 |
+
|
| 56 |
+
query_engine = index.as_query_engine(service_context=service_context)
|
| 57 |
+
response = query_engine.query("如何使用X-Pipe?")
|
| 58 |
+
print(response)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
if __name__ == "__main__":
|
| 62 |
+
load_dotenv()
|
| 63 |
+
main()
|
langchain_manager/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (154 Bytes). View file
|
|
|
langchain_manager/manager.py
CHANGED
|
@@ -5,6 +5,8 @@ from langchain.embeddings.base import Embeddings as LCEmbeddings
|
|
| 5 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
| 6 |
from langchain.llms import AzureOpenAI
|
| 7 |
|
|
|
|
|
|
|
| 8 |
|
| 9 |
class BaseLangChainManager(ABC):
|
| 10 |
def __init__(self) -> None:
|
|
@@ -22,49 +24,16 @@ class BaseLangChainManager(ABC):
|
|
| 22 |
class LangChainAzureManager(BaseLangChainManager):
|
| 23 |
def __init__(self) -> None:
|
| 24 |
super().__init__()
|
| 25 |
-
self.embedding = OpenAIEmbeddings(client=None, chunk_size=1)
|
| 26 |
-
self.llm = AzureOpenAI(
|
| 27 |
-
deployment_name="text-davinci-003",
|
| 28 |
-
# model_name="text-davinci-003",
|
| 29 |
-
model="text-davinci-003",
|
| 30 |
-
client=None,
|
| 31 |
-
# temperature set to 0.0(default 0.7) to get a certain answer from OpenAI,
|
| 32 |
-
# as a wiki robot we won't want to get flexible answers
|
| 33 |
-
temperature=0.0,
|
| 34 |
-
# GPT-3 default is 4096, however, openai.py default is 256
|
| 35 |
-
max_tokens=2048,
|
| 36 |
-
)
|
| 37 |
|
| 38 |
# Override
|
| 39 |
def get_embedding(self) -> LCEmbeddings:
|
| 40 |
-
return
|
| 41 |
|
| 42 |
# Override
|
| 43 |
def get_llm(self) -> BaseLanguageModel:
|
| 44 |
-
return
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
class LangChainHuggingFaceManager(BaseLangChainManager):
|
| 48 |
-
def __init__(self) -> None:
|
| 49 |
-
super().__init__()
|
| 50 |
-
from transformers import AutoTokenizer, AutoModel
|
| 51 |
-
|
| 52 |
-
AutoTokenizer.from_pretrained("GanymedeNil/text2vec-large-chinese")
|
| 53 |
-
|
| 54 |
-
AutoModel.from_pretrained("GanymedeNil/text2vec-large-chinese")
|
| 55 |
-
|
| 56 |
-
self.embedding = OpenAIEmbeddings(client=None, chunk_size=1)
|
| 57 |
-
self.llm = AzureOpenAI(
|
| 58 |
deployment_name="text-davinci-003",
|
| 59 |
# model_name="text-davinci-003",
|
| 60 |
model="text-davinci-003",
|
| 61 |
client=None,
|
| 62 |
)
|
| 63 |
-
|
| 64 |
-
# Override
|
| 65 |
-
def get_embedding(self) -> LCEmbeddings:
|
| 66 |
-
return self.embedding
|
| 67 |
-
|
| 68 |
-
# Override
|
| 69 |
-
def get_llm(self) -> BaseLanguageModel:
|
| 70 |
-
return self.llm
|
|
|
|
| 5 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
| 6 |
from langchain.llms import AzureOpenAI
|
| 7 |
|
| 8 |
+
from core.lifecycle import Lifecycle
|
| 9 |
+
|
| 10 |
|
| 11 |
class BaseLangChainManager(ABC):
|
| 12 |
def __init__(self) -> None:
|
|
|
|
| 24 |
class LangChainAzureManager(BaseLangChainManager):
|
| 25 |
def __init__(self) -> None:
|
| 26 |
super().__init__()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
# Override
|
| 29 |
def get_embedding(self) -> LCEmbeddings:
|
| 30 |
+
return OpenAIEmbeddings(client=None, chunk_size=1)
|
| 31 |
|
| 32 |
# Override
|
| 33 |
def get_llm(self) -> BaseLanguageModel:
|
| 34 |
+
return AzureOpenAI(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
deployment_name="text-davinci-003",
|
| 36 |
# model_name="text-davinci-003",
|
| 37 |
model="text-davinci-003",
|
| 38 |
client=None,
|
| 39 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llama/{storage_context.py → context.py}
RENAMED
|
@@ -1,14 +1,62 @@
|
|
| 1 |
-
from llama_index import StorageContext
|
| 2 |
-
from typing import List
|
| 3 |
from abc import abstractmethod, ABC
|
| 4 |
|
| 5 |
-
from llama_index import Document
|
|
|
|
| 6 |
|
| 7 |
from core.lifecycle import Lifecycle
|
| 8 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
class StorageContextManager(Lifecycle, ABC):
|
|
|
|
| 12 |
@abstractmethod
|
| 13 |
def get_storage_context(self) -> StorageContext:
|
| 14 |
pass
|
|
@@ -17,11 +65,9 @@ class StorageContextManager(Lifecycle, ABC):
|
|
| 17 |
class LocalStorageContextManager(StorageContextManager):
|
| 18 |
storage_context: StorageContext
|
| 19 |
|
| 20 |
-
def __init__(
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
dataset_path: str = "./dataset",
|
| 24 |
-
) -> None:
|
| 25 |
super().__init__()
|
| 26 |
self.dataset_path = dataset_path
|
| 27 |
self.service_context_manager = service_context_manager
|
|
@@ -31,11 +77,8 @@ class LocalStorageContextManager(StorageContextManager):
|
|
| 31 |
|
| 32 |
def do_init(self) -> None:
|
| 33 |
from llama.utils import is_local_storage_files_ready
|
| 34 |
-
|
| 35 |
if is_local_storage_files_ready(self.dataset_path):
|
| 36 |
-
self.storage_context = StorageContext.from_defaults(
|
| 37 |
-
persist_dir=self.dataset_path
|
| 38 |
-
)
|
| 39 |
else:
|
| 40 |
docs = self._download()
|
| 41 |
self._indexing(docs)
|
|
@@ -51,17 +94,14 @@ class LocalStorageContextManager(StorageContextManager):
|
|
| 51 |
def do_dispose(self) -> None:
|
| 52 |
self.storage_context.persist(self.dataset_path)
|
| 53 |
|
| 54 |
-
def _download(self) ->
|
| 55 |
from llama.data_loader import GithubLoader
|
| 56 |
-
|
| 57 |
loader = GithubLoader()
|
| 58 |
return loader.load()
|
| 59 |
|
| 60 |
-
def _indexing(self, docs:
|
| 61 |
from llama_index import GPTVectorStoreIndex
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
docs, service_context=self.service_context_manager.get_service_context()
|
| 65 |
-
)
|
| 66 |
index.storage_context.persist(persist_dir=self.dataset_path)
|
| 67 |
self.storage_context = index.storage_context
|
|
|
|
|
|
|
|
|
|
| 1 |
from abc import abstractmethod, ABC
|
| 2 |
|
| 3 |
+
from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding, Document
|
| 4 |
+
from llama_index import StorageContext
|
| 5 |
|
| 6 |
from core.lifecycle import Lifecycle
|
| 7 |
+
from langchain_manager.manager import BaseLangChainManager
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class ServiceContextManager(Lifecycle, ABC):
|
| 11 |
+
|
| 12 |
+
@abstractmethod
|
| 13 |
+
def get_service_context(self) -> ServiceContext:
|
| 14 |
+
pass
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class AzureServiceContextManager(ServiceContextManager):
|
| 18 |
+
lc_manager: BaseLangChainManager
|
| 19 |
+
service_context: ServiceContext
|
| 20 |
+
|
| 21 |
+
def __init__(self, lc_manager: BaseLangChainManager):
|
| 22 |
+
super().__init__()
|
| 23 |
+
self.lc_manager = lc_manager
|
| 24 |
+
|
| 25 |
+
def get_service_context(self) -> ServiceContext:
|
| 26 |
+
if self.service_context is None:
|
| 27 |
+
raise ValueError(
|
| 28 |
+
"service context is not ready, check for lifecycle statement"
|
| 29 |
+
)
|
| 30 |
+
return self.service_context
|
| 31 |
+
|
| 32 |
+
def do_init(self) -> None:
|
| 33 |
+
# define embedding
|
| 34 |
+
embedding = LangchainEmbedding(self.lc_manager.get_embedding())
|
| 35 |
+
# define LLM
|
| 36 |
+
llm_predictor = LLMPredictor(llm=self.lc_manager.get_llm())
|
| 37 |
+
# configure service context
|
| 38 |
+
self.service_context = ServiceContext.from_defaults(
|
| 39 |
+
llm_predictor=llm_predictor, embed_model=embedding
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
def do_start(self) -> None:
|
| 43 |
+
self.logger.info("[do_start][embedding] last used usage: %d",
|
| 44 |
+
self.service_context.embed_model.total_tokens_used)
|
| 45 |
+
self.logger.info("[do_start][predict] last used usage: %d",
|
| 46 |
+
self.service_context.llm_predictor.total_tokens_used)
|
| 47 |
+
|
| 48 |
+
def do_stop(self) -> None:
|
| 49 |
+
self.logger.info("[do_stop][embedding] last used usage: %d",
|
| 50 |
+
self.service_context.embed_model.total_tokens_used)
|
| 51 |
+
self.logger.info("[do_stop][predict] last used usage: %d",
|
| 52 |
+
self.service_context.llm_predictor.total_tokens_used)
|
| 53 |
+
|
| 54 |
+
def do_dispose(self) -> None:
|
| 55 |
+
self.logger.info("[do_dispose] total used token: %d", self.service_context.llm_predictor.total_tokens_used)
|
| 56 |
|
| 57 |
|
| 58 |
class StorageContextManager(Lifecycle, ABC):
|
| 59 |
+
|
| 60 |
@abstractmethod
|
| 61 |
def get_storage_context(self) -> StorageContext:
|
| 62 |
pass
|
|
|
|
| 65 |
class LocalStorageContextManager(StorageContextManager):
|
| 66 |
storage_context: StorageContext
|
| 67 |
|
| 68 |
+
def __init__(self,
|
| 69 |
+
dataset_path: str = "./dataset",
|
| 70 |
+
service_context_manager: ServiceContextManager = None) -> None:
|
|
|
|
|
|
|
| 71 |
super().__init__()
|
| 72 |
self.dataset_path = dataset_path
|
| 73 |
self.service_context_manager = service_context_manager
|
|
|
|
| 77 |
|
| 78 |
def do_init(self) -> None:
|
| 79 |
from llama.utils import is_local_storage_files_ready
|
|
|
|
| 80 |
if is_local_storage_files_ready(self.dataset_path):
|
| 81 |
+
self.storage_context = StorageContext.from_defaults(persist_dir=self.dataset_path)
|
|
|
|
|
|
|
| 82 |
else:
|
| 83 |
docs = self._download()
|
| 84 |
self._indexing(docs)
|
|
|
|
| 94 |
def do_dispose(self) -> None:
|
| 95 |
self.storage_context.persist(self.dataset_path)
|
| 96 |
|
| 97 |
+
def _download(self) -> [Document]:
|
| 98 |
from llama.data_loader import GithubLoader
|
|
|
|
| 99 |
loader = GithubLoader()
|
| 100 |
return loader.load()
|
| 101 |
|
| 102 |
+
def _indexing(self, docs: [Document]):
|
| 103 |
from llama_index import GPTVectorStoreIndex
|
| 104 |
+
index = GPTVectorStoreIndex.from_documents(docs,
|
| 105 |
+
service_context=self.service_context_manager.get_service_context())
|
|
|
|
|
|
|
| 106 |
index.storage_context.persist(persist_dir=self.dataset_path)
|
| 107 |
self.storage_context = index.storage_context
|
llama/data_loader.py
CHANGED
|
@@ -16,10 +16,10 @@ class WikiLoader(ABC):
|
|
| 16 |
|
| 17 |
class GithubLoader(WikiLoader):
|
| 18 |
def __init__(
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
):
|
| 24 |
super().__init__()
|
| 25 |
self.owner = (
|
|
@@ -51,8 +51,7 @@ class GithubLoader(WikiLoader):
|
|
| 51 |
verbose=True,
|
| 52 |
concurrent_requests=10,
|
| 53 |
)
|
| 54 |
-
|
| 55 |
-
os.environ["https_proxy"] = "http://127.0.0.1:7890"
|
| 56 |
docs = loader.load_data(branch="master")
|
| 57 |
|
| 58 |
with open("docs/docs.pkl", "wb") as f:
|
|
|
|
| 16 |
|
| 17 |
class GithubLoader(WikiLoader):
|
| 18 |
def __init__(
|
| 19 |
+
self,
|
| 20 |
+
github_owner: Optional[str] = None,
|
| 21 |
+
repo: Optional[str] = None,
|
| 22 |
+
dirs: Optional[Sequence[str]] = None,
|
| 23 |
):
|
| 24 |
super().__init__()
|
| 25 |
self.owner = (
|
|
|
|
| 51 |
verbose=True,
|
| 52 |
concurrent_requests=10,
|
| 53 |
)
|
| 54 |
+
|
|
|
|
| 55 |
docs = loader.load_data(branch="master")
|
| 56 |
|
| 57 |
with open("docs/docs.pkl", "wb") as f:
|
llama/index.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from core.lifecycle import Lifecycle
|
| 2 |
+
from llama.context import ServiceContextManager
|
| 3 |
+
from llama_index.indices.vector_store import VectorStoreIndex
|
| 4 |
+
from typing import Optional
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class IndexManager(Lifecycle):
|
| 8 |
+
index: Optional[VectorStoreIndex]
|
| 9 |
+
|
| 10 |
+
def __init__(self, context_manager: ServiceContextManager) -> None:
|
| 11 |
+
super().__init__()
|
| 12 |
+
self.index = None
|
| 13 |
+
self.context_manager = context_manager
|
| 14 |
+
|
| 15 |
+
def get_index(self) -> Optional[VectorStoreIndex]:
|
| 16 |
+
if not self.lifecycle_state.is_started():
|
| 17 |
+
raise Exception("Lifecycle state is not correct")
|
| 18 |
+
return self.index
|
llama/service_context.py
DELETED
|
@@ -1,142 +0,0 @@
|
|
| 1 |
-
from abc import abstractmethod, ABC
|
| 2 |
-
|
| 3 |
-
from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding
|
| 4 |
-
|
| 5 |
-
from core.lifecycle import Lifecycle
|
| 6 |
-
from langchain_manager.manager import BaseLangChainManager
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
# def get_callback_manager() -> CallbackManager:
|
| 10 |
-
# from llama_index.callbacks import (
|
| 11 |
-
# WandbCallbackHandler,
|
| 12 |
-
# CallbackManager,
|
| 13 |
-
# LlamaDebugHandler,
|
| 14 |
-
# )
|
| 15 |
-
# llama_debug = LlamaDebugHandler(print_trace_on_end=True)
|
| 16 |
-
# # wandb.init args
|
| 17 |
-
# run_args = dict(
|
| 18 |
-
# project="llamaindex",
|
| 19 |
-
# )
|
| 20 |
-
# wandb_callback = WandbCallbackHandler(run_args=run_args)
|
| 21 |
-
# return CallbackManager([llama_debug, wandb_callback])
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
class ServiceContextManager(Lifecycle, ABC):
|
| 25 |
-
@abstractmethod
|
| 26 |
-
def get_service_context(self) -> ServiceContext:
|
| 27 |
-
pass
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
class AzureServiceContextManager(ServiceContextManager):
|
| 31 |
-
lc_manager: BaseLangChainManager
|
| 32 |
-
service_context: ServiceContext
|
| 33 |
-
|
| 34 |
-
def __init__(self, lc_manager: BaseLangChainManager):
|
| 35 |
-
super().__init__()
|
| 36 |
-
self.lc_manager = lc_manager
|
| 37 |
-
|
| 38 |
-
def get_service_context(self) -> ServiceContext:
|
| 39 |
-
if self.service_context is None:
|
| 40 |
-
raise ValueError(
|
| 41 |
-
"service context is not ready, check for lifecycle statement"
|
| 42 |
-
)
|
| 43 |
-
return self.service_context
|
| 44 |
-
|
| 45 |
-
def do_init(self) -> None:
|
| 46 |
-
# define embedding
|
| 47 |
-
embedding = LangchainEmbedding(self.lc_manager.get_embedding())
|
| 48 |
-
# define LLM
|
| 49 |
-
llm_predictor = LLMPredictor(llm=self.lc_manager.get_llm())
|
| 50 |
-
# configure service context
|
| 51 |
-
self.service_context = ServiceContext.from_defaults(
|
| 52 |
-
llm_predictor=llm_predictor,
|
| 53 |
-
embed_model=embedding,
|
| 54 |
-
# callback_manager=get_callback_manager(),
|
| 55 |
-
)
|
| 56 |
-
|
| 57 |
-
def do_start(self) -> None:
|
| 58 |
-
self.logger.info(
|
| 59 |
-
"[do_start][embedding] last used usage: %d",
|
| 60 |
-
self.service_context.embed_model.total_tokens_used,
|
| 61 |
-
)
|
| 62 |
-
self.logger.info(
|
| 63 |
-
"[do_start][predict] last used usage: %d",
|
| 64 |
-
self.service_context.llm_predictor.total_tokens_used,
|
| 65 |
-
)
|
| 66 |
-
|
| 67 |
-
def do_stop(self) -> None:
|
| 68 |
-
self.logger.info(
|
| 69 |
-
"[do_stop][embedding] last used usage: %d",
|
| 70 |
-
self.service_context.embed_model.total_tokens_used,
|
| 71 |
-
)
|
| 72 |
-
self.logger.info(
|
| 73 |
-
"[do_stop][predict] last used usage: %d",
|
| 74 |
-
self.service_context.llm_predictor.total_tokens_used,
|
| 75 |
-
)
|
| 76 |
-
|
| 77 |
-
def do_dispose(self) -> None:
|
| 78 |
-
self.logger.info(
|
| 79 |
-
"[do_dispose] total used token: %d",
|
| 80 |
-
self.service_context.llm_predictor.total_tokens_used,
|
| 81 |
-
)
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
class HuggingFaceChineseOptServiceContextManager(ServiceContextManager):
|
| 85 |
-
lc_manager: BaseLangChainManager
|
| 86 |
-
service_context: ServiceContext
|
| 87 |
-
|
| 88 |
-
def __init__(self, lc_manager: BaseLangChainManager):
|
| 89 |
-
super().__init__()
|
| 90 |
-
self.lc_manager = lc_manager
|
| 91 |
-
|
| 92 |
-
def get_service_context(self) -> ServiceContext:
|
| 93 |
-
if self.service_context is None:
|
| 94 |
-
raise ValueError(
|
| 95 |
-
"service context is not ready, check for lifecycle statement"
|
| 96 |
-
)
|
| 97 |
-
return self.service_context
|
| 98 |
-
|
| 99 |
-
def do_init(self) -> None:
|
| 100 |
-
# define embedding
|
| 101 |
-
from langchain.embeddings import HuggingFaceEmbeddings
|
| 102 |
-
|
| 103 |
-
model_name = "GanymedeNil/text2vec-large-chinese"
|
| 104 |
-
hf_embedding = HuggingFaceEmbeddings(
|
| 105 |
-
model_name=model_name, model_kwargs={"device": "cpu"}
|
| 106 |
-
)
|
| 107 |
-
|
| 108 |
-
embedding = LangchainEmbedding(hf_embedding)
|
| 109 |
-
# define LLM
|
| 110 |
-
llm_predictor = LLMPredictor(self.lc_manager.get_llm())
|
| 111 |
-
# configure service context
|
| 112 |
-
self.service_context = ServiceContext.from_defaults(
|
| 113 |
-
llm_predictor=llm_predictor,
|
| 114 |
-
embed_model=embedding,
|
| 115 |
-
# callback_manager=get_callback_manager()
|
| 116 |
-
)
|
| 117 |
-
|
| 118 |
-
def do_start(self) -> None:
|
| 119 |
-
self.logger.info(
|
| 120 |
-
"[do_start][embedding] last used usage: %d",
|
| 121 |
-
self.service_context.embed_model.total_tokens_used,
|
| 122 |
-
)
|
| 123 |
-
self.logger.info(
|
| 124 |
-
"[do_start][predict] last used usage: %d",
|
| 125 |
-
self.service_context.llm_predictor.total_tokens_used,
|
| 126 |
-
)
|
| 127 |
-
|
| 128 |
-
def do_stop(self) -> None:
|
| 129 |
-
self.logger.info(
|
| 130 |
-
"[do_stop][embedding] last used usage: %d",
|
| 131 |
-
self.service_context.embed_model.total_tokens_used,
|
| 132 |
-
)
|
| 133 |
-
self.logger.info(
|
| 134 |
-
"[do_stop][predict] last used usage: %d",
|
| 135 |
-
self.service_context.llm_predictor.total_tokens_used,
|
| 136 |
-
)
|
| 137 |
-
|
| 138 |
-
def do_dispose(self) -> None:
|
| 139 |
-
self.logger.info(
|
| 140 |
-
"[do_dispose] total used token: %d",
|
| 141 |
-
self.service_context.llm_predictor.total_tokens_used,
|
| 142 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llama/vector_storage.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from core.lifecycle import Lifecycle
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class VectorStorageManager(Lifecycle):
|
| 5 |
+
def __init__(self) -> None:
|
| 6 |
+
super().__init__()
|
| 7 |
+
|
| 8 |
+
def do_init(self) -> None:
|
| 9 |
+
pass
|
| 10 |
+
|
| 11 |
+
def do_start(self) -> None:
|
| 12 |
+
pass
|
| 13 |
+
|
| 14 |
+
def do_stop(self) -> None:
|
| 15 |
+
pass
|
| 16 |
+
|
| 17 |
+
def do_dispose(self) -> None:
|
| 18 |
+
pass
|
local-requirements.txt
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
python-dotenv
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,10 +1,6 @@
|
|
| 1 |
-
llama_index>=0.6.3
|
| 2 |
-
llama_hub
|
| 3 |
-
|
| 4 |
-
ruff
|
| 5 |
-
black
|
| 6 |
-
mypy
|
| 7 |
-
accelerate
|
| 8 |
-
python-dotenv
|
| 9 |
-
sentence_transformers
|
| 10 |
-
wandb
|
|
|
|
| 1 |
+
llama_index>=0.6.3
|
| 2 |
+
llama_hub
|
| 3 |
+
|
| 4 |
+
ruff
|
| 5 |
+
black
|
| 6 |
+
mypy
|
|
|
|
|
|
|
|
|
|
|
|
xpipe_wiki/manager_factory.py
CHANGED
|
@@ -4,69 +4,34 @@ import os
|
|
| 4 |
from core.helper import LifecycleHelper
|
| 5 |
from xpipe_wiki.robot_manager import XPipeWikiRobotManager, AzureXPipeWikiRobotManager
|
| 6 |
|
| 7 |
-
from multiprocessing import Lock
|
| 8 |
-
|
| 9 |
-
lock = Lock()
|
| 10 |
-
|
| 11 |
|
| 12 |
class XPipeRobotRevision(enum.Enum):
|
| 13 |
SIMPLE_OPENAI_VERSION_0 = 1
|
| 14 |
-
HUGGINGFACE_VERSION_0 = 2
|
| 15 |
|
| 16 |
|
| 17 |
-
|
| 18 |
-
"""
|
| 19 |
-
CAPABLE: Dict[XPipeRobotRevision, XPipeWikiRobotManager] =
|
| 20 |
-
{XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0: XPipeWikiRobotManager()}
|
| 21 |
-
"""
|
| 22 |
|
| 23 |
-
CAPABLE = dict() # type: dict[XPipeRobotRevision, XPipeWikiRobotManager]
|
| 24 |
|
|
|
|
| 25 |
@classmethod
|
| 26 |
def get_or_create(cls, revision: XPipeRobotRevision) -> XPipeWikiRobotManager:
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
manager = cls.create_huggingface_version_0()
|
| 34 |
-
cls.CAPABLE[revision] = manager
|
| 35 |
-
return manager
|
| 36 |
|
| 37 |
@classmethod
|
| 38 |
def create_simple_openai_version_0(cls) -> AzureXPipeWikiRobotManager:
|
| 39 |
-
from llama.
|
| 40 |
from langchain_manager.manager import LangChainAzureManager
|
| 41 |
|
| 42 |
service_context_manager = AzureServiceContextManager(
|
| 43 |
lc_manager=LangChainAzureManager()
|
| 44 |
)
|
| 45 |
-
from llama.storage_context import LocalStorageContextManager
|
| 46 |
-
|
| 47 |
-
dataset_path = os.getenv("XPIPE_WIKI_DATASET_PATH", "./dataset")
|
| 48 |
-
storage_context_manager = LocalStorageContextManager(
|
| 49 |
-
dataset_path=dataset_path, service_context_manager=service_context_manager
|
| 50 |
-
)
|
| 51 |
-
|
| 52 |
-
robot_manager = AzureXPipeWikiRobotManager(
|
| 53 |
-
service_context_manager=service_context_manager,
|
| 54 |
-
storage_context_manager=storage_context_manager,
|
| 55 |
-
)
|
| 56 |
-
LifecycleHelper.initialize_if_possible(robot_manager)
|
| 57 |
-
LifecycleHelper.start_if_possible(robot_manager)
|
| 58 |
-
return robot_manager
|
| 59 |
-
|
| 60 |
-
@classmethod
|
| 61 |
-
def create_huggingface_version_0(cls) -> AzureXPipeWikiRobotManager:
|
| 62 |
-
from llama.service_context import HuggingFaceChineseOptServiceContextManager
|
| 63 |
-
from langchain_manager.manager import LangChainAzureManager
|
| 64 |
-
|
| 65 |
-
service_context_manager = HuggingFaceChineseOptServiceContextManager(
|
| 66 |
-
lc_manager=LangChainAzureManager()
|
| 67 |
-
)
|
| 68 |
|
| 69 |
-
from llama.
|
| 70 |
|
| 71 |
dataset_path = os.getenv("XPIPE_WIKI_DATASET_PATH", "./dataset")
|
| 72 |
storage_context_manager = LocalStorageContextManager(
|
|
|
|
| 4 |
from core.helper import LifecycleHelper
|
| 5 |
from xpipe_wiki.robot_manager import XPipeWikiRobotManager, AzureXPipeWikiRobotManager
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
class XPipeRobotRevision(enum.Enum):
|
| 9 |
SIMPLE_OPENAI_VERSION_0 = 1
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
+
CAPABLE = dict()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
|
|
|
| 14 |
|
| 15 |
+
class XPipeRobotManagerFactory:
|
| 16 |
@classmethod
|
| 17 |
def get_or_create(cls, revision: XPipeRobotRevision) -> XPipeWikiRobotManager:
|
| 18 |
+
if CAPABLE.get(revision) is not None:
|
| 19 |
+
return CAPABLE[revision]
|
| 20 |
+
if revision == XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0:
|
| 21 |
+
manager = cls.create_simple_openai_version_0()
|
| 22 |
+
CAPABLE[revision] = manager
|
| 23 |
+
return manager
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
@classmethod
|
| 26 |
def create_simple_openai_version_0(cls) -> AzureXPipeWikiRobotManager:
|
| 27 |
+
from llama.context import AzureServiceContextManager
|
| 28 |
from langchain_manager.manager import LangChainAzureManager
|
| 29 |
|
| 30 |
service_context_manager = AzureServiceContextManager(
|
| 31 |
lc_manager=LangChainAzureManager()
|
| 32 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
+
from llama.context import LocalStorageContextManager
|
| 35 |
|
| 36 |
dataset_path = os.getenv("XPIPE_WIKI_DATASET_PATH", "./dataset")
|
| 37 |
storage_context_manager = LocalStorageContextManager(
|
xpipe_wiki/robot_manager.py
CHANGED
|
@@ -3,12 +3,10 @@ from typing import Any
|
|
| 3 |
|
| 4 |
from llama_index import load_index_from_storage
|
| 5 |
from llama_index.indices.query.base import BaseQueryEngine
|
| 6 |
-
from llama_index.indices.response import ResponseMode
|
| 7 |
|
| 8 |
from core.helper import LifecycleHelper
|
| 9 |
from core.lifecycle import Lifecycle
|
| 10 |
-
from llama.
|
| 11 |
-
from llama.storage_context import StorageContextManager
|
| 12 |
|
| 13 |
|
| 14 |
class XPipeWikiRobot(ABC):
|
|
@@ -25,10 +23,7 @@ class AzureOpenAIXPipeWikiRobot(XPipeWikiRobot):
|
|
| 25 |
self.query_engine = query_engine
|
| 26 |
|
| 27 |
def ask(self, question: str) -> Any:
|
| 28 |
-
|
| 29 |
-
response = self.query_engine.query(question)
|
| 30 |
-
print("response type: ", type(response))
|
| 31 |
-
return response.__str__()
|
| 32 |
|
| 33 |
|
| 34 |
class XPipeWikiRobotManager(Lifecycle):
|
|
@@ -64,10 +59,10 @@ class AzureXPipeWikiRobotManager(XPipeWikiRobotManager):
|
|
| 64 |
index = load_index_from_storage(
|
| 65 |
storage_context=self.storage_context_manager.get_storage_context(),
|
| 66 |
service_context=self.service_context_manager.get_service_context(),
|
|
|
|
| 67 |
)
|
| 68 |
self.query_engine = index.as_query_engine(
|
| 69 |
-
service_context=self.service_context_manager.get_service_context()
|
| 70 |
-
response_mode=ResponseMode.TREE_SUMMARIZE,
|
| 71 |
)
|
| 72 |
|
| 73 |
def do_stop(self) -> None:
|
|
|
|
| 3 |
|
| 4 |
from llama_index import load_index_from_storage
|
| 5 |
from llama_index.indices.query.base import BaseQueryEngine
|
|
|
|
| 6 |
|
| 7 |
from core.helper import LifecycleHelper
|
| 8 |
from core.lifecycle import Lifecycle
|
| 9 |
+
from llama.context import ServiceContextManager, StorageContextManager
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
class XPipeWikiRobot(ABC):
|
|
|
|
| 23 |
self.query_engine = query_engine
|
| 24 |
|
| 25 |
def ask(self, question: str) -> Any:
|
| 26 |
+
return self.query_engine.query(question)
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
class XPipeWikiRobotManager(Lifecycle):
|
|
|
|
| 59 |
index = load_index_from_storage(
|
| 60 |
storage_context=self.storage_context_manager.get_storage_context(),
|
| 61 |
service_context=self.service_context_manager.get_service_context(),
|
| 62 |
+
|
| 63 |
)
|
| 64 |
self.query_engine = index.as_query_engine(
|
| 65 |
+
service_context=self.service_context_manager.get_service_context()
|
|
|
|
| 66 |
)
|
| 67 |
|
| 68 |
def do_stop(self) -> None:
|