.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -56,7 +56,6 @@ coverage.xml
56
  .hypothesis/
57
  .pytest_cache/
58
  .ruff_cache
59
- wandb/
60
 
61
  # Translations
62
  *.mo
 
56
  .hypothesis/
57
  .pytest_cache/
58
  .ruff_cache
 
59
 
60
  # Translations
61
  *.mo
.idea/.gitignore DELETED
@@ -1,8 +0,0 @@
1
- # Default ignored files
2
- /shelf/
3
- /workspace.xml
4
- # Editor-based HTTP Client requests
5
- /httpRequests/
6
- # Datasource local storage ignored files
7
- /dataSources/
8
- /dataSources.local.xml
 
 
 
 
 
 
 
 
 
.idea/inspectionProfiles/profiles_settings.xml DELETED
@@ -1,6 +0,0 @@
1
- <component name="InspectionProjectProfileManager">
2
- <settings>
3
- <option name="USE_PROJECT_PROFILE" value="false" />
4
- <version value="1.0" />
5
- </settings>
6
- </component>
 
 
 
 
 
 
 
.idea/llama-xpipe.iml DELETED
@@ -1,11 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <module type="PYTHON_MODULE" version="4">
3
- <component name="NewModuleRootManager">
4
- <content url="file://$MODULE_DIR$">
5
- <excludeFolder url="file://$MODULE_DIR$/.venv" />
6
- <excludeFolder url="file://$MODULE_DIR$/venv" />
7
- </content>
8
- <orderEntry type="inheritedJdk" />
9
- <orderEntry type="sourceFolder" forTests="false" />
10
- </component>
11
- </module>
 
 
 
 
 
 
 
 
 
 
 
 
.idea/misc.xml DELETED
@@ -1,4 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project version="4">
3
- <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (llama-xpipe)" project-jdk-type="Python SDK" />
4
- </project>
 
 
 
 
 
.idea/modules.xml DELETED
@@ -1,8 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project version="4">
3
- <component name="ProjectModuleManager">
4
- <modules>
5
- <module fileurl="file://$PROJECT_DIR$/.idea/llama-xpipe.iml" filepath="$PROJECT_DIR$/.idea/llama-xpipe.iml" />
6
- </modules>
7
- </component>
8
- </project>
 
 
 
 
 
 
 
 
 
.idea/vcs.xml DELETED
@@ -1,6 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project version="4">
3
- <component name="VcsDirectoryMappings">
4
- <mapping directory="" vcs="Git" />
5
- </component>
6
- </project>
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,45 +1,40 @@
1
- import logging
2
- import sys
3
-
4
- import streamlit as st
5
- from dotenv import load_dotenv
6
-
7
- from xpipe_wiki.manager_factory import XPipeRobotManagerFactory, XPipeRobotRevision
8
-
9
- logging.basicConfig(
10
- stream=sys.stdout, level=logging.INFO
11
- ) # logging.DEBUG for more verbose output
12
- # logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
13
-
14
- # # Sidebar contents
15
- with st.sidebar:
16
- st.title("🤗💬 LLM Chat App")
17
- st.markdown(
18
- """
19
- ## About
20
- This app is an LLM-powered chatbot built using:
21
- - [Streamlit](https://streamlit.io/)
22
- - [LangChain](https://python.langchain.com/)
23
- - [X-Pipe](https://github.com/ctripcorp/x-pipe)
24
- """
25
- )
26
- # add_vertical_space(5)
27
- st.write("Made by Nick")
28
-
29
-
30
- def main() -> None:
31
- st.header("X-Pipe Wiki 机器人 💬")
32
-
33
- robot_manager = XPipeRobotManagerFactory.get_or_create(
34
- XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0
35
- )
36
- robot = robot_manager.get_robot()
37
- query = st.text_input("X-Pipe Wiki 问题:")
38
- if query:
39
- response = robot.ask(question=query)
40
- st.write(response)
41
-
42
-
43
- if __name__ == "__main__":
44
- load_dotenv()
45
- main()
 
1
+ import logging
2
+ import sys
3
+
4
+ import streamlit as st
5
+
6
+ from xpipe_wiki.manager_factory import XPipeRobotManagerFactory, XPipeRobotRevision
7
+
8
+ logging.basicConfig(
9
+ stream=sys.stdout, level=logging.DEBUG
10
+ ) # logging.DEBUG for more verbose output
11
+ logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
12
+
13
+ # Sidebar contents
14
+ with st.sidebar:
15
+ st.title("🤗💬 LLM Chat App")
16
+ st.markdown(
17
+ """
18
+ ## About
19
+ This app is an LLM-powered chatbot built using:
20
+ - [Streamlit](https://streamlit.io/)
21
+ - [LangChain](https://python.langchain.com/)
22
+ - [X-Pipe](https://github.com/ctripcorp/x-pipe)
23
+ """
24
+ )
25
+ # add_vertical_space(5)
26
+ st.write("Made by Nick")
27
+
28
+
29
+ def main() -> None:
30
+ st.header("X-Pipe Wiki 机器人 💬")
31
+ robot_manager = XPipeRobotManagerFactory.get_or_create(XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0)
32
+ robot = robot_manager.get_robot()
33
+ query = st.text_input("X-Pipe Wiki 问题:")
34
+ if query:
35
+ response = robot.ask(question=query)
36
+ st.write(response)
37
+
38
+
39
+ if __name__ == "__main__":
40
+ main()
 
 
 
 
 
core/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/core/__pycache__/__init__.cpython-310.pyc and b/core/__pycache__/__init__.cpython-310.pyc differ
 
core/__pycache__/lifecycle.cpython-310.pyc CHANGED
Binary files a/core/__pycache__/lifecycle.cpython-310.pyc and b/core/__pycache__/lifecycle.cpython-310.pyc differ
 
core/__pycache__/logger_factory.cpython-310.pyc CHANGED
Binary files a/core/__pycache__/logger_factory.cpython-310.pyc and b/core/__pycache__/logger_factory.cpython-310.pyc differ
 
core/helper.py CHANGED
@@ -2,30 +2,23 @@ from core.lifecycle import Lifecycle
2
 
3
 
4
  class LifecycleHelper:
 
5
  @classmethod
6
  def initialize_if_possible(cls, ls: Lifecycle) -> None:
7
- if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_initialize(
8
- ls.lifecycle_state.phase
9
- ):
10
  ls.initialize()
11
 
12
  @classmethod
13
  def start_if_possible(cls, ls: Lifecycle) -> None:
14
- if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_start(
15
- ls.lifecycle_state.phase
16
- ):
17
  ls.start()
18
 
19
  @classmethod
20
  def stop_if_possible(cls, ls: Lifecycle) -> None:
21
- if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_stop(
22
- ls.lifecycle_state.phase
23
- ):
24
  ls.stop()
25
 
26
  @classmethod
27
  def dispose_if_possible(cls, ls: Lifecycle) -> None:
28
- if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_dispose(
29
- ls.lifecycle_state.phase
30
- ):
31
  ls.dispose()
 
2
 
3
 
4
  class LifecycleHelper:
5
+
6
  @classmethod
7
  def initialize_if_possible(cls, ls: Lifecycle) -> None:
8
+ if isinstance(ls, Lifecycle) and ls.get_lifecycle_state().can_initialize(ls.lifecycle_state.phase):
 
 
9
  ls.initialize()
10
 
11
  @classmethod
12
  def start_if_possible(cls, ls: Lifecycle) -> None:
13
+ if isinstance(ls, Lifecycle) and ls.get_lifecycle_state().can_start(ls.lifecycle_state.phase):
 
 
14
  ls.start()
15
 
16
  @classmethod
17
  def stop_if_possible(cls, ls: Lifecycle) -> None:
18
+ if isinstance(ls, Lifecycle) and ls.get_lifecycle_state().can_stop(ls.lifecycle_state.phase):
 
 
19
  ls.stop()
20
 
21
  @classmethod
22
  def dispose_if_possible(cls, ls: Lifecycle) -> None:
23
+ if isinstance(ls, Lifecycle) and ls.get_lifecycle_state().can_dispose(ls.lifecycle_state.phase):
 
 
24
  ls.dispose()
core/lifecycle.py CHANGED
@@ -1,184 +1,184 @@
1
- import enum
2
- from abc import ABC, abstractmethod
3
- from typing import TypeVar, Optional
4
-
5
- from core import logger_factory
6
-
7
-
8
- class Initializable(ABC):
9
- @abstractmethod
10
- def initialize(self) -> None:
11
- pass
12
-
13
-
14
- class Startable(ABC):
15
- @abstractmethod
16
- def start(self) -> None:
17
- pass
18
-
19
-
20
- class Stoppable(ABC):
21
- @abstractmethod
22
- def stop(self) -> None:
23
- pass
24
-
25
-
26
- class Disposable(ABC):
27
- @abstractmethod
28
- def dispose(self) -> None:
29
- pass
30
-
31
-
32
- class LifecycleAware(ABC):
33
- def __init__(self, state: "LifecycleState") -> None:
34
- """
35
- Args:
36
- state(LifecycleState): lifecycle state
37
- """
38
- self.state = state
39
-
40
- def get_lifecycle_state(self) -> "LifecycleState":
41
- return self.state
42
-
43
-
44
- class Lifecycle(Initializable, Startable, Stoppable, Disposable, LifecycleAware, ABC):
45
- def __init__(self) -> None:
46
- self.logger = logger_factory.get_logger(self.__class__.__name__)
47
- self.lifecycle_state = LifecycleState(lifecycle=self)
48
-
49
- def initialize(self) -> None:
50
- if not self.lifecycle_state.can_initialize(self.lifecycle_state.get_phase()):
51
- self.logger.warning("[{}]cannot initialize".format(self.__class__.__name__))
52
- return
53
- self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZING)
54
- self.do_init()
55
- self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZED)
56
-
57
- def start(self) -> None:
58
- if not self.lifecycle_state.can_start(self.lifecycle_state.get_phase()):
59
- self.logger.warning("[{}]cannot start".format(self.__class__.__name__))
60
- return
61
- self.lifecycle_state.set_phase(LifecyclePhase.STARTING)
62
- self.do_start()
63
- self.lifecycle_state.set_phase(LifecyclePhase.STARTED)
64
-
65
- def stop(self) -> None:
66
- if not self.lifecycle_state.can_stop(self.lifecycle_state.get_phase()):
67
- self.logger.warning("[{}]cannot stop".format(self.__class__.__name__))
68
- return
69
- self.lifecycle_state.set_phase(LifecyclePhase.STOPPING)
70
- self.do_stop()
71
- self.lifecycle_state.set_phase(LifecyclePhase.STOPPED)
72
-
73
- def dispose(self) -> None:
74
- if not self.lifecycle_state.can_dispose(self.lifecycle_state.get_phase()):
75
- self.logger.warning("[{}]cannot dispose".format(self.__class__.__name__))
76
- return
77
- self.lifecycle_state.set_phase(LifecyclePhase.DISPOSING)
78
- self.do_dispose()
79
- self.lifecycle_state.set_phase(LifecyclePhase.DISPOSED)
80
-
81
- @abstractmethod
82
- def do_init(self) -> None:
83
- pass
84
-
85
- @abstractmethod
86
- def do_start(self) -> None:
87
- pass
88
-
89
- @abstractmethod
90
- def do_stop(self) -> None:
91
- pass
92
-
93
- @abstractmethod
94
- def do_dispose(self) -> None:
95
- pass
96
-
97
-
98
- class LifecyclePhase(enum.Enum):
99
- INITIALIZING = 1
100
- INITIALIZED = 2
101
- STARTING = 3
102
- STARTED = 4
103
- STOPPING = 5
104
- STOPPED = 6
105
- DISPOSING = 7
106
- DISPOSED = 8
107
-
108
-
109
- class LifecycleController(ABC):
110
- def can_initialize(self, phase: Optional[LifecyclePhase]) -> bool:
111
- return phase is None or phase == LifecyclePhase.DISPOSED
112
-
113
- def can_start(self, phase: Optional[LifecyclePhase]) -> bool:
114
- return phase is not None and (
115
- phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
116
- )
117
-
118
- def can_stop(self, phase: Optional[LifecyclePhase]) -> bool:
119
- return phase is not None and phase == LifecyclePhase.STARTED
120
-
121
- def can_dispose(self, phase: Optional[LifecyclePhase]) -> bool:
122
- return phase is not None and (
123
- phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
124
- )
125
-
126
-
127
- LS = TypeVar("LS", bound=Lifecycle)
128
-
129
-
130
- class LifecycleState(LifecycleController, ABC):
131
- phase: Optional[LifecyclePhase]
132
-
133
- def __init__(self, lifecycle: LS) -> None:
134
- self.phase = None
135
- self.prev_phase = None
136
- self.lifecycle = lifecycle
137
- self.logger = logger_factory.get_logger(__name__)
138
-
139
- def is_initializing(self) -> bool:
140
- return self.phase == LifecyclePhase.INITIALIZING
141
-
142
- def is_initialized(self) -> bool:
143
- return self.phase == LifecyclePhase.INITIALIZED
144
-
145
- def is_starting(self) -> bool:
146
- return self.phase == LifecyclePhase.STARTING
147
-
148
- def is_started(self) -> bool:
149
- return self.phase == LifecyclePhase.STARTED
150
-
151
- def is_stopping(self) -> bool:
152
- return self.phase == LifecyclePhase.STOPPING
153
-
154
- def is_stopped(self) -> bool:
155
- return self.phase == LifecyclePhase.STOPPED
156
-
157
- def is_disposing(self) -> bool:
158
- return self.phase == LifecyclePhase.DISPOSING
159
-
160
- def is_disposed(self) -> bool:
161
- return self.phase == LifecyclePhase.DISPOSED
162
-
163
- def get_phase(self) -> Optional[LifecyclePhase]:
164
- return self.phase
165
-
166
- def set_phase(self, phase: Optional[LifecyclePhase]) -> None:
167
- prev = "None"
168
- if self.phase is not None:
169
- prev = self.phase.name
170
- current = "None"
171
- if phase is not None:
172
- current = phase.name
173
- self.logger.info(
174
- "[setPhaseName][{}]{} --> {}".format(
175
- self.lifecycle.__class__.__name__,
176
- prev,
177
- current,
178
- )
179
- )
180
- self.phase = phase
181
-
182
- def rollback(self, err: Exception) -> None:
183
- self.phase = self.prev_phase
184
- self.prev_phase = None
 
1
+ import enum
2
+ from abc import ABC, abstractmethod
3
+ from typing import TypeVar, Optional
4
+
5
+ from core import logger_factory
6
+
7
+
8
+ class Initializable(ABC):
9
+ @abstractmethod
10
+ def initialize(self) -> None:
11
+ pass
12
+
13
+
14
+ class Startable(ABC):
15
+ @abstractmethod
16
+ def start(self) -> None:
17
+ pass
18
+
19
+
20
+ class Stoppable(ABC):
21
+ @abstractmethod
22
+ def stop(self) -> None:
23
+ pass
24
+
25
+
26
+ class Disposable(ABC):
27
+ @abstractmethod
28
+ def dispose(self) -> None:
29
+ pass
30
+
31
+
32
+ class LifecycleAware(ABC):
33
+ def __init__(self, state: "LifecycleState") -> None:
34
+ """
35
+ Args:
36
+ state(LifecycleState): lifecycle state
37
+ """
38
+ self.state = state
39
+
40
+ def get_lifecycle_state(self) -> "LifecycleState":
41
+ return self.state
42
+
43
+
44
+ class Lifecycle(Initializable, Startable, Stoppable, Disposable, LifecycleAware, ABC):
45
+ def __init__(self) -> None:
46
+ self.logger = logger_factory.get_logger(self.__class__.__name__)
47
+ self.lifecycle_state = LifecycleState(lifecycle=self)
48
+
49
+ def initialize(self) -> None:
50
+ if not self.lifecycle_state.can_initialize(self.lifecycle_state.get_phase()):
51
+ self.logger.warning("[{}]cannot initialize".format(self.__class__.__name__))
52
+ return
53
+ self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZING)
54
+ self.do_init()
55
+ self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZED)
56
+
57
+ def start(self) -> None:
58
+ if not self.lifecycle_state.can_start(self.lifecycle_state.get_phase()):
59
+ self.logger.warning("[{}]cannot start".format(self.__class__.__name__))
60
+ return
61
+ self.lifecycle_state.set_phase(LifecyclePhase.STARTING)
62
+ self.do_start()
63
+ self.lifecycle_state.set_phase(LifecyclePhase.STARTED)
64
+
65
+ def stop(self) -> None:
66
+ if not self.lifecycle_state.can_stop(self.lifecycle_state.get_phase()):
67
+ self.logger.warning("[{}]cannot stop".format(self.__class__.__name__))
68
+ return
69
+ self.lifecycle_state.set_phase(LifecyclePhase.STOPPING)
70
+ self.do_stop()
71
+ self.lifecycle_state.set_phase(LifecyclePhase.STOPPED)
72
+
73
+ def dispose(self) -> None:
74
+ if not self.lifecycle_state.can_dispose(self.lifecycle_state.get_phase()):
75
+ self.logger.warning("[{}]cannot dispose".format(self.__class__.__name__))
76
+ return
77
+ self.lifecycle_state.set_phase(LifecyclePhase.DISPOSING)
78
+ self.do_dispose()
79
+ self.lifecycle_state.set_phase(LifecyclePhase.DISPOSED)
80
+
81
+ @abstractmethod
82
+ def do_init(self) -> None:
83
+ pass
84
+
85
+ @abstractmethod
86
+ def do_start(self) -> None:
87
+ pass
88
+
89
+ @abstractmethod
90
+ def do_stop(self) -> None:
91
+ pass
92
+
93
+ @abstractmethod
94
+ def do_dispose(self) -> None:
95
+ pass
96
+
97
+
98
+ class LifecyclePhase(enum.Enum):
99
+ INITIALIZING = 1
100
+ INITIALIZED = 2
101
+ STARTING = 3
102
+ STARTED = 4
103
+ STOPPING = 5
104
+ STOPPED = 6
105
+ DISPOSING = 7
106
+ DISPOSED = 8
107
+
108
+
109
+ class LifecycleController(ABC):
110
+ def can_initialize(self, phase: Optional[LifecyclePhase]) -> bool:
111
+ return phase is None or phase == LifecyclePhase.DISPOSED
112
+
113
+ def can_start(self, phase: Optional[LifecyclePhase]) -> bool:
114
+ return phase is not None and (
115
+ phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
116
+ )
117
+
118
+ def can_stop(self, phase: Optional[LifecyclePhase]) -> bool:
119
+ return phase is not None and phase == LifecyclePhase.STARTED
120
+
121
+ def can_dispose(self, phase: Optional[LifecyclePhase]) -> bool:
122
+ return phase is not None and (
123
+ phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
124
+ )
125
+
126
+
127
+ LS = TypeVar("LS", bound=Lifecycle)
128
+
129
+
130
+ class LifecycleState(LifecycleController, ABC):
131
+ phase: Optional[LifecyclePhase]
132
+
133
+ def __init__(self, lifecycle: LS) -> None:
134
+ self.phase = None
135
+ self.prev_phase = None
136
+ self.lifecycle = lifecycle
137
+ self.logger = logger_factory.get_logger(__name__)
138
+
139
+ def is_initializing(self) -> bool:
140
+ return self.phase == LifecyclePhase.INITIALIZING
141
+
142
+ def is_initialized(self) -> bool:
143
+ return self.phase == LifecyclePhase.INITIALIZED
144
+
145
+ def is_starting(self) -> bool:
146
+ return self.phase == LifecyclePhase.STARTING
147
+
148
+ def is_started(self) -> bool:
149
+ return self.phase == LifecyclePhase.STARTED
150
+
151
+ def is_stopping(self) -> bool:
152
+ return self.phase == LifecyclePhase.STOPPING
153
+
154
+ def is_stopped(self) -> bool:
155
+ return self.phase == LifecyclePhase.STOPPED
156
+
157
+ def is_disposing(self) -> bool:
158
+ return self.phase == LifecyclePhase.DISPOSING
159
+
160
+ def is_disposed(self) -> bool:
161
+ return self.phase == LifecyclePhase.DISPOSED
162
+
163
+ def get_phase(self) -> Optional[LifecyclePhase]:
164
+ return self.phase
165
+
166
+ def set_phase(self, phase: Optional[LifecyclePhase]) -> None:
167
+ prev = "None"
168
+ if self.phase is not None:
169
+ prev = self.phase.name
170
+ current = "None"
171
+ if phase is not None:
172
+ current = phase.name
173
+ self.logger.info(
174
+ "[setPhaseName][{}]{} --> {}".format(
175
+ self.lifecycle.__class__.__name__,
176
+ prev,
177
+ current,
178
+ )
179
+ )
180
+ self.phase = phase
181
+
182
+ def rollback(self, err: Exception) -> None:
183
+ self.phase = self.prev_phase
184
+ self.prev_phase = None
core/test_lifecycle.py CHANGED
@@ -1,7 +1,10 @@
 
1
  from unittest import TestCase
2
 
3
  from core.lifecycle import Lifecycle
4
 
 
 
5
 
6
  class SubLifecycle(Lifecycle):
7
  def __init__(self) -> None:
 
1
+ import logging
2
  from unittest import TestCase
3
 
4
  from core.lifecycle import Lifecycle
5
 
6
+ logging.basicConfig()
7
+
8
 
9
  class SubLifecycle(Lifecycle):
10
  def __init__(self) -> None:
dataset/docstore.json CHANGED
The diff for this file is too large to render. See raw diff
 
dataset/graph_store.json CHANGED
@@ -1 +1,3 @@
1
- {"graph_dict": {}}
 
 
 
1
+ {
2
+ "graph_dict": {}
3
+ }
dataset/index_store.json CHANGED
@@ -1 +1,8 @@
1
- {"index_store/data": {"da495c94-4541-47e1-b93f-8535192a5f28": {"__type__": "vector_store", "__data__": "{\"index_id\": \"da495c94-4541-47e1-b93f-8535192a5f28\", \"summary\": null, \"nodes_dict\": {\"59108663-a5e1-4e3e-bb21-626158eef136\": \"59108663-a5e1-4e3e-bb21-626158eef136\", \"50de4ec9-febb-466f-9f9a-cc9296895e83\": \"50de4ec9-febb-466f-9f9a-cc9296895e83\", \"aa413a53-0dda-4ac4-8ae9-6e8e340bb4f0\": \"aa413a53-0dda-4ac4-8ae9-6e8e340bb4f0\", \"a0cc4323-ec8f-4fed-9401-e44125134341\": \"a0cc4323-ec8f-4fed-9401-e44125134341\", \"5321cc7b-2a86-48b8-b56c-415dde7c149b\": \"5321cc7b-2a86-48b8-b56c-415dde7c149b\", \"9e19fb91-8258-4aca-9692-2d027073499e\": \"9e19fb91-8258-4aca-9692-2d027073499e\", \"02e856e5-4211-4a27-9204-e966907f1d74\": \"02e856e5-4211-4a27-9204-e966907f1d74\", \"f3074870-8fbf-4322-b1d2-2111e6aac9af\": \"f3074870-8fbf-4322-b1d2-2111e6aac9af\", \"82677fb9-abe3-4038-8263-5576c47da4f2\": \"82677fb9-abe3-4038-8263-5576c47da4f2\", \"a08364a6-c23d-4df5-8b5d-84137fbebd4e\": \"a08364a6-c23d-4df5-8b5d-84137fbebd4e\", \"e45b082d-c3ec-45aa-b630-6db49a62728b\": \"e45b082d-c3ec-45aa-b630-6db49a62728b\", \"2c55445c-04b1-4705-9871-adaa02f38f1b\": \"2c55445c-04b1-4705-9871-adaa02f38f1b\", \"d0de9736-ccad-450e-b4a1-49d4cdb8b941\": \"d0de9736-ccad-450e-b4a1-49d4cdb8b941\", \"fd0d2375-39e2-4bce-8e39-1182a122a1b4\": \"fd0d2375-39e2-4bce-8e39-1182a122a1b4\", \"13221de7-6c68-4367-b1be-f35b06fc3a74\": \"13221de7-6c68-4367-b1be-f35b06fc3a74\", \"9f448401-cda9-4b5f-9a80-c79e111f9963\": \"9f448401-cda9-4b5f-9a80-c79e111f9963\", \"3bc7dfc2-3ddf-4384-a60c-6cd52e1314f4\": \"3bc7dfc2-3ddf-4384-a60c-6cd52e1314f4\", \"ce3e530c-ce2d-4f5f-a171-72a790c3c624\": \"ce3e530c-ce2d-4f5f-a171-72a790c3c624\", \"85f764bd-e560-48ba-a51e-2287b6fe19db\": \"85f764bd-e560-48ba-a51e-2287b6fe19db\", \"3a8e4c7c-9f7d-4735-93e7-9d847cff98de\": \"3a8e4c7c-9f7d-4735-93e7-9d847cff98de\", \"af881b61-03f4-4851-8946-794015e3436c\": \"af881b61-03f4-4851-8946-794015e3436c\", \"31579820-439e-4029-b8c4-a0d6528daa59\": \"31579820-439e-4029-b8c4-a0d6528daa59\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}
 
 
 
 
 
 
 
 
1
+ {
2
+ "index_store/data": {
3
+ "7fd1f2d1-8a3a-4e24-bf42-5c149a73c8dc": {
4
+ "__type__": "vector_store",
5
+ "__data__": "{\"index_id\": \"7fd1f2d1-8a3a-4e24-bf42-5c149a73c8dc\", \"summary\": null, \"nodes_dict\": {\"190d619c-e92d-4cc5-b43b-652f66f4d6e0\": \"190d619c-e92d-4cc5-b43b-652f66f4d6e0\", \"1158d24f-29db-457b-aeaa-2a051316796e\": \"1158d24f-29db-457b-aeaa-2a051316796e\", \"520004a6-3eb6-44cf-90ba-a51ed1c6b894\": \"520004a6-3eb6-44cf-90ba-a51ed1c6b894\", \"359cdd4d-6096-4be4-991d-f3cc691c07c9\": \"359cdd4d-6096-4be4-991d-f3cc691c07c9\", \"3e4ffb91-090e-4e84-92cd-e8df8b3a87b8\": \"3e4ffb91-090e-4e84-92cd-e8df8b3a87b8\", \"7aa9b872-c4df-4de6-88c5-55962b913ba9\": \"7aa9b872-c4df-4de6-88c5-55962b913ba9\", \"16aa0efc-39b4-4fa8-93ab-f31b5740670c\": \"16aa0efc-39b4-4fa8-93ab-f31b5740670c\", \"1bebdce9-981b-4100-914d-48da73dc5f96\": \"1bebdce9-981b-4100-914d-48da73dc5f96\", \"bbe72c0d-ab9f-45c0-b581-d051f7aa797e\": \"bbe72c0d-ab9f-45c0-b581-d051f7aa797e\", \"229d36cc-1f60-485a-b44f-c53701b423de\": \"229d36cc-1f60-485a-b44f-c53701b423de\", \"dff86a2f-8cf4-4f01-a8ab-36977bb2db5c\": \"dff86a2f-8cf4-4f01-a8ab-36977bb2db5c\", \"cdf5fa07-4780-4cc2-9073-851996c4f006\": \"cdf5fa07-4780-4cc2-9073-851996c4f006\", \"a27c62d4-e969-46a8-94d8-aa0ce552234e\": \"a27c62d4-e969-46a8-94d8-aa0ce552234e\", \"e85816a0-a950-44c7-af93-dcf15a889490\": \"e85816a0-a950-44c7-af93-dcf15a889490\", \"cce4f38a-e5d4-4561-aab9-d695eef3bacf\": \"cce4f38a-e5d4-4561-aab9-d695eef3bacf\", \"b3716ae7-21ec-4112-979e-d5963c8235d5\": \"b3716ae7-21ec-4112-979e-d5963c8235d5\", \"55362a6d-27e6-46de-8fa8-c87a2039020f\": \"55362a6d-27e6-46de-8fa8-c87a2039020f\", \"785291f8-266d-4458-ba2c-2c110fac1038\": \"785291f8-266d-4458-ba2c-2c110fac1038\", \"c8655cf5-12f3-479a-a3b4-b43a2f6965ba\": \"c8655cf5-12f3-479a-a3b4-b43a2f6965ba\", \"67b2a33e-b9a3-4072-bc89-c1405d60dbe0\": \"67b2a33e-b9a3-4072-bc89-c1405d60dbe0\", \"42256b49-3c47-4fa2-8c64-871a757b9993\": \"42256b49-3c47-4fa2-8c64-871a757b9993\", \"f0a7c6ca-206e-477c-8f0a-416a355532d5\": \"f0a7c6ca-206e-477c-8f0a-416a355532d5\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"
6
+ }
7
+ }
8
+ }
dataset/vector_store.json CHANGED
The diff for this file is too large to render. See raw diff
 
docs/docs.pkl CHANGED
Binary files a/docs/docs.pkl and b/docs/docs.pkl differ
 
github_retriever.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_hub.github_repo import GithubRepositoryReader, GithubClient
2
+ from llama_index import download_loader, GPTVectorStoreIndex
3
+ from llama_index import LLMPredictor, VectorStoreIndex, ServiceContext
4
+ from langchain.llms import AzureOpenAI
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
+ from llama_index import LangchainEmbedding, ServiceContext
7
+ from llama_index import StorageContext, load_index_from_storage
8
+ from dotenv import load_dotenv
9
+ import os
10
+ import pickle
11
+
12
+
13
+ def main() -> None:
14
+ # define embedding
15
+ embedding = LangchainEmbedding(OpenAIEmbeddings(chunk_size=1))
16
+ # define LLM
17
+ llm_predictor = LLMPredictor(
18
+ llm=AzureOpenAI(
19
+ engine="text-davinci-003",
20
+ model_name="text-davinci-003",
21
+ )
22
+ )
23
+
24
+ # configure service context
25
+ service_context = ServiceContext.from_defaults(
26
+ llm_predictor=llm_predictor, embed_model=embedding
27
+ )
28
+ download_loader("GithubRepositoryReader")
29
+ docs = None
30
+ if os.path.exists("docs/docs.pkl"):
31
+ with open("docs/docs.pkl", "rb") as f:
32
+ docs = pickle.load(f)
33
+
34
+ if docs is None:
35
+ github_client = GithubClient(os.getenv("GITHUB_TOKEN"))
36
+ loader = GithubRepositoryReader(
37
+ github_client,
38
+ owner="ctripcorp",
39
+ repo="x-pipe",
40
+ filter_directories=(
41
+ [".", "doc"],
42
+ GithubRepositoryReader.FilterType.INCLUDE,
43
+ ),
44
+ filter_file_extensions=([".md"], GithubRepositoryReader.FilterType.INCLUDE),
45
+ verbose=True,
46
+ concurrent_requests=10,
47
+ )
48
+
49
+ docs = loader.load_data(branch="master")
50
+
51
+ with open("docs/docs.pkl", "wb") as f:
52
+ pickle.dump(docs, f)
53
+
54
+ index = GPTVectorStoreIndex.from_documents(docs, service_context=service_context)
55
+
56
+ query_engine = index.as_query_engine(service_context=service_context)
57
+ response = query_engine.query("如何使用X-Pipe?")
58
+ print(response)
59
+
60
+
61
+ if __name__ == "__main__":
62
+ load_dotenv()
63
+ main()
langchain_manager/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (154 Bytes). View file
 
langchain_manager/manager.py CHANGED
@@ -1,12 +1,14 @@
1
  from abc import abstractmethod, ABC
2
 
3
- from langchain.base_language import BaseLanguageModel
4
- from langchain.embeddings.base import Embeddings as LCEmbeddings
5
- from langchain.embeddings.openai import OpenAIEmbeddings
6
- from langchain.llms import AzureOpenAI
7
 
 
8
 
9
- class BaseLangChainManager(ABC):
 
10
  def __init__(self) -> None:
11
  super().__init__()
12
 
@@ -22,49 +24,16 @@ class BaseLangChainManager(ABC):
22
  class LangChainAzureManager(BaseLangChainManager):
23
  def __init__(self) -> None:
24
  super().__init__()
25
- self.embedding = OpenAIEmbeddings(client=None, chunk_size=1)
26
- self.llm = AzureOpenAI(
27
- deployment_name="text-davinci-003",
28
- # model_name="text-davinci-003",
29
- model="text-davinci-003",
30
- client=None,
31
- # temperature set to 0.0(default 0.7) to get a certain answer from OpenAI,
32
- # as a wiki robot we won't want to get flexible answers
33
- temperature=0.0,
34
- # GPT-3 default is 4096, however, openai.py default is 256
35
- max_tokens=2048,
36
- )
37
 
38
  # Override
39
  def get_embedding(self) -> LCEmbeddings:
40
- return self.embedding
41
 
42
  # Override
43
  def get_llm(self) -> BaseLanguageModel:
44
- return self.llm
45
-
46
-
47
- class LangChainHuggingFaceManager(BaseLangChainManager):
48
- def __init__(self) -> None:
49
- super().__init__()
50
- from transformers import AutoTokenizer, AutoModel
51
-
52
- AutoTokenizer.from_pretrained("GanymedeNil/text2vec-large-chinese")
53
-
54
- AutoModel.from_pretrained("GanymedeNil/text2vec-large-chinese")
55
-
56
- self.embedding = OpenAIEmbeddings(client=None, chunk_size=1)
57
- self.llm = AzureOpenAI(
58
  deployment_name="text-davinci-003",
59
  # model_name="text-davinci-003",
60
  model="text-davinci-003",
61
  client=None,
62
  )
63
-
64
- # Override
65
- def get_embedding(self) -> LCEmbeddings:
66
- return self.embedding
67
-
68
- # Override
69
- def get_llm(self) -> BaseLanguageModel:
70
- return self.llm
 
1
  from abc import abstractmethod, ABC
2
 
3
+ from langchain_manager.base_language import BaseLanguageModel
4
+ from langchain_manager.embeddings.base import Embeddings as LCEmbeddings
5
+ from langchain_manager.embeddings.openai import OpenAIEmbeddings
6
+ from langchain_manager.llms import AzureOpenAI
7
 
8
+ from core.lifecycle import Lifecycle
9
 
10
+
11
+ class BaseLangChainManager(Lifecycle, ABC):
12
  def __init__(self) -> None:
13
  super().__init__()
14
 
 
24
  class LangChainAzureManager(BaseLangChainManager):
25
  def __init__(self) -> None:
26
  super().__init__()
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  # Override
29
  def get_embedding(self) -> LCEmbeddings:
30
+ return OpenAIEmbeddings(client=None, chunk_size=1)
31
 
32
  # Override
33
  def get_llm(self) -> BaseLanguageModel:
34
+ return AzureOpenAI(
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  deployment_name="text-davinci-003",
36
  # model_name="text-davinci-003",
37
  model="text-davinci-003",
38
  client=None,
39
  )
 
 
 
 
 
 
 
 
llama/{storage_context.py → context.py} RENAMED
@@ -1,14 +1,66 @@
1
- from llama_index import StorageContext
2
- from typing import List
3
  from abc import abstractmethod, ABC
4
 
5
- from llama_index import Document
 
6
 
7
  from core.lifecycle import Lifecycle
8
- from llama.service_context import ServiceContextManager
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
 
11
  class StorageContextManager(Lifecycle, ABC):
 
12
  @abstractmethod
13
  def get_storage_context(self) -> StorageContext:
14
  pass
@@ -17,11 +69,9 @@ class StorageContextManager(Lifecycle, ABC):
17
  class LocalStorageContextManager(StorageContextManager):
18
  storage_context: StorageContext
19
 
20
- def __init__(
21
- self,
22
- service_context_manager: ServiceContextManager,
23
- dataset_path: str = "./dataset",
24
- ) -> None:
25
  super().__init__()
26
  self.dataset_path = dataset_path
27
  self.service_context_manager = service_context_manager
@@ -31,37 +81,29 @@ class LocalStorageContextManager(StorageContextManager):
31
 
32
  def do_init(self) -> None:
33
  from llama.utils import is_local_storage_files_ready
34
-
35
  if is_local_storage_files_ready(self.dataset_path):
36
- self.storage_context = StorageContext.from_defaults(
37
- persist_dir=self.dataset_path
38
- )
39
  else:
40
  docs = self._download()
41
  self._indexing(docs)
42
 
43
  def do_start(self) -> None:
44
- # self.logger.info("[do_start]%", **self.storage_context.to_dict())
45
- pass
46
 
47
  def do_stop(self) -> None:
48
- # self.logger.info("[do_stop]%", **self.storage_context.to_dict())
49
- pass
50
 
51
  def do_dispose(self) -> None:
52
  self.storage_context.persist(self.dataset_path)
53
 
54
- def _download(self) -> List[Document]:
55
  from llama.data_loader import GithubLoader
56
-
57
  loader = GithubLoader()
58
  return loader.load()
59
 
60
- def _indexing(self, docs: List[Document]) -> None:
61
  from llama_index import GPTVectorStoreIndex
62
-
63
- index = GPTVectorStoreIndex.from_documents(
64
- docs, service_context=self.service_context_manager.get_service_context()
65
- )
66
  index.storage_context.persist(persist_dir=self.dataset_path)
67
  self.storage_context = index.storage_context
 
 
 
1
  from abc import abstractmethod, ABC
2
 
3
+ from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding, Document
4
+ from llama_index import StorageContext
5
 
6
  from core.lifecycle import Lifecycle
7
+ from langchain_manager.manager import BaseLangChainManager
8
+
9
+
10
+ class ServiceContextManager(Lifecycle, ABC):
11
+
12
+ @abstractmethod
13
+ def get_service_context(self) -> ServiceContext:
14
+ pass
15
+
16
+
17
+ class AzureServiceContextManager(ServiceContextManager):
18
+ lc_manager: BaseLangChainManager
19
+ service_context: ServiceContext
20
+
21
+ def __init__(self, lc_manager: BaseLangChainManager):
22
+ super().__init__()
23
+ self.lc_manager = lc_manager
24
+
25
+ def get_service_context(self) -> ServiceContext:
26
+ if self.lifecycle_state.is_started():
27
+ raise KeyError(
28
+ "incorrect lifecycle state: {}".format(self.lifecycle_state.phase)
29
+ )
30
+ if self.service_context is None:
31
+ raise ValueError(
32
+ "service context is not ready, check for lifecycle statement"
33
+ )
34
+ return self.service_context
35
+
36
+ def do_init(self) -> None:
37
+ # define embedding
38
+ embedding = LangchainEmbedding(self.lc_manager.get_embedding())
39
+ # define LLM
40
+ llm_predictor = LLMPredictor(llm=self.lc_manager.get_llm())
41
+ # configure service context
42
+ self.service_context = ServiceContext.from_defaults(
43
+ llm_predictor=llm_predictor, embed_model=embedding
44
+ )
45
+
46
+ def do_start(self) -> None:
47
+ self.logger.info("[do_start][embedding] last used usage: %d",
48
+ self.service_context.embed_model.total_tokens_used)
49
+ self.logger.info("[do_start][predict] last used usage: %d",
50
+ self.service_context.llm_predictor.total_tokens_used)
51
+
52
+ def do_stop(self) -> None:
53
+ self.logger.info("[do_stop][embedding] last used usage: %d",
54
+ self.service_context.embed_model.total_tokens_used)
55
+ self.logger.info("[do_stop][predict] last used usage: %d",
56
+ self.service_context.llm_predictor.total_tokens_used)
57
+
58
+ def do_dispose(self) -> None:
59
+ self.logger.info("[do_dispose] total used token: %d", self.service_context.llm_predictor.total_tokens_used)
60
 
61
 
62
  class StorageContextManager(Lifecycle, ABC):
63
+
64
  @abstractmethod
65
  def get_storage_context(self) -> StorageContext:
66
  pass
 
69
  class LocalStorageContextManager(StorageContextManager):
70
  storage_context: StorageContext
71
 
72
+ def __init__(self,
73
+ dataset_path: str = "./dataset",
74
+ service_context_manager: ServiceContextManager = None) -> None:
 
 
75
  super().__init__()
76
  self.dataset_path = dataset_path
77
  self.service_context_manager = service_context_manager
 
81
 
82
  def do_init(self) -> None:
83
  from llama.utils import is_local_storage_files_ready
 
84
  if is_local_storage_files_ready(self.dataset_path):
85
+ self.storage_context = StorageContext.from_defaults(persist_dir=self.dataset_path)
 
 
86
  else:
87
  docs = self._download()
88
  self._indexing(docs)
89
 
90
  def do_start(self) -> None:
91
+ self.logger.info("[do_start]%", **self.storage_context.to_dict())
 
92
 
93
  def do_stop(self) -> None:
94
+ self.logger.info("[do_stop]%", **self.storage_context.to_dict())
 
95
 
96
  def do_dispose(self) -> None:
97
  self.storage_context.persist(self.dataset_path)
98
 
99
+ def _download(self) -> [Document]:
100
  from llama.data_loader import GithubLoader
 
101
  loader = GithubLoader()
102
  return loader.load()
103
 
104
+ def _indexing(self, docs: [Document]):
105
  from llama_index import GPTVectorStoreIndex
106
+ index = GPTVectorStoreIndex.from_documents(docs,
107
+ service_context=self.service_context_manager.get_service_context())
 
 
108
  index.storage_context.persist(persist_dir=self.dataset_path)
109
  self.storage_context = index.storage_context
llama/data_loader.py CHANGED
@@ -16,10 +16,10 @@ class WikiLoader(ABC):
16
 
17
  class GithubLoader(WikiLoader):
18
  def __init__(
19
- self,
20
- github_owner: Optional[str] = None,
21
- repo: Optional[str] = None,
22
- dirs: Optional[Sequence[str]] = None,
23
  ):
24
  super().__init__()
25
  self.owner = (
@@ -51,8 +51,7 @@ class GithubLoader(WikiLoader):
51
  verbose=True,
52
  concurrent_requests=10,
53
  )
54
- os.environ["http_proxy"] = "http://127.0.0.1:7890"
55
- os.environ["https_proxy"] = "http://127.0.0.1:7890"
56
  docs = loader.load_data(branch="master")
57
 
58
  with open("docs/docs.pkl", "wb") as f:
 
16
 
17
  class GithubLoader(WikiLoader):
18
  def __init__(
19
+ self,
20
+ github_owner: Optional[str] = None,
21
+ repo: Optional[str] = None,
22
+ dirs: Optional[Sequence[str]] = None,
23
  ):
24
  super().__init__()
25
  self.owner = (
 
51
  verbose=True,
52
  concurrent_requests=10,
53
  )
54
+
 
55
  docs = loader.load_data(branch="master")
56
 
57
  with open("docs/docs.pkl", "wb") as f:
llama/index.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from core.lifecycle import Lifecycle
2
+ from llama.context import ServiceContextManager
3
+ from llama_index.indices.vector_store import VectorStoreIndex
4
+ from typing import Optional
5
+
6
+
7
+ class IndexManager(Lifecycle):
8
+ index: Optional[VectorStoreIndex]
9
+
10
+ def __init__(self, context_manager: ServiceContextManager) -> None:
11
+ super().__init__()
12
+ self.index = None
13
+ self.context_manager = context_manager
14
+
15
+ def get_index(self) -> Optional[VectorStoreIndex]:
16
+ if not self.lifecycle_state.is_started():
17
+ raise Exception("Lifecycle state is not correct")
18
+ return self.index
llama/service_context.py DELETED
@@ -1,142 +0,0 @@
1
- from abc import abstractmethod, ABC
2
-
3
- from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding
4
-
5
- from core.lifecycle import Lifecycle
6
- from langchain_manager.manager import BaseLangChainManager
7
-
8
-
9
- # def get_callback_manager() -> CallbackManager:
10
- # from llama_index.callbacks import (
11
- # WandbCallbackHandler,
12
- # CallbackManager,
13
- # LlamaDebugHandler,
14
- # )
15
- # llama_debug = LlamaDebugHandler(print_trace_on_end=True)
16
- # # wandb.init args
17
- # run_args = dict(
18
- # project="llamaindex",
19
- # )
20
- # wandb_callback = WandbCallbackHandler(run_args=run_args)
21
- # return CallbackManager([llama_debug, wandb_callback])
22
-
23
-
24
- class ServiceContextManager(Lifecycle, ABC):
25
- @abstractmethod
26
- def get_service_context(self) -> ServiceContext:
27
- pass
28
-
29
-
30
- class AzureServiceContextManager(ServiceContextManager):
31
- lc_manager: BaseLangChainManager
32
- service_context: ServiceContext
33
-
34
- def __init__(self, lc_manager: BaseLangChainManager):
35
- super().__init__()
36
- self.lc_manager = lc_manager
37
-
38
- def get_service_context(self) -> ServiceContext:
39
- if self.service_context is None:
40
- raise ValueError(
41
- "service context is not ready, check for lifecycle statement"
42
- )
43
- return self.service_context
44
-
45
- def do_init(self) -> None:
46
- # define embedding
47
- embedding = LangchainEmbedding(self.lc_manager.get_embedding())
48
- # define LLM
49
- llm_predictor = LLMPredictor(llm=self.lc_manager.get_llm())
50
- # configure service context
51
- self.service_context = ServiceContext.from_defaults(
52
- llm_predictor=llm_predictor,
53
- embed_model=embedding,
54
- # callback_manager=get_callback_manager(),
55
- )
56
-
57
- def do_start(self) -> None:
58
- self.logger.info(
59
- "[do_start][embedding] last used usage: %d",
60
- self.service_context.embed_model.total_tokens_used,
61
- )
62
- self.logger.info(
63
- "[do_start][predict] last used usage: %d",
64
- self.service_context.llm_predictor.total_tokens_used,
65
- )
66
-
67
- def do_stop(self) -> None:
68
- self.logger.info(
69
- "[do_stop][embedding] last used usage: %d",
70
- self.service_context.embed_model.total_tokens_used,
71
- )
72
- self.logger.info(
73
- "[do_stop][predict] last used usage: %d",
74
- self.service_context.llm_predictor.total_tokens_used,
75
- )
76
-
77
- def do_dispose(self) -> None:
78
- self.logger.info(
79
- "[do_dispose] total used token: %d",
80
- self.service_context.llm_predictor.total_tokens_used,
81
- )
82
-
83
-
84
- class HuggingFaceChineseOptServiceContextManager(ServiceContextManager):
85
- lc_manager: BaseLangChainManager
86
- service_context: ServiceContext
87
-
88
- def __init__(self, lc_manager: BaseLangChainManager):
89
- super().__init__()
90
- self.lc_manager = lc_manager
91
-
92
- def get_service_context(self) -> ServiceContext:
93
- if self.service_context is None:
94
- raise ValueError(
95
- "service context is not ready, check for lifecycle statement"
96
- )
97
- return self.service_context
98
-
99
- def do_init(self) -> None:
100
- # define embedding
101
- from langchain.embeddings import HuggingFaceEmbeddings
102
-
103
- model_name = "GanymedeNil/text2vec-large-chinese"
104
- hf_embedding = HuggingFaceEmbeddings(
105
- model_name=model_name, model_kwargs={"device": "cpu"}
106
- )
107
-
108
- embedding = LangchainEmbedding(hf_embedding)
109
- # define LLM
110
- llm_predictor = LLMPredictor(self.lc_manager.get_llm())
111
- # configure service context
112
- self.service_context = ServiceContext.from_defaults(
113
- llm_predictor=llm_predictor,
114
- embed_model=embedding,
115
- # callback_manager=get_callback_manager()
116
- )
117
-
118
- def do_start(self) -> None:
119
- self.logger.info(
120
- "[do_start][embedding] last used usage: %d",
121
- self.service_context.embed_model.total_tokens_used,
122
- )
123
- self.logger.info(
124
- "[do_start][predict] last used usage: %d",
125
- self.service_context.llm_predictor.total_tokens_used,
126
- )
127
-
128
- def do_stop(self) -> None:
129
- self.logger.info(
130
- "[do_stop][embedding] last used usage: %d",
131
- self.service_context.embed_model.total_tokens_used,
132
- )
133
- self.logger.info(
134
- "[do_stop][predict] last used usage: %d",
135
- self.service_context.llm_predictor.total_tokens_used,
136
- )
137
-
138
- def do_dispose(self) -> None:
139
- self.logger.info(
140
- "[do_dispose] total used token: %d",
141
- self.service_context.llm_predictor.total_tokens_used,
142
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llama/vector_storage.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from core.lifecycle import Lifecycle
2
+
3
+
4
+ class VectorStorageManager(Lifecycle):
5
+ def __init__(self) -> None:
6
+ super().__init__()
7
+
8
+ def do_init(self) -> None:
9
+ pass
10
+
11
+ def do_start(self) -> None:
12
+ pass
13
+
14
+ def do_stop(self) -> None:
15
+ pass
16
+
17
+ def do_dispose(self) -> None:
18
+ pass
local-requirements.txt DELETED
@@ -1 +0,0 @@
1
- python-dotenv
 
 
requirements.txt CHANGED
@@ -1,10 +1,6 @@
1
- llama_index>=0.6.3
2
- llama_hub
3
- streamlit
4
- ruff
5
- black
6
- mypy
7
- accelerate
8
- python-dotenv
9
- sentence_transformers
10
- wandb
 
1
+ llama_index>=0.6.3
2
+ llama_hub
3
+
4
+ ruff
5
+ black
6
+ mypy
 
 
 
 
xpipe_wiki/manager_factory.py CHANGED
@@ -4,79 +4,39 @@ import os
4
  from core.helper import LifecycleHelper
5
  from xpipe_wiki.robot_manager import XPipeWikiRobotManager, AzureXPipeWikiRobotManager
6
 
7
- from multiprocessing import Lock
8
-
9
- lock = Lock()
10
-
11
 
12
  class XPipeRobotRevision(enum.Enum):
13
  SIMPLE_OPENAI_VERSION_0 = 1
14
- HUGGINGFACE_VERSION_0 = 2
15
 
16
 
17
- class XPipeRobotManagerFactory:
18
- """
19
- CAPABLE: Dict[XPipeRobotRevision, XPipeWikiRobotManager] =
20
- {XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0: XPipeWikiRobotManager()}
21
- """
22
 
23
- CAPABLE = dict() # type: dict[XPipeRobotRevision, XPipeWikiRobotManager]
24
 
25
  @classmethod
26
  def get_or_create(cls, revision: XPipeRobotRevision) -> XPipeWikiRobotManager:
27
- with lock:
28
- if cls.CAPABLE.get(revision) is not None:
29
- return cls.CAPABLE[revision]
30
- if revision == XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0:
31
- manager = cls.create_simple_openai_version_0()
32
- elif revision == XPipeRobotRevision.HUGGINGFACE_VERSION_0:
33
- manager = cls.create_huggingface_version_0()
34
- cls.CAPABLE[revision] = manager
35
- return manager
36
 
37
  @classmethod
38
  def create_simple_openai_version_0(cls) -> AzureXPipeWikiRobotManager:
39
- from llama.service_context import AzureServiceContextManager
40
- from langchain_manager.manager import LangChainAzureManager
41
-
42
- service_context_manager = AzureServiceContextManager(
43
- lc_manager=LangChainAzureManager()
44
- )
45
- from llama.storage_context import LocalStorageContextManager
46
-
47
- dataset_path = os.getenv("XPIPE_WIKI_DATASET_PATH", "./dataset")
48
- storage_context_manager = LocalStorageContextManager(
49
- dataset_path=dataset_path, service_context_manager=service_context_manager
50
- )
51
-
52
- robot_manager = AzureXPipeWikiRobotManager(
53
- service_context_manager=service_context_manager,
54
- storage_context_manager=storage_context_manager,
55
- )
56
- LifecycleHelper.initialize_if_possible(robot_manager)
57
- LifecycleHelper.start_if_possible(robot_manager)
58
- return robot_manager
59
 
60
- @classmethod
61
- def create_huggingface_version_0(cls) -> AzureXPipeWikiRobotManager:
62
- from llama.service_context import HuggingFaceChineseOptServiceContextManager
63
  from langchain_manager.manager import LangChainAzureManager
 
64
 
65
- service_context_manager = HuggingFaceChineseOptServiceContextManager(
66
- lc_manager=LangChainAzureManager()
67
- )
68
-
69
- from llama.storage_context import LocalStorageContextManager
70
-
71
  dataset_path = os.getenv("XPIPE_WIKI_DATASET_PATH", "./dataset")
72
- storage_context_manager = LocalStorageContextManager(
73
- dataset_path=dataset_path, service_context_manager=service_context_manager
74
- )
75
 
76
- robot_manager = AzureXPipeWikiRobotManager(
77
- service_context_manager=service_context_manager,
78
- storage_context_manager=storage_context_manager,
79
- )
80
  LifecycleHelper.initialize_if_possible(robot_manager)
81
  LifecycleHelper.start_if_possible(robot_manager)
82
  return robot_manager
 
4
  from core.helper import LifecycleHelper
5
  from xpipe_wiki.robot_manager import XPipeWikiRobotManager, AzureXPipeWikiRobotManager
6
 
 
 
 
 
7
 
8
  class XPipeRobotRevision(enum.Enum):
9
  SIMPLE_OPENAI_VERSION_0 = 1
 
10
 
11
 
12
+ CAPABLE = dict[XPipeRobotRevision, XPipeWikiRobotManager]
13
+
 
 
 
14
 
15
+ class XPipeRobotManagerFactory:
16
 
17
  @classmethod
18
  def get_or_create(cls, revision: XPipeRobotRevision) -> XPipeWikiRobotManager:
19
+ if CAPABLE.get(revision) is not None:
20
+ return CAPABLE[revision]
21
+ if revision == XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0:
22
+ manager = cls.create_simple_openai_version_0()
23
+ CAPABLE[revision] = manager
24
+ return manager
 
 
 
25
 
26
  @classmethod
27
  def create_simple_openai_version_0(cls) -> AzureXPipeWikiRobotManager:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ from llama.context import AzureServiceContextManager
 
 
30
  from langchain_manager.manager import LangChainAzureManager
31
+ service_context_manager = AzureServiceContextManager(lc_manager=LangChainAzureManager())
32
 
33
+ from llama.context import LocalStorageContextManager
 
 
 
 
 
34
  dataset_path = os.getenv("XPIPE_WIKI_DATASET_PATH", "./dataset")
35
+ storage_context_manager = LocalStorageContextManager(dataset_path=dataset_path,
36
+ service_context_manager=service_context_manager)
 
37
 
38
+ robot_manager = AzureXPipeWikiRobotManager(service_context_manager=service_context_manager,
39
+ storage_context_manager=storage_context_manager)
 
 
40
  LifecycleHelper.initialize_if_possible(robot_manager)
41
  LifecycleHelper.start_if_possible(robot_manager)
42
  return robot_manager
xpipe_wiki/robot_manager.py CHANGED
@@ -3,12 +3,11 @@ from typing import Any
3
 
4
  from llama_index import load_index_from_storage
5
  from llama_index.indices.query.base import BaseQueryEngine
6
- from llama_index.indices.response import ResponseMode
7
 
8
  from core.helper import LifecycleHelper
9
  from core.lifecycle import Lifecycle
10
- from llama.service_context import ServiceContextManager
11
- from llama.storage_context import StorageContextManager
12
 
13
 
14
  class XPipeWikiRobot(ABC):
@@ -17,42 +16,30 @@ class XPipeWikiRobot(ABC):
17
  pass
18
 
19
 
 
20
  class AzureOpenAIXPipeWikiRobot(XPipeWikiRobot):
21
  query_engine: BaseQueryEngine
22
 
23
- def __init__(self, query_engine: BaseQueryEngine) -> None:
24
- super().__init__()
25
- self.query_engine = query_engine
26
-
27
  def ask(self, question: str) -> Any:
28
- print("question: ", question)
29
- response = self.query_engine.query(question)
30
- print("response type: ", type(response))
31
- return response.__str__()
32
 
33
 
34
  class XPipeWikiRobotManager(Lifecycle):
 
35
  @abstractmethod
36
  def get_robot(self) -> XPipeWikiRobot:
37
  pass
38
 
39
 
 
40
  class AzureXPipeWikiRobotManager(XPipeWikiRobotManager):
41
  service_context_manager: ServiceContextManager
42
  storage_context_manager: StorageContextManager
43
- query_engine: BaseQueryEngine
44
-
45
- def __init__(
46
- self,
47
- service_context_manager: ServiceContextManager,
48
- storage_context_manager: StorageContextManager,
49
- ) -> None:
50
- super().__init__()
51
- self.service_context_manager = service_context_manager
52
- self.storage_context_manager = storage_context_manager
53
 
54
  def get_robot(self) -> XPipeWikiRobot:
55
- return AzureOpenAIXPipeWikiRobot(self.query_engine)
 
 
56
 
57
  def do_init(self) -> None:
58
  LifecycleHelper.initialize_if_possible(self.service_context_manager)
@@ -61,14 +48,6 @@ class AzureXPipeWikiRobotManager(XPipeWikiRobotManager):
61
  def do_start(self) -> None:
62
  LifecycleHelper.start_if_possible(self.service_context_manager)
63
  LifecycleHelper.start_if_possible(self.storage_context_manager)
64
- index = load_index_from_storage(
65
- storage_context=self.storage_context_manager.get_storage_context(),
66
- service_context=self.service_context_manager.get_service_context(),
67
- )
68
- self.query_engine = index.as_query_engine(
69
- service_context=self.service_context_manager.get_service_context(),
70
- response_mode=ResponseMode.TREE_SUMMARIZE,
71
- )
72
 
73
  def do_stop(self) -> None:
74
  LifecycleHelper.stop_if_possible(self.storage_context_manager)
 
3
 
4
  from llama_index import load_index_from_storage
5
  from llama_index.indices.query.base import BaseQueryEngine
6
+ from pydantic import dataclasses
7
 
8
  from core.helper import LifecycleHelper
9
  from core.lifecycle import Lifecycle
10
+ from llama.context import ServiceContextManager, StorageContextManager
 
11
 
12
 
13
  class XPipeWikiRobot(ABC):
 
16
  pass
17
 
18
 
19
+ @dataclasses
20
  class AzureOpenAIXPipeWikiRobot(XPipeWikiRobot):
21
  query_engine: BaseQueryEngine
22
 
 
 
 
 
23
  def ask(self, question: str) -> Any:
24
+ return self.query_engine.query(question)
 
 
 
25
 
26
 
27
  class XPipeWikiRobotManager(Lifecycle):
28
+
29
  @abstractmethod
30
  def get_robot(self) -> XPipeWikiRobot:
31
  pass
32
 
33
 
34
+ @dataclasses
35
  class AzureXPipeWikiRobotManager(XPipeWikiRobotManager):
36
  service_context_manager: ServiceContextManager
37
  storage_context_manager: StorageContextManager
 
 
 
 
 
 
 
 
 
 
38
 
39
  def get_robot(self) -> XPipeWikiRobot:
40
+ index = load_index_from_storage(storage_context=self.storage_context_manager.get_storage_context())
41
+ query_engine = index.as_query_engine(service_context=self.service_context_manager.get_service_context())
42
+ return AzureOpenAIXPipeWikiRobot(query_engine)
43
 
44
  def do_init(self) -> None:
45
  LifecycleHelper.initialize_if_possible(self.service_context_manager)
 
48
  def do_start(self) -> None:
49
  LifecycleHelper.start_if_possible(self.service_context_manager)
50
  LifecycleHelper.start_if_possible(self.storage_context_manager)
 
 
 
 
 
 
 
 
51
 
52
  def do_stop(self) -> None:
53
  LifecycleHelper.stop_if_possible(self.storage_context_manager)