add chinese-large embedding optimize

#4
by NickNYU - opened
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -56,7 +56,6 @@ coverage.xml
56
  .hypothesis/
57
  .pytest_cache/
58
  .ruff_cache
59
- wandb/
60
 
61
  # Translations
62
  *.mo
 
56
  .hypothesis/
57
  .pytest_cache/
58
  .ruff_cache
 
59
 
60
  # Translations
61
  *.mo
.idea/.gitignore DELETED
@@ -1,8 +0,0 @@
1
- # Default ignored files
2
- /shelf/
3
- /workspace.xml
4
- # Editor-based HTTP Client requests
5
- /httpRequests/
6
- # Datasource local storage ignored files
7
- /dataSources/
8
- /dataSources.local.xml
 
 
 
 
 
 
 
 
 
.idea/inspectionProfiles/profiles_settings.xml DELETED
@@ -1,6 +0,0 @@
1
- <component name="InspectionProjectProfileManager">
2
- <settings>
3
- <option name="USE_PROJECT_PROFILE" value="false" />
4
- <version value="1.0" />
5
- </settings>
6
- </component>
 
 
 
 
 
 
 
.idea/llama-xpipe.iml DELETED
@@ -1,11 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <module type="PYTHON_MODULE" version="4">
3
- <component name="NewModuleRootManager">
4
- <content url="file://$MODULE_DIR$">
5
- <excludeFolder url="file://$MODULE_DIR$/.venv" />
6
- <excludeFolder url="file://$MODULE_DIR$/venv" />
7
- </content>
8
- <orderEntry type="inheritedJdk" />
9
- <orderEntry type="sourceFolder" forTests="false" />
10
- </component>
11
- </module>
 
 
 
 
 
 
 
 
 
 
 
 
.idea/misc.xml DELETED
@@ -1,4 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project version="4">
3
- <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (llama-xpipe)" project-jdk-type="Python SDK" />
4
- </project>
 
 
 
 
 
.idea/modules.xml DELETED
@@ -1,8 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project version="4">
3
- <component name="ProjectModuleManager">
4
- <modules>
5
- <module fileurl="file://$PROJECT_DIR$/.idea/llama-xpipe.iml" filepath="$PROJECT_DIR$/.idea/llama-xpipe.iml" />
6
- </modules>
7
- </component>
8
- </project>
 
 
 
 
 
 
 
 
 
.idea/vcs.xml DELETED
@@ -1,6 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project version="4">
3
- <component name="VcsDirectoryMappings">
4
- <mapping directory="" vcs="Git" />
5
- </component>
6
- </project>
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,45 +1,43 @@
1
- import logging
2
- import sys
3
-
4
- import streamlit as st
5
- from dotenv import load_dotenv
6
-
7
- from xpipe_wiki.manager_factory import XPipeRobotManagerFactory, XPipeRobotRevision
8
-
9
- logging.basicConfig(
10
- stream=sys.stdout, level=logging.INFO
11
- ) # logging.DEBUG for more verbose output
12
- # logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
13
-
14
- # # Sidebar contents
15
- with st.sidebar:
16
- st.title("🤗💬 LLM Chat App")
17
- st.markdown(
18
- """
19
- ## About
20
- This app is an LLM-powered chatbot built using:
21
- - [Streamlit](https://streamlit.io/)
22
- - [LangChain](https://python.langchain.com/)
23
- - [X-Pipe](https://github.com/ctripcorp/x-pipe)
24
- """
25
- )
26
- # add_vertical_space(5)
27
- st.write("Made by Nick")
28
-
29
-
30
- def main() -> None:
31
- st.header("X-Pipe Wiki 机器人 💬")
32
-
33
- robot_manager = XPipeRobotManagerFactory.get_or_create(
34
- XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0
35
- )
36
- robot = robot_manager.get_robot()
37
- query = st.text_input("X-Pipe Wiki 问题:")
38
- if query:
39
- response = robot.ask(question=query)
40
- st.write(response)
41
-
42
-
43
- if __name__ == "__main__":
44
- load_dotenv()
45
- main()
 
1
+ import logging
2
+ import sys
3
+
4
+ import streamlit as st
5
+
6
+ from xpipe_wiki.manager_factory import XPipeRobotManagerFactory, XPipeRobotRevision
7
+ from xpipe_wiki.robot_manager import XPipeWikiRobot, AzureOpenAIXPipeWikiRobot
8
+
9
+ logging.basicConfig(
10
+ stream=sys.stdout, level=logging.DEBUG
11
+ ) # logging.DEBUG for more verbose output
12
+ logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
13
+
14
+ # Sidebar contents
15
+ with st.sidebar:
16
+ st.title("🤗💬 LLM Chat App")
17
+ st.markdown(
18
+ """
19
+ ## About
20
+ This app is an LLM-powered chatbot built using:
21
+ - [Streamlit](https://streamlit.io/)
22
+ - [LangChain](https://python.langchain.com/)
23
+ - [X-Pipe](https://github.com/ctripcorp/x-pipe)
24
+ """
25
+ )
26
+ # add_vertical_space(5)
27
+ st.write("Made by Nick")
28
+
29
+
30
+ def main() -> None:
31
+ st.header("X-Pipe Wiki 机器人 💬")
32
+ robot_manager = XPipeRobotManagerFactory.get_or_create(
33
+ XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0
34
+ )
35
+ robot = robot_manager.get_robot()
36
+ query = st.text_input("X-Pipe Wiki 问题:")
37
+ if query:
38
+ response = robot.ask(question=query)
39
+ st.write(response)
40
+
41
+
42
+ if __name__ == "__main__":
43
+ main()
 
 
core/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/core/__pycache__/__init__.cpython-310.pyc and b/core/__pycache__/__init__.cpython-310.pyc differ
 
core/__pycache__/lifecycle.cpython-310.pyc CHANGED
Binary files a/core/__pycache__/lifecycle.cpython-310.pyc and b/core/__pycache__/lifecycle.cpython-310.pyc differ
 
core/__pycache__/logger_factory.cpython-310.pyc CHANGED
Binary files a/core/__pycache__/logger_factory.cpython-310.pyc and b/core/__pycache__/logger_factory.cpython-310.pyc differ
 
core/helper.py CHANGED
@@ -2,30 +2,23 @@ from core.lifecycle import Lifecycle
2
 
3
 
4
  class LifecycleHelper:
 
5
  @classmethod
6
  def initialize_if_possible(cls, ls: Lifecycle) -> None:
7
- if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_initialize(
8
- ls.lifecycle_state.phase
9
- ):
10
  ls.initialize()
11
 
12
  @classmethod
13
  def start_if_possible(cls, ls: Lifecycle) -> None:
14
- if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_start(
15
- ls.lifecycle_state.phase
16
- ):
17
  ls.start()
18
 
19
  @classmethod
20
  def stop_if_possible(cls, ls: Lifecycle) -> None:
21
- if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_stop(
22
- ls.lifecycle_state.phase
23
- ):
24
  ls.stop()
25
 
26
  @classmethod
27
  def dispose_if_possible(cls, ls: Lifecycle) -> None:
28
- if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_dispose(
29
- ls.lifecycle_state.phase
30
- ):
31
  ls.dispose()
 
2
 
3
 
4
  class LifecycleHelper:
5
+
6
  @classmethod
7
  def initialize_if_possible(cls, ls: Lifecycle) -> None:
8
+ if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_initialize(ls.lifecycle_state.phase):
 
 
9
  ls.initialize()
10
 
11
  @classmethod
12
  def start_if_possible(cls, ls: Lifecycle) -> None:
13
+ if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_start(ls.lifecycle_state.phase):
 
 
14
  ls.start()
15
 
16
  @classmethod
17
  def stop_if_possible(cls, ls: Lifecycle) -> None:
18
+ if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_stop(ls.lifecycle_state.phase):
 
 
19
  ls.stop()
20
 
21
  @classmethod
22
  def dispose_if_possible(cls, ls: Lifecycle) -> None:
23
+ if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_dispose(ls.lifecycle_state.phase):
 
 
24
  ls.dispose()
core/lifecycle.py CHANGED
@@ -1,184 +1,184 @@
1
- import enum
2
- from abc import ABC, abstractmethod
3
- from typing import TypeVar, Optional
4
-
5
- from core import logger_factory
6
-
7
-
8
- class Initializable(ABC):
9
- @abstractmethod
10
- def initialize(self) -> None:
11
- pass
12
-
13
-
14
- class Startable(ABC):
15
- @abstractmethod
16
- def start(self) -> None:
17
- pass
18
-
19
-
20
- class Stoppable(ABC):
21
- @abstractmethod
22
- def stop(self) -> None:
23
- pass
24
-
25
-
26
- class Disposable(ABC):
27
- @abstractmethod
28
- def dispose(self) -> None:
29
- pass
30
-
31
-
32
- class LifecycleAware(ABC):
33
- def __init__(self, state: "LifecycleState") -> None:
34
- """
35
- Args:
36
- state(LifecycleState): lifecycle state
37
- """
38
- self.state = state
39
-
40
- def get_lifecycle_state(self) -> "LifecycleState":
41
- return self.state
42
-
43
-
44
- class Lifecycle(Initializable, Startable, Stoppable, Disposable, LifecycleAware, ABC):
45
- def __init__(self) -> None:
46
- self.logger = logger_factory.get_logger(self.__class__.__name__)
47
- self.lifecycle_state = LifecycleState(lifecycle=self)
48
-
49
- def initialize(self) -> None:
50
- if not self.lifecycle_state.can_initialize(self.lifecycle_state.get_phase()):
51
- self.logger.warning("[{}]cannot initialize".format(self.__class__.__name__))
52
- return
53
- self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZING)
54
- self.do_init()
55
- self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZED)
56
-
57
- def start(self) -> None:
58
- if not self.lifecycle_state.can_start(self.lifecycle_state.get_phase()):
59
- self.logger.warning("[{}]cannot start".format(self.__class__.__name__))
60
- return
61
- self.lifecycle_state.set_phase(LifecyclePhase.STARTING)
62
- self.do_start()
63
- self.lifecycle_state.set_phase(LifecyclePhase.STARTED)
64
-
65
- def stop(self) -> None:
66
- if not self.lifecycle_state.can_stop(self.lifecycle_state.get_phase()):
67
- self.logger.warning("[{}]cannot stop".format(self.__class__.__name__))
68
- return
69
- self.lifecycle_state.set_phase(LifecyclePhase.STOPPING)
70
- self.do_stop()
71
- self.lifecycle_state.set_phase(LifecyclePhase.STOPPED)
72
-
73
- def dispose(self) -> None:
74
- if not self.lifecycle_state.can_dispose(self.lifecycle_state.get_phase()):
75
- self.logger.warning("[{}]cannot dispose".format(self.__class__.__name__))
76
- return
77
- self.lifecycle_state.set_phase(LifecyclePhase.DISPOSING)
78
- self.do_dispose()
79
- self.lifecycle_state.set_phase(LifecyclePhase.DISPOSED)
80
-
81
- @abstractmethod
82
- def do_init(self) -> None:
83
- pass
84
-
85
- @abstractmethod
86
- def do_start(self) -> None:
87
- pass
88
-
89
- @abstractmethod
90
- def do_stop(self) -> None:
91
- pass
92
-
93
- @abstractmethod
94
- def do_dispose(self) -> None:
95
- pass
96
-
97
-
98
- class LifecyclePhase(enum.Enum):
99
- INITIALIZING = 1
100
- INITIALIZED = 2
101
- STARTING = 3
102
- STARTED = 4
103
- STOPPING = 5
104
- STOPPED = 6
105
- DISPOSING = 7
106
- DISPOSED = 8
107
-
108
-
109
- class LifecycleController(ABC):
110
- def can_initialize(self, phase: Optional[LifecyclePhase]) -> bool:
111
- return phase is None or phase == LifecyclePhase.DISPOSED
112
-
113
- def can_start(self, phase: Optional[LifecyclePhase]) -> bool:
114
- return phase is not None and (
115
- phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
116
- )
117
-
118
- def can_stop(self, phase: Optional[LifecyclePhase]) -> bool:
119
- return phase is not None and phase == LifecyclePhase.STARTED
120
-
121
- def can_dispose(self, phase: Optional[LifecyclePhase]) -> bool:
122
- return phase is not None and (
123
- phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
124
- )
125
-
126
-
127
- LS = TypeVar("LS", bound=Lifecycle)
128
-
129
-
130
- class LifecycleState(LifecycleController, ABC):
131
- phase: Optional[LifecyclePhase]
132
-
133
- def __init__(self, lifecycle: LS) -> None:
134
- self.phase = None
135
- self.prev_phase = None
136
- self.lifecycle = lifecycle
137
- self.logger = logger_factory.get_logger(__name__)
138
-
139
- def is_initializing(self) -> bool:
140
- return self.phase == LifecyclePhase.INITIALIZING
141
-
142
- def is_initialized(self) -> bool:
143
- return self.phase == LifecyclePhase.INITIALIZED
144
-
145
- def is_starting(self) -> bool:
146
- return self.phase == LifecyclePhase.STARTING
147
-
148
- def is_started(self) -> bool:
149
- return self.phase == LifecyclePhase.STARTED
150
-
151
- def is_stopping(self) -> bool:
152
- return self.phase == LifecyclePhase.STOPPING
153
-
154
- def is_stopped(self) -> bool:
155
- return self.phase == LifecyclePhase.STOPPED
156
-
157
- def is_disposing(self) -> bool:
158
- return self.phase == LifecyclePhase.DISPOSING
159
-
160
- def is_disposed(self) -> bool:
161
- return self.phase == LifecyclePhase.DISPOSED
162
-
163
- def get_phase(self) -> Optional[LifecyclePhase]:
164
- return self.phase
165
-
166
- def set_phase(self, phase: Optional[LifecyclePhase]) -> None:
167
- prev = "None"
168
- if self.phase is not None:
169
- prev = self.phase.name
170
- current = "None"
171
- if phase is not None:
172
- current = phase.name
173
- self.logger.info(
174
- "[setPhaseName][{}]{} --> {}".format(
175
- self.lifecycle.__class__.__name__,
176
- prev,
177
- current,
178
- )
179
- )
180
- self.phase = phase
181
-
182
- def rollback(self, err: Exception) -> None:
183
- self.phase = self.prev_phase
184
- self.prev_phase = None
 
1
+ import enum
2
+ from abc import ABC, abstractmethod
3
+ from typing import TypeVar, Optional
4
+
5
+ from core import logger_factory
6
+
7
+
8
+ class Initializable(ABC):
9
+ @abstractmethod
10
+ def initialize(self) -> None:
11
+ pass
12
+
13
+
14
+ class Startable(ABC):
15
+ @abstractmethod
16
+ def start(self) -> None:
17
+ pass
18
+
19
+
20
+ class Stoppable(ABC):
21
+ @abstractmethod
22
+ def stop(self) -> None:
23
+ pass
24
+
25
+
26
+ class Disposable(ABC):
27
+ @abstractmethod
28
+ def dispose(self) -> None:
29
+ pass
30
+
31
+
32
+ class LifecycleAware(ABC):
33
+ def __init__(self, state: "LifecycleState") -> None:
34
+ """
35
+ Args:
36
+ state(LifecycleState): lifecycle state
37
+ """
38
+ self.state = state
39
+
40
+ def get_lifecycle_state(self) -> "LifecycleState":
41
+ return self.state
42
+
43
+
44
+ class Lifecycle(Initializable, Startable, Stoppable, Disposable, LifecycleAware, ABC):
45
+ def __init__(self) -> None:
46
+ self.logger = logger_factory.get_logger(self.__class__.__name__)
47
+ self.lifecycle_state = LifecycleState(lifecycle=self)
48
+
49
+ def initialize(self) -> None:
50
+ if not self.lifecycle_state.can_initialize(self.lifecycle_state.get_phase()):
51
+ self.logger.warning("[{}]cannot initialize".format(self.__class__.__name__))
52
+ return
53
+ self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZING)
54
+ self.do_init()
55
+ self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZED)
56
+
57
+ def start(self) -> None:
58
+ if not self.lifecycle_state.can_start(self.lifecycle_state.get_phase()):
59
+ self.logger.warning("[{}]cannot start".format(self.__class__.__name__))
60
+ return
61
+ self.lifecycle_state.set_phase(LifecyclePhase.STARTING)
62
+ self.do_start()
63
+ self.lifecycle_state.set_phase(LifecyclePhase.STARTED)
64
+
65
+ def stop(self) -> None:
66
+ if not self.lifecycle_state.can_stop(self.lifecycle_state.get_phase()):
67
+ self.logger.warning("[{}]cannot stop".format(self.__class__.__name__))
68
+ return
69
+ self.lifecycle_state.set_phase(LifecyclePhase.STOPPING)
70
+ self.do_stop()
71
+ self.lifecycle_state.set_phase(LifecyclePhase.STOPPED)
72
+
73
+ def dispose(self) -> None:
74
+ if not self.lifecycle_state.can_dispose(self.lifecycle_state.get_phase()):
75
+ self.logger.warning("[{}]cannot dispose".format(self.__class__.__name__))
76
+ return
77
+ self.lifecycle_state.set_phase(LifecyclePhase.DISPOSING)
78
+ self.do_dispose()
79
+ self.lifecycle_state.set_phase(LifecyclePhase.DISPOSED)
80
+
81
+ @abstractmethod
82
+ def do_init(self) -> None:
83
+ pass
84
+
85
+ @abstractmethod
86
+ def do_start(self) -> None:
87
+ pass
88
+
89
+ @abstractmethod
90
+ def do_stop(self) -> None:
91
+ pass
92
+
93
+ @abstractmethod
94
+ def do_dispose(self) -> None:
95
+ pass
96
+
97
+
98
+ class LifecyclePhase(enum.Enum):
99
+ INITIALIZING = 1
100
+ INITIALIZED = 2
101
+ STARTING = 3
102
+ STARTED = 4
103
+ STOPPING = 5
104
+ STOPPED = 6
105
+ DISPOSING = 7
106
+ DISPOSED = 8
107
+
108
+
109
+ class LifecycleController(ABC):
110
+ def can_initialize(self, phase: Optional[LifecyclePhase]) -> bool:
111
+ return phase is None or phase == LifecyclePhase.DISPOSED
112
+
113
+ def can_start(self, phase: Optional[LifecyclePhase]) -> bool:
114
+ return phase is not None and (
115
+ phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
116
+ )
117
+
118
+ def can_stop(self, phase: Optional[LifecyclePhase]) -> bool:
119
+ return phase is not None and phase == LifecyclePhase.STARTED
120
+
121
+ def can_dispose(self, phase: Optional[LifecyclePhase]) -> bool:
122
+ return phase is not None and (
123
+ phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
124
+ )
125
+
126
+
127
+ LS = TypeVar("LS", bound=Lifecycle)
128
+
129
+
130
+ class LifecycleState(LifecycleController, ABC):
131
+ phase: Optional[LifecyclePhase]
132
+
133
+ def __init__(self, lifecycle: LS) -> None:
134
+ self.phase = None
135
+ self.prev_phase = None
136
+ self.lifecycle = lifecycle
137
+ self.logger = logger_factory.get_logger(__name__)
138
+
139
+ def is_initializing(self) -> bool:
140
+ return self.phase == LifecyclePhase.INITIALIZING
141
+
142
+ def is_initialized(self) -> bool:
143
+ return self.phase == LifecyclePhase.INITIALIZED
144
+
145
+ def is_starting(self) -> bool:
146
+ return self.phase == LifecyclePhase.STARTING
147
+
148
+ def is_started(self) -> bool:
149
+ return self.phase == LifecyclePhase.STARTED
150
+
151
+ def is_stopping(self) -> bool:
152
+ return self.phase == LifecyclePhase.STOPPING
153
+
154
+ def is_stopped(self) -> bool:
155
+ return self.phase == LifecyclePhase.STOPPED
156
+
157
+ def is_disposing(self) -> bool:
158
+ return self.phase == LifecyclePhase.DISPOSING
159
+
160
+ def is_disposed(self) -> bool:
161
+ return self.phase == LifecyclePhase.DISPOSED
162
+
163
+ def get_phase(self) -> Optional[LifecyclePhase]:
164
+ return self.phase
165
+
166
+ def set_phase(self, phase: Optional[LifecyclePhase]) -> None:
167
+ prev = "None"
168
+ if self.phase is not None:
169
+ prev = self.phase.name
170
+ current = "None"
171
+ if phase is not None:
172
+ current = phase.name
173
+ self.logger.info(
174
+ "[setPhaseName][{}]{} --> {}".format(
175
+ self.lifecycle.__class__.__name__,
176
+ prev,
177
+ current,
178
+ )
179
+ )
180
+ self.phase = phase
181
+
182
+ def rollback(self, err: Exception) -> None:
183
+ self.phase = self.prev_phase
184
+ self.prev_phase = None
core/test_lifecycle.py CHANGED
@@ -1,7 +1,10 @@
 
1
  from unittest import TestCase
2
 
3
  from core.lifecycle import Lifecycle
4
 
 
 
5
 
6
  class SubLifecycle(Lifecycle):
7
  def __init__(self) -> None:
 
1
+ import logging
2
  from unittest import TestCase
3
 
4
  from core.lifecycle import Lifecycle
5
 
6
+ logging.basicConfig()
7
+
8
 
9
  class SubLifecycle(Lifecycle):
10
  def __init__(self) -> None:
dataset/docstore.json CHANGED
The diff for this file is too large to render. See raw diff
 
dataset/index_store.json CHANGED
@@ -1 +1 @@
1
- {"index_store/data": {"da495c94-4541-47e1-b93f-8535192a5f28": {"__type__": "vector_store", "__data__": "{\"index_id\": \"da495c94-4541-47e1-b93f-8535192a5f28\", \"summary\": null, \"nodes_dict\": {\"59108663-a5e1-4e3e-bb21-626158eef136\": \"59108663-a5e1-4e3e-bb21-626158eef136\", \"50de4ec9-febb-466f-9f9a-cc9296895e83\": \"50de4ec9-febb-466f-9f9a-cc9296895e83\", \"aa413a53-0dda-4ac4-8ae9-6e8e340bb4f0\": \"aa413a53-0dda-4ac4-8ae9-6e8e340bb4f0\", \"a0cc4323-ec8f-4fed-9401-e44125134341\": \"a0cc4323-ec8f-4fed-9401-e44125134341\", \"5321cc7b-2a86-48b8-b56c-415dde7c149b\": \"5321cc7b-2a86-48b8-b56c-415dde7c149b\", \"9e19fb91-8258-4aca-9692-2d027073499e\": \"9e19fb91-8258-4aca-9692-2d027073499e\", \"02e856e5-4211-4a27-9204-e966907f1d74\": \"02e856e5-4211-4a27-9204-e966907f1d74\", \"f3074870-8fbf-4322-b1d2-2111e6aac9af\": \"f3074870-8fbf-4322-b1d2-2111e6aac9af\", \"82677fb9-abe3-4038-8263-5576c47da4f2\": \"82677fb9-abe3-4038-8263-5576c47da4f2\", \"a08364a6-c23d-4df5-8b5d-84137fbebd4e\": \"a08364a6-c23d-4df5-8b5d-84137fbebd4e\", \"e45b082d-c3ec-45aa-b630-6db49a62728b\": \"e45b082d-c3ec-45aa-b630-6db49a62728b\", \"2c55445c-04b1-4705-9871-adaa02f38f1b\": \"2c55445c-04b1-4705-9871-adaa02f38f1b\", \"d0de9736-ccad-450e-b4a1-49d4cdb8b941\": \"d0de9736-ccad-450e-b4a1-49d4cdb8b941\", \"fd0d2375-39e2-4bce-8e39-1182a122a1b4\": \"fd0d2375-39e2-4bce-8e39-1182a122a1b4\", \"13221de7-6c68-4367-b1be-f35b06fc3a74\": \"13221de7-6c68-4367-b1be-f35b06fc3a74\", \"9f448401-cda9-4b5f-9a80-c79e111f9963\": \"9f448401-cda9-4b5f-9a80-c79e111f9963\", \"3bc7dfc2-3ddf-4384-a60c-6cd52e1314f4\": \"3bc7dfc2-3ddf-4384-a60c-6cd52e1314f4\", \"ce3e530c-ce2d-4f5f-a171-72a790c3c624\": \"ce3e530c-ce2d-4f5f-a171-72a790c3c624\", \"85f764bd-e560-48ba-a51e-2287b6fe19db\": \"85f764bd-e560-48ba-a51e-2287b6fe19db\", \"3a8e4c7c-9f7d-4735-93e7-9d847cff98de\": \"3a8e4c7c-9f7d-4735-93e7-9d847cff98de\", \"af881b61-03f4-4851-8946-794015e3436c\": \"af881b61-03f4-4851-8946-794015e3436c\", \"31579820-439e-4029-b8c4-a0d6528daa59\": \"31579820-439e-4029-b8c4-a0d6528daa59\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}
 
1
+ {"index_store/data": {"0928a9d6-bc3f-467f-9464-1d03e5aa155d": {"__type__": "vector_store", "__data__": "{\"index_id\": \"0928a9d6-bc3f-467f-9464-1d03e5aa155d\", \"summary\": null, \"nodes_dict\": {\"a763d37e-d6ef-42ad-b0ce-143ed9698d4b\": \"a763d37e-d6ef-42ad-b0ce-143ed9698d4b\", \"9d8b60d0-64b1-4074-8fb3-a33c961270ed\": \"9d8b60d0-64b1-4074-8fb3-a33c961270ed\", \"b051986d-d812-4326-b811-4ae17ae3c012\": \"b051986d-d812-4326-b811-4ae17ae3c012\", \"68d8c48a-1f9d-4b05-bf9a-e3b9490821d5\": \"68d8c48a-1f9d-4b05-bf9a-e3b9490821d5\", \"8cfea388-ca09-4fb3-88d5-8570f6231d28\": \"8cfea388-ca09-4fb3-88d5-8570f6231d28\", \"f1edfc04-f95c-40c4-8d95-e7cd42745b62\": \"f1edfc04-f95c-40c4-8d95-e7cd42745b62\", \"1ec3388c-c4e5-4706-a412-525b68481002\": \"1ec3388c-c4e5-4706-a412-525b68481002\", \"c2fcee2a-5c0d-4d26-86a0-273e87963874\": \"c2fcee2a-5c0d-4d26-86a0-273e87963874\", \"bd151167-8a9b-47e4-be4b-b03cb10c65b1\": \"bd151167-8a9b-47e4-be4b-b03cb10c65b1\", \"3f5d936e-80a3-463c-ae17-fd9c376ff3e1\": \"3f5d936e-80a3-463c-ae17-fd9c376ff3e1\", \"91ab0928-bc7a-4fef-8693-308ad9764ef4\": \"91ab0928-bc7a-4fef-8693-308ad9764ef4\", \"fae01dbb-1bd5-47b5-989f-38f6029e2f4c\": \"fae01dbb-1bd5-47b5-989f-38f6029e2f4c\", \"3424533f-5d8c-4149-8ffe-3b53cadfb4fb\": \"3424533f-5d8c-4149-8ffe-3b53cadfb4fb\", \"00e72630-a81a-4bd8-bebe-a52f47bd2087\": \"00e72630-a81a-4bd8-bebe-a52f47bd2087\", \"a32f87c5-45c7-4e21-85cb-d49b6dd759ab\": \"a32f87c5-45c7-4e21-85cb-d49b6dd759ab\", \"4593da6f-276b-4f80-bb95-00e23bafb74b\": \"4593da6f-276b-4f80-bb95-00e23bafb74b\", \"578f3e77-3ec2-4b86-a2f8-a0a34d9ce810\": \"578f3e77-3ec2-4b86-a2f8-a0a34d9ce810\", \"8fc8d087-6e09-4fb4-bac0-5fa6bfed7a15\": \"8fc8d087-6e09-4fb4-bac0-5fa6bfed7a15\", \"c38b7421-554a-49eb-b0cc-7c0de3723ef9\": \"c38b7421-554a-49eb-b0cc-7c0de3723ef9\", \"739c5748-36c1-4087-b926-419639d4da27\": \"739c5748-36c1-4087-b926-419639d4da27\", \"b80c39d8-a895-48d1-a2dc-dc13ab03fb0a\": \"b80c39d8-a895-48d1-a2dc-dc13ab03fb0a\", \"01d4860e-11ef-430d-9b9c-ee1bb34680f7\": \"01d4860e-11ef-430d-9b9c-ee1bb34680f7\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}
dataset/vector_store.json CHANGED
The diff for this file is too large to render. See raw diff
 
docs/docs.pkl CHANGED
Binary files a/docs/docs.pkl and b/docs/docs.pkl differ
 
github_retriever.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_hub.github_repo import GithubRepositoryReader, GithubClient
2
+ from llama_index import download_loader, GPTVectorStoreIndex
3
+ from llama_index import LLMPredictor, VectorStoreIndex, ServiceContext
4
+ from langchain.llms import AzureOpenAI
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
+ from llama_index import LangchainEmbedding, ServiceContext
7
+ from llama_index import StorageContext, load_index_from_storage
8
+ from dotenv import load_dotenv
9
+ import os
10
+ import pickle
11
+
12
+
13
+ def main() -> None:
14
+ # define embedding
15
+ embedding = LangchainEmbedding(OpenAIEmbeddings(chunk_size=1))
16
+ # define LLM
17
+ llm_predictor = LLMPredictor(
18
+ llm=AzureOpenAI(
19
+ engine="text-davinci-003",
20
+ model_name="text-davinci-003",
21
+ )
22
+ )
23
+
24
+ # configure service context
25
+ service_context = ServiceContext.from_defaults(
26
+ llm_predictor=llm_predictor, embed_model=embedding
27
+ )
28
+ download_loader("GithubRepositoryReader")
29
+ docs = None
30
+ if os.path.exists("docs/docs.pkl"):
31
+ with open("docs/docs.pkl", "rb") as f:
32
+ docs = pickle.load(f)
33
+
34
+ if docs is None:
35
+ github_client = GithubClient(os.getenv("GITHUB_TOKEN"))
36
+ loader = GithubRepositoryReader(
37
+ github_client,
38
+ owner="ctripcorp",
39
+ repo="x-pipe",
40
+ filter_directories=(
41
+ [".", "doc"],
42
+ GithubRepositoryReader.FilterType.INCLUDE,
43
+ ),
44
+ filter_file_extensions=([".md"], GithubRepositoryReader.FilterType.INCLUDE),
45
+ verbose=True,
46
+ concurrent_requests=10,
47
+ )
48
+
49
+ docs = loader.load_data(branch="master")
50
+
51
+ with open("docs/docs.pkl", "wb") as f:
52
+ pickle.dump(docs, f)
53
+
54
+ index = GPTVectorStoreIndex.from_documents(docs, service_context=service_context)
55
+
56
+ query_engine = index.as_query_engine(service_context=service_context)
57
+ response = query_engine.query("如何使用X-Pipe?")
58
+ print(response)
59
+
60
+
61
+ if __name__ == "__main__":
62
+ load_dotenv()
63
+ main()
langchain_manager/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (154 Bytes). View file
 
langchain_manager/manager.py CHANGED
@@ -5,6 +5,8 @@ from langchain.embeddings.base import Embeddings as LCEmbeddings
5
  from langchain.embeddings.openai import OpenAIEmbeddings
6
  from langchain.llms import AzureOpenAI
7
 
 
 
8
 
9
  class BaseLangChainManager(ABC):
10
  def __init__(self) -> None:
@@ -22,49 +24,16 @@ class BaseLangChainManager(ABC):
22
  class LangChainAzureManager(BaseLangChainManager):
23
  def __init__(self) -> None:
24
  super().__init__()
25
- self.embedding = OpenAIEmbeddings(client=None, chunk_size=1)
26
- self.llm = AzureOpenAI(
27
- deployment_name="text-davinci-003",
28
- # model_name="text-davinci-003",
29
- model="text-davinci-003",
30
- client=None,
31
- # temperature set to 0.0(default 0.7) to get a certain answer from OpenAI,
32
- # as a wiki robot we won't want to get flexible answers
33
- temperature=0.0,
34
- # GPT-3 default is 4096, however, openai.py default is 256
35
- max_tokens=2048,
36
- )
37
 
38
  # Override
39
  def get_embedding(self) -> LCEmbeddings:
40
- return self.embedding
41
 
42
  # Override
43
  def get_llm(self) -> BaseLanguageModel:
44
- return self.llm
45
-
46
-
47
- class LangChainHuggingFaceManager(BaseLangChainManager):
48
- def __init__(self) -> None:
49
- super().__init__()
50
- from transformers import AutoTokenizer, AutoModel
51
-
52
- AutoTokenizer.from_pretrained("GanymedeNil/text2vec-large-chinese")
53
-
54
- AutoModel.from_pretrained("GanymedeNil/text2vec-large-chinese")
55
-
56
- self.embedding = OpenAIEmbeddings(client=None, chunk_size=1)
57
- self.llm = AzureOpenAI(
58
  deployment_name="text-davinci-003",
59
  # model_name="text-davinci-003",
60
  model="text-davinci-003",
61
  client=None,
62
  )
63
-
64
- # Override
65
- def get_embedding(self) -> LCEmbeddings:
66
- return self.embedding
67
-
68
- # Override
69
- def get_llm(self) -> BaseLanguageModel:
70
- return self.llm
 
5
  from langchain.embeddings.openai import OpenAIEmbeddings
6
  from langchain.llms import AzureOpenAI
7
 
8
+ from core.lifecycle import Lifecycle
9
+
10
 
11
  class BaseLangChainManager(ABC):
12
  def __init__(self) -> None:
 
24
  class LangChainAzureManager(BaseLangChainManager):
25
  def __init__(self) -> None:
26
  super().__init__()
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  # Override
29
  def get_embedding(self) -> LCEmbeddings:
30
+ return OpenAIEmbeddings(client=None, chunk_size=1)
31
 
32
  # Override
33
  def get_llm(self) -> BaseLanguageModel:
34
+ return AzureOpenAI(
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  deployment_name="text-davinci-003",
36
  # model_name="text-davinci-003",
37
  model="text-davinci-003",
38
  client=None,
39
  )
 
 
 
 
 
 
 
 
llama/{storage_context.py → context.py} RENAMED
@@ -1,14 +1,62 @@
1
- from llama_index import StorageContext
2
- from typing import List
3
  from abc import abstractmethod, ABC
4
 
5
- from llama_index import Document
 
6
 
7
  from core.lifecycle import Lifecycle
8
- from llama.service_context import ServiceContextManager
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
 
11
  class StorageContextManager(Lifecycle, ABC):
 
12
  @abstractmethod
13
  def get_storage_context(self) -> StorageContext:
14
  pass
@@ -17,11 +65,9 @@ class StorageContextManager(Lifecycle, ABC):
17
  class LocalStorageContextManager(StorageContextManager):
18
  storage_context: StorageContext
19
 
20
- def __init__(
21
- self,
22
- service_context_manager: ServiceContextManager,
23
- dataset_path: str = "./dataset",
24
- ) -> None:
25
  super().__init__()
26
  self.dataset_path = dataset_path
27
  self.service_context_manager = service_context_manager
@@ -31,11 +77,8 @@ class LocalStorageContextManager(StorageContextManager):
31
 
32
  def do_init(self) -> None:
33
  from llama.utils import is_local_storage_files_ready
34
-
35
  if is_local_storage_files_ready(self.dataset_path):
36
- self.storage_context = StorageContext.from_defaults(
37
- persist_dir=self.dataset_path
38
- )
39
  else:
40
  docs = self._download()
41
  self._indexing(docs)
@@ -51,17 +94,14 @@ class LocalStorageContextManager(StorageContextManager):
51
  def do_dispose(self) -> None:
52
  self.storage_context.persist(self.dataset_path)
53
 
54
- def _download(self) -> List[Document]:
55
  from llama.data_loader import GithubLoader
56
-
57
  loader = GithubLoader()
58
  return loader.load()
59
 
60
- def _indexing(self, docs: List[Document]) -> None:
61
  from llama_index import GPTVectorStoreIndex
62
-
63
- index = GPTVectorStoreIndex.from_documents(
64
- docs, service_context=self.service_context_manager.get_service_context()
65
- )
66
  index.storage_context.persist(persist_dir=self.dataset_path)
67
  self.storage_context = index.storage_context
 
 
 
1
  from abc import abstractmethod, ABC
2
 
3
+ from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding, Document
4
+ from llama_index import StorageContext
5
 
6
  from core.lifecycle import Lifecycle
7
+ from langchain_manager.manager import BaseLangChainManager
8
+
9
+
10
+ class ServiceContextManager(Lifecycle, ABC):
11
+
12
+ @abstractmethod
13
+ def get_service_context(self) -> ServiceContext:
14
+ pass
15
+
16
+
17
+ class AzureServiceContextManager(ServiceContextManager):
18
+ lc_manager: BaseLangChainManager
19
+ service_context: ServiceContext
20
+
21
+ def __init__(self, lc_manager: BaseLangChainManager):
22
+ super().__init__()
23
+ self.lc_manager = lc_manager
24
+
25
+ def get_service_context(self) -> ServiceContext:
26
+ if self.service_context is None:
27
+ raise ValueError(
28
+ "service context is not ready, check for lifecycle statement"
29
+ )
30
+ return self.service_context
31
+
32
+ def do_init(self) -> None:
33
+ # define embedding
34
+ embedding = LangchainEmbedding(self.lc_manager.get_embedding())
35
+ # define LLM
36
+ llm_predictor = LLMPredictor(llm=self.lc_manager.get_llm())
37
+ # configure service context
38
+ self.service_context = ServiceContext.from_defaults(
39
+ llm_predictor=llm_predictor, embed_model=embedding
40
+ )
41
+
42
+ def do_start(self) -> None:
43
+ self.logger.info("[do_start][embedding] last used usage: %d",
44
+ self.service_context.embed_model.total_tokens_used)
45
+ self.logger.info("[do_start][predict] last used usage: %d",
46
+ self.service_context.llm_predictor.total_tokens_used)
47
+
48
+ def do_stop(self) -> None:
49
+ self.logger.info("[do_stop][embedding] last used usage: %d",
50
+ self.service_context.embed_model.total_tokens_used)
51
+ self.logger.info("[do_stop][predict] last used usage: %d",
52
+ self.service_context.llm_predictor.total_tokens_used)
53
+
54
+ def do_dispose(self) -> None:
55
+ self.logger.info("[do_dispose] total used token: %d", self.service_context.llm_predictor.total_tokens_used)
56
 
57
 
58
  class StorageContextManager(Lifecycle, ABC):
59
+
60
  @abstractmethod
61
  def get_storage_context(self) -> StorageContext:
62
  pass
 
65
  class LocalStorageContextManager(StorageContextManager):
66
  storage_context: StorageContext
67
 
68
+ def __init__(self,
69
+ dataset_path: str = "./dataset",
70
+ service_context_manager: ServiceContextManager = None) -> None:
 
 
71
  super().__init__()
72
  self.dataset_path = dataset_path
73
  self.service_context_manager = service_context_manager
 
77
 
78
  def do_init(self) -> None:
79
  from llama.utils import is_local_storage_files_ready
 
80
  if is_local_storage_files_ready(self.dataset_path):
81
+ self.storage_context = StorageContext.from_defaults(persist_dir=self.dataset_path)
 
 
82
  else:
83
  docs = self._download()
84
  self._indexing(docs)
 
94
  def do_dispose(self) -> None:
95
  self.storage_context.persist(self.dataset_path)
96
 
97
+ def _download(self) -> [Document]:
98
  from llama.data_loader import GithubLoader
 
99
  loader = GithubLoader()
100
  return loader.load()
101
 
102
+ def _indexing(self, docs: [Document]):
103
  from llama_index import GPTVectorStoreIndex
104
+ index = GPTVectorStoreIndex.from_documents(docs,
105
+ service_context=self.service_context_manager.get_service_context())
 
 
106
  index.storage_context.persist(persist_dir=self.dataset_path)
107
  self.storage_context = index.storage_context
llama/data_loader.py CHANGED
@@ -16,10 +16,10 @@ class WikiLoader(ABC):
16
 
17
  class GithubLoader(WikiLoader):
18
  def __init__(
19
- self,
20
- github_owner: Optional[str] = None,
21
- repo: Optional[str] = None,
22
- dirs: Optional[Sequence[str]] = None,
23
  ):
24
  super().__init__()
25
  self.owner = (
@@ -51,8 +51,7 @@ class GithubLoader(WikiLoader):
51
  verbose=True,
52
  concurrent_requests=10,
53
  )
54
- os.environ["http_proxy"] = "http://127.0.0.1:7890"
55
- os.environ["https_proxy"] = "http://127.0.0.1:7890"
56
  docs = loader.load_data(branch="master")
57
 
58
  with open("docs/docs.pkl", "wb") as f:
 
16
 
17
  class GithubLoader(WikiLoader):
18
  def __init__(
19
+ self,
20
+ github_owner: Optional[str] = None,
21
+ repo: Optional[str] = None,
22
+ dirs: Optional[Sequence[str]] = None,
23
  ):
24
  super().__init__()
25
  self.owner = (
 
51
  verbose=True,
52
  concurrent_requests=10,
53
  )
54
+
 
55
  docs = loader.load_data(branch="master")
56
 
57
  with open("docs/docs.pkl", "wb") as f:
llama/index.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from core.lifecycle import Lifecycle
2
+ from llama.context import ServiceContextManager
3
+ from llama_index.indices.vector_store import VectorStoreIndex
4
+ from typing import Optional
5
+
6
+
7
+ class IndexManager(Lifecycle):
8
+ index: Optional[VectorStoreIndex]
9
+
10
+ def __init__(self, context_manager: ServiceContextManager) -> None:
11
+ super().__init__()
12
+ self.index = None
13
+ self.context_manager = context_manager
14
+
15
+ def get_index(self) -> Optional[VectorStoreIndex]:
16
+ if not self.lifecycle_state.is_started():
17
+ raise Exception("Lifecycle state is not correct")
18
+ return self.index
llama/service_context.py DELETED
@@ -1,142 +0,0 @@
1
- from abc import abstractmethod, ABC
2
-
3
- from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding
4
-
5
- from core.lifecycle import Lifecycle
6
- from langchain_manager.manager import BaseLangChainManager
7
-
8
-
9
- # def get_callback_manager() -> CallbackManager:
10
- # from llama_index.callbacks import (
11
- # WandbCallbackHandler,
12
- # CallbackManager,
13
- # LlamaDebugHandler,
14
- # )
15
- # llama_debug = LlamaDebugHandler(print_trace_on_end=True)
16
- # # wandb.init args
17
- # run_args = dict(
18
- # project="llamaindex",
19
- # )
20
- # wandb_callback = WandbCallbackHandler(run_args=run_args)
21
- # return CallbackManager([llama_debug, wandb_callback])
22
-
23
-
24
- class ServiceContextManager(Lifecycle, ABC):
25
- @abstractmethod
26
- def get_service_context(self) -> ServiceContext:
27
- pass
28
-
29
-
30
- class AzureServiceContextManager(ServiceContextManager):
31
- lc_manager: BaseLangChainManager
32
- service_context: ServiceContext
33
-
34
- def __init__(self, lc_manager: BaseLangChainManager):
35
- super().__init__()
36
- self.lc_manager = lc_manager
37
-
38
- def get_service_context(self) -> ServiceContext:
39
- if self.service_context is None:
40
- raise ValueError(
41
- "service context is not ready, check for lifecycle statement"
42
- )
43
- return self.service_context
44
-
45
- def do_init(self) -> None:
46
- # define embedding
47
- embedding = LangchainEmbedding(self.lc_manager.get_embedding())
48
- # define LLM
49
- llm_predictor = LLMPredictor(llm=self.lc_manager.get_llm())
50
- # configure service context
51
- self.service_context = ServiceContext.from_defaults(
52
- llm_predictor=llm_predictor,
53
- embed_model=embedding,
54
- # callback_manager=get_callback_manager(),
55
- )
56
-
57
- def do_start(self) -> None:
58
- self.logger.info(
59
- "[do_start][embedding] last used usage: %d",
60
- self.service_context.embed_model.total_tokens_used,
61
- )
62
- self.logger.info(
63
- "[do_start][predict] last used usage: %d",
64
- self.service_context.llm_predictor.total_tokens_used,
65
- )
66
-
67
- def do_stop(self) -> None:
68
- self.logger.info(
69
- "[do_stop][embedding] last used usage: %d",
70
- self.service_context.embed_model.total_tokens_used,
71
- )
72
- self.logger.info(
73
- "[do_stop][predict] last used usage: %d",
74
- self.service_context.llm_predictor.total_tokens_used,
75
- )
76
-
77
- def do_dispose(self) -> None:
78
- self.logger.info(
79
- "[do_dispose] total used token: %d",
80
- self.service_context.llm_predictor.total_tokens_used,
81
- )
82
-
83
-
84
- class HuggingFaceChineseOptServiceContextManager(ServiceContextManager):
85
- lc_manager: BaseLangChainManager
86
- service_context: ServiceContext
87
-
88
- def __init__(self, lc_manager: BaseLangChainManager):
89
- super().__init__()
90
- self.lc_manager = lc_manager
91
-
92
- def get_service_context(self) -> ServiceContext:
93
- if self.service_context is None:
94
- raise ValueError(
95
- "service context is not ready, check for lifecycle statement"
96
- )
97
- return self.service_context
98
-
99
- def do_init(self) -> None:
100
- # define embedding
101
- from langchain.embeddings import HuggingFaceEmbeddings
102
-
103
- model_name = "GanymedeNil/text2vec-large-chinese"
104
- hf_embedding = HuggingFaceEmbeddings(
105
- model_name=model_name, model_kwargs={"device": "cpu"}
106
- )
107
-
108
- embedding = LangchainEmbedding(hf_embedding)
109
- # define LLM
110
- llm_predictor = LLMPredictor(self.lc_manager.get_llm())
111
- # configure service context
112
- self.service_context = ServiceContext.from_defaults(
113
- llm_predictor=llm_predictor,
114
- embed_model=embedding,
115
- # callback_manager=get_callback_manager()
116
- )
117
-
118
- def do_start(self) -> None:
119
- self.logger.info(
120
- "[do_start][embedding] last used usage: %d",
121
- self.service_context.embed_model.total_tokens_used,
122
- )
123
- self.logger.info(
124
- "[do_start][predict] last used usage: %d",
125
- self.service_context.llm_predictor.total_tokens_used,
126
- )
127
-
128
- def do_stop(self) -> None:
129
- self.logger.info(
130
- "[do_stop][embedding] last used usage: %d",
131
- self.service_context.embed_model.total_tokens_used,
132
- )
133
- self.logger.info(
134
- "[do_stop][predict] last used usage: %d",
135
- self.service_context.llm_predictor.total_tokens_used,
136
- )
137
-
138
- def do_dispose(self) -> None:
139
- self.logger.info(
140
- "[do_dispose] total used token: %d",
141
- self.service_context.llm_predictor.total_tokens_used,
142
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llama/vector_storage.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from core.lifecycle import Lifecycle
2
+
3
+
4
+ class VectorStorageManager(Lifecycle):
5
+ def __init__(self) -> None:
6
+ super().__init__()
7
+
8
+ def do_init(self) -> None:
9
+ pass
10
+
11
+ def do_start(self) -> None:
12
+ pass
13
+
14
+ def do_stop(self) -> None:
15
+ pass
16
+
17
+ def do_dispose(self) -> None:
18
+ pass
local-requirements.txt DELETED
@@ -1 +0,0 @@
1
- python-dotenv
 
 
requirements.txt CHANGED
@@ -1,10 +1,6 @@
1
- llama_index>=0.6.3
2
- llama_hub
3
- streamlit
4
- ruff
5
- black
6
- mypy
7
- accelerate
8
- python-dotenv
9
- sentence_transformers
10
- wandb
 
1
+ llama_index>=0.6.3
2
+ llama_hub
3
+
4
+ ruff
5
+ black
6
+ mypy
 
 
 
 
xpipe_wiki/manager_factory.py CHANGED
@@ -4,69 +4,34 @@ import os
4
  from core.helper import LifecycleHelper
5
  from xpipe_wiki.robot_manager import XPipeWikiRobotManager, AzureXPipeWikiRobotManager
6
 
7
- from multiprocessing import Lock
8
-
9
- lock = Lock()
10
-
11
 
12
  class XPipeRobotRevision(enum.Enum):
13
  SIMPLE_OPENAI_VERSION_0 = 1
14
- HUGGINGFACE_VERSION_0 = 2
15
 
16
 
17
- class XPipeRobotManagerFactory:
18
- """
19
- CAPABLE: Dict[XPipeRobotRevision, XPipeWikiRobotManager] =
20
- {XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0: XPipeWikiRobotManager()}
21
- """
22
 
23
- CAPABLE = dict() # type: dict[XPipeRobotRevision, XPipeWikiRobotManager]
24
 
 
25
  @classmethod
26
  def get_or_create(cls, revision: XPipeRobotRevision) -> XPipeWikiRobotManager:
27
- with lock:
28
- if cls.CAPABLE.get(revision) is not None:
29
- return cls.CAPABLE[revision]
30
- if revision == XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0:
31
- manager = cls.create_simple_openai_version_0()
32
- elif revision == XPipeRobotRevision.HUGGINGFACE_VERSION_0:
33
- manager = cls.create_huggingface_version_0()
34
- cls.CAPABLE[revision] = manager
35
- return manager
36
 
37
  @classmethod
38
  def create_simple_openai_version_0(cls) -> AzureXPipeWikiRobotManager:
39
- from llama.service_context import AzureServiceContextManager
40
  from langchain_manager.manager import LangChainAzureManager
41
 
42
  service_context_manager = AzureServiceContextManager(
43
  lc_manager=LangChainAzureManager()
44
  )
45
- from llama.storage_context import LocalStorageContextManager
46
-
47
- dataset_path = os.getenv("XPIPE_WIKI_DATASET_PATH", "./dataset")
48
- storage_context_manager = LocalStorageContextManager(
49
- dataset_path=dataset_path, service_context_manager=service_context_manager
50
- )
51
-
52
- robot_manager = AzureXPipeWikiRobotManager(
53
- service_context_manager=service_context_manager,
54
- storage_context_manager=storage_context_manager,
55
- )
56
- LifecycleHelper.initialize_if_possible(robot_manager)
57
- LifecycleHelper.start_if_possible(robot_manager)
58
- return robot_manager
59
-
60
- @classmethod
61
- def create_huggingface_version_0(cls) -> AzureXPipeWikiRobotManager:
62
- from llama.service_context import HuggingFaceChineseOptServiceContextManager
63
- from langchain_manager.manager import LangChainAzureManager
64
-
65
- service_context_manager = HuggingFaceChineseOptServiceContextManager(
66
- lc_manager=LangChainAzureManager()
67
- )
68
 
69
- from llama.storage_context import LocalStorageContextManager
70
 
71
  dataset_path = os.getenv("XPIPE_WIKI_DATASET_PATH", "./dataset")
72
  storage_context_manager = LocalStorageContextManager(
 
4
  from core.helper import LifecycleHelper
5
  from xpipe_wiki.robot_manager import XPipeWikiRobotManager, AzureXPipeWikiRobotManager
6
 
 
 
 
 
7
 
8
  class XPipeRobotRevision(enum.Enum):
9
  SIMPLE_OPENAI_VERSION_0 = 1
 
10
 
11
 
12
+ CAPABLE = dict()
 
 
 
 
13
 
 
14
 
15
+ class XPipeRobotManagerFactory:
16
  @classmethod
17
  def get_or_create(cls, revision: XPipeRobotRevision) -> XPipeWikiRobotManager:
18
+ if CAPABLE.get(revision) is not None:
19
+ return CAPABLE[revision]
20
+ if revision == XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0:
21
+ manager = cls.create_simple_openai_version_0()
22
+ CAPABLE[revision] = manager
23
+ return manager
 
 
 
24
 
25
  @classmethod
26
  def create_simple_openai_version_0(cls) -> AzureXPipeWikiRobotManager:
27
+ from llama.context import AzureServiceContextManager
28
  from langchain_manager.manager import LangChainAzureManager
29
 
30
  service_context_manager = AzureServiceContextManager(
31
  lc_manager=LangChainAzureManager()
32
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ from llama.context import LocalStorageContextManager
35
 
36
  dataset_path = os.getenv("XPIPE_WIKI_DATASET_PATH", "./dataset")
37
  storage_context_manager = LocalStorageContextManager(
xpipe_wiki/robot_manager.py CHANGED
@@ -3,12 +3,10 @@ from typing import Any
3
 
4
  from llama_index import load_index_from_storage
5
  from llama_index.indices.query.base import BaseQueryEngine
6
- from llama_index.indices.response import ResponseMode
7
 
8
  from core.helper import LifecycleHelper
9
  from core.lifecycle import Lifecycle
10
- from llama.service_context import ServiceContextManager
11
- from llama.storage_context import StorageContextManager
12
 
13
 
14
  class XPipeWikiRobot(ABC):
@@ -25,10 +23,7 @@ class AzureOpenAIXPipeWikiRobot(XPipeWikiRobot):
25
  self.query_engine = query_engine
26
 
27
  def ask(self, question: str) -> Any:
28
- print("question: ", question)
29
- response = self.query_engine.query(question)
30
- print("response type: ", type(response))
31
- return response.__str__()
32
 
33
 
34
  class XPipeWikiRobotManager(Lifecycle):
@@ -64,10 +59,10 @@ class AzureXPipeWikiRobotManager(XPipeWikiRobotManager):
64
  index = load_index_from_storage(
65
  storage_context=self.storage_context_manager.get_storage_context(),
66
  service_context=self.service_context_manager.get_service_context(),
 
67
  )
68
  self.query_engine = index.as_query_engine(
69
- service_context=self.service_context_manager.get_service_context(),
70
- response_mode=ResponseMode.TREE_SUMMARIZE,
71
  )
72
 
73
  def do_stop(self) -> None:
 
3
 
4
  from llama_index import load_index_from_storage
5
  from llama_index.indices.query.base import BaseQueryEngine
 
6
 
7
  from core.helper import LifecycleHelper
8
  from core.lifecycle import Lifecycle
9
+ from llama.context import ServiceContextManager, StorageContextManager
 
10
 
11
 
12
  class XPipeWikiRobot(ABC):
 
23
  self.query_engine = query_engine
24
 
25
  def ask(self, question: str) -> Any:
26
+ return self.query_engine.query(question)
 
 
 
27
 
28
 
29
  class XPipeWikiRobotManager(Lifecycle):
 
59
  index = load_index_from_storage(
60
  storage_context=self.storage_context_manager.get_storage_context(),
61
  service_context=self.service_context_manager.get_service_context(),
62
+
63
  )
64
  self.query_engine = index.as_query_engine(
65
+ service_context=self.service_context_manager.get_service_context()
 
66
  )
67
 
68
  def do_stop(self) -> None: